From 9b1dd69ae6ab47cec2a7f071a3f47e99d30dc829 Mon Sep 17 00:00:00 2001 From: Brian Date: Sun, 7 Jun 2015 22:35:56 -0400 Subject: [PATCH] Brian's HW --- program_classifier.ipynb | 2144 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 2144 insertions(+) create mode 100644 program_classifier.ipynb diff --git a/program_classifier.ipynb b/program_classifier.ipynb new file mode 100644 index 0000000..01010fe --- /dev/null +++ b/program_classifier.ipynb @@ -0,0 +1,2144 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Make an array where:\n", + "* column1 is index\n", + "* column2 is language\n", + "\n", + "### Do this by using glob on the pro-lan dir. \n", + "\n", + "### Maybe run an Unselective Classifier to get labels" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "import glob\n", + "import os\n", + "import csv\n", + "import pandas as pd\n", + "import re\n", + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "names = [os.path.basename(x) for x in glob.glob('benchmarksgame-2014-08-31/*/*/*/*')]" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "names = []\n", + "txtf = []\n", + "\n", + "for file in glob.glob('benchmarksgame-2014-08-31/*/*/*/*/*/*'):\n", + "# names.append(os.path.basename(file))\n", + " names.append(os.path.splitext(file)[-2])\n", + " with open(file, encoding=\"ISO-8859-1\") as some_file:\n", + " txtf.append(some_file.read())\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "8410\n", + "8410\n" + ] + } + ], + "source": [ + "txt = pd.Series(txtf); print(len(txtf))\n", + "names = pd.Series(names); print(len(names))" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "8409 benchmarksgame-2014-08-31/benchmarksgame/websi...\n", + "dtype: object" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "names[-1:]" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "extensions = []\n", + "for file in names:\n", + " extensions.append(os.path.splitext(file)[-1])\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "exten2 = []\n", + "for file in extensions:\n", + " file = re.sub('\\.','',file)\n", + " exten2.append(file)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "exten = pd.Series(exten2)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "df = pd.DataFrame()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "df = df.append([exten, txt]).T" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
extentxt
0ats<span class=\"hl com\">(*</span>\\n<span class=\"h...
1ats\\nWed, 23 Jan 2013 05:50:58 GMT\\n\\nMAKE:\\n/usr...
2cint<span class=\"hl com\">/* The Computer Language ...
3cint
4clojure<span class=\"hl slc\">;; The Computer Language ...
5clojure\\nFri, 18 Apr 2014 21:29:20 GMT\\n\\nMAKE:\\nmv b...
6csharp<span class=\"hl com\">/*</span>\\n<span class...
7csharp\\nThu, 14 Aug 2014 10:17:17 GMT\\n\\nMAKE:\\nmv b...
8dart<span class=\"hl com\">/* The Computer Language ...
9dart\\nThu, 28 Aug 2014 23:40:40 GMT\\n\\nCOMMAND LIN...
10fpascal<span class=\"hl com\">(*</span>\\n<span class=\"h...
11fpascal\\nFri, 25 Apr 2014 02:25:51 GMT\\n\\nMAKE:\\nmv b...
12fsharp<span class=\"hl com\">(*</span>\\n<span class...
13fsharp\\nThu, 14 Aug 2014 10:19:29 GMT\\n\\nMAKE:\\nmv b...
14gcc<span class=\"hl com\">/* The Computer Language ...
15gcc\\nThu, 24 Apr 2014 01:10:05 GMT\\n\\nMAKE:\\n/usr...
16ghc<span class=\"hl slc\">--</span>\\n<span class=\"h...
17ghc\\nThu, 17 Apr 2014 00:13:33 GMT\\n\\nMAKE:\\nmv b...
18gnat<span class=\"hl slc\">-- The Computer Language ...
19gnat\\nSat, 26 Apr 2014 16:51:41 GMT\\n\\nMAKE:\\n/usr...
20go<span class=\"hl com\">/* The Computer Language ...
21go\\nThu, 19 Jun 2014 02:22:05 GMT\\n\\nMAKE:\\n/usr...
22hipe<span class=\"hl slc\">% The Computer Language B...
23hipe\\nMon, 04 Mar 2013 21:40:14 GMT\\n\\nMAKE:\\nmv b...
24ifc<span class=\"hl slc\">! -*- mode: f90 -*-</span...
25ifc\\nTue, 15 Jan 2013 06:41:34 GMT\\n\\nMAKE:\\n/usr...
26java<span class=\"hl com\">/* The Computer Language ...
27java\\nTue, 18 Mar 2014 23:34:26 GMT\\n\\nMAKE:\\nmv b...
28jruby<span class=\"hl slc\"># The Computer Language S...
29jruby\\nSat, 29 Mar 2014 19:58:45 GMT\\n\\nMAKE:\\nmv b...
.........
8380<p>java version \"1.8.0\"<br/>\\nJava(TM) SE Runt...
8381<p>Mono JIT compiler version 3.8.1 (master/db3...
8382<p>Dart VM version: 1.6.0 (Tue Aug 26 14:02:07...
8383<p>Erlang R16B (erts-5.10.1) [source] [64-bit]...
8384<p>Free Pascal Compiler version 2.6.4 [2014/03...
8385<p>F# Compiler for F# 3.1 (Open Source Edition...
8386<p>gcc version 4.8.2 (Ubuntu 4.8.2-19ubuntu1)<...
8387<p>The Glorious Glasgow Haskell Compilation Sy...
8388<p>GNAT 4.6</p>\\n<p>gcc version 4.8.2 (Ubuntu ...
8389<p>go version go1.3 linux/amd64</p>\\n
8390<p>gcc version 4.8.2 (Ubuntu 4.8.2-19ubuntu1)<...
8391<p>HipHop VM 3.0.0-dev (rel)<br/>\\nCompiler: h...
8392<p>Erlang R16B (erts-5.10.1) [source] [64-bit]...
8393<p>Intel(R) Fortran Intel(R) 64 Compiler XE fo...
8394<p>java version \"1.8.0\"<br/>\\nJava(TM) SE Runt...
8395<p>jruby 1.7.11 (1.9.3p392) 2014-02-24 86339bb...
8396<p>Lua 5.1.2 Copyright (C) 1994-2007 Lua.org,...
8397<p>The OCaml native-code compiler, version 4.0...
8398<p>Mozart Compiler 1.4.0 (20080704) playing Oz...
8399<p>This is perl 5, version 18, subversion 0 (v...
8400<p>PHP 5.5.0 (cli) (built: Jun 25 2013 23:17:0...
8401<p>Python 3.4.0 (default, Mar 17 2014, 08:05:2...
8402<p>Welcome to Racket v6.0.</p>\\n
8403<p>ruby 1.8.7 (2008-08-11 patchlevel 72) [x86_...
8404<p>rustc 0.11.0 (aa1163b92de7717eb7c5eba002b40...
8405<p>This is SBCL 1.2.0, an implementation of AN...
8406<p>java version \"1.8.0\"<br/>\\nJava(TM) SE Runt...
8407<p>V8 version 1.3.10 [console: dumb]</p>\\n
8408<p>VisualWorks(R) 7.7 beta2 Nov 16 2009<br/>\\n...
8409<p>ruby 2.1.0p0 (2013-12-25 revision 44422) [x...
\n", + "

8410 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " exten txt\n", + "0 ats (*\\n/* The Computer Language ...\n", + "3 cint \n", + "4 clojure ;; The Computer Language ...\n", + "5 clojure \\nFri, 18 Apr 2014 21:29:20 GMT\\n\\nMAKE:\\nmv b...\n", + "6 csharp /*\\n/* The Computer Language ...\n", + "9 dart \\nThu, 28 Aug 2014 23:40:40 GMT\\n\\nCOMMAND LIN...\n", + "10 fpascal (*\\n(*\\n/* The Computer Language ...\n", + "15 gcc \\nThu, 24 Apr 2014 01:10:05 GMT\\n\\nMAKE:\\n/usr...\n", + "16 ghc --\\n-- The Computer Language ...\n", + "19 gnat \\nSat, 26 Apr 2014 16:51:41 GMT\\n\\nMAKE:\\n/usr...\n", + "20 go /* The Computer Language ...\n", + "21 go \\nThu, 19 Jun 2014 02:22:05 GMT\\n\\nMAKE:\\n/usr...\n", + "22 hipe % The Computer Language B...\n", + "23 hipe \\nMon, 04 Mar 2013 21:40:14 GMT\\n\\nMAKE:\\nmv b...\n", + "24 ifc ! -*- mode: f90 -*-/* The Computer Language ...\n", + "27 java \\nTue, 18 Mar 2014 23:34:26 GMT\\n\\nMAKE:\\nmv b...\n", + "28 jruby # The Computer Language S...\n", + "29 jruby \\nSat, 29 Mar 2014 19:58:45 GMT\\n\\nMAKE:\\nmv b...\n", + "... ... ...\n", + "8380

java version \"1.8.0\"
\\nJava(TM) SE Runt...\n", + "8381

Mono JIT compiler version 3.8.1 (master/db3...\n", + "8382

Dart VM version: 1.6.0 (Tue Aug 26 14:02:07...\n", + "8383

Erlang R16B (erts-5.10.1) [source] [64-bit]...\n", + "8384

Free Pascal Compiler version 2.6.4 [2014/03...\n", + "8385

F# Compiler for F# 3.1 (Open Source Edition...\n", + "8386

gcc version 4.8.2 (Ubuntu 4.8.2-19ubuntu1)<...\n", + "8387

The Glorious Glasgow Haskell Compilation Sy...\n", + "8388

GNAT 4.6

\\n

gcc version 4.8.2 (Ubuntu ...\n", + "8389

go version go1.3 linux/amd64

\\n\n", + "8390

gcc version 4.8.2 (Ubuntu 4.8.2-19ubuntu1)<...\n", + "8391

HipHop VM 3.0.0-dev (rel)
\\nCompiler: h...\n", + "8392

Erlang R16B (erts-5.10.1) [source] [64-bit]...\n", + "8393

Intel(R) Fortran Intel(R) 64 Compiler XE fo...\n", + "8394

java version \"1.8.0\"
\\nJava(TM) SE Runt...\n", + "8395

jruby 1.7.11 (1.9.3p392) 2014-02-24 86339bb...\n", + "8396

Lua 5.1.2 Copyright (C) 1994-2007 Lua.org,...\n", + "8397

The OCaml native-code compiler, version 4.0...\n", + "8398

Mozart Compiler 1.4.0 (20080704) playing Oz...\n", + "8399

This is perl 5, version 18, subversion 0 (v...\n", + "8400

PHP 5.5.0 (cli) (built: Jun 25 2013 23:17:0...\n", + "8401

Python 3.4.0 (default, Mar 17 2014, 08:05:2...\n", + "8402

Welcome to Racket v6.0.

\\n\n", + "8403

ruby 1.8.7 (2008-08-11 patchlevel 72) [x86_...\n", + "8404

rustc 0.11.0 (aa1163b92de7717eb7c5eba002b40...\n", + "8405

This is SBCL 1.2.0, an implementation of AN...\n", + "8406

java version \"1.8.0\"
\\nJava(TM) SE Runt...\n", + "8407

V8 version 1.3.10 [console: dumb]

\\n\n", + "8408

VisualWorks(R) 7.7 beta2 Nov 16 2009
\\n...\n", + "8409

ruby 2.1.0p0 (2013-12-25 revision 44422) [x...\n", + "\n", + "[8410 rows x 2 columns]" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.columns = ['exten', 'txt']; df" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "df['exten'].replace('', np.nan, inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "df['txt'].replace('',np.nan, inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "collapsed": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "

\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
extentxt
0ats<span class=\"hl com\">(*</span>\\n<span class=\"h...
1ats\\nWed, 23 Jan 2013 05:50:58 GMT\\n\\nMAKE:\\n/usr...
2cint<span class=\"hl com\">/* The Computer Language ...
4clojure<span class=\"hl slc\">;; The Computer Language ...
5clojure\\nFri, 18 Apr 2014 21:29:20 GMT\\n\\nMAKE:\\nmv b...
6csharp<span class=\"hl com\">/*</span>\\n<span class...
7csharp\\nThu, 14 Aug 2014 10:17:17 GMT\\n\\nMAKE:\\nmv b...
8dart<span class=\"hl com\">/* The Computer Language ...
9dart\\nThu, 28 Aug 2014 23:40:40 GMT\\n\\nCOMMAND LIN...
10fpascal<span class=\"hl com\">(*</span>\\n<span class=\"h...
11fpascal\\nFri, 25 Apr 2014 02:25:51 GMT\\n\\nMAKE:\\nmv b...
12fsharp<span class=\"hl com\">(*</span>\\n<span class...
13fsharp\\nThu, 14 Aug 2014 10:19:29 GMT\\n\\nMAKE:\\nmv b...
14gcc<span class=\"hl com\">/* The Computer Language ...
15gcc\\nThu, 24 Apr 2014 01:10:05 GMT\\n\\nMAKE:\\n/usr...
16ghc<span class=\"hl slc\">--</span>\\n<span class=\"h...
17ghc\\nThu, 17 Apr 2014 00:13:33 GMT\\n\\nMAKE:\\nmv b...
18gnat<span class=\"hl slc\">-- The Computer Language ...
19gnat\\nSat, 26 Apr 2014 16:51:41 GMT\\n\\nMAKE:\\n/usr...
20go<span class=\"hl com\">/* The Computer Language ...
21go\\nThu, 19 Jun 2014 02:22:05 GMT\\n\\nMAKE:\\n/usr...
22hipe<span class=\"hl slc\">% The Computer Language B...
23hipe\\nMon, 04 Mar 2013 21:40:14 GMT\\n\\nMAKE:\\nmv b...
24ifc<span class=\"hl slc\">! -*- mode: f90 -*-</span...
25ifc\\nTue, 15 Jan 2013 06:41:34 GMT\\n\\nMAKE:\\n/usr...
26java<span class=\"hl com\">/* The Computer Language ...
27java\\nTue, 18 Mar 2014 23:34:26 GMT\\n\\nMAKE:\\nmv b...
28jruby<span class=\"hl slc\"># The Computer Language S...
29jruby\\nSat, 29 Mar 2014 19:58:45 GMT\\n\\nMAKE:\\nmv b...
30ocaml<span class=\"hl com\">(* The Computer Language ...
.........
8346fsharp\\nWed, 13 Aug 2014 23:53:57 GMT\\n\\nMAKE:\\nmv t...
8347gcc<span class=\"hl com\">/* The Computer Language ...
8348gcc\\nSun, 20 Apr 2014 19:45:51 GMT\\n\\nMAKE:\\n/usr...
8349gnat<span class=\"hl slc\">-- The Computer Language ...
8350gnat\\nSun, 27 Apr 2014 03:03:03 GMT\\n\\nMAKE:\\n/usr...
8351go<span class=\"hl com\">/* The Computer Language ...
8352go\\nThu, 14 Jun 2012 22:19:30 GMT\\n\\nMAKE:\\n/usr...
8353gpp<span class=\"hl com\">/*</span>\\n<span class=\"h...
8354gpp\\nSun, 20 Apr 2014 19:33:58 GMT\\n\\nMAKE:\\n/usr...
8355java<span class=\"hl com\">/**</span>\\n<span class=\"...
8356java\\nTue, 18 Mar 2014 23:07:24 GMT\\n\\nMAKE:\\nmv t...
8357javaxint<span class=\"hl com\">/**</span>\\n<span class=\"...
8358javaxint\\nSat, 05 Mar 2011 12:09:56 GMT\\n\\nMAKE:\\nmv t...
8359gnat<span class=\"hl slc\">-- The Computer Language ...
8360gnat\\nSun, 27 Apr 2014 03:11:21 GMT\\n\\nMAKE:\\n/usr...
8361go<span class=\"hl com\">/* The Computer Language ...
8362go\\nThu, 19 Jun 2014 06:44:38 GMT\\n\\nMAKE:\\n/usr...
8363gpp<span class=\"hl com\">/*\\n</span>\\n<span class=...
8364gpp\\nSun, 20 Apr 2014 19:37:12 GMT\\n\\nMAKE:\\n/usr...
8365java<span class=\"hl com\">/**</span>\\n<span class=\"...
8366java\\nTue, 18 Mar 2014 23:05:24 GMT\\n\\nMAKE:\\nmv t...
8367javaxint<span class=\"hl com\">/**</span>\\n<span class=\"...
8368javaxint\\nSat, 05 Mar 2011 11:58:19 GMT\\n\\nMAKE:\\nmv t...
8369java<span class=\"hl com\">/**</span>\\n<span class=\"...
8370java\\nTue, 18 Mar 2014 22:50:47 GMT\\n\\nMAKE:\\nmv t...
8371javaxint<span class=\"hl com\">/**</span>\\n<span class=\"...
8372javaxint\\nSat, 05 Mar 2011 12:25:54 GMT\\n\\nMAKE:\\nmv t...
8373java<span class=\"hl com\">/**\\n</span>\\n<span class...
8374java\\nTue, 18 Mar 2014 22:51:36 GMT\\n\\nMAKE:\\nmv t...
8377csvBZh91AY&SY‘bpy\u0002p\u000b", + "]€x\u0010Beà\u0010\b?ÿßpaµÞ±J¢©J)T\u0014 (\u0000\u0005...
\n", + "

8275 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " exten txt\n", + "0 ats (*\\n/* The Computer Language ...\n", + "4 clojure ;; The Computer Language ...\n", + "5 clojure \\nFri, 18 Apr 2014 21:29:20 GMT\\n\\nMAKE:\\nmv b...\n", + "6 csharp /*\\n/* The Computer Language ...\n", + "9 dart \\nThu, 28 Aug 2014 23:40:40 GMT\\n\\nCOMMAND LIN...\n", + "10 fpascal (*\\n(*\\n/* The Computer Language ...\n", + "15 gcc \\nThu, 24 Apr 2014 01:10:05 GMT\\n\\nMAKE:\\n/usr...\n", + "16 ghc --\\n-- The Computer Language ...\n", + "19 gnat \\nSat, 26 Apr 2014 16:51:41 GMT\\n\\nMAKE:\\n/usr...\n", + "20 go /* The Computer Language ...\n", + "21 go \\nThu, 19 Jun 2014 02:22:05 GMT\\n\\nMAKE:\\n/usr...\n", + "22 hipe % The Computer Language B...\n", + "23 hipe \\nMon, 04 Mar 2013 21:40:14 GMT\\n\\nMAKE:\\nmv b...\n", + "24 ifc ! -*- mode: f90 -*-/* The Computer Language ...\n", + "27 java \\nTue, 18 Mar 2014 23:34:26 GMT\\n\\nMAKE:\\nmv b...\n", + "28 jruby # The Computer Language S...\n", + "29 jruby \\nSat, 29 Mar 2014 19:58:45 GMT\\n\\nMAKE:\\nmv b...\n", + "30 ocaml (* The Computer Language ...\n", + "... ... ...\n", + "8346 fsharp \\nWed, 13 Aug 2014 23:53:57 GMT\\n\\nMAKE:\\nmv t...\n", + "8347 gcc /* The Computer Language ...\n", + "8348 gcc \\nSun, 20 Apr 2014 19:45:51 GMT\\n\\nMAKE:\\n/usr...\n", + "8349 gnat -- The Computer Language ...\n", + "8350 gnat \\nSun, 27 Apr 2014 03:03:03 GMT\\n\\nMAKE:\\n/usr...\n", + "8351 go /* The Computer Language ...\n", + "8352 go \\nThu, 14 Jun 2012 22:19:30 GMT\\n\\nMAKE:\\n/usr...\n", + "8353 gpp /*\\n/**\\n/**\\n-- The Computer Language ...\n", + "8360 gnat \\nSun, 27 Apr 2014 03:11:21 GMT\\n\\nMAKE:\\n/usr...\n", + "8361 go /* The Computer Language ...\n", + "8362 go \\nThu, 19 Jun 2014 06:44:38 GMT\\n\\nMAKE:\\n/usr...\n", + "8363 gpp /*\\n\\n/**\\n/**\\n/**\\n/**\\n/**\\n\\n\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
extentxt
2cint<span class=\"hl com\">/* The Computer Language ...
4clojure<span class=\"hl slc\">;; The Computer Language ...
5clojure\\nFri, 18 Apr 2014 21:29:20 GMT\\n\\nMAKE:\\nmv b...
6csharp<span class=\"hl com\">/*</span>\\n<span class...
7csharp\\nThu, 14 Aug 2014 10:17:17 GMT\\n\\nMAKE:\\nmv b...
8dart<span class=\"hl com\">/* The Computer Language ...
9dart\\nThu, 28 Aug 2014 23:40:40 GMT\\n\\nCOMMAND LIN...
26java<span class=\"hl com\">/* The Computer Language ...
27java\\nTue, 18 Mar 2014 23:34:26 GMT\\n\\nMAKE:\\nmv b...
28jruby<span class=\"hl slc\"># The Computer Language S...
29jruby\\nSat, 29 Mar 2014 19:58:45 GMT\\n\\nMAKE:\\nmv b...
30ocaml<span class=\"hl com\">(* The Computer Language ...
31ocaml\\nThu, 12 Sep 2013 20:23:23 GMT\\n\\nMAKE:\\nmv b...
34perl<span class=\"hl slc\"># The Computer Language B...
35perl\\nTue, 21 May 2013 17:44:06 GMT\\n\\nCOMMAND LIN...
36php<span class=\"hl opt\">&lt;</span>?php \\n<span c...
37php\\nTue, 25 Jun 2013 20:43:25 GMT\\n\\nCOMMAND LIN...
38racket<span class=\"hl opt\">#</span>lang racket<span ...
39racket\\nFri, 28 Feb 2014 04:34:37 GMT\\n\\nCOMMAND LIN...
40rust<span class=\"hl slc\">// The Computer Language ...
41rust\\nTue, 08 Jul 2014 00:10:57 GMT\\n\\nMAKE:\\n/usr...
42sbcl<span class=\"hl slc\">;; The Computer Languag...
43sbcl\\nSat, 31 May 2014 04:02:44 GMT\\n\\nMAKE:\\ncp: ...
44scala<span class=\"hl com\">/* The Computer Language ...
45scala\\nWed, 19 Mar 2014 08:28:18 GMT\\n\\nMAKE:\\nmv b...
50yarv<span class=\"hl slc\"># The Computer Language B...
51yarv\\nTue, 07 Jan 2014 21:26:00 GMT\\n\\nCOMMAND LIN...
52clojure<span class=\"hl slc\">;; The Computer Language ...
53clojure\\nFri, 18 Apr 2014 21:23:34 GMT\\n\\nMAKE:\\nmv b...
54csharp<span class=\"hl com\">/* The Computer Language ...
.........
8301csharp<span class=\"hl com\">/* The Computer Langua...
8302csharp\\nWed, 13 Aug 2014 21:59:29 GMT\\n\\nMAKE:\\nmv t...
8311java<span class=\"hl com\">/**</span>\\n<span class=\"...
8312java\\nTue, 18 Mar 2014 22:57:55 GMT\\n\\nMAKE:\\nmv t...
8315jruby<span class=\"hl slc\"># The Computer Language B...
8316jruby\\nSun, 30 Mar 2014 03:39:44 GMT\\n\\nMAKE:\\nmv t...
8317ocaml<span class=\"hl com\">(* The Computer Language ...
8318ocaml\\nFri, 13 Sep 2013 03:27:18 GMT\\n\\nMAKE:\\nmv t...
8319python3<span class=\"hl slc\"># The Computer Language B...
8320python3\\nTue, 18 Mar 2014 06:12:43 GMT\\n\\nMAKE:\\nmv t...
8321sbcl<span class=\"hl slc\">;;; The Computer Language...
8322sbcl\\nSun, 01 Jun 2014 01:39:42 GMT\\n\\nMAKE:\\ncp: ...
8323yarv<span class=\"hl slc\"># The Computer Language B...
8324yarv\\nThu, 09 Jan 2014 05:13:53 GMT\\n\\nCOMMAND LIN...
8335java<span class=\"hl com\">/**</span>\\n<span class=\"...
8336java\\nTue, 18 Mar 2014 22:59:09 GMT\\n\\nMAKE:\\nmv t...
8339ocaml<span class=\"hl com\">(* The Computer Language ...
8340ocaml\\nFri, 13 Sep 2013 03:13:05 GMT\\n\\nMAKE:\\nmv t...
8341perl<span class=\"hl slc\"># The Computer Language B...
8342perl\\nTue, 21 May 2013 09:44:09 GMT\\n\\nCOMMAND LIN...
8343python3<span class=\"hl slc\"># The Computer Language B...
8344python3\\nTue, 18 Mar 2014 06:18:49 GMT\\n\\nMAKE:\\nmv t...
8355java<span class=\"hl com\">/**</span>\\n<span class=\"...
8356java\\nTue, 18 Mar 2014 23:07:24 GMT\\n\\nMAKE:\\nmv t...
8365java<span class=\"hl com\">/**</span>\\n<span class=\"...
8366java\\nTue, 18 Mar 2014 23:05:24 GMT\\n\\nMAKE:\\nmv t...
8369java<span class=\"hl com\">/**</span>\\n<span class=\"...
8370java\\nTue, 18 Mar 2014 22:50:47 GMT\\n\\nMAKE:\\nmv t...
8373java<span class=\"hl com\">/**\\n</span>\\n<span class...
8374java\\nTue, 18 Mar 2014 22:51:36 GMT\\n\\nMAKE:\\nmv t...
\n", + "

4565 rows × 2 columns

\n", + "" + ], + "text/plain": [ + " exten txt\n", + "2 cint /* The Computer Language ...\n", + "4 clojure ;; The Computer Language ...\n", + "5 clojure \\nFri, 18 Apr 2014 21:29:20 GMT\\n\\nMAKE:\\nmv b...\n", + "6 csharp /*\\n/* The Computer Language ...\n", + "9 dart \\nThu, 28 Aug 2014 23:40:40 GMT\\n\\nCOMMAND LIN...\n", + "26 java /* The Computer Language ...\n", + "27 java \\nTue, 18 Mar 2014 23:34:26 GMT\\n\\nMAKE:\\nmv b...\n", + "28 jruby # The Computer Language S...\n", + "29 jruby \\nSat, 29 Mar 2014 19:58:45 GMT\\n\\nMAKE:\\nmv b...\n", + "30 ocaml (* The Computer Language ...\n", + "31 ocaml \\nThu, 12 Sep 2013 20:23:23 GMT\\n\\nMAKE:\\nmv b...\n", + "34 perl # The Computer Language B...\n", + "35 perl \\nTue, 21 May 2013 17:44:06 GMT\\n\\nCOMMAND LIN...\n", + "36 php <?php \\n#lang racket// The Computer Language ...\n", + "41 rust \\nTue, 08 Jul 2014 00:10:57 GMT\\n\\nMAKE:\\n/usr...\n", + "42 sbcl ;; The Computer Languag...\n", + "43 sbcl \\nSat, 31 May 2014 04:02:44 GMT\\n\\nMAKE:\\ncp: ...\n", + "44 scala /* The Computer Language ...\n", + "45 scala \\nWed, 19 Mar 2014 08:28:18 GMT\\n\\nMAKE:\\nmv b...\n", + "50 yarv # The Computer Language B...\n", + "51 yarv \\nTue, 07 Jan 2014 21:26:00 GMT\\n\\nCOMMAND LIN...\n", + "52 clojure ;; The Computer Language ...\n", + "53 clojure \\nFri, 18 Apr 2014 21:23:34 GMT\\n\\nMAKE:\\nmv b...\n", + "54 csharp /* The Computer Language ...\n", + "... ... ...\n", + "8301 csharp /* The Computer Langua...\n", + "8302 csharp \\nWed, 13 Aug 2014 21:59:29 GMT\\n\\nMAKE:\\nmv t...\n", + "8311 java /**\\n# The Computer Language B...\n", + "8316 jruby \\nSun, 30 Mar 2014 03:39:44 GMT\\n\\nMAKE:\\nmv t...\n", + "8317 ocaml (* The Computer Language ...\n", + "8318 ocaml \\nFri, 13 Sep 2013 03:27:18 GMT\\n\\nMAKE:\\nmv t...\n", + "8319 python3 # The Computer Language B...\n", + "8320 python3 \\nTue, 18 Mar 2014 06:12:43 GMT\\n\\nMAKE:\\nmv t...\n", + "8321 sbcl ;;; The Computer Language...\n", + "8322 sbcl \\nSun, 01 Jun 2014 01:39:42 GMT\\n\\nMAKE:\\ncp: ...\n", + "8323 yarv # The Computer Language B...\n", + "8324 yarv \\nThu, 09 Jan 2014 05:13:53 GMT\\n\\nCOMMAND LIN...\n", + "8335 java /**\\n(* The Computer Language ...\n", + "8340 ocaml \\nFri, 13 Sep 2013 03:13:05 GMT\\n\\nMAKE:\\nmv t...\n", + "8341 perl # The Computer Language B...\n", + "8342 perl \\nTue, 21 May 2013 09:44:09 GMT\\n\\nCOMMAND LIN...\n", + "8343 python3 # The Computer Language B...\n", + "8344 python3 \\nTue, 18 Mar 2014 06:18:49 GMT\\n\\nMAKE:\\nmv t...\n", + "8355 java /**\\n/**\\n/**\\n/**\\n\\n\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
extentxt
2c++<span class=\"hl com\">/* The Computer Language ...
4clojure<span class=\"hl slc\">;; The Computer Language ...
5clojure\\nFri, 18 Apr 2014 21:29:20 GMT\\n\\nMAKE:\\nmv b...
6c#<span class=\"hl com\">/*</span>\\n<span class...
7c#\\nThu, 14 Aug 2014 10:17:17 GMT\\n\\nMAKE:\\nmv b...
\n", + "" + ], + "text/plain": [ + " exten txt\n", + "2 c++ /* The Computer Language ...\n", + "4 clojure ;; The Computer Language ...\n", + "5 clojure \\nFri, 18 Apr 2014 21:29:20 GMT\\n\\nMAKE:\\nmv b...\n", + "6 c# /*\\n\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
extentxtnonwordcurly%period%
0c++<span class=\"hl com\">/* The Computer Language ...150.0007720.001029
1clojure<span class=\"hl slc\">;; The Computer Language ...350.0000000.001311
2clojure\\nFri, 18 Apr 2014 21:29:20 GMT\\n\\nMAKE:\\nmv b...40.0000000.023649
3c#<span class=\"hl com\">/*</span>\\n<span class...150.0018690.002419
4c#\\nThu, 14 Aug 2014 10:17:17 GMT\\n\\nMAKE:\\nmv b...30.0000000.009447
\n", + "" + ], + "text/plain": [ + " exten txt nonword \\\n", + "0 c++ /* The Computer Language ... 15 \n", + "1 clojure ;; The Computer Language ... 35 \n", + "2 clojure \\nFri, 18 Apr 2014 21:29:20 GMT\\n\\nMAKE:\\nmv b... 4 \n", + "3 c# /*\\n