diff --git a/program_classifier.ipynb b/program_classifier.ipynb new file mode 100644 index 0000000..01010fe --- /dev/null +++ b/program_classifier.ipynb @@ -0,0 +1,2144 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Make an array where:\n", + "* column1 is index\n", + "* column2 is language\n", + "\n", + "### Do this by using glob on the pro-lan dir. \n", + "\n", + "### Maybe run an Unselective Classifier to get labels" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "import glob\n", + "import os\n", + "import csv\n", + "import pandas as pd\n", + "import re\n", + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "names = [os.path.basename(x) for x in glob.glob('benchmarksgame-2014-08-31/*/*/*/*')]" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "names = []\n", + "txtf = []\n", + "\n", + "for file in glob.glob('benchmarksgame-2014-08-31/*/*/*/*/*/*'):\n", + "# names.append(os.path.basename(file))\n", + " names.append(os.path.splitext(file)[-2])\n", + " with open(file, encoding=\"ISO-8859-1\") as some_file:\n", + " txtf.append(some_file.read())\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "8410\n", + "8410\n" + ] + } + ], + "source": [ + "txt = pd.Series(txtf); print(len(txtf))\n", + "names = pd.Series(names); print(len(names))" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "8409 benchmarksgame-2014-08-31/benchmarksgame/websi...\n", + "dtype: object" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "names[-1:]" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "extensions = []\n", + "for file in names:\n", + " extensions.append(os.path.splitext(file)[-1])\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "exten2 = []\n", + "for file in extensions:\n", + " file = re.sub('\\.','',file)\n", + " exten2.append(file)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "exten = pd.Series(exten2)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "df = pd.DataFrame()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "df = df.append([exten, txt]).T" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "
| \n", + " | exten | \n", + "txt | \n", + "
|---|---|---|
| 0 | \n", + "ats | \n", + "<span class=\"hl com\">(*</span>\\n<span class=\"h... | \n", + "
| 1 | \n", + "ats | \n", + "\\nWed, 23 Jan 2013 05:50:58 GMT\\n\\nMAKE:\\n/usr... | \n", + "
| 2 | \n", + "cint | \n", + "<span class=\"hl com\">/* The Computer Language ... | \n", + "
| 3 | \n", + "cint | \n", + "\n", + " |
| 4 | \n", + "clojure | \n", + "<span class=\"hl slc\">;; The Computer Language ... | \n", + "
| 5 | \n", + "clojure | \n", + "\\nFri, 18 Apr 2014 21:29:20 GMT\\n\\nMAKE:\\nmv b... | \n", + "
| 6 | \n", + "csharp | \n", + "<span class=\"hl com\">/*</span>\\n<span class... | \n", + "
| 7 | \n", + "csharp | \n", + "\\nThu, 14 Aug 2014 10:17:17 GMT\\n\\nMAKE:\\nmv b... | \n", + "
| 8 | \n", + "dart | \n", + "<span class=\"hl com\">/* The Computer Language ... | \n", + "
| 9 | \n", + "dart | \n", + "\\nThu, 28 Aug 2014 23:40:40 GMT\\n\\nCOMMAND LIN... | \n", + "
| 10 | \n", + "fpascal | \n", + "<span class=\"hl com\">(*</span>\\n<span class=\"h... | \n", + "
| 11 | \n", + "fpascal | \n", + "\\nFri, 25 Apr 2014 02:25:51 GMT\\n\\nMAKE:\\nmv b... | \n", + "
| 12 | \n", + "fsharp | \n", + "<span class=\"hl com\">(*</span>\\n<span class... | \n", + "
| 13 | \n", + "fsharp | \n", + "\\nThu, 14 Aug 2014 10:19:29 GMT\\n\\nMAKE:\\nmv b... | \n", + "
| 14 | \n", + "gcc | \n", + "<span class=\"hl com\">/* The Computer Language ... | \n", + "
| 15 | \n", + "gcc | \n", + "\\nThu, 24 Apr 2014 01:10:05 GMT\\n\\nMAKE:\\n/usr... | \n", + "
| 16 | \n", + "ghc | \n", + "<span class=\"hl slc\">--</span>\\n<span class=\"h... | \n", + "
| 17 | \n", + "ghc | \n", + "\\nThu, 17 Apr 2014 00:13:33 GMT\\n\\nMAKE:\\nmv b... | \n", + "
| 18 | \n", + "gnat | \n", + "<span class=\"hl slc\">-- The Computer Language ... | \n", + "
| 19 | \n", + "gnat | \n", + "\\nSat, 26 Apr 2014 16:51:41 GMT\\n\\nMAKE:\\n/usr... | \n", + "
| 20 | \n", + "go | \n", + "<span class=\"hl com\">/* The Computer Language ... | \n", + "
| 21 | \n", + "go | \n", + "\\nThu, 19 Jun 2014 02:22:05 GMT\\n\\nMAKE:\\n/usr... | \n", + "
| 22 | \n", + "hipe | \n", + "<span class=\"hl slc\">% The Computer Language B... | \n", + "
| 23 | \n", + "hipe | \n", + "\\nMon, 04 Mar 2013 21:40:14 GMT\\n\\nMAKE:\\nmv b... | \n", + "
| 24 | \n", + "ifc | \n", + "<span class=\"hl slc\">! -*- mode: f90 -*-</span... | \n", + "
| 25 | \n", + "ifc | \n", + "\\nTue, 15 Jan 2013 06:41:34 GMT\\n\\nMAKE:\\n/usr... | \n", + "
| 26 | \n", + "java | \n", + "<span class=\"hl com\">/* The Computer Language ... | \n", + "
| 27 | \n", + "java | \n", + "\\nTue, 18 Mar 2014 23:34:26 GMT\\n\\nMAKE:\\nmv b... | \n", + "
| 28 | \n", + "jruby | \n", + "<span class=\"hl slc\"># The Computer Language S... | \n", + "
| 29 | \n", + "jruby | \n", + "\\nSat, 29 Mar 2014 19:58:45 GMT\\n\\nMAKE:\\nmv b... | \n", + "
| ... | \n", + "... | \n", + "... | \n", + "
| 8380 | \n", + "\n", + " | <p>java version \"1.8.0\"<br/>\\nJava(TM) SE Runt... | \n", + "
| 8381 | \n", + "\n", + " | <p>Mono JIT compiler version 3.8.1 (master/db3... | \n", + "
| 8382 | \n", + "\n", + " | <p>Dart VM version: 1.6.0 (Tue Aug 26 14:02:07... | \n", + "
| 8383 | \n", + "\n", + " | <p>Erlang R16B (erts-5.10.1) [source] [64-bit]... | \n", + "
| 8384 | \n", + "\n", + " | <p>Free Pascal Compiler version 2.6.4 [2014/03... | \n", + "
| 8385 | \n", + "\n", + " | <p>F# Compiler for F# 3.1 (Open Source Edition... | \n", + "
| 8386 | \n", + "\n", + " | <p>gcc version 4.8.2 (Ubuntu 4.8.2-19ubuntu1)<... | \n", + "
| 8387 | \n", + "\n", + " | <p>The Glorious Glasgow Haskell Compilation Sy... | \n", + "
| 8388 | \n", + "\n", + " | <p>GNAT 4.6</p>\\n<p>gcc version 4.8.2 (Ubuntu ... | \n", + "
| 8389 | \n", + "\n", + " | <p>go version go1.3 linux/amd64</p>\\n | \n", + "
| 8390 | \n", + "\n", + " | <p>gcc version 4.8.2 (Ubuntu 4.8.2-19ubuntu1)<... | \n", + "
| 8391 | \n", + "\n", + " | <p>HipHop VM 3.0.0-dev (rel)<br/>\\nCompiler: h... | \n", + "
| 8392 | \n", + "\n", + " | <p>Erlang R16B (erts-5.10.1) [source] [64-bit]... | \n", + "
| 8393 | \n", + "\n", + " | <p>Intel(R) Fortran Intel(R) 64 Compiler XE fo... | \n", + "
| 8394 | \n", + "\n", + " | <p>java version \"1.8.0\"<br/>\\nJava(TM) SE Runt... | \n", + "
| 8395 | \n", + "\n", + " | <p>jruby 1.7.11 (1.9.3p392) 2014-02-24 86339bb... | \n", + "
| 8396 | \n", + "\n", + " | <p>Lua 5.1.2 Copyright (C) 1994-2007 Lua.org,... | \n", + "
| 8397 | \n", + "\n", + " | <p>The OCaml native-code compiler, version 4.0... | \n", + "
| 8398 | \n", + "\n", + " | <p>Mozart Compiler 1.4.0 (20080704) playing Oz... | \n", + "
| 8399 | \n", + "\n", + " | <p>This is perl 5, version 18, subversion 0 (v... | \n", + "
| 8400 | \n", + "\n", + " | <p>PHP 5.5.0 (cli) (built: Jun 25 2013 23:17:0... | \n", + "
| 8401 | \n", + "\n", + " | <p>Python 3.4.0 (default, Mar 17 2014, 08:05:2... | \n", + "
| 8402 | \n", + "\n", + " | <p>Welcome to Racket v6.0.</p>\\n | \n", + "
| 8403 | \n", + "\n", + " | <p>ruby 1.8.7 (2008-08-11 patchlevel 72) [x86_... | \n", + "
| 8404 | \n", + "\n", + " | <p>rustc 0.11.0 (aa1163b92de7717eb7c5eba002b40... | \n", + "
| 8405 | \n", + "\n", + " | <p>This is SBCL 1.2.0, an implementation of AN... | \n", + "
| 8406 | \n", + "\n", + " | <p>java version \"1.8.0\"<br/>\\nJava(TM) SE Runt... | \n", + "
| 8407 | \n", + "\n", + " | <p>V8 version 1.3.10 [console: dumb]</p>\\n | \n", + "
| 8408 | \n", + "\n", + " | <p>VisualWorks(R) 7.7 beta2 Nov 16 2009<br/>\\n... | \n", + "
| 8409 | \n", + "\n", + " | <p>ruby 2.1.0p0 (2013-12-25 revision 44422) [x... | \n", + "
8410 rows × 2 columns
\n", + "java version \"1.8.0\"
\\nJava(TM) SE Runt...\n",
+ "8381
Mono JIT compiler version 3.8.1 (master/db3...\n", + "8382
Dart VM version: 1.6.0 (Tue Aug 26 14:02:07...\n", + "8383
Erlang R16B (erts-5.10.1) [source] [64-bit]...\n", + "8384
Free Pascal Compiler version 2.6.4 [2014/03...\n", + "8385
F# Compiler for F# 3.1 (Open Source Edition...\n", + "8386
gcc version 4.8.2 (Ubuntu 4.8.2-19ubuntu1)<...\n", + "8387
The Glorious Glasgow Haskell Compilation Sy...\n", + "8388
GNAT 4.6
\\ngcc version 4.8.2 (Ubuntu ...\n", + "8389
go version go1.3 linux/amd64
\\n\n", + "8390gcc version 4.8.2 (Ubuntu 4.8.2-19ubuntu1)<...\n", + "8391
HipHop VM 3.0.0-dev (rel)
\\nCompiler: h...\n",
+ "8392
Erlang R16B (erts-5.10.1) [source] [64-bit]...\n", + "8393
Intel(R) Fortran Intel(R) 64 Compiler XE fo...\n", + "8394
java version \"1.8.0\"
\\nJava(TM) SE Runt...\n",
+ "8395
jruby 1.7.11 (1.9.3p392) 2014-02-24 86339bb...\n", + "8396
Lua 5.1.2 Copyright (C) 1994-2007 Lua.org,...\n", + "8397
The OCaml native-code compiler, version 4.0...\n", + "8398
Mozart Compiler 1.4.0 (20080704) playing Oz...\n", + "8399
This is perl 5, version 18, subversion 0 (v...\n", + "8400
PHP 5.5.0 (cli) (built: Jun 25 2013 23:17:0...\n", + "8401
Python 3.4.0 (default, Mar 17 2014, 08:05:2...\n", + "8402
Welcome to Racket v6.0.
\\n\n", + "8403ruby 1.8.7 (2008-08-11 patchlevel 72) [x86_...\n", + "8404
rustc 0.11.0 (aa1163b92de7717eb7c5eba002b40...\n", + "8405
This is SBCL 1.2.0, an implementation of AN...\n", + "8406
java version \"1.8.0\"
\\nJava(TM) SE Runt...\n",
+ "8407
V8 version 1.3.10 [console: dumb]
\\n\n", + "8408VisualWorks(R) 7.7 beta2 Nov 16 2009
\\n...\n",
+ "8409
ruby 2.1.0p0 (2013-12-25 revision 44422) [x...\n", + "\n", + "[8410 rows x 2 columns]" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.columns = ['exten', 'txt']; df" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "df['exten'].replace('', np.nan, inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "df['txt'].replace('',np.nan, inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "collapsed": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
| \n", + " | exten | \n", + "txt | \n", + "
|---|---|---|
| 0 | \n", + "ats | \n", + "<span class=\"hl com\">(*</span>\\n<span class=\"h... | \n", + "
| 1 | \n", + "ats | \n", + "\\nWed, 23 Jan 2013 05:50:58 GMT\\n\\nMAKE:\\n/usr... | \n", + "
| 2 | \n", + "cint | \n", + "<span class=\"hl com\">/* The Computer Language ... | \n", + "
| 4 | \n", + "clojure | \n", + "<span class=\"hl slc\">;; The Computer Language ... | \n", + "
| 5 | \n", + "clojure | \n", + "\\nFri, 18 Apr 2014 21:29:20 GMT\\n\\nMAKE:\\nmv b... | \n", + "
| 6 | \n", + "csharp | \n", + "<span class=\"hl com\">/*</span>\\n<span class... | \n", + "
| 7 | \n", + "csharp | \n", + "\\nThu, 14 Aug 2014 10:17:17 GMT\\n\\nMAKE:\\nmv b... | \n", + "
| 8 | \n", + "dart | \n", + "<span class=\"hl com\">/* The Computer Language ... | \n", + "
| 9 | \n", + "dart | \n", + "\\nThu, 28 Aug 2014 23:40:40 GMT\\n\\nCOMMAND LIN... | \n", + "
| 10 | \n", + "fpascal | \n", + "<span class=\"hl com\">(*</span>\\n<span class=\"h... | \n", + "
| 11 | \n", + "fpascal | \n", + "\\nFri, 25 Apr 2014 02:25:51 GMT\\n\\nMAKE:\\nmv b... | \n", + "
| 12 | \n", + "fsharp | \n", + "<span class=\"hl com\">(*</span>\\n<span class... | \n", + "
| 13 | \n", + "fsharp | \n", + "\\nThu, 14 Aug 2014 10:19:29 GMT\\n\\nMAKE:\\nmv b... | \n", + "
| 14 | \n", + "gcc | \n", + "<span class=\"hl com\">/* The Computer Language ... | \n", + "
| 15 | \n", + "gcc | \n", + "\\nThu, 24 Apr 2014 01:10:05 GMT\\n\\nMAKE:\\n/usr... | \n", + "
| 16 | \n", + "ghc | \n", + "<span class=\"hl slc\">--</span>\\n<span class=\"h... | \n", + "
| 17 | \n", + "ghc | \n", + "\\nThu, 17 Apr 2014 00:13:33 GMT\\n\\nMAKE:\\nmv b... | \n", + "
| 18 | \n", + "gnat | \n", + "<span class=\"hl slc\">-- The Computer Language ... | \n", + "
| 19 | \n", + "gnat | \n", + "\\nSat, 26 Apr 2014 16:51:41 GMT\\n\\nMAKE:\\n/usr... | \n", + "
| 20 | \n", + "go | \n", + "<span class=\"hl com\">/* The Computer Language ... | \n", + "
| 21 | \n", + "go | \n", + "\\nThu, 19 Jun 2014 02:22:05 GMT\\n\\nMAKE:\\n/usr... | \n", + "
| 22 | \n", + "hipe | \n", + "<span class=\"hl slc\">% The Computer Language B... | \n", + "
| 23 | \n", + "hipe | \n", + "\\nMon, 04 Mar 2013 21:40:14 GMT\\n\\nMAKE:\\nmv b... | \n", + "
| 24 | \n", + "ifc | \n", + "<span class=\"hl slc\">! -*- mode: f90 -*-</span... | \n", + "
| 25 | \n", + "ifc | \n", + "\\nTue, 15 Jan 2013 06:41:34 GMT\\n\\nMAKE:\\n/usr... | \n", + "
| 26 | \n", + "java | \n", + "<span class=\"hl com\">/* The Computer Language ... | \n", + "
| 27 | \n", + "java | \n", + "\\nTue, 18 Mar 2014 23:34:26 GMT\\n\\nMAKE:\\nmv b... | \n", + "
| 28 | \n", + "jruby | \n", + "<span class=\"hl slc\"># The Computer Language S... | \n", + "
| 29 | \n", + "jruby | \n", + "\\nSat, 29 Mar 2014 19:58:45 GMT\\n\\nMAKE:\\nmv b... | \n", + "
| 30 | \n", + "ocaml | \n", + "<span class=\"hl com\">(* The Computer Language ... | \n", + "
| ... | \n", + "... | \n", + "... | \n", + "
| 8346 | \n", + "fsharp | \n", + "\\nWed, 13 Aug 2014 23:53:57 GMT\\n\\nMAKE:\\nmv t... | \n", + "
| 8347 | \n", + "gcc | \n", + "<span class=\"hl com\">/* The Computer Language ... | \n", + "
| 8348 | \n", + "gcc | \n", + "\\nSun, 20 Apr 2014 19:45:51 GMT\\n\\nMAKE:\\n/usr... | \n", + "
| 8349 | \n", + "gnat | \n", + "<span class=\"hl slc\">-- The Computer Language ... | \n", + "
| 8350 | \n", + "gnat | \n", + "\\nSun, 27 Apr 2014 03:03:03 GMT\\n\\nMAKE:\\n/usr... | \n", + "
| 8351 | \n", + "go | \n", + "<span class=\"hl com\">/* The Computer Language ... | \n", + "
| 8352 | \n", + "go | \n", + "\\nThu, 14 Jun 2012 22:19:30 GMT\\n\\nMAKE:\\n/usr... | \n", + "
| 8353 | \n", + "gpp | \n", + "<span class=\"hl com\">/*</span>\\n<span class=\"h... | \n", + "
| 8354 | \n", + "gpp | \n", + "\\nSun, 20 Apr 2014 19:33:58 GMT\\n\\nMAKE:\\n/usr... | \n", + "
| 8355 | \n", + "java | \n", + "<span class=\"hl com\">/**</span>\\n<span class=\"... | \n", + "
| 8356 | \n", + "java | \n", + "\\nTue, 18 Mar 2014 23:07:24 GMT\\n\\nMAKE:\\nmv t... | \n", + "
| 8357 | \n", + "javaxint | \n", + "<span class=\"hl com\">/**</span>\\n<span class=\"... | \n", + "
| 8358 | \n", + "javaxint | \n", + "\\nSat, 05 Mar 2011 12:09:56 GMT\\n\\nMAKE:\\nmv t... | \n", + "
| 8359 | \n", + "gnat | \n", + "<span class=\"hl slc\">-- The Computer Language ... | \n", + "
| 8360 | \n", + "gnat | \n", + "\\nSun, 27 Apr 2014 03:11:21 GMT\\n\\nMAKE:\\n/usr... | \n", + "
| 8361 | \n", + "go | \n", + "<span class=\"hl com\">/* The Computer Language ... | \n", + "
| 8362 | \n", + "go | \n", + "\\nThu, 19 Jun 2014 06:44:38 GMT\\n\\nMAKE:\\n/usr... | \n", + "
| 8363 | \n", + "gpp | \n", + "<span class=\"hl com\">/*\\n</span>\\n<span class=... | \n", + "
| 8364 | \n", + "gpp | \n", + "\\nSun, 20 Apr 2014 19:37:12 GMT\\n\\nMAKE:\\n/usr... | \n", + "
| 8365 | \n", + "java | \n", + "<span class=\"hl com\">/**</span>\\n<span class=\"... | \n", + "
| 8366 | \n", + "java | \n", + "\\nTue, 18 Mar 2014 23:05:24 GMT\\n\\nMAKE:\\nmv t... | \n", + "
| 8367 | \n", + "javaxint | \n", + "<span class=\"hl com\">/**</span>\\n<span class=\"... | \n", + "
| 8368 | \n", + "javaxint | \n", + "\\nSat, 05 Mar 2011 11:58:19 GMT\\n\\nMAKE:\\nmv t... | \n", + "
| 8369 | \n", + "java | \n", + "<span class=\"hl com\">/**</span>\\n<span class=\"... | \n", + "
| 8370 | \n", + "java | \n", + "\\nTue, 18 Mar 2014 22:50:47 GMT\\n\\nMAKE:\\nmv t... | \n", + "
| 8371 | \n", + "javaxint | \n", + "<span class=\"hl com\">/**</span>\\n<span class=\"... | \n", + "
| 8372 | \n", + "javaxint | \n", + "\\nSat, 05 Mar 2011 12:25:54 GMT\\n\\nMAKE:\\nmv t... | \n", + "
| 8373 | \n", + "java | \n", + "<span class=\"hl com\">/**\\n</span>\\n<span class... | \n", + "
| 8374 | \n", + "java | \n", + "\\nTue, 18 Mar 2014 22:51:36 GMT\\n\\nMAKE:\\nmv t... | \n", + "
| 8377 | \n", + "csv | \n", + "BZh91AY&SYbpy\u0002p\u000b", + "]x\u0010Beà\u0010\b?ÿßpaµÞ±J¢©J)T\u0014 (\u0000\u0005... | \n", + "
8275 rows × 2 columns
\n", + "| \n", + " | exten | \n", + "txt | \n", + "
|---|---|---|
| 2 | \n", + "cint | \n", + "<span class=\"hl com\">/* The Computer Language ... | \n", + "
| 4 | \n", + "clojure | \n", + "<span class=\"hl slc\">;; The Computer Language ... | \n", + "
| 5 | \n", + "clojure | \n", + "\\nFri, 18 Apr 2014 21:29:20 GMT\\n\\nMAKE:\\nmv b... | \n", + "
| 6 | \n", + "csharp | \n", + "<span class=\"hl com\">/*</span>\\n<span class... | \n", + "
| 7 | \n", + "csharp | \n", + "\\nThu, 14 Aug 2014 10:17:17 GMT\\n\\nMAKE:\\nmv b... | \n", + "
| 8 | \n", + "dart | \n", + "<span class=\"hl com\">/* The Computer Language ... | \n", + "
| 9 | \n", + "dart | \n", + "\\nThu, 28 Aug 2014 23:40:40 GMT\\n\\nCOMMAND LIN... | \n", + "
| 26 | \n", + "java | \n", + "<span class=\"hl com\">/* The Computer Language ... | \n", + "
| 27 | \n", + "java | \n", + "\\nTue, 18 Mar 2014 23:34:26 GMT\\n\\nMAKE:\\nmv b... | \n", + "
| 28 | \n", + "jruby | \n", + "<span class=\"hl slc\"># The Computer Language S... | \n", + "
| 29 | \n", + "jruby | \n", + "\\nSat, 29 Mar 2014 19:58:45 GMT\\n\\nMAKE:\\nmv b... | \n", + "
| 30 | \n", + "ocaml | \n", + "<span class=\"hl com\">(* The Computer Language ... | \n", + "
| 31 | \n", + "ocaml | \n", + "\\nThu, 12 Sep 2013 20:23:23 GMT\\n\\nMAKE:\\nmv b... | \n", + "
| 34 | \n", + "perl | \n", + "<span class=\"hl slc\"># The Computer Language B... | \n", + "
| 35 | \n", + "perl | \n", + "\\nTue, 21 May 2013 17:44:06 GMT\\n\\nCOMMAND LIN... | \n", + "
| 36 | \n", + "php | \n", + "<span class=\"hl opt\"><</span>?php \\n<span c... | \n", + "
| 37 | \n", + "php | \n", + "\\nTue, 25 Jun 2013 20:43:25 GMT\\n\\nCOMMAND LIN... | \n", + "
| 38 | \n", + "racket | \n", + "<span class=\"hl opt\">#</span>lang racket<span ... | \n", + "
| 39 | \n", + "racket | \n", + "\\nFri, 28 Feb 2014 04:34:37 GMT\\n\\nCOMMAND LIN... | \n", + "
| 40 | \n", + "rust | \n", + "<span class=\"hl slc\">// The Computer Language ... | \n", + "
| 41 | \n", + "rust | \n", + "\\nTue, 08 Jul 2014 00:10:57 GMT\\n\\nMAKE:\\n/usr... | \n", + "
| 42 | \n", + "sbcl | \n", + "<span class=\"hl slc\">;; The Computer Languag... | \n", + "
| 43 | \n", + "sbcl | \n", + "\\nSat, 31 May 2014 04:02:44 GMT\\n\\nMAKE:\\ncp: ... | \n", + "
| 44 | \n", + "scala | \n", + "<span class=\"hl com\">/* The Computer Language ... | \n", + "
| 45 | \n", + "scala | \n", + "\\nWed, 19 Mar 2014 08:28:18 GMT\\n\\nMAKE:\\nmv b... | \n", + "
| 50 | \n", + "yarv | \n", + "<span class=\"hl slc\"># The Computer Language B... | \n", + "
| 51 | \n", + "yarv | \n", + "\\nTue, 07 Jan 2014 21:26:00 GMT\\n\\nCOMMAND LIN... | \n", + "
| 52 | \n", + "clojure | \n", + "<span class=\"hl slc\">;; The Computer Language ... | \n", + "
| 53 | \n", + "clojure | \n", + "\\nFri, 18 Apr 2014 21:23:34 GMT\\n\\nMAKE:\\nmv b... | \n", + "
| 54 | \n", + "csharp | \n", + "<span class=\"hl com\">/* The Computer Language ... | \n", + "
| ... | \n", + "... | \n", + "... | \n", + "
| 8301 | \n", + "csharp | \n", + "<span class=\"hl com\">/* The Computer Langua... | \n", + "
| 8302 | \n", + "csharp | \n", + "\\nWed, 13 Aug 2014 21:59:29 GMT\\n\\nMAKE:\\nmv t... | \n", + "
| 8311 | \n", + "java | \n", + "<span class=\"hl com\">/**</span>\\n<span class=\"... | \n", + "
| 8312 | \n", + "java | \n", + "\\nTue, 18 Mar 2014 22:57:55 GMT\\n\\nMAKE:\\nmv t... | \n", + "
| 8315 | \n", + "jruby | \n", + "<span class=\"hl slc\"># The Computer Language B... | \n", + "
| 8316 | \n", + "jruby | \n", + "\\nSun, 30 Mar 2014 03:39:44 GMT\\n\\nMAKE:\\nmv t... | \n", + "
| 8317 | \n", + "ocaml | \n", + "<span class=\"hl com\">(* The Computer Language ... | \n", + "
| 8318 | \n", + "ocaml | \n", + "\\nFri, 13 Sep 2013 03:27:18 GMT\\n\\nMAKE:\\nmv t... | \n", + "
| 8319 | \n", + "python3 | \n", + "<span class=\"hl slc\"># The Computer Language B... | \n", + "
| 8320 | \n", + "python3 | \n", + "\\nTue, 18 Mar 2014 06:12:43 GMT\\n\\nMAKE:\\nmv t... | \n", + "
| 8321 | \n", + "sbcl | \n", + "<span class=\"hl slc\">;;; The Computer Language... | \n", + "
| 8322 | \n", + "sbcl | \n", + "\\nSun, 01 Jun 2014 01:39:42 GMT\\n\\nMAKE:\\ncp: ... | \n", + "
| 8323 | \n", + "yarv | \n", + "<span class=\"hl slc\"># The Computer Language B... | \n", + "
| 8324 | \n", + "yarv | \n", + "\\nThu, 09 Jan 2014 05:13:53 GMT\\n\\nCOMMAND LIN... | \n", + "
| 8335 | \n", + "java | \n", + "<span class=\"hl com\">/**</span>\\n<span class=\"... | \n", + "
| 8336 | \n", + "java | \n", + "\\nTue, 18 Mar 2014 22:59:09 GMT\\n\\nMAKE:\\nmv t... | \n", + "
| 8339 | \n", + "ocaml | \n", + "<span class=\"hl com\">(* The Computer Language ... | \n", + "
| 8340 | \n", + "ocaml | \n", + "\\nFri, 13 Sep 2013 03:13:05 GMT\\n\\nMAKE:\\nmv t... | \n", + "
| 8341 | \n", + "perl | \n", + "<span class=\"hl slc\"># The Computer Language B... | \n", + "
| 8342 | \n", + "perl | \n", + "\\nTue, 21 May 2013 09:44:09 GMT\\n\\nCOMMAND LIN... | \n", + "
| 8343 | \n", + "python3 | \n", + "<span class=\"hl slc\"># The Computer Language B... | \n", + "
| 8344 | \n", + "python3 | \n", + "\\nTue, 18 Mar 2014 06:18:49 GMT\\n\\nMAKE:\\nmv t... | \n", + "
| 8355 | \n", + "java | \n", + "<span class=\"hl com\">/**</span>\\n<span class=\"... | \n", + "
| 8356 | \n", + "java | \n", + "\\nTue, 18 Mar 2014 23:07:24 GMT\\n\\nMAKE:\\nmv t... | \n", + "
| 8365 | \n", + "java | \n", + "<span class=\"hl com\">/**</span>\\n<span class=\"... | \n", + "
| 8366 | \n", + "java | \n", + "\\nTue, 18 Mar 2014 23:05:24 GMT\\n\\nMAKE:\\nmv t... | \n", + "
| 8369 | \n", + "java | \n", + "<span class=\"hl com\">/**</span>\\n<span class=\"... | \n", + "
| 8370 | \n", + "java | \n", + "\\nTue, 18 Mar 2014 22:50:47 GMT\\n\\nMAKE:\\nmv t... | \n", + "
| 8373 | \n", + "java | \n", + "<span class=\"hl com\">/**\\n</span>\\n<span class... | \n", + "
| 8374 | \n", + "java | \n", + "\\nTue, 18 Mar 2014 22:51:36 GMT\\n\\nMAKE:\\nmv t... | \n", + "
4565 rows × 2 columns
\n", + "" + ], + "text/plain": [ + " exten txt\n", + "2 cint /* The Computer Language ...\n", + "4 clojure ;; The Computer Language ...\n", + "5 clojure \\nFri, 18 Apr 2014 21:29:20 GMT\\n\\nMAKE:\\nmv b...\n", + "6 csharp /*\\n/* The Computer Language ...\n", + "9 dart \\nThu, 28 Aug 2014 23:40:40 GMT\\n\\nCOMMAND LIN...\n", + "26 java /* The Computer Language ...\n", + "27 java \\nTue, 18 Mar 2014 23:34:26 GMT\\n\\nMAKE:\\nmv b...\n", + "28 jruby # The Computer Language S...\n", + "29 jruby \\nSat, 29 Mar 2014 19:58:45 GMT\\n\\nMAKE:\\nmv b...\n", + "30 ocaml (* The Computer Language ...\n", + "31 ocaml \\nThu, 12 Sep 2013 20:23:23 GMT\\n\\nMAKE:\\nmv b...\n", + "34 perl # The Computer Language B...\n", + "35 perl \\nTue, 21 May 2013 17:44:06 GMT\\n\\nCOMMAND LIN...\n", + "36 php <?php \\n#lang racket// The Computer Language ...\n", + "41 rust \\nTue, 08 Jul 2014 00:10:57 GMT\\n\\nMAKE:\\n/usr...\n", + "42 sbcl ;; The Computer Languag...\n", + "43 sbcl \\nSat, 31 May 2014 04:02:44 GMT\\n\\nMAKE:\\ncp: ...\n", + "44 scala /* The Computer Language ...\n", + "45 scala \\nWed, 19 Mar 2014 08:28:18 GMT\\n\\nMAKE:\\nmv b...\n", + "50 yarv # The Computer Language B...\n", + "51 yarv \\nTue, 07 Jan 2014 21:26:00 GMT\\n\\nCOMMAND LIN...\n", + "52 clojure ;; The Computer Language ...\n", + "53 clojure \\nFri, 18 Apr 2014 21:23:34 GMT\\n\\nMAKE:\\nmv b...\n", + "54 csharp /* The Computer Language ...\n", + "... ... ...\n", + "8301 csharp /* The Computer Langua...\n", + "8302 csharp \\nWed, 13 Aug 2014 21:59:29 GMT\\n\\nMAKE:\\nmv t...\n", + "8311 java /**\\n# The Computer Language B...\n", + "8316 jruby \\nSun, 30 Mar 2014 03:39:44 GMT\\n\\nMAKE:\\nmv t...\n", + "8317 ocaml (* The Computer Language ...\n", + "8318 ocaml \\nFri, 13 Sep 2013 03:27:18 GMT\\n\\nMAKE:\\nmv t...\n", + "8319 python3 # The Computer Language B...\n", + "8320 python3 \\nTue, 18 Mar 2014 06:12:43 GMT\\n\\nMAKE:\\nmv t...\n", + "8321 sbcl ;;; The Computer Language...\n", + "8322 sbcl \\nSun, 01 Jun 2014 01:39:42 GMT\\n\\nMAKE:\\ncp: ...\n", + "8323 yarv # The Computer Language B...\n", + "8324 yarv \\nThu, 09 Jan 2014 05:13:53 GMT\\n\\nCOMMAND LIN...\n", + "8335 java /**\\n(* The Computer Language ...\n", + "8340 ocaml \\nFri, 13 Sep 2013 03:13:05 GMT\\n\\nMAKE:\\nmv t...\n", + "8341 perl # The Computer Language B...\n", + "8342 perl \\nTue, 21 May 2013 09:44:09 GMT\\n\\nCOMMAND LIN...\n", + "8343 python3 # The Computer Language B...\n", + "8344 python3 \\nTue, 18 Mar 2014 06:18:49 GMT\\n\\nMAKE:\\nmv t...\n", + "8355 java /**\\n/**\\n/**\\n/**\\n\\n\n", + "| \n", + " | exten | \n", + "txt | \n", + "
|---|---|---|
| 2 | \n", + "c++ | \n", + "<span class=\"hl com\">/* The Computer Language ... | \n", + "
| 4 | \n", + "clojure | \n", + "<span class=\"hl slc\">;; The Computer Language ... | \n", + "
| 5 | \n", + "clojure | \n", + "\\nFri, 18 Apr 2014 21:29:20 GMT\\n\\nMAKE:\\nmv b... | \n", + "
| 6 | \n", + "c# | \n", + "<span class=\"hl com\">/*</span>\\n<span class... | \n", + "
| 7 | \n", + "c# | \n", + "\\nThu, 14 Aug 2014 10:17:17 GMT\\n\\nMAKE:\\nmv b... | \n", + "
| \n", + " | exten | \n", + "txt | \n", + "nonword | \n", + "curly% | \n", + "period% | \n", + "
|---|---|---|---|---|---|
| 0 | \n", + "c++ | \n", + "<span class=\"hl com\">/* The Computer Language ... | \n", + "15 | \n", + "0.000772 | \n", + "0.001029 | \n", + "
| 1 | \n", + "clojure | \n", + "<span class=\"hl slc\">;; The Computer Language ... | \n", + "35 | \n", + "0.000000 | \n", + "0.001311 | \n", + "
| 2 | \n", + "clojure | \n", + "\\nFri, 18 Apr 2014 21:29:20 GMT\\n\\nMAKE:\\nmv b... | \n", + "4 | \n", + "0.000000 | \n", + "0.023649 | \n", + "
| 3 | \n", + "c# | \n", + "<span class=\"hl com\">/*</span>\\n<span class... | \n", + "15 | \n", + "0.001869 | \n", + "0.002419 | \n", + "
| 4 | \n", + "c# | \n", + "\\nThu, 14 Aug 2014 10:17:17 GMT\\n\\nMAKE:\\nmv b... | \n", + "3 | \n", + "0.000000 | \n", + "0.009447 | \n", + "