diff --git a/.gitignore b/.gitignore index f00dbf2..8143a15 100644 --- a/.gitignore +++ b/.gitignore @@ -65,3 +65,5 @@ docs/_build/ # PyBuilder target/ +benchmarksgame-scm-latest.tar.gz +bench diff --git a/bs4_testing.ipynb b/bs4_testing.ipynb new file mode 100644 index 0000000..c40c30f --- /dev/null +++ b/bs4_testing.ipynb @@ -0,0 +1,2482 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "from bs4 import BeautifulSoup\n", + "import requests" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "html_doc = \"\"\"\n", + "The Dormouse's story\n", + "\n", + "

The Dormouse's story

\n", + "\n", + "

Once upon a time there were three little sisters; and their names were\n", + "Elsie,\n", + "Lacie and\n", + "Tillie;\n", + "and they lived at the bottom of a well.

\n", + "\n", + "

...

\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + " \n", + " \n", + " The Dormouse's story\n", + " \n", + " \n", + " \n", + "

\n", + " \n", + " The Dormouse's story\n", + " \n", + "

\n", + "

\n", + " Once upon a time there were three little sisters; and their names were\n", + " \n", + " Elsie\n", + " \n", + " ,\n", + " \n", + " Lacie\n", + " \n", + " and\n", + " \n", + " Tillie\n", + " \n", + " ;\n", + "and they lived at the bottom of a well.\n", + "

\n", + "

\n", + " ...\n", + "

\n", + " \n", + "\n" + ] + } + ], + "source": [ + "soup = BeautifulSoup(html_doc)\n", + "\n", + "print(soup.prettify())" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "soup.title\n", + "# The Dormouse's story\n", + "\n", + "soup.title.name\n", + "# u'title'\n", + "\n", + "soup.title.string\n", + "# u'The Dormouse's story'\n", + "\n", + "soup.title.parent.name\n", + "# u'head'\n", + "\n", + "soup.p\n", + "#

The Dormouse's story

\n", + "\n", + "soup.p['class']\n", + "# u'title'\n", + "\n", + "soup.a\n", + "# Elsie\n", + "\n", + "soup.find_all('a')\n", + "# [Elsie,\n", + "# Lacie,\n", + "# Tillie]\n", + "\n", + "soup.find(id=\"link3\")\n", + "# Tillie\n", + "\n", + "\n", + "####Extract links####\n", + "for link in soup.find_all('a'):\n", + " print(link.get('href'))\n", + "# http://example.com/elsie\n", + "# http://example.com/lacie\n", + "# http://example.com/tillie\n", + "\n", + "####Extract text####\n", + "print(soup.get_text())\n", + "# The Dormouse's story\n", + "#\n", + "# The Dormouse's story\n", + "#\n", + "# Once upon a time there were three little sisters; and their names were\n", + "# Elsie,\n", + "# Lacie and\n", + "# Tillie;\n", + "# and they lived at the bottom of a well.\n", + "#\n", + "# ..." + ] + }, + { + "cell_type": "code", + "execution_count": 196, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "None\n", + "#mw-head\n", + "#p-search\n", + "/wiki/Category:Solutions_by_Programming_Task\n", + "/wiki/Rosetta_Code:Solve_a_Task\n", + "/wiki/Rosetta_Code:Multiple_passes\n", + "/wiki/Rosetta_Code:Extra_credit\n", + "/wiki/Talk:100_doors\n", + "/wiki/Rosetta_Code:Optimization\n", + "#4DOS_Batch\n", + "#6502_Assembly\n", + "#68000_Assembly\n", + "#8086_Assembly\n", + "#8th\n", + "#ABAP\n", + "#ACL2\n", + "#ActionScript\n", + "#Acurity_Architect\n", + "#Ada\n", + "#Aikido\n", + "#ALGOL_68\n", + "#ALGOL_W\n", + "#AmigaE\n", + "#APL\n", + "#AppleScript\n", + "#Arbre\n", + "#Argile\n", + "#ATS\n", + "#AutoHotkey\n", + "#Standard_Approach\n", + "#Alternative_Approach\n", + "#Optimized\n", + "#AutoIt\n", + "#Axiom\n", + "#AWK\n", + "#BASIC\n", + "#BASIC256\n", + "#Batch_File\n", + "#BBC_BASIC\n", + "#bc\n", + "#Befunge\n", + "#BlitzMax\n", + "#Bracmat\n", + "#Burlesque\n", + "#C\n", + "#unoptimized\n", + "#optimized_2\n", + "#C.2B.2B\n", + "#C.23\n", + "#Unoptimized_with_Modulus_.25_Operator\n", + "#Optimized_for_Increments\n", + "#Optimized_for_Orthogonality\n", + "#Unoptimized_but_Concise\n", + "#Optimized_for_brevity\n", + "#C1R\n", + "#Cach.C3.A9_ObjectScript\n", + "#Clarion\n", + "#CLIPS\n", + "#Clojure\n", + "#COBOL\n", + "#Coco\n", + "#CoffeeScript\n", + "#ColdFusion\n", + "#Common_Lisp\n", + "#Component_Pascal\n", + "#Coq\n", + "#Crystal\n", + "#D\n", + "#Dart\n", + "#DCL\n", + "#Delphi\n", + "#D.C3.A9j.C3.A0_Vu\n", + "#DWScript\n", + "#Dylan\n", + "#E\n", + "#ECL\n", + "#Eero\n", + "#EGL\n", + "#Eiffel\n", + "#Ela\n", + "#Elixir\n", + "#Emacs_Lisp\n", + "#Erlang\n", + "#ERRE\n", + "#Euler_Math_Toolbox\n", + "#Euphoria\n", + "#F.23\n", + "#Factor\n", + "#Falcon\n", + "#Fantom\n", + "#FBSL\n", + "#friendly_interactive_shell\n", + "#Forth\n", + "#Fortran\n", + "#Frink\n", + "#FunL\n", + "#Unoptimized_2\n", + "#Optimized_3\n", + "#GAP\n", + "#GML\n", + "#Go\n", + "#Golfscript\n", + "#Gosu\n", + "#Groovy\n", + "#Harbour\n", + "#Haskell\n", + "#Haxe\n", + "#HicEst\n", + "#Hy\n", + "#Icon_and_Unicon\n", + "#Inform_7\n", + "#Informix_4GL\n", + "#Io\n", + "#Ioke\n", + "#J\n", + "#Java\n", + "#JavaScript\n", + "#ES5\n", + "#unoptimized_3\n", + "#optimized_4\n", + "#ES6\n", + "#jq\n", + "#Julia\n", + "#K\n", + "#Kotlin\n", + "#LabVIEW\n", + "#Lasso\n", + "#Loop\n", + "#Lhogho\n", + "#Liberty_BASIC\n", + "#LiveCode\n", + "#Logo\n", + "#LOLCODE\n", + "#Lua\n", + "#M4\n", + "#Maple\n", + "#Mathematica\n", + "#MATLAB_.2F_Octave\n", + "#Iterative_Method\n", + "#Vectorized_Method\n", + "#Known-Result_Method\n", + "#Maxima\n", + "#MAXScript\n", + "#Mercury\n", + "#Metafont\n", + "#MIPS_Assembly\n", + "#Mirah\n", + "#mIRC_Scripting_Language\n", + "#ML.2FI\n", + "#MMIX\n", + "#Modula-2\n", + "#Modula-3\n", + "#MOO\n", + "#MoonScript\n", + "#MUMPS\n", + "#NetRexx\n", + "#NewLisp\n", + "#Nim\n", + "#Objeck\n", + "#Objective-C\n", + "#OCaml\n", + "#Octave\n", + "#Oforth\n", + "#ooRexx\n", + "#OpenEdge.2FProgress\n", + "#OxygenBasic\n", + "#Oz\n", + "#PARI.2FGP\n", + "#Pascal\n", + "#Perl\n", + "#Perl5i\n", + "#Perl_6\n", + "#PHL\n", + "#unoptimized_4\n", + "#optimized_5\n", + "#PHP\n", + "#PicoLisp\n", + "#Piet\n", + "#Pike\n", + "#PL.2FI\n", + "#PL.2FSQL\n", + "#Pop11\n", + "#PostScript\n", + "#Potion\n", + "#PowerShell\n", + "#unoptimized_5\n", + "#Alternative_Method\n", + "#unoptimized_Pipeline\n", + "#unoptimized_Pipeline_2\n", + "#unoptimized_Pipeline_3_.28dynamically_build_pipeline.29\n", + "#Using_Powershell_Workflow_for_Parallelism\n", + "#optimized_6\n", + "#ProDOS\n", + "#Prolog\n", + "#unoptimized_6\n", + "#optimized_7\n", + "#PureBasic\n", + "#Python\n", + "#Q\n", + "#R\n", + "#Racket\n", + "#RapidQ\n", + "#REALbasic\n", + "#REBOL\n", + "#Unoptimized_7\n", + "#Optimized_8\n", + "#Retro\n", + "#REXX\n", + "#version_1\n", + "#version_2.2C_the_hard_way\n", + "#version_3.2C_the_easy_way\n", + "#version_4.2C_easy_way.2C_1.2C000_doors\n", + "#Ruby\n", + "#Run_BASIC\n", + "#Rust\n", + "#S-lang\n", + "#Salmon\n", + "#SAS\n", + "#Scala\n", + "#Sather\n", + "#Scheme\n", + "#Seed7\n", + "#SETL\n", + "#Sidef\n", + "#Slate\n", + "#Smalltalk\n", + "#SNOBOL4\n", + "#Sparkling\n", + "#SQL\n", + "#Swift\n", + "#Tcl\n", + "#TI-83_BASIC\n", + "#Unoptimized_8\n", + "#Optimized_9\n", + "#TI-89_BASIC\n", + "#TorqueScript\n", + "#TSE_SAL\n", + "#TUSCRIPT\n", + "#TXR\n", + "#Uniface\n", + "#UNIX_Shell\n", + "#Ursala\n", + "#Vala\n", + "#VBA\n", + "#VBScript\n", + "#Vedit_macro_language\n", + "#VHDL\n", + "#Visual_Basic_.NET\n", + "#Wart\n", + "#Wortel\n", + "#Wrapl\n", + "#XPL0\n", + "#XSLT_1.0\n", + "#XSLT_2.0\n", + "#Yorick\n", + "#zkl\n", + "#ZX_Spectrum_Basic\n", + "/mw/index.php?title=100_doors&action=edit§ion=1\n", + "/wiki/Category:4DOS_Batch\n", + "/mw/index.php?title=100_doors&action=edit§ion=2\n", + "/wiki/Category:6502_Assembly\n", + "http://www.6502asm.com/beta/index.html\n", + "http://www.6502asm.com/\n", + "/mw/index.php?title=100_doors&action=edit§ion=3\n", + "/wiki/Category:68000_Assembly\n", + "http://www.easy68k.com/\n", + "/mw/index.php?title=100_doors&action=edit§ion=4\n", + "/wiki/Category:8086_Assembly\n", + "/wiki/100_doors/8086_Assembly\n", + "/mw/index.php?title=100_doors&action=edit§ion=5\n", + "/wiki/Category:8th\n", + "/mw/index.php?title=100_doors&action=edit§ion=6\n", + "/wiki/Category:ABAP\n", + "http://help.sap.com/abapdocu/en/ABAPFORM.htm\n", + "http://help.sap.com/abapdocu/en/ABAPDATA.htm\n", + "http://help.sap.com/abapdocu/en/ABAPDATA.htm\n", + "http://help.sap.com/abapdocu/en/ABAPFIELD-SYMBOLS.htm\n", + "http://help.sap.com/abapdocu/en/ABAPDO.htm\n", + "http://help.sap.com/abapdocu/en/ABAPAPPEND.htm\n", + "http://help.sap.com/abapdocu/en/ABAPENDDO.htm\n", + "http://help.sap.com/abapdocu/en/ABAPWHILE.htm\n", + "http://help.sap.com/abapdocu/en/ABAPWHILE.htm\n", + "http://help.sap.com/abapdocu/en/ABAPIF.htm\n", + "http://help.sap.com/abapdocu/en/ABAPELSE.htm\n", + "http://help.sap.com/abapdocu/en/ABAPENDIF.htm\n", + "http://help.sap.com/abapdocu/en/ABAPADD.htm\n", + "http://help.sap.com/abapdocu/en/ABAPENDWHILE.htm\n", + "http://help.sap.com/abapdocu/en/ABAPENDWHILE.htm\n", + "http://help.sap.com/abapdocu/en/ABAPLOOP.htm\n", + "http://help.sap.com/abapdocu/en/ABAPIF.htm\n", + "http://help.sap.com/abapdocu/en/ABAPWRITE.htm\n", + "http://help.sap.com/abapdocu/en/ABAPENDIF.htm\n", + "http://help.sap.com/abapdocu/en/ABAPENDLOOP.htm\n", + "http://help.sap.com/abapdocu/en/ABAPENDFORM.htm\n", + "http://help.sap.com/abapdocu/en/ABAPFORM.htm\n", + "http://help.sap.com/abapdocu/en/ABAPDATA.htm\n", + "http://help.sap.com/abapdocu/en/ABAPDATA.htm\n", + "http://help.sap.com/abapdocu/en/ABAPFIELD-SYMBOLS.htm\n", + "http://help.sap.com/abapdocu/en/ABAPDO.htm\n", + "http://help.sap.com/abapdocu/en/ABAPAPPEND.htm\n", + "http://help.sap.com/abapdocu/en/ABAPIF.htm\n", + "http://help.sap.com/abapdocu/en/ABAPADD.htm\n", + "http://help.sap.com/abapdocu/en/ABAPWRITE.htm\n", + "http://help.sap.com/abapdocu/en/ABAPENDIF.htm\n", + "http://help.sap.com/abapdocu/en/ABAPENDDO.htm\n", + "http://help.sap.com/abapdocu/en/ABAPENDFORM.htm\n", + "/mw/index.php?title=100_doors&action=edit§ion=7\n", + "/wiki/Category:ACL2\n", + "/mw/index.php?title=100_doors&action=edit§ion=8\n", + "/wiki/Category:ActionScript\n", + "/wiki/ActionScript\n", + "/mw/index.php?title=100_doors&action=edit§ion=9\n", + "/wiki/Category:Acurity_Architect\n", + "/mw/index.php?title=100_doors&action=edit§ion=10\n", + "/wiki/Category:Ada\n", + "/mw/index.php?title=100_doors&action=edit§ion=11\n", + "/wiki/Category:Aikido\n", + "/mw/index.php?title=100_doors&action=edit§ion=12\n", + "/wiki/Category:ALGOL_68\n", + "/mw/index.php?title=100_doors&action=edit§ion=13\n", + "/wiki/Category:ALGOL_W\n", + "/mw/index.php?title=100_doors&action=edit§ion=14\n", + "/wiki/Category:AmigaE\n", + "/mw/index.php?title=100_doors&action=edit§ion=15\n", + "/wiki/Category:APL\n", + "/wiki/GNU_APL\n", + "/mw/index.php?title=100_doors&action=edit§ion=16\n", + "/wiki/Category:AppleScript\n", + "/mw/index.php?title=100_doors&action=edit§ion=17\n", + "/wiki/Category:Arbre\n", + "/mw/index.php?title=100_doors&action=edit§ion=18\n", + "/wiki/Category:Argile\n", + "/mw/index.php?title=100_doors&action=edit§ion=19\n", + "/wiki/Category:ATS\n", + "/mw/index.php?title=100_doors&action=edit§ion=20\n", + "/wiki/Category:AutoHotkey\n", + "/mw/index.php?title=100_doors&action=edit§ion=21\n", + "http://www.autohotkey.com/docs/commands/Loop.htm\n", + "http://www.autohotkey.com/docs/commands/Loop.htm\n", + "http://www.autohotkey.com/docs/Variables.htm#A_Index\n", + "http://www.autohotkey.com/docs/Variables.htm#A_Index\n", + "http://www.autohotkey.com/docs/commands/Loop.htm\n", + "http://www.autohotkey.com/docs/Variables.htm#A_Index\n", + "http://www.autohotkey.com/docs/commands/MsgBox.htm\n", + "/mw/index.php?title=100_doors&action=edit§ion=22\n", + "http://www.autohotkey.com/docs/commands/Loop.htm\n", + "http://www.autohotkey.com/docs/Variables.htm#A_Index\n", + "http://www.autohotkey.com/docs/Variables.htm#A_Index\n", + "http://www.autohotkey.com/docs/commands/MsgBox.htm\n", + "http://www.autohotkey.com/docs/Functions.htm#BuiltIn\n", + "/mw/index.php?title=100_doors&action=edit§ion=23\n", + "http://www.autohotkey.com/docs/Variables.htm#A_Index\n", + "http://www.autohotkey.com/docs/commands/MsgBox.htm\n", + "/mw/index.php?title=100_doors&action=edit§ion=24\n", + "/wiki/Category:AutoIt\n", + "http://www.autoitscript.com/autoit3/docs/keywords.htm\n", + "http://www.autoitscript.com/autoit3/docs/keywords.htm\n", + "http://www.autoitscript.com/autoit3/docs/keywords.htm\n", + "http://www.autoitscript.com/autoit3/docs/keywords.htm\n", + "http://www.autoitscript.com/autoit3/docs/keywords.htm\n", + "http://www.autoitscript.com/autoit3/docs/keywords.htm\n", + "http://www.autoitscript.com/autoit3/docs/keywords.htm\n", + "http://www.autoitscript.com/autoit3/docs/keywords.htm\n", + "http://www.autoitscript.com/autoit3/docs/keywords.htm\n", + "http://www.autoitscript.com/autoit3/docs/keywords.htm\n", + "http://www.autoitscript.com/autoit3/docs/keywords.htm\n", + "http://www.autoitscript.com/autoit3/docs/functions/ConsoleWrite.htm\n", + "http://www.autoitscript.com/autoit3/docs/functions/Number.htm\n", + "http://www.autoitscript.com/autoit3/docs/keywords.htm\n", + "http://www.autoitscript.com/autoit3/docs/functions/Mod.htm\n", + "http://www.autoitscript.com/autoit3/docs/keywords.htm\n", + "http://www.autoitscript.com/autoit3/docs/functions/ConsoleWrite.htm\n", + "http://www.autoitscript.com/autoit3/docs/macros.htm\n", + "http://www.autoitscript.com/autoit3/docs/keywords.htm\n", + "/mw/index.php?title=100_doors&action=edit§ion=25\n", + "/wiki/Category:Axiom\n", + "/mw/index.php?title=100_doors&action=edit§ion=26\n", + "/wiki/Category:AWK\n", + "/mw/index.php?title=100_doors&action=edit§ion=27\n", + "/wiki/Category:BASIC\n", + "/mw/index.php?title=QBASIC,_QB64&action=edit&redlink=1\n", + "http://www.qbasicnews.com/qboho/qckdefint.shtml\n", + "http://www.qbasicnews.com/qboho/qckconst.shtml\n", + "http://www.qbasicnews.com/qboho/qckdim.shtml\n", + "http://www.qbasicnews.com/qboho/qcklet.shtml\n", + "http://www.qbasicnews.com/qboho/qcknot.shtml\n", + "http://www.qbasicnews.com/qboho/qckprint.shtml\n", + "http://www.qbasicnews.com/qboho/qckprint.shtml\n", + "http://www.qbasicnews.com/qboho/qckend.shtml\n", + "/wiki/QuickBasic\n", + "http://www.qbasicnews.com/qboho/qckdim.shtml\n", + "http://www.qbasicnews.com/qboho/qckprint.shtml\n", + "http://www.qbasicnews.com/qboho/qckprint.shtml\n", + "http://www.qbasicnews.com/qboho/qcknot.shtml\n", + "http://www.qbasicnews.com/qboho/qcknot.shtml\n", + "http://www.qbasicnews.com/qboho/qckprint.shtml\n", + "http://www.qbasicnews.com/qboho/qcknot.shtml\n", + "http://www.qbasicnews.com/qboho/qckprint.shtml\n", + "http://www.qbasicnews.com/qboho/qckprint.shtml\n", + "http://www.qbasicnews.com/qboho/qckend.shtml\n", + "http://www.qbasicnews.com/qboho/qckdim.shtml\n", + "http://www.qbasicnews.com/qboho/qckint.shtml\n", + "http://www.qbasicnews.com/qboho/qcksqr.shtml\n", + "http://www.qbasicnews.com/qboho/qcksqr.shtml\n", + "http://www.qbasicnews.com/qboho/qckprint.shtml\n", + "http://www.qbasicnews.com/qboho/qcknot.shtml\n", + "http://www.qbasicnews.com/qboho/qckprint.shtml\n", + "http://www.qbasicnews.com/qboho/qckprint.shtml\n", + "http://www.qbasicnews.com/qboho/qckend.shtml\n", + "http://www.qbasicnews.com/qboho/qckcls.shtml\n", + "http://www.qbasicnews.com/qboho/qckprint.shtml\n", + "http://www.qbasicnews.com/qboho/qckprint.shtml\n", + "http://www.qbasicnews.com/qboho/qckend.shtml\n", + "/mw/index.php?title=100_doors&action=edit§ion=28\n", + "/wiki/Category:BASIC256\n", + "/mw/index.php?title=100_doors&action=edit§ion=29\n", + "/wiki/Category:Batch_File\n", + "http://www.ss64.com/nt/echo.html\n", + "http://www.ss64.com/nt/setlocal.html\n", + "http://www.ss64.com/nt/for.html\n", + "http://www.ss64.com/nt/in.html\n", + "http://www.ss64.com/nt/do.html\n", + "http://www.ss64.com/nt/for.html\n", + "http://www.ss64.com/nt/in.html\n", + "http://www.ss64.com/nt/do.html\n", + "http://www.ss64.com/nt/set.html\n", + "http://www.ss64.com/nt/for.html\n", + "http://www.ss64.com/nt/in.html\n", + "http://www.ss64.com/nt/do.html\n", + "http://www.ss64.com/nt/if.html\n", + "http://www.ss64.com/nt/echo.html\n", + "http://www.ss64.com/nt/else.html\n", + "http://www.ss64.com/nt/echo.html\n", + "http://www.ss64.com/nt/echo.html\n", + "http://www.ss64.com/nt/setlocal.html\n", + "http://www.ss64.com/nt/set.html\n", + "http://www.ss64.com/nt/for.html\n", + "http://www.ss64.com/nt/in.html\n", + "http://www.ss64.com/nt/do.html\n", + "http://www.ss64.com/nt/if.html\n", + "http://www.ss64.com/nt/neq.html\n", + "http://www.ss64.com/nt/echo.html\n", + "http://www.ss64.com/nt/else.html\n", + "http://www.ss64.com/nt/echo.html\n", + "http://www.ss64.com/nt/set.html\n", + "/mw/index.php?title=100_doors&action=edit§ion=30\n", + "/wiki/Category:BBC_BASIC\n", + "/mw/index.php?title=100_doors&action=edit§ion=31\n", + "/wiki/Category:Bc\n", + "/mw/index.php?title=100_doors&action=edit§ion=32\n", + "/wiki/Category:Befunge\n", + "/wiki/CCBI\n", + "/mw/index.php?title=100_doors&action=edit§ion=33\n", + "/wiki/Category:BlitzMax\n", + "/wiki/BlitzMax\n", + "/mw/index.php?title=100_doors&action=edit§ion=34\n", + "/wiki/Category:Bracmat\n", + "/mw/index.php?title=100_doors&action=edit§ion=35\n", + "/wiki/Category:Burlesque\n", + "/mw/index.php?title=100_doors&action=edit§ion=36\n", + "/wiki/Category:C\n", + "/mw/index.php?title=100_doors&action=edit§ion=37\n", + "/wiki/Library/C_Runtime\n", + "/wiki/Library/C_Runtime/printf\n", + "http://www.opengroup.org/onlinepubs/009695399/functions/printf.html\n", + "/wiki/Library/C_Runtime\n", + "/wiki/Library/C_Runtime/printf\n", + "http://www.opengroup.org/onlinepubs/009695399/functions/printf.html\n", + "/mw/index.php?title=100_doors&action=edit§ion=38\n", + "/wiki/Library/C_Runtime\n", + "/wiki/Library/C_Runtime/printf\n", + "http://www.opengroup.org/onlinepubs/009695399/functions/printf.html\n", + "http://www.opengroup.org/onlinepubs/009695399/functions/printf.html\n", + "http://www.opengroup.org/onlinepubs/009695399/functions/printf.html\n", + "http://www.opengroup.org/onlinepubs/009695399/functions/printf.html\n", + "http://www.opengroup.org/onlinepubs/009695399/functions/printf.html\n", + "/mw/index.php?title=100_doors&action=edit§ion=39\n", + "/wiki/Category:C%2B%2B\n", + "/wiki/GCC\n", + "/mw/index.php?title=100_doors&action=edit§ion=40\n", + "/wiki/Category:C_sharp\n", + "/mw/index.php?title=100_doors&action=edit§ion=41\n", + "http://www.google.com/search?q=new+msdn.microsoft.com\n", + "/mw/index.php?title=100_doors&action=edit§ion=42\n", + "/mw/index.php?title=100_doors&action=edit§ion=43\n", + "http://www.google.com/search?q=new+msdn.microsoft.com\n", + "/mw/index.php?title=100_doors&action=edit§ion=44\n", + "http://www.google.com/search?q=new+msdn.microsoft.com\n", + "/mw/index.php?title=100_doors&action=edit§ion=45\n", + "/mw/index.php?title=100_doors&action=edit§ion=46\n", + "/wiki/Category:C1R\n", + "/mw/index.php?title=100_doors&action=edit§ion=47\n", + "/wiki/Category:Cach%C3%A9_ObjectScript\n", + "/mw/index.php?title=100_doors&action=edit§ion=48\n", + "/wiki/Category:Clarion\n", + "/mw/index.php?title=100_doors&action=edit§ion=49\n", + "/wiki/Category:CLIPS\n", + "/mw/index.php?title=100_doors&action=edit§ion=50\n", + "/wiki/Category:Clojure\n", + "/mw/index.php?title=100_doors&action=edit§ion=51\n", + "/wiki/Category:COBOL\n", + "/mw/index.php?title=100_doors&action=edit§ion=52\n", + "/wiki/Category:Coco\n", + "/mw/index.php?title=100_doors&action=edit§ion=53\n", + "/wiki/Category:CoffeeScript\n", + "/mw/index.php?title=100_doors&action=edit§ion=54\n", + "/wiki/Category:ColdFusion\n", + "/mw/index.php?title=100_doors&action=edit§ion=55\n", + "/wiki/Category:Common_Lisp\n", + "/mw/index.php?title=100_doors&action=edit§ion=56\n", + "/wiki/Category:Component_Pascal\n", + "/mw/index.php?title=100_doors&action=edit§ion=57\n", + "/wiki/Category:Coq\n", + "https://github.com/spanjel/rosetta\n", + "/mw/index.php?title=100_doors&action=edit§ion=58\n", + "/wiki/Category:Crystal\n", + "/mw/index.php?title=100_doors&action=edit§ion=59\n", + "/wiki/Category:D\n", + "/mw/index.php?title=100_doors&action=edit§ion=60\n", + "/wiki/Category:Dart\n", + "/mw/index.php?title=100_doors&action=edit§ion=61\n", + "/wiki/Category:DCL\n", + "/mw/index.php?title=100_doors&action=edit§ion=62\n", + "/wiki/Category:Delphi\n", + "#Pascal\n", + "/mw/index.php?title=100_doors&action=edit§ion=63\n", + "/wiki/Category:D%C3%A9j%C3%A0_Vu\n", + "/mw/index.php?title=100_doors&action=edit§ion=64\n", + "/wiki/Category:DWScript\n", + "/mw/index.php?title=100_doors&action=edit§ion=65\n", + "/wiki/Category:Dylan\n", + "/mw/index.php?title=100_doors&action=edit§ion=66\n", + "/wiki/Category:E\n", + "/wiki/E-on-Java\n", + "http://wiki.erights.org/wiki/var\n", + "http://wiki.erights.org/wiki/var\n", + "http://wiki.erights.org/wiki/def\n", + "http://wiki.erights.org/wiki/for\n", + "http://wiki.erights.org/wiki/in\n", + "http://wiki.erights.org/wiki/def\n", + "http://wiki.erights.org/wiki/E\n", + "http://wiki.erights.org/wiki/fn\n", + "http://wiki.erights.org/wiki/fn\n", + "http://wiki.erights.org/wiki/def\n", + "http://wiki.erights.org/wiki/def\n", + "http://wiki.erights.org/wiki/to\n", + "http://wiki.erights.org/wiki/bind\n", + "http://wiki.erights.org/wiki/true\n", + "http://wiki.erights.org/wiki/match\n", + "http://wiki.erights.org/wiki/def\n", + "http://wiki.erights.org/wiki/def\n", + "http://wiki.erights.org/wiki/timer\n", + "http://wiki.erights.org/wiki/timer\n", + "http://wiki.erights.org/wiki/fn\n", + "http://wiki.erights.org/wiki/if\n", + "http://wiki.erights.org/wiki/if\n", + "http://wiki.erights.org/wiki/else\n", + "http://wiki.erights.org/wiki/else\n", + "http://wiki.erights.org/wiki/interp\n", + "/mw/index.php?title=100_doors&action=edit§ion=67\n", + "/wiki/Category:ECL\n", + "/mw/index.php?title=100_doors&action=edit§ion=68\n", + "/wiki/Category:Eero\n", + "http://www.opengroup.org/onlinepubs/009695399/functions/printf.html\n", + "http://www.opengroup.org/onlinepubs/009695399/functions/puts.html\n", + "http://www.opengroup.org/onlinepubs/009695399/functions/puts.html\n", + "/mw/index.php?title=100_doors&action=edit§ion=69\n", + "/wiki/Category:EGL\n", + "/mw/index.php?title=100_doors&action=edit§ion=70\n", + "/wiki/Category:Eiffel\n", + "http://www.google.com/search?q=site%3Ahttp%3A%2F%2Fdocs.eiffel.com%2Feiffelstudio%2Flibraries+none&btnI=I%27m+Feeling+Lucky\n", + "http://www.google.com/search?q=site%3Ahttp%3A%2F%2Fdocs.eiffel.com%2Feiffelstudio%2Flibraries+linked_list&btnI=I%27m+Feeling+Lucky\n", + "http://www.google.com/search?q=site%3Ahttp%3A%2F%2Fdocs.eiffel.com%2Feiffelstudio%2Flibraries+integer&btnI=I%27m+Feeling+Lucky\n", + "http://www.google.com/search?q=site%3Ahttp%3A%2F%2Fdocs.eiffel.com%2Feiffelstudio%2Flibraries+integer&btnI=I%27m+Feeling+Lucky\n", + "http://www.google.com/search?q=site%3Ahttp%3A%2F%2Fdocs.eiffel.com%2Feiffelstudio%2Flibraries+boolean&btnI=I%27m+Feeling+Lucky\n", + "http://www.google.com/search?q=site%3Ahttp%3A%2F%2Fdocs.eiffel.com%2Feiffelstudio%2Flibraries+integer&btnI=I%27m+Feeling+Lucky\n", + "http://www.google.com/search?q=site%3Ahttp%3A%2F%2Fdocs.eiffel.com%2Feiffelstudio%2Flibraries+boolean&btnI=I%27m+Feeling+Lucky\n", + "http://www.google.com/search?q=site%3Ahttp%3A%2F%2Fdocs.eiffel.com%2Feiffelstudio%2Flibraries+boolean&btnI=I%27m+Feeling+Lucky\n", + "/mw/index.php?title=100_doors&action=edit§ion=71\n", + "/wiki/Category:Ela\n", + "/mw/index.php?title=100_doors&action=edit§ion=72\n", + "/wiki/Category:Elixir\n", + "/mw/index.php?title=100_doors&action=edit§ion=73\n", + "/wiki/Category:Emacs_Lisp\n", + "/mw/index.php?title=100_doors&action=edit§ion=74\n", + "/wiki/Category:Erlang\n", + "http://erlang.org/doc/man/lists.html\n", + "http://erlang.org/doc/man/lists.html\n", + "http://erlang.org/doc/man/lists.html\n", + "http://erlang.org/doc/man/math.html\n", + "http://erlang.org/doc/man/io.html\n", + "http://erlang.org/doc/man/io.html\n", + "http://erlang.org/doc/man/lists.html\n", + "/mw/index.php?title=100_doors&action=edit§ion=75\n", + "/wiki/Category:ERRE\n", + "/mw/index.php?title=100_doors&action=edit§ion=76\n", + "/wiki/Category:Euler_Math_Toolbox\n", + "/mw/index.php?title=100_doors&action=edit§ion=77\n", + "/wiki/Category:Euphoria\n", + "/mw/index.php?title=100_doors&action=edit§ion=78\n", + "/wiki/Category:F_Sharp\n", + "http://research.microsoft.com/en-us/um/cambridge/projects/fsharp/manual/namespaces.html\n", + "http://research.microsoft.com/en-us/um/cambridge/projects/fsharp/manual/namespaces.html\n", + "http://research.microsoft.com/en-us/um/cambridge/projects/fsharp/manual/namespaces.html\n", + "/mw/index.php?title=100_doors&action=edit§ion=79\n", + "/wiki/Category:Factor\n", + "/mw/index.php?title=100_doors&action=edit§ion=80\n", + "/wiki/Category:Falcon\n", + "http://falconpl.org/project_docs/core/functions.html#arrayBuffer\n", + "http://falconpl.org/project_docs/core/functions.html#len\n", + "http://falconpl.org/project_docs/core/functions.html#len\n", + "http://falconpl.org/project_docs/core/functions.html#len\n", + "http://falconpl.org/project_docs/core/functions.html#fract\n", + "/mw/index.php?title=100_doors&action=edit§ion=81\n", + "/wiki/Category:Fantom\n", + "/mw/index.php?title=100_doors&action=edit§ion=82\n", + "/wiki/Category:FBSL\n", + "http://www.qbasicnews.com/qboho/qckdim.shtml\n", + "http://www.qbasicnews.com/qboho/qckas.shtml\n", + "http://www.qbasicnews.com/qboho/qckinteger.shtml\n", + "http://www.qbasicnews.com/qboho/qckdim.shtml\n", + "http://www.qbasicnews.com/qboho/qckdim.shtml\n", + "http://www.qbasicnews.com/qboho/qcknot.shtml\n", + "http://www.qbasicnews.com/qboho/qckprint.shtml\n", + "http://www.qbasicnews.com/qboho/qckdim.shtml\n", + "http://www.qbasicnews.com/qboho/qckprint.shtml\n", + "http://www.qbasicnews.com/qboho/qckend.shtml\n", + "/mw/index.php?title=100_doors&action=edit§ion=83\n", + "/wiki/Category:Friendly_interactive_shell\n", + "/mw/index.php?title=100_doors&action=edit§ion=84\n", + "/wiki/Category:Forth\n", + "/mw/index.php?title=100_doors&action=edit§ion=85\n", + "/wiki/Category:Fortran\n", + "/wiki/Fortran\n", + "/mw/index.php?title=100_doors&action=edit§ion=86\n", + "/wiki/Category:Frink\n", + "/mw/index.php?title=100_doors&action=edit§ion=87\n", + "/wiki/Category:FunL\n", + "/mw/index.php?title=100_doors&action=edit§ion=88\n", + "/mw/index.php?title=100_doors&action=edit§ion=89\n", + "/mw/index.php?title=100_doors&action=edit§ion=90\n", + "/wiki/Category:GAP\n", + "/mw/index.php?title=100_doors&action=edit§ion=91\n", + "/wiki/Category:GML\n", + "/mw/index.php?title=100_doors&action=edit§ion=92\n", + "/wiki/Category:Go\n", + "/mw/index.php?title=100_doors&action=edit§ion=93\n", + "/wiki/Category:Golfscript\n", + "/mw/index.php?title=100_doors&action=edit§ion=94\n", + "/wiki/Category:Gosu\n", + "/mw/index.php?title=100_doors&action=edit§ion=95\n", + "/wiki/Category:Groovy\n", + "http://www.google.de/search?q=site%3Agroovy.codehaus.org/%20false\n", + "http://www.google.de/search?q=site%3Agroovy.codehaus.org/%20each\n", + "http://www.google.de/search?q=site%3Agroovy.codehaus.org/%20step\n", + "http://www.google.de/search?q=site%3Agroovy.codehaus.org/%20true\n", + "http://www.google.de/search?q=site%3Agroovy.codehaus.org/%20each\n", + "http://www.google.de/search?q=site%3Agroovy.codehaus.org/%20println\n", + "http://www.google.de/search?q=site%3Agroovy.codehaus.org/%20each\n", + "http://www.google.de/search?q=site%3Agroovy.codehaus.org/%20println\n", + "http://www.google.de/search?q=site%3Agroovy.codehaus.org/%20each\n", + "http://www.google.de/search?q=site%3Agroovy.codehaus.org/%20each\n", + "http://www.google.de/search?q=site%3Agroovy.codehaus.org/%20println\n", + "/mw/index.php?title=100_doors&action=edit§ion=96\n", + "/wiki/Category:Harbour\n", + "/mw/index.php?title=100_doors&action=edit§ion=97\n", + "/wiki/Category:Haskell\n", + "http://haskell.org/ghc/docs/latest/html/libraries/base/Prelude.html#t:Show\n", + "http://haskell.org/ghc/docs/latest/html/libraries/base/Prelude.html#t:Int\n", + "http://haskell.org/ghc/docs/latest/html/libraries/base/Prelude.html#v:zipWith\n", + "http://haskell.org/ghc/docs/latest/html/libraries/base/Prelude.html#v:cycle\n", + "http://haskell.org/ghc/docs/latest/html/libraries/base/Prelude.html#v:id\n", + "http://haskell.org/ghc/docs/latest/html/libraries/base/Prelude.html#v:foldl\n", + "http://haskell.org/ghc/docs/latest/html/libraries/base/Prelude.html#t:Show\n", + "http://haskell.org/ghc/docs/latest/html/libraries/base/Prelude.html#t:Int\n", + "http://haskell.org/ghc/docs/latest/html/libraries/base/Prelude.html#v:zipWith\n", + "http://haskell.org/ghc/docs/latest/html/libraries/base/Prelude.html#v:mod\n", + "http://haskell.org/ghc/docs/latest/html/libraries/base/Prelude.html#v:otherwise\n", + "http://haskell.org/ghc/docs/latest/html/libraries/base/Prelude.html#v:foldr\n", + "http://haskell.org/ghc/docs/latest/html/libraries/base/Prelude.html#t:Eq\n", + "/mw/index.php?title=100_doors&action=edit§ion=98\n", + "/wiki/Category:Haxe\n", + "/mw/index.php?title=100_doors&action=edit§ion=99\n", + "/wiki/Category:HicEst\n", + "/mw/index.php?title=100_doors&action=edit§ion=100\n", + "/wiki/Category:Hy\n", + "/wiki/100_doors#Coco\n", + "/mw/index.php?title=100_doors&action=edit§ion=101\n", + "/wiki/Category:Icon\n", + "/wiki/Category:Unicon\n", + "/mw/index.php?title=100_doors&action=edit§ion=102\n", + "/wiki/Category:Inform_7\n", + "/mw/index.php?title=Z-machine&action=edit&redlink=1\n", + "/mw/index.php?title=Glulx_virtual_machine&action=edit&redlink=1\n", + "/mw/index.php?title=100_doors&action=edit§ion=103\n", + "/wiki/Category:Informix_4GL\n", + "/mw/index.php?title=100_doors&action=edit§ion=104\n", + "/wiki/Category:Io\n", + "/mw/index.php?title=100_doors&action=edit§ion=105\n", + "/wiki/Category:Ioke\n", + "/mw/index.php?title=100_doors&action=edit§ion=106\n", + "/wiki/Category:J\n", + "/mw/index.php?title=100_doors&action=edit§ion=107\n", + "/wiki/Category:Java\n", + "http://www.google.com/search?hl=en&q=allinurl%3Astring+java.sun.com&btnI=I%27m%20Feeling%20Lucky\n", + "http://www.google.com/search?hl=en&q=allinurl%3Asystem+java.sun.com&btnI=I%27m%20Feeling%20Lucky\n", + "http://www.google.com/search?hl=en&q=allinurl%3Astring+java.sun.com&btnI=I%27m%20Feeling%20Lucky\n", + "http://www.google.com/search?hl=en&q=allinurl%3Asystem+java.sun.com&btnI=I%27m%20Feeling%20Lucky\n", + "http://www.google.com/search?hl=en&q=allinurl%3Astring+java.sun.com&btnI=I%27m%20Feeling%20Lucky\n", + "http://www.google.com/search?hl=en&q=allinurl%3Asystem+java.sun.com&btnI=I%27m%20Feeling%20Lucky\n", + "http://www.google.com/search?hl=en&q=allinurl%3Astring+java.sun.com&btnI=I%27m%20Feeling%20Lucky\n", + "http://www.google.com/search?hl=en&q=allinurl%3Asystem+java.sun.com&btnI=I%27m%20Feeling%20Lucky\n", + "http://www.google.com/search?hl=en&q=allinurl%3Astring+java.sun.com&btnI=I%27m%20Feeling%20Lucky\n", + "http://www.google.com/search?hl=en&q=allinurl%3Asystem+java.sun.com&btnI=I%27m%20Feeling%20Lucky\n", + "http://www.google.com/search?hl=en&q=allinurl%3Astring+java.sun.com&btnI=I%27m%20Feeling%20Lucky\n", + "http://www.google.com/search?hl=en&q=allinurl%3Amath+java.sun.com&btnI=I%27m%20Feeling%20Lucky\n", + "http://www.google.com/search?hl=en&q=allinurl%3Asystem+java.sun.com&btnI=I%27m%20Feeling%20Lucky\n", + "http://www.google.com/search?hl=en&q=allinurl%3Asystem+java.sun.com&btnI=I%27m%20Feeling%20Lucky\n", + "/mw/index.php?title=100_doors&action=edit§ion=108\n", + "/wiki/Category:JavaScript\n", + "/mw/index.php?title=100_doors&action=edit§ion=109\n", + "/mw/index.php?title=100_doors&action=edit§ion=110\n", + "/mw/index.php?title=100_doors&action=edit§ion=111\n", + "/mw/index.php?title=100_doors&action=edit§ion=112\n", + "/mw/index.php?title=100_doors&action=edit§ion=113\n", + "/wiki/Category:Jq\n", + "/mw/index.php?title=100_doors&action=edit§ion=114\n", + "/wiki/Category:Julia\n", + "/mw/index.php?title=100_doors&action=edit§ion=115\n", + "/wiki/Category:K\n", + "/mw/index.php?title=100_doors&action=edit§ion=116\n", + "/wiki/Category:Kotlin\n", + "/mw/index.php?title=100_doors&action=edit§ion=117\n", + "/wiki/Category:LabVIEW\n", + "http://zone.ni.com/devzone/cda/tut/p/id/9330\n", + "/wiki/LabVIEW\n", + "/wiki/File:100doors.png\n", + "http://zone.ni.com/devzone/cda/tut/p/id/9330\n", + "/wiki/LabVIEW\n", + "/wiki/File:LabVIEW_100_doors.png\n", + "/mw/index.php?title=100_doors&action=edit§ion=118\n", + "/wiki/Category:Lasso\n", + "/mw/index.php?title=100_doors&action=edit§ion=119\n", + "/mw/index.php?title=100_doors&action=edit§ion=120\n", + "/wiki/Category:Lhogho\n", + "/mw/index.php?title=100_doors&action=edit§ion=121\n", + "/wiki/Category:Liberty_BASIC\n", + "/mw/index.php?title=100_doors&action=edit§ion=122\n", + "/wiki/Category:LiveCode\n", + "/mw/index.php?title=100_doors&action=edit§ion=123\n", + "/wiki/Category:Logo\n", + "/mw/index.php?title=100_doors&action=edit§ion=124\n", + "/wiki/Category:LOLCODE\n", + "/mw/index.php?title=100_doors&action=edit§ion=125\n", + "/wiki/Category:Lua\n", + "/mw/index.php?title=100_doors&action=edit§ion=126\n", + "/wiki/Category:M4\n", + "/mw/index.php?title=100_doors&action=edit§ion=127\n", + "/wiki/Category:Maple\n", + "/mw/index.php?title=100_doors&action=edit§ion=128\n", + "/wiki/Category:Mathematica\n", + "/mw/index.php?title=100_doors&action=edit§ion=129\n", + "/wiki/Category:MATLAB\n", + "/wiki/Category:Octave\n", + "/mw/index.php?title=100_doors&action=edit§ion=130\n", + "http://www.mathworks.com/access/helpdesk/help/techdoc/ref/zeros.html\n", + "http://www.mathworks.com/access/helpdesk/help/techdoc/ref/i.html\n", + "http://www.mathworks.com/access/helpdesk/help/techdoc/ref/i.html\n", + "http://www.mathworks.com/access/helpdesk/help/techdoc/ref/i.html\n", + "http://www.mathworks.com/access/helpdesk/help/techdoc/ref/i.html\n", + "http://www.mathworks.com/access/helpdesk/help/techdoc/ref/sqrt.html\n", + "http://www.mathworks.com/access/helpdesk/help/techdoc/ref/floor.html\n", + "http://www.mathworks.com/access/helpdesk/help/techdoc/ref/sqrt.html\n", + "http://www.mathworks.com/access/helpdesk/help/techdoc/ref/i.html\n", + "http://www.mathworks.com/access/helpdesk/help/techdoc/ref/zeros.html\n", + "http://www.mathworks.com/access/helpdesk/help/techdoc/ref/sqrt.html\n", + "/mw/index.php?title=100_doors&action=edit§ion=131\n", + "http://www.mathworks.com/access/helpdesk/help/techdoc/ref/logical.html\n", + "http://www.mathworks.com/access/helpdesk/help/techdoc/ref/find.html\n", + "http://www.mathworks.com/access/helpdesk/help/techdoc/ref/find.html\n", + "/mw/index.php?title=100_doors&action=edit§ion=132\n", + "/mw/index.php?title=100_doors&action=edit§ion=133\n", + "/wiki/Category:Maxima\n", + "/mw/index.php?title=100_doors&action=edit§ion=134\n", + "/wiki/Category:MAXScript\n", + "/mw/index.php?title=100_doors&action=edit§ion=135\n", + "/wiki/Category:Mercury\n", + "/mw/index.php?title=100_doors&action=edit§ion=136\n", + "/wiki/Category:Metafont\n", + "/mw/index.php?title=100_doors&action=edit§ion=137\n", + "/wiki/Category:MIPS_Assembly\n", + "/mw/index.php?title=100_doors&action=edit§ion=138\n", + "/wiki/Category:Mirah\n", + "/mw/index.php?title=100_doors&action=edit§ion=139\n", + "/wiki/Category:MIRC_Scripting_Language\n", + "http://www.mirc.com/echo\n", + "http://www.mirc.com/echo\n", + "/mw/index.php?title=100_doors&action=edit§ion=140\n", + "/wiki/Category:ML/I\n", + "/mw/index.php?title=100_doors&action=edit§ion=141\n", + "/wiki/Category:MMIX\n", + "/wiki/100_doors/MMIX\n", + "/mw/index.php?title=100_doors&action=edit§ion=142\n", + "/wiki/Category:Modula-2\n", + "/mw/index.php?title=100_doors&action=edit§ion=143\n", + "/wiki/Category:Modula-3\n", + "/mw/index.php?title=100_doors&action=edit§ion=144\n", + "/wiki/Category:MOO\n", + "/mw/index.php?title=100_doors&action=edit§ion=145\n", + "/wiki/Category:MoonScript\n", + "/mw/index.php?title=100_doors&action=edit§ion=146\n", + "/wiki/Category:MUMPS\n", + "/mw/index.php?title=100_doors&action=edit§ion=147\n", + "/wiki/Category:NetRexx\n", + "/wiki/100_doors#Java\n", + "/wiki/100_doors#Java\n", + "/mw/index.php?title=100_doors&action=edit§ion=148\n", + "/wiki/Category:NewLisp\n", + "http://www.newlisp.org/downloads/newlisp_manual.html#define\n", + "http://www.newlisp.org/downloads/newlisp_manual.html#let\n", + "http://www.newlisp.org/downloads/newlisp_manual.html#int\n", + "http://www.newlisp.org/downloads/newlisp_manual.html#sqrt\n", + "http://www.newlisp.org/downloads/newlisp_manual.html#if\n", + "http://www.newlisp.org/downloads/newlisp_manual.html#string\n", + "http://www.newlisp.org/downloads/newlisp_manual.html#string\n", + "http://www.newlisp.org/downloads/newlisp_manual.html#dolist\n", + "http://www.newlisp.org/downloads/newlisp_manual.html#map\n", + "http://www.newlisp.org/downloads/newlisp_manual.html#sequence\n", + "http://www.newlisp.org/downloads/newlisp_manual.html#println\n", + "/mw/index.php?title=100_doors&action=edit§ion=149\n", + "/wiki/Category:Nim\n", + "/mw/index.php?title=100_doors&action=edit§ion=150\n", + "/wiki/Category:Objeck\n", + "/mw/index.php?title=100_doors&action=edit§ion=151\n", + "/wiki/Category:Objective-C\n", + "/mw/index.php?title=100_doors&action=edit§ion=152\n", + "/wiki/Category:OCaml\n", + "http://caml.inria.fr/pub/docs/manual-ocaml/libref/Array.html\n", + "http://caml.inria.fr/pub/docs/manual-ocaml/libref/Printf.html\n", + "http://caml.inria.fr/pub/docs/manual-ocaml/libref/Array.html\n", + "http://caml.inria.fr/pub/docs/manual-ocaml/libref/Pervasives.html#VALint_of_float\n", + "http://caml.inria.fr/pub/docs/manual-ocaml/libref/Pervasives.html#VALsqrt\n", + "http://caml.inria.fr/pub/docs/manual-ocaml/libref/Pervasives.html#VALfloat_of_int\n", + "http://caml.inria.fr/pub/docs/manual-ocaml/libref/Array.html\n", + "/mw/index.php?title=100_doors&action=edit§ion=153\n", + "/wiki/Category:Octave\n", + "http://octave.sourceforge.net/octave/function/false.html\n", + "http://octave.sourceforge.net/octave/function/i.html\n", + "http://octave.sourceforge.net/octave/function/j.html\n", + "http://octave.sourceforge.net/octave/function/i.html\n", + "http://octave.sourceforge.net/octave/function/i.html\n", + "http://octave.sourceforge.net/octave/function/j.html\n", + "http://octave.sourceforge.net/octave/function/j.html\n", + "http://octave.sourceforge.net/octave/function/i.html\n", + "http://octave.sourceforge.net/octave/function/i.html\n", + "http://octave.sourceforge.net/octave/function/printf.html\n", + "http://octave.sourceforge.net/octave/function/i.html\n", + "/mw/index.php?title=100_doors&action=edit§ion=154\n", + "/wiki/Category:Oforth\n", + "/mw/index.php?title=100_doors&action=edit§ion=155\n", + "/wiki/Category:OoRexx\n", + "/mw/index.php?title=100_doors&action=edit§ion=156\n", + "/wiki/Category:OpenEdge/Progress\n", + "/mw/index.php?title=100_doors&action=edit§ion=157\n", + "/wiki/Category:OxygenBasic\n", + "/mw/index.php?title=100_doors&action=edit§ion=158\n", + "/wiki/Category:Oz\n", + "/mw/index.php?title=100_doors&action=edit§ion=159\n", + "/wiki/Category:PARI/GP\n", + "/mw/index.php?title=100_doors&action=edit§ion=160\n", + "/wiki/Category:Pascal\n", + "/mw/index.php?title=100_doors&action=edit§ion=161\n", + "/wiki/Category:Perl\n", + "/wiki/Perl\n", + "http://perldoc.perl.org/functions/print.html\n", + "/wiki/Perl\n", + "http://perldoc.perl.org/functions/map.html\n", + "http://perldoc.perl.org/functions/int.html\n", + "http://perldoc.perl.org/functions/print.html\n", + "http://perldoc.perl.org/functions/join.html\n", + "http://perldoc.perl.org/functions/map.html\n", + "http://perldoc.perl.org/functions/grep.html\n", + "http://perldoc.perl.org/functions/print.html\n", + "/wiki/Perl\n", + "http://perldoc.perl.org/functions/print.html\n", + "http://perldoc.perl.org/functions/map.html\n", + "http://perldoc.perl.org/functions/print.html\n", + "http://perldoc.perl.org/functions/qw.html\n", + "http://perldoc.perl.org/functions/int.html\n", + "http://perldoc.perl.org/functions/sqrt.html\n", + "http://perldoc.perl.org/functions/sqrt.html\n", + "http://perldoc.perl.org/functions/sqrt.html\n", + "http://perldoc.perl.org/functions/int.html\n", + "http://perldoc.perl.org/functions/print.html\n", + "http://perldoc.perl.org/functions/print.html\n", + "/mw/index.php?title=100_doors&action=edit§ion=162\n", + "/wiki/Category:Perl5i\n", + "/mw/index.php?title=100_doors&action=edit§ion=163\n", + "/wiki/Category:Perl_6\n", + "/wiki/Rakudo\n", + "/mw/index.php?title=100_doors&action=edit§ion=164\n", + "/wiki/Category:PHL\n", + "/mw/index.php?title=100_doors&action=edit§ion=165\n", + "/mw/index.php?title=100_doors&action=edit§ion=166\n", + "/wiki/100_doors#C.23\n", + "/mw/index.php?title=100_doors&action=edit§ion=167\n", + "/wiki/Category:PHP\n", + "http://www.thomporter.com/100doors.php\n", + "http://www.php.net/sqrt\n", + "http://www.php.net/ceil\n", + "http://www.php.net/array\n", + "http://www.php.net/array_fill\n", + "http://www.php.net/printf\n", + "/mw/index.php?title=100_doors&action=edit§ion=168\n", + "/wiki/Category:PicoLisp\n", + "/mw/index.php?title=100_doors&action=edit§ion=169\n", + "/wiki/Category:Piet\n", + "http://www.toothycat.net/~sham/piet/100doors.png\n", + "/mw/index.php?title=100_doors&action=edit§ion=170\n", + "/wiki/Category:Pike\n", + "/mw/index.php?title=100_doors&action=edit§ion=171\n", + "/wiki/Category:PL/I\n", + "/mw/index.php?title=100_doors&action=edit§ion=172\n", + "/wiki/Category:PL/SQL\n", + "http://www.oracle.com/pls/db92/db92.drilldown?word=DECLARE\n", + "http://www.oracle.com/pls/db92/db92.drilldown?word=TYPE\n", + "http://www.oracle.com/pls/db92/db92.drilldown?word=IS\n", + "http://www.oracle.com/pls/db92/db92.drilldown?word=OF\n", + "http://www.oracle.com/pls/db92/db92.drilldown?word=BOOLEAN\n", + "http://www.oracle.com/pls/db92/db92.drilldown?word=BEGIN\n", + "http://www.oracle.com/pls/db92/db92.drilldown?word=FOR\n", + "http://www.oracle.com/pls/db92/db92.drilldown?word=IN\n", + "http://www.oracle.com/pls/db92/db92.drilldown?word=COUNT\n", + "http://www.oracle.com/pls/db92/db92.drilldown?word=LOOP\n", + "http://www.oracle.com/pls/db92/db92.drilldown?word=FALSE\n", + "http://www.oracle.com/pls/db92/db92.drilldown?word=END\n", + "http://www.oracle.com/pls/db92/db92.drilldown?word=LOOP\n", + "http://www.oracle.com/pls/db92/db92.drilldown?word=FOR\n", + "http://www.oracle.com/pls/db92/db92.drilldown?word=IN\n", + "http://www.oracle.com/pls/db92/db92.drilldown?word=LOOP\n", + "http://www.oracle.com/pls/db92/db92.drilldown?word=FOR\n", + "http://www.oracle.com/pls/db92/db92.drilldown?word=IN\n", + "http://www.oracle.com/pls/db92/db92.drilldown?word=LOOP\n", + "http://www.oracle.com/pls/db92/db92.drilldown?word=IF\n", + "http://www.oracle.com/pls/db92/db92.drilldown?word=MOD\n", + "http://www.oracle.com/pls/db92/db92.drilldown?word=THEN\n", + "http://www.oracle.com/pls/db92/db92.drilldown?word=NOT\n", + "http://www.oracle.com/pls/db92/db92.drilldown?word=END\n", + "http://www.oracle.com/pls/db92/db92.drilldown?word=IF\n", + "http://www.oracle.com/pls/db92/db92.drilldown?word=END\n", + "http://www.oracle.com/pls/db92/db92.drilldown?word=LOOP\n", + "http://www.oracle.com/pls/db92/db92.drilldown?word=END\n", + "http://www.oracle.com/pls/db92/db92.drilldown?word=LOOP\n", + "http://www.oracle.com/pls/db92/db92.drilldown?word=FOR\n", + "http://www.oracle.com/pls/db92/db92.drilldown?word=IN\n", + "http://www.oracle.com/pls/db92/db92.drilldown?word=COUNT\n", + "http://www.oracle.com/pls/db92/db92.drilldown?word=LOOP\n", + "http://www.oracle.com/pls/db92/db92.drilldown?word=DBMS_OUTPUT\n", + "http://www.oracle.com/pls/db92/db92.drilldown?word=CASE\n", + "http://www.oracle.com/pls/db92/db92.drilldown?word=WHEN\n", + "http://www.oracle.com/pls/db92/db92.drilldown?word=THEN\n", + "http://www.oracle.com/pls/db92/db92.drilldown?word=ELSE\n", + "http://www.oracle.com/pls/db92/db92.drilldown?word=END\n", + "http://www.oracle.com/pls/db92/db92.drilldown?word=END\n", + "http://www.oracle.com/pls/db92/db92.drilldown?word=LOOP\n", + "http://www.oracle.com/pls/db92/db92.drilldown?word=END\n", + "/mw/index.php?title=100_doors&action=edit§ion=173\n", + "/wiki/Category:Pop11\n", + "/mw/index.php?title=100_doors&action=edit§ion=174\n", + "/wiki/Category:PostScript\n", + "/mw/index.php?title=100_doors&action=edit§ion=175\n", + "/wiki/Category:Potion\n", + "/mw/index.php?title=100_doors&action=edit§ion=176\n", + "/wiki/Category:PowerShell\n", + "/mw/index.php?title=100_doors&action=edit§ion=177\n", + "/mw/index.php?title=100_doors&action=edit§ion=178\n", + "/mw/index.php?title=100_doors&action=edit§ion=179\n", + "about:blank\n", + "about:blank\n", + "/mw/index.php?title=100_doors&action=edit§ion=180\n", + "about:blank\n", + "about:blank\n", + "about:blank\n", + "about:blank\n", + "/mw/index.php?title=100_doors&action=edit§ion=181\n", + "about:blank\n", + "about:blank\n", + "about:blank\n", + "about:blank\n", + "/mw/index.php?title=100_doors&action=edit§ion=182\n", + "/mw/index.php?title=100_doors&action=edit§ion=183\n", + "about:blank\n", + "about:blank\n", + "/mw/index.php?title=100_doors&action=edit§ion=184\n", + "/wiki/Category:ProDOS\n", + "/mw/index.php?title=100_doors&action=edit§ion=185\n", + "/wiki/Category:Prolog\n", + "/mw/index.php?title=100_doors&action=edit§ion=186\n", + "http://pauillac.inria.fr/~deransar/prolog/bips.html\n", + "http://pauillac.inria.fr/~deransar/prolog/bips.html\n", + "http://pauillac.inria.fr/~deransar/prolog/bips.html\n", + "http://pauillac.inria.fr/~deransar/prolog/bips.html\n", + "http://pauillac.inria.fr/~deransar/prolog/bips.html\n", + "http://pauillac.inria.fr/~deransar/prolog/bips.html\n", + "http://pauillac.inria.fr/~deransar/prolog/bips.html\n", + "http://pauillac.inria.fr/~deransar/prolog/bips.html\n", + "http://pauillac.inria.fr/~deransar/prolog/bips.html\n", + "http://pauillac.inria.fr/~deransar/prolog/bips.html\n", + "http://pauillac.inria.fr/~deransar/prolog/bips.html\n", + "http://pauillac.inria.fr/~deransar/prolog/bips.html\n", + "http://pauillac.inria.fr/~deransar/prolog/bips.html\n", + "http://pauillac.inria.fr/~deransar/prolog/bips.html\n", + "http://pauillac.inria.fr/~deransar/prolog/bips.html\n", + "http://pauillac.inria.fr/~deransar/prolog/bips.html\n", + "http://pauillac.inria.fr/~deransar/prolog/bips.html\n", + "http://pauillac.inria.fr/~deransar/prolog/bips.html\n", + "http://pauillac.inria.fr/~deransar/prolog/bips.html\n", + "http://pauillac.inria.fr/~deransar/prolog/bips.html\n", + "http://pauillac.inria.fr/~deransar/prolog/bips.html\n", + "/mw/index.php?title=100_doors&action=edit§ion=187\n", + "http://pauillac.inria.fr/~deransar/prolog/bips.html\n", + "http://pauillac.inria.fr/~deransar/prolog/bips.html\n", + "http://pauillac.inria.fr/~deransar/prolog/bips.html\n", + "http://pauillac.inria.fr/~deransar/prolog/bips.html\n", + "/mw/index.php?title=100_doors&action=edit§ion=188\n", + "/wiki/Category:PureBasic\n", + "/mw/index.php?title=100_doors&action=edit§ion=189\n", + "/wiki/Category:Python\n", + "/wiki/Python\n", + "/wiki/Python\n", + "/mw/index.php?title=100_doors&action=edit§ion=190\n", + "/wiki/Category:Q\n", + "/mw/index.php?title=100_doors&action=edit§ion=191\n", + "/wiki/Category:R\n", + "/mw/index.php?title=100_doors&action=edit§ion=192\n", + "/wiki/Category:Racket\n", + "/wiki/File:100doors_rkt.png\n", + "/mw/index.php?title=100_doors&action=edit§ion=193\n", + "/wiki/Category:RapidQ\n", + "/mw/index.php?title=100_doors&action=edit§ion=194\n", + "/wiki/Category:REALbasic\n", + "/mw/index.php?title=100_doors&action=edit§ion=195\n", + "/wiki/Category:REBOL\n", + "/mw/index.php?title=100_doors&action=edit§ion=196\n", + "/mw/index.php?title=100_doors&action=edit§ion=197\n", + "/mw/index.php?title=100_doors&action=edit§ion=198\n", + "/wiki/Category:Retro\n", + "/mw/index.php?title=100_doors&action=edit§ion=199\n", + "/wiki/Category:REXX\n", + "/mw/index.php?title=100_doors&action=edit§ion=200\n", + "/mw/index.php?title=100_doors&action=edit§ion=201\n", + "/mw/index.php?title=100_doors&action=edit§ion=202\n", + "/mw/index.php?title=100_doors&action=edit§ion=203\n", + "/mw/index.php?title=100_doors&action=edit§ion=204\n", + "/wiki/Category:Ruby\n", + "/mw/index.php?title=100_doors&action=edit§ion=205\n", + "/wiki/Category:Run_BASIC\n", + "/mw/index.php?title=100_doors&action=edit§ion=206\n", + "/wiki/Category:Rust\n", + "/mw/index.php?title=100_doors&action=edit§ion=207\n", + "/wiki/Category:S-lang\n", + "/mw/index.php?title=100_doors&action=edit§ion=208\n", + "/wiki/Category:Salmon\n", + "/mw/index.php?title=100_doors&action=edit§ion=209\n", + "/wiki/Category:SAS\n", + "/mw/index.php?title=100_doors&action=edit§ion=210\n", + "/wiki/Category:Scala\n", + "http://scala-lang.org\n", + "http://scala-lang.org\n", + "http://scala-lang.org\n", + "http://scala-lang.org\n", + "http://scala-lang.org\n", + "http://scala-lang.org\n", + "http://scala-lang.org\n", + "http://scala-lang.org\n", + "http://scala-lang.org\n", + "http://scala-lang.org\n", + "http://scala-lang.org\n", + "http://scala-lang.org\n", + "http://scala-lang.org\n", + "http://scala-lang.org\n", + "http://scala-lang.org\n", + "/mw/index.php?title=100_doors&action=edit§ion=211\n", + "/wiki/Category:Sather\n", + "/mw/index.php?title=100_doors&action=edit§ion=212\n", + "/wiki/Category:Scheme\n", + "/mw/index.php?title=100_doors&action=edit§ion=213\n", + "/wiki/Category:Seed7\n", + "/mw/index.php?title=100_doors&action=edit§ion=214\n", + "/wiki/Category:SETL\n", + "/mw/index.php?title=100_doors&action=edit§ion=215\n", + "/wiki/Category:Sidef\n", + "/mw/index.php?title=100_doors&action=edit§ion=216\n", + "/wiki/Category:Slate\n", + "/mw/index.php?title=100_doors&action=edit§ion=217\n", + "/wiki/Category:Smalltalk\n", + "/wiki/GNU_Smalltalk\n", + "/mw/index.php?title=Squeak_Smalltalk&action=edit&redlink=1\n", + "/mw/index.php?title=100_doors&action=edit§ion=218\n", + "/wiki/Category:SNOBOL4\n", + "/mw/index.php?title=100_doors&action=edit§ion=219\n", + "/wiki/Category:Sparkling\n", + "/mw/index.php?title=100_doors&action=edit§ion=220\n", + "/wiki/Category:SQL\n", + "/mw/index.php?title=100_doors&action=edit§ion=221\n", + "/wiki/Category:Swift\n", + "/mw/index.php?title=100_doors&action=edit§ion=222\n", + "/wiki/Category:Tcl\n", + "/wiki/Category:Tk\n", + "/mw/index.php?title=100_doors&action=edit§ion=223\n", + "/wiki/Category:TI-83_BASIC\n", + "/mw/index.php?title=100_doors&action=edit§ion=224\n", + "/mw/index.php?title=100_doors&action=edit§ion=225\n", + "/mw/index.php?title=100_doors&action=edit§ion=226\n", + "/wiki/Category:TI-89_BASIC\n", + "/mw/index.php?title=100_doors&action=edit§ion=227\n", + "/wiki/Category:TorqueScript\n", + "/mw/index.php?title=100_doors&action=edit§ion=228\n", + "/wiki/Category:TSE_SAL\n", + "/mw/index.php?title=100_doors&action=edit§ion=229\n", + "/wiki/Category:TUSCRIPT\n", + "/mw/index.php?title=100_doors&action=edit§ion=230\n", + "/wiki/Category:TXR\n", + "/mw/index.php?title=100_doors&action=edit§ion=231\n", + "/wiki/Category:Uniface\n", + "/mw/index.php?title=Uniface_9.6&action=edit&redlink=1\n", + "/mw/index.php?title=100_doors&action=edit§ion=232\n", + "/wiki/Category:UNIX_Shell\n", + "/wiki/Bourne_Again_SHell\n", + "/mw/index.php?title=100_doors&action=edit§ion=233\n", + "/wiki/Category:Ursala\n", + "/mw/index.php?title=100_doors&action=edit§ion=234\n", + "/wiki/Category:Vala\n", + "/mw/index.php?title=100_doors&action=edit§ion=235\n", + "/wiki/Category:VBA\n", + "/mw/index.php?title=100_doors&action=edit§ion=236\n", + "/wiki/Category:VBScript\n", + "/wiki/Windows_Script_Host\n", + "/mw/index.php?title=100_doors&action=edit§ion=237\n", + "/wiki/Category:Vedit_macro_language\n", + "/mw/index.php?title=100_doors&action=edit§ion=238\n", + "/wiki/Category:VHDL\n", + "/mw/index.php?title=100_doors&action=edit§ion=239\n", + "/wiki/Category:Visual_Basic_.NET\n", + "/wiki/Visual_Basic_.NET\n", + "/mw/index.php?title=100_doors&action=edit§ion=240\n", + "/wiki/Category:Wart\n", + "/mw/index.php?title=100_doors&action=edit§ion=241\n", + "/wiki/Category:Wortel\n", + "/wiki/100_doors#JavaScript\n", + "/mw/index.php?title=100_doors&action=edit§ion=242\n", + "/wiki/Category:Wrapl\n", + "/mw/index.php?title=100_doors&action=edit§ion=243\n", + "/wiki/Category:XPL0\n", + "/mw/index.php?title=100_doors&action=edit§ion=244\n", + "/wiki/Category:XSLT_1.0\n", + "/wiki/100_doors/XSLT\n", + "/mw/index.php?title=100_doors&action=edit§ion=245\n", + "/wiki/Category:XSLT_2.0\n", + "/mw/index.php?title=100_doors&action=edit§ion=246\n", + "/wiki/Category:Yorick\n", + "/mw/index.php?title=100_doors&action=edit§ion=247\n", + "/wiki/Category:Zkl\n", + "/mw/index.php?title=100_doors&action=edit§ion=248\n", + "/wiki/Category:ZX_Spectrum_Basic\n", + "http://rosettacode.org/mw/index.php?title=100_doors&oldid=205312\n", + "/wiki/Special:Categories\n", + "/wiki/Category:Programming_Tasks\n", + "/wiki/Category:Solutions_by_Programming_Task\n", + "/wiki/Category:4DOS_Batch\n", + "/wiki/Category:6502_Assembly\n", + "/wiki/Category:68000_Assembly\n", + "/wiki/Category:8086_Assembly\n", + "/wiki/Category:8th\n", + "/wiki/Category:ABAP\n", + "/wiki/Category:ACL2\n", + "/wiki/Category:ActionScript\n", + "/wiki/Category:Acurity_Architect\n", + "/wiki/Category:Ada\n", + "/wiki/Category:Aikido\n", + "/wiki/Category:ALGOL_68\n", + "/wiki/Category:ALGOL_W\n", + "/wiki/Category:AmigaE\n", + "/wiki/Category:APL\n", + "/wiki/Category:AppleScript\n", + "/wiki/Category:Arbre\n", + "/wiki/Category:Argile\n", + "/wiki/Category:ATS\n", + "/wiki/Category:AutoHotkey\n", + "/wiki/Category:AutoIt\n", + "/wiki/Category:Axiom\n", + "/wiki/Category:AWK\n", + "/wiki/Category:BASIC\n", + "/wiki/Category:BASIC256\n", + "/wiki/Category:Batch_File\n", + "/wiki/Category:BBC_BASIC\n", + "/wiki/Category:Bc\n", + "/wiki/Category:Befunge\n", + "/wiki/Category:BlitzMax\n", + "/wiki/Category:Bracmat\n", + "/wiki/Category:Burlesque\n", + "/wiki/Category:C\n", + "/mw/index.php?title=Category:C_Runtime&action=edit&redlink=1\n", + "/wiki/Category:C%2B%2B\n", + "/wiki/Category:C_sharp\n", + "/wiki/Category:C1R\n", + "/wiki/Category:Cach%C3%A9_ObjectScript\n", + "/wiki/Category:Clarion\n", + "/wiki/Category:CLIPS\n", + "/wiki/Category:Clojure\n", + "/wiki/Category:COBOL\n", + "/wiki/Category:Coco\n", + "/wiki/Category:CoffeeScript\n", + "/wiki/Category:ColdFusion\n", + "/wiki/Category:Common_Lisp\n", + "/wiki/Category:Component_Pascal\n", + "/wiki/Category:Coq\n", + "/wiki/Category:Crystal\n", + "/wiki/Category:D\n", + "/wiki/Category:Dart\n", + "/wiki/Category:DCL\n", + "/wiki/Category:Delphi\n", + "/wiki/Category:D%C3%A9j%C3%A0_Vu\n", + "/wiki/Category:DWScript\n", + "/wiki/Category:Dylan\n", + "/wiki/Category:E\n", + "/wiki/Category:ECL\n", + "/wiki/Category:Eero\n", + "/wiki/Category:EGL\n", + "/wiki/Category:Eiffel\n", + "/wiki/Category:Ela\n", + "/wiki/Category:Elixir\n", + "/wiki/Category:Emacs_Lisp\n", + "/wiki/Category:Erlang\n", + "/wiki/Category:ERRE\n", + "/wiki/Category:Euler_Math_Toolbox\n", + "/wiki/Category:Euphoria\n", + "/wiki/Category:F_Sharp\n", + "/wiki/Category:Factor\n", + "/wiki/Category:Falcon\n", + "/wiki/Category:Fantom\n", + "/wiki/Category:FBSL\n", + "/wiki/Category:Friendly_interactive_shell\n", + "/wiki/Category:Forth\n", + "/wiki/Category:Fortran\n", + "/wiki/Category:Frink\n", + "/wiki/Category:FunL\n", + "/wiki/Category:GAP\n", + "/wiki/Category:GML\n", + "/wiki/Category:Go\n", + "/wiki/Category:Golfscript\n", + "/wiki/Category:Gosu\n", + "/wiki/Category:Groovy\n", + "/wiki/Category:Harbour\n", + "/wiki/Category:Haskell\n", + "/wiki/Category:Haxe\n", + "/wiki/Category:HicEst\n", + "/wiki/Category:Hy\n", + "/wiki/Category:Icon\n", + "/wiki/Category:Unicon\n", + "/wiki/Category:Inform_7\n", + "/wiki/Category:Informix_4GL\n", + "/wiki/Category:Io\n", + "/wiki/Category:Ioke\n", + "/wiki/Category:J\n", + "/wiki/Category:Java\n", + "/wiki/Category:JavaScript\n", + "/wiki/Category:Jq\n", + "/wiki/Category:Julia\n", + "/wiki/Category:K\n", + "/wiki/Category:Kotlin\n", + "/wiki/Category:LabVIEW\n", + "/wiki/Category:Lasso\n", + "/wiki/Category:Lhogho\n", + "/wiki/Category:Liberty_BASIC\n", + "/wiki/Category:LiveCode\n", + "/wiki/Category:Logo\n", + "/wiki/Category:LOLCODE\n", + "/wiki/Category:Lua\n", + "/wiki/Category:M4\n", + "/wiki/Category:Maple\n", + "/wiki/Category:Mathematica\n", + "/wiki/Category:MATLAB\n", + "/wiki/Category:Octave\n", + "/wiki/Category:Maxima\n", + "/wiki/Category:MAXScript\n", + "/wiki/Category:Mercury\n", + "/wiki/Category:Metafont\n", + "/wiki/Category:MIPS_Assembly\n", + "/wiki/Category:Mirah\n", + "/wiki/Category:MIRC_Scripting_Language\n", + "/wiki/Category:ML/I\n", + "/wiki/Category:MMIX\n", + "/wiki/Category:Modula-2\n", + "/wiki/Category:Modula-3\n", + "/wiki/Category:MOO\n", + "/wiki/Category:MoonScript\n", + "/wiki/Category:MUMPS\n", + "/wiki/Category:NetRexx\n", + "/wiki/Category:NewLisp\n", + "/wiki/Category:Nim\n", + "/wiki/Category:Objeck\n", + "/wiki/Category:Objective-C\n", + "/wiki/Category:OCaml\n", + "/wiki/Category:Oforth\n", + "/wiki/Category:OoRexx\n", + "/wiki/Category:OpenEdge/Progress\n", + "/wiki/Category:OxygenBasic\n", + "/wiki/Category:Oz\n", + "/wiki/Category:PARI/GP\n", + "/wiki/Category:Pascal\n", + "/wiki/Category:Perl\n", + "/wiki/Category:Perl5i\n", + "/wiki/Category:Perl_6\n", + "/wiki/Category:PHL\n", + "/wiki/Category:PHP\n", + "/wiki/Category:PicoLisp\n", + "/wiki/Category:Piet\n", + "/wiki/Category:Pike\n", + "/wiki/Category:PL/I\n", + "/wiki/Category:PL/SQL\n", + "/wiki/Category:Pop11\n", + "/wiki/Category:PostScript\n", + "/wiki/Category:Potion\n", + "/wiki/Category:PowerShell\n", + "/wiki/Category:ProDOS\n", + "/wiki/Category:Prolog\n", + "/wiki/Category:PureBasic\n", + "/wiki/Category:Python\n", + "/wiki/Category:Q\n", + "/wiki/Category:R\n", + "/wiki/Category:Racket\n", + "/wiki/Category:RapidQ\n", + "/wiki/Category:REALbasic\n", + "/wiki/Category:REBOL\n", + "/wiki/Category:Retro\n", + "/wiki/Category:REXX\n", + "/wiki/Category:Ruby\n", + "/wiki/Category:Run_BASIC\n", + "/wiki/Category:Rust\n", + "/wiki/Category:S-lang\n", + "/wiki/Category:Salmon\n", + "/wiki/Category:SAS\n", + "/wiki/Category:Scala\n", + "/wiki/Category:Sather\n", + "/wiki/Category:Scheme\n", + "/wiki/Category:Seed7\n", + "/wiki/Category:SETL\n", + "/wiki/Category:Sidef\n", + "/wiki/Category:Slate\n", + "/wiki/Category:Smalltalk\n", + "/wiki/Category:SNOBOL4\n", + "/wiki/Category:Sparkling\n", + "/wiki/Category:SQL\n", + "/wiki/Category:Swift\n", + "/wiki/Category:Tcl\n", + "/wiki/Category:Tk\n", + "/wiki/Category:TI-83_BASIC\n", + "/wiki/Category:TI-89_BASIC\n", + "/wiki/Category:TorqueScript\n", + "/wiki/Category:TSE_SAL\n", + "/wiki/Category:TUSCRIPT\n", + "/wiki/Category:TXR\n", + "/wiki/Category:Uniface\n", + "/wiki/Category:UNIX_Shell\n", + "/wiki/Category:Ursala\n", + "/wiki/Category:Vala\n", + "/wiki/Category:VBA\n", + "/wiki/Category:VBScript\n", + "/wiki/Category:Vedit_macro_language\n", + "/wiki/Category:VHDL\n", + "/wiki/Category:Visual_Basic_.NET\n", + "/wiki/Category:Wart\n", + "/wiki/Category:Wortel\n", + "/wiki/Category:Wrapl\n", + "/wiki/Category:XPL0\n", + "/wiki/Category:XSLT_1.0\n", + "/wiki/Category:XSLT_2.0\n", + "/wiki/Category:Yorick\n", + "/wiki/Category:Zkl\n", + "/wiki/Category:GUISS/Omit\n", + "/wiki/Category:ZX_Spectrum_Basic\n", + "/mw/index.php?title=Special:UserLogin&returnto=100+doors&type=signup\n", + "/mw/index.php?title=Special:UserLogin&returnto=100+doors\n", + "/wiki/100_doors\n", + "/wiki/Talk:100_doors\n", + "#\n", + "/wiki/100_doors\n", + "/mw/index.php?title=100_doors&action=edit\n", + "/mw/index.php?title=100_doors&action=history\n", + "#\n", + "/wiki/Rosetta_Code\n", + "http://twitter.com/rosettacode\n", + "/wiki/Special:WebChat\n", + "http://rosettacode.org/planet\n", + "/wiki/Rosetta_Code:Village_Pump\n", + "/wiki/Rosetta_Code:Finances\n", + "/wiki/Category:Programming_Languages\n", + "/wiki/Category:Programming_Tasks\n", + "/wiki/Special:RecentChanges\n", + "/wiki/Help:Similar_Sites\n", + "/wiki/Special:Random\n", + "https://twitter.com/share\n", + "/wiki/Special:WhatLinksHere/100_doors\n", + "/wiki/Special:RecentChangesLinked/100_doors\n", + "/wiki/Special:SpecialPages\n", + "/mw/index.php?title=100_doors&printable=yes\n", + "/mw/index.php?title=100_doors&oldid=205312\n", + "/wiki/Special:Browse/100_doors\n", + "http://www.gnu.org/licenses/fdl-1.2.html\n", + "/wiki/Rosetta_Code:Privacy_policy\n", + "/wiki/Rosetta_Code:About\n", + "/wiki/Rosetta_Code:General_disclaimer\n", + "http://www.gnu.org/licenses/fdl-1.2.html\n", + "//www.mediawiki.org/\n", + "http://www.semantic-mediawiki.org/wiki/Semantic_MediaWiki\n" + ] + } + ], + "source": [ + "#url = input(\"Enter a website to extract the URL's from: \")\n", + "#url = 'rosettacode.org/wiki/Category:C'\n", + "url = 'rosettacode.org/wiki/100_doors'\n", + "r = requests.get(\"http://\" +url)\n", + "\n", + "data = r.text\n", + "\n", + "soup = BeautifulSoup(data)\n", + "\n", + "for link in soup.find_all('a'):\n", + " print(link.get('href'))" + ] + }, + { + "cell_type": "code", + "execution_count": 203, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# c_soup = BeautifulSoup(\n", + "x = soup.find(\"pre\", class_=\"c highlighted_source\")\n", + "#x = soup.select('pre')[0]\n", + "# dir(x)" + ] + }, + { + "cell_type": "code", + "execution_count": 204, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "#include \n", + "int main(){ char is_open[100] = { 0 }; int pass, door;\n", + " /* do the 100 passes */ for (pass = 0; pass < 100; ++pass) for (door = pass; door < 100; door += pass+1) is_open[door] = !is_open[door];\n", + " /* output the result */ for (door = 0; door < 100; ++door) printf(\"door #%d is\n", + "%s.\\n\", door+1, (is_open[door]? \"open\" : \"closed\"));\n", + " return 0;}\n" + ] + } + ], + "source": [ + "import re\n", + "print(re.sub(r'\\xa0', r'\\n',x.text))" + ] + }, + { + "cell_type": "code", + "execution_count": 205, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "[#include <stdio.h>,\n", + "
,\n", + " '\\xa0',\n", + "
,\n", + " int,\n", + " ' main',\n", + " (,\n", + " ),\n", + "
,\n", + " {,\n", + "
,\n", + " ' ',\n", + " char,\n", + " ' is_open',\n", + " [,\n", + " 100,\n", + " ],\n", + " ' ',\n", + " =,\n", + " ' ',\n", + " {,\n", + " ' ',\n", + " 0,\n", + " ' ',\n", + " },\n", + " ;,\n", + "
,\n", + " ' ',\n", + " int,\n", + " ' pass',\n", + " ,,\n", + " ' door',\n", + " ;,\n", + "
,\n", + " '\\xa0',\n", + "
,\n", + " ' ',\n", + " /* do the 100 passes */,\n", + "
,\n", + " ' ',\n", + " for,\n", + " ' ',\n", + " (,\n", + " 'pass ',\n", + " =,\n", + " ' ',\n", + " 0,\n", + " ;,\n", + " ' pass ',\n", + " <,\n", + " ' ',\n", + " 100,\n", + " ;,\n", + " ' ',\n", + " ++,\n", + " 'pass',\n", + " ),\n", + "
,\n", + " ' ',\n", + " for,\n", + " ' ',\n", + " (,\n", + " 'door ',\n", + " =,\n", + " ' pass',\n", + " ;,\n", + " ' door ',\n", + " <,\n", + " ' ',\n", + " 100,\n", + " ;,\n", + " ' door ',\n", + " +=,\n", + " ' pass',\n", + " +,\n", + " 1,\n", + " ),\n", + "
,\n", + " ' is_open',\n", + " [,\n", + " 'door',\n", + " ],\n", + " ' ',\n", + " =,\n", + " ' ',\n", + " !,\n", + " 'is_open',\n", + " [,\n", + " 'door',\n", + " ],\n", + " ;,\n", + "
,\n", + " '\\xa0',\n", + "
,\n", + " ' ',\n", + " /* output the result */,\n", + "
,\n", + " ' ',\n", + " for,\n", + " ' ',\n", + " (,\n", + " 'door ',\n", + " =,\n", + " ' ',\n", + " 0,\n", + " ;,\n", + " ' door ',\n", + " <,\n", + " ' ',\n", + " 100,\n", + " ;,\n", + " ' ',\n", + " ++,\n", + " 'door',\n", + " ),\n", + "
,\n", + " ' ',\n", + " printf,\n", + " (,\n", + " \"door #%d is %s.\\n\",\n", + " ,,\n", + " ' door',\n", + " +,\n", + " 1,\n", + " ,,\n", + " ' ',\n", + " (,\n", + " 'is_open',\n", + " [,\n", + " 'door',\n", + " ],\n", + " ?,\n", + " ' ',\n", + " \"open\",\n", + " ' ',\n", + " :,\n", + " ' ',\n", + " \"closed\",\n", + " ),\n", + " ),\n", + " ;,\n", + "
,\n", + " '\\xa0',\n", + "
,\n", + " ' ',\n", + " return,\n", + " ' ',\n", + " 0,\n", + " ;,\n", + "
,\n", + " }]" + ] + }, + "execution_count": 205, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "x.contents" + ] + }, + { + "cell_type": "code", + "execution_count": 206, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "while x.br:\n", + " a_tag = x.br\n", + " new_tag = soup.new_tag(\"p\")\n", + " new_tag.string = \"\\n\"\n", + " a_tag.replace_with(new_tag)" + ] + }, + { + "cell_type": "code", + "execution_count": 207, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "
#include <stdio.h>

\n", + "

 

\n", + "

int main()

\n", + "

{

\n", + "

char is_open[100] = { 0 };

\n", + "

int pass, door;

\n", + "

 

\n", + "

/* do the 100 passes */

\n", + "

for (pass = 0; pass < 100; ++pass)

\n", + "

for (door = pass; door < 100; door += pass+1)

\n", + "

is_open[door] = !is_open[door];

\n", + "

 

\n", + "

/* output the result */

\n", + "

for (door = 0; door < 100; ++door)

\n", + "

printf(\"door #%d is %s.\\n\", door+1, (is_open[door]? \"open\" : \"closed\"));

\n", + "

 

\n", + "

return 0;

\n", + "

}
" + ] + }, + "execution_count": 207, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "x" + ] + }, + { + "cell_type": "code", + "execution_count": 208, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "#include \n", + "\n", + "int main()\n", + "{\n", + " char is_open[100] = { 0 };\n", + " int pass, door;\n", + "\n", + " /* do the 100 passes */\n", + " for (pass = 0; pass < 100; ++pass)\n", + " for (door = pass; door < 100; door += pass+1)\n", + " is_open[door] = !is_open[door];\n", + "\n", + " /* output the result */\n", + " for (door = 0; door < 100; ++door)\n", + " printf(\"door #%d is%s.\\n\", door+1, (is_open[door]? \"open\" : \"closed\"));\n", + "\n", + " return 0;\n", + "}\n" + ] + } + ], + "source": [ + "print(re.sub(r'\\xa0', r'',x.text))" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "#include \n", + " \n", + "int main()\n", + "{\n", + " char is_open[100] = { 0 };\n", + " int pass, door;\n", + " \n", + " /* do the 100 passes */\n", + " for (pass = 0; pass < 100; ++pass)\n", + " for (door = pass; door < 100; door += pass+1)\n", + " is_open[door] = !is_open[door];\n", + " \n", + " /* output the result */\n", + " for (door = 0; door < 100; ++door)\n", + " printf(\"door #%d is %s.\\n\", door+1, (is_open[door]? \"open\" : \"closed\"));\n", + " \n", + " return 0;\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 201, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "c\n", + "c\n", + "c\n", + "c\n", + "c\n", + "c\n", + "ocaml\n", + "ocaml\n" + ] + } + ], + "source": [ + "pres = soup.findAll('pre')\n", + "texts = []\n", + "langs = ['c', 'ocaml']\n", + "for pre in pres:\n", + " lang = pre.get('class', ['',''])[0]\n", + " if lang in langs:\n", + " print(lang)\n", + "# lang" + ] + }, + { + "cell_type": "code", + "execution_count": 192, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "[['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['', ''],\n", + " ['text', 'highlighted_source'],\n", + " ['_68000devpac', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['abap', 'highlighted_source'],\n", + " ['abap', 'highlighted_source'],\n", + " ['lisp', 'highlighted_source'],\n", + " ['actionscript', 'highlighted_source'],\n", + " ['', ''],\n", + " ['text', 'highlighted_source'],\n", + " ['', ''],\n", + " ['ada', 'highlighted_source'],\n", + " ['ada', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['algol68', 'highlighted_source'],\n", + " ['algol68', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['', ''],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['', ''],\n", + " ['', ''],\n", + " ['applescript', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['autohotkey', 'highlighted_source'],\n", + " ['autohotkey', 'highlighted_source'],\n", + " ['autohotkey', 'highlighted_source'],\n", + " ['autoit', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['awk', 'highlighted_source'],\n", + " ['awk', 'highlighted_source'],\n", + " ['qbasic', 'highlighted_source'],\n", + " ['qbasic', 'highlighted_source'],\n", + " ['qbasic', 'highlighted_source'],\n", + " ['qbasic', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['dos', 'highlighted_source'],\n", + " ['dos', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['c', 'highlighted_source'],\n", + " ['c', 'highlighted_source'],\n", + " ['c', 'highlighted_source'],\n", + " ['c', 'highlighted_source'],\n", + " ['c', 'highlighted_source'],\n", + " ['cpp', 'highlighted_source'],\n", + " ['cpp', 'highlighted_source'],\n", + " ['cpp', 'highlighted_source'],\n", + " ['csharp', 'highlighted_source'],\n", + " ['csharp', 'highlighted_source'],\n", + " ['csharp', 'highlighted_source'],\n", + " ['csharp', 'highlighted_source'],\n", + " ['csharp', 'highlighted_source'],\n", + " ['c', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['clojure', 'highlighted_source'],\n", + " ['clojure', 'highlighted_source'],\n", + " ['clojure', 'highlighted_source'],\n", + " ['clojure', 'highlighted_source'],\n", + " ['clojure', 'highlighted_source'],\n", + " ['cobol', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['coffeescript', 'highlighted_source'],\n", + " ['coffeescript', 'highlighted_source'],\n", + " ['coffeescript', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['cfm', 'highlighted_source'],\n", + " ['lisp', 'highlighted_source'],\n", + " ['lisp', 'highlighted_source'],\n", + " ['', ''],\n", + " ['lisp', 'highlighted_source'],\n", + " ['lisp', 'highlighted_source'],\n", + " ['oberon2', 'highlighted_source'],\n", + " ['', ''],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['ruby', 'highlighted_source'],\n", + " ['d', 'highlighted_source'],\n", + " ['', ''],\n", + " ['d', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['dcl', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['', ''],\n", + " ['delphi', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['e', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['objc', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['eiffel', 'highlighted_source'],\n", + " ['eiffel', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['', ''],\n", + " ['lisp', 'highlighted_source'],\n", + " ['erlang', 'highlighted_source'],\n", + " ['erlang', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['euphoria', 'highlighted_source'],\n", + " ['fsharp', 'highlighted_source'],\n", + " ['fsharp', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['falcon', 'highlighted_source'],\n", + " ['falcon', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['qbasic', 'highlighted_source'],\n", + " ['qbasic', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['fortran', 'highlighted_source'],\n", + " ['fortran', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['gml', 'highlighted_source'],\n", + " ['go', 'highlighted_source'],\n", + " ['', ''],\n", + " ['go', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['groovy', 'highlighted_source'],\n", + " ['groovy', 'highlighted_source'],\n", + " ['groovy', 'highlighted_source'],\n", + " ['visualfoxpro', 'highlighted_source'],\n", + " ['visualfoxpro', 'highlighted_source'],\n", + " ['', ''],\n", + " ['haskell', 'highlighted_source'],\n", + " ['haskell', 'highlighted_source'],\n", + " ['haskell', 'highlighted_source'],\n", + " ['haskell', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['hicest', 'highlighted_source'],\n", + " ['hicest', 'highlighted_source'],\n", + " ['lisp', 'highlighted_source'],\n", + " ['icon', 'highlighted_source'],\n", + " ['icon', 'highlighted_source'],\n", + " ['icon', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['io', 'highlighted_source'],\n", + " ['io', 'highlighted_source'],\n", + " ['', ''],\n", + " ['text', 'highlighted_source'],\n", + " ['j', 'highlighted_source'],\n", + " ['j', 'highlighted_source'],\n", + " ['j', 'highlighted_source'],\n", + " ['java', 'highlighted_source'],\n", + " ['java', 'highlighted_source'],\n", + " ['java', 'highlighted_source'],\n", + " ['', ''],\n", + " ['java', 'highlighted_source'],\n", + " ['java', 'highlighted_source'],\n", + " ['java', 'highlighted_source'],\n", + " ['javascript', 'highlighted_source'],\n", + " ['javascript', 'highlighted_source'],\n", + " ['javascript', 'highlighted_source'],\n", + " ['javascript', 'highlighted_source'],\n", + " ['javascript', 'highlighted_source'],\n", + " ['javascript', 'highlighted_source'],\n", + " ['', ''],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['', ''],\n", + " ['text', 'highlighted_source'],\n", + " ['lb', 'highlighted_source'],\n", + " ['lb', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['lolcode', 'highlighted_source'],\n", + " ['lua', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['matlab', 'highlighted_source'],\n", + " ['matlab', 'highlighted_source'],\n", + " ['matlab', 'highlighted_source'],\n", + " ['matlab', 'highlighted_source'],\n", + " ['matlab', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['mirc', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['modula2', 'highlighted_source'],\n", + " ['modula2', 'highlighted_source'],\n", + " ['modula3', 'highlighted_source'],\n", + " ['modula3', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['netrexx', 'highlighted_source'],\n", + " ['netrexx', 'highlighted_source'],\n", + " ['netrexx', 'highlighted_source'],\n", + " ['netrexx', 'highlighted_source'],\n", + " ['newlisp', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['objeck', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['ocaml', 'highlighted_source'],\n", + " ['ocaml', 'highlighted_source'],\n", + " ['octave', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['oorexx', 'highlighted_source'],\n", + " ['progress', 'highlighted_source'],\n", + " ['', ''],\n", + " ['oz', 'highlighted_source'],\n", + " ['', ''],\n", + " ['parigp', 'highlighted_source'],\n", + " ['parigp', 'highlighted_source'],\n", + " ['pascal', 'highlighted_source'],\n", + " ['pascal', 'highlighted_source'],\n", + " ['perl', 'highlighted_source'],\n", + " ['perl', 'highlighted_source'],\n", + " ['perl', 'highlighted_source'],\n", + " ['perl', 'highlighted_source'],\n", + " ['perl', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['perl6', 'highlighted_source'],\n", + " ['perl6', 'highlighted_source'],\n", + " ['perl6', 'highlighted_source'],\n", + " ['perl6', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['php', 'highlighted_source'],\n", + " ['php', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['', ''],\n", + " ['text', 'highlighted_source'],\n", + " ['', ''],\n", + " ['pike', 'highlighted_source'],\n", + " ['pike', 'highlighted_source'],\n", + " ['pike', 'highlighted_source'],\n", + " ['', ''],\n", + " ['pli', 'highlighted_source'],\n", + " ['plsql', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['powershell', 'highlighted_source'],\n", + " ['powershell', 'highlighted_source'],\n", + " ['powershell', 'highlighted_source'],\n", + " ['powershell', 'highlighted_source'],\n", + " ['powershell', 'highlighted_source'],\n", + " ['powershell', 'highlighted_source'],\n", + " ['powershell', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['prolog', 'highlighted_source'],\n", + " ['prolog', 'highlighted_source'],\n", + " ['prolog', 'highlighted_source'],\n", + " ['purebasic', 'highlighted_source'],\n", + " ['purebasic', 'highlighted_source'],\n", + " ['', ''],\n", + " ['python', 'highlighted_source'],\n", + " ['python', 'highlighted_source'],\n", + " ['python', 'highlighted_source'],\n", + " ['python', 'highlighted_source'],\n", + " ['python', 'highlighted_source'],\n", + " ['python', 'highlighted_source'],\n", + " ['q', 'highlighted_source'],\n", + " ['q', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['vb', 'highlighted_source'],\n", + " ['', ''],\n", + " ['vb', 'highlighted_source'],\n", + " ['rebol', 'highlighted_source'],\n", + " ['rebol', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['rexx', 'highlighted_source'],\n", + " ['rexx', 'highlighted_source'],\n", + " ['', ''],\n", + " ['rexx', 'highlighted_source'],\n", + " ['', ''],\n", + " ['rexx', 'highlighted_source'],\n", + " ['', ''],\n", + " ['ruby', 'highlighted_source'],\n", + " ['ruby', 'highlighted_source'],\n", + " ['ruby', 'highlighted_source'],\n", + " ['ruby', 'highlighted_source'],\n", + " ['', ''],\n", + " ['text', 'highlighted_source'],\n", + " ['', ''],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['sas', 'highlighted_source'],\n", + " ['scala', 'highlighted_source'],\n", + " ['scala', 'highlighted_source'],\n", + " ['scala', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['scheme', 'highlighted_source'],\n", + " ['scheme', 'highlighted_source'],\n", + " ['scheme', 'highlighted_source'],\n", + " ['', ''],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['', ''],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['ruby', 'highlighted_source'],\n", + " ['ruby', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['smalltalk', 'highlighted_source'],\n", + " ['smalltalk', 'highlighted_source'],\n", + " ['smalltalk', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['', ''],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['sql', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['tcl', 'highlighted_source'],\n", + " ['tcl', 'highlighted_source'],\n", + " ['tcl', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['', ''],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['', ''],\n", + " ['bash', 'highlighted_source'],\n", + " ['bash', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['', ''],\n", + " ['vala', 'highlighted_source'],\n", + " ['', ''],\n", + " ['vala', 'highlighted_source'],\n", + " ['', ''],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['vedit', 'highlighted_source'],\n", + " ['vedit', 'highlighted_source'],\n", + " ['', ''],\n", + " ['vhdl', 'highlighted_source'],\n", + " ['vhdl', 'highlighted_source'],\n", + " ['vbnet', 'highlighted_source'],\n", + " ['vbnet', 'highlighted_source'],\n", + " ['python', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['xml', 'highlighted_source'],\n", + " ['xml', 'highlighted_source'],\n", + " ['xml', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['', ''],\n", + " ['text', 'highlighted_source'],\n", + " ['', ''],\n", + " ['', ''],\n", + " ['', '']]" + ] + }, + "execution_count": 192, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "[x.get('class', ['','']) for x in soup.findAll('pre')]\n", + "# y = [x for x in soup.findAll('pre')][0]\n", + "# y" + ] + }, + { + "cell_type": "code", + "execution_count": 188, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['text', 'highlighted_source']" + ] + }, + "execution_count": 188, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "y.get('class')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.4.3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/feature_testing_with_benchmark_dataset.ipynb b/feature_testing_with_benchmark_dataset.ipynb new file mode 100644 index 0000000..fd5a97d --- /dev/null +++ b/feature_testing_with_benchmark_dataset.ipynb @@ -0,0 +1,1211 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#Initial Setup" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "from lang_classifier import *\n", + "from sklearn.pipeline import make_pipeline, make_union\n", + "from sklearn.metrics import classification_report, confusion_matrix" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "def setup():\n", + " \"\"\"Load the training benchmark training data and split it for train/test\"\"\"\n", + " df = load_bench_data()\n", + " X = df.text\n", + " y = df.language\n", + " test_data = load_test_data()\n", + " args = train_test_split(X, y, test_size=0.2, random_state=0)\n", + " # X_train, X_test, y_train, y_test\n", + " \n", + " return df, X, y, test_data, args" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "df, X, y, test_data, args = setup() # Load and split the train/test data\n", + "X_train, X_test, y_train, y_test = args" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "####Inspect the test_data dataframe to make sure language and text are properly aligned" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
languagetextguess
item
0perluse warnings;\\nuse strict;\\n\\nmy $initial = jo...NaN
1clojure(defn cf-settings\\n \"Setup settings for campf...NaN
2clojure(ns my-cli.core)\\n\\n(defn -main [& args]\\n (p...NaN
3clojure(extend-type String\\n Person\\n (first-name [...NaN
4clojure(require '[overtone.live :as overtone])\\n\\n(de...NaN
5pythonfrom pkgutil import iter_modules\\nfrom subproc...NaN
6pythonimport re\\nimport subprocess\\n\\ndef cmd_keymap...NaN
7pythonclass NoSuchService(Exception):\\n def __ini...NaN
8pythonfrom collections import namedtuple\\nimport fun...NaN
9javascriptfunction errorHandler(context) {\\n return fun...NaN
10javascriptvar _ = require('lodash'),\\n fs = require('...NaN
11javascript/* Riot v2.0.8, @license MIT, (c) 2015 Muut In...NaN
12javascriptvar r = riot.route = function(arg) {\\n //...NaN
13rubymodule ActiveJob\\n module Core\\n extend Ac...NaN
14rubyrequire 'formula'\\n\\nclass A52dec < Formula\\n ...NaN
15rubymodule Fluent\\n class Input\\n include Conf...NaN
16haskell{-# LANGUAGE ScopedTypeVariables, FlexibleInst...NaN
17haskellreverseDependencies :: ModuleGraph -> M.Map Mo...NaN
18haskell{- git-annex extra config files\\n -\\n - Copyri...NaN
19scheme(define subst-f\\n (lambda (new old l)\\n (c...NaN
20scheme(define add1\\n (lambda (n) (+ n 1)))NaN
21scheme(define-lib-primitive (length lst)\\n (if (nul...NaN
22java/**\\n * Interface to represent a persistence s...NaN
23java/*\\n * Copyright 2002-2008 the original author...NaN
24scalapackage com.github.pathikrit\\n\\nimport scala.a...NaN
25scala/* sbt -- Simple Build Tool\\n * Copyright 2010...NaN
26tclproc isaac::mix {a b c d e f g h} {\\n set a...NaN
27tclproc twitter::follow {nick uhost hand chan arg...NaN
28phpclass View\\n{\\n /**\\n * Data available ...NaN
29phppublic function formatLocalized($format)\\n...NaN
30phpclass Application extends App {\\n\\t/**\\n\\t * @...NaN
31ocamltype name = string\\n\\nlet compare_label label1...NaN
32ocamllet search_compiler_libs () =\\n prerr_endline...NaN
\n", + "
" + ], + "text/plain": [ + " language text guess\n", + "item \n", + "0 perl use warnings;\\nuse strict;\\n\\nmy $initial = jo... NaN\n", + "1 clojure (defn cf-settings\\n \"Setup settings for campf... NaN\n", + "2 clojure (ns my-cli.core)\\n\\n(defn -main [& args]\\n (p... NaN\n", + "3 clojure (extend-type String\\n Person\\n (first-name [... NaN\n", + "4 clojure (require '[overtone.live :as overtone])\\n\\n(de... NaN\n", + "5 python from pkgutil import iter_modules\\nfrom subproc... NaN\n", + "6 python import re\\nimport subprocess\\n\\ndef cmd_keymap... NaN\n", + "7 python class NoSuchService(Exception):\\n def __ini... NaN\n", + "8 python from collections import namedtuple\\nimport fun... NaN\n", + "9 javascript function errorHandler(context) {\\n return fun... NaN\n", + "10 javascript var _ = require('lodash'),\\n fs = require('... NaN\n", + "11 javascript /* Riot v2.0.8, @license MIT, (c) 2015 Muut In... NaN\n", + "12 javascript var r = riot.route = function(arg) {\\n //... NaN\n", + "13 ruby module ActiveJob\\n module Core\\n extend Ac... NaN\n", + "14 ruby require 'formula'\\n\\nclass A52dec < Formula\\n ... NaN\n", + "15 ruby module Fluent\\n class Input\\n include Conf... NaN\n", + "16 haskell {-# LANGUAGE ScopedTypeVariables, FlexibleInst... NaN\n", + "17 haskell reverseDependencies :: ModuleGraph -> M.Map Mo... NaN\n", + "18 haskell {- git-annex extra config files\\n -\\n - Copyri... NaN\n", + "19 scheme (define subst-f\\n (lambda (new old l)\\n (c... NaN\n", + "20 scheme (define add1\\n (lambda (n) (+ n 1))) NaN\n", + "21 scheme (define-lib-primitive (length lst)\\n (if (nul... NaN\n", + "22 java /**\\n * Interface to represent a persistence s... NaN\n", + "23 java /*\\n * Copyright 2002-2008 the original author... NaN\n", + "24 scala package com.github.pathikrit\\n\\nimport scala.a... NaN\n", + "25 scala /* sbt -- Simple Build Tool\\n * Copyright 2010... NaN\n", + "26 tcl proc isaac::mix {a b c d e f g h} {\\n set a... NaN\n", + "27 tcl proc twitter::follow {nick uhost hand chan arg... NaN\n", + "28 php class View\\n{\\n /**\\n * Data available ... NaN\n", + "29 php public function formatLocalized($format)\\n... NaN\n", + "30 php class Application extends App {\\n\\t/**\\n\\t * @... NaN\n", + "31 ocaml type name = string\\n\\nlet compare_label label1... NaN\n", + "32 ocaml let search_compiler_libs () =\\n prerr_endline... NaN" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "test_data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Use a generic bag of words/naive bayes classifier pipeline as a baseline" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "def assess_test_data(pipe):\n", + " test_data['guess'] = pd.DataFrame(pipe.predict(test_data['text']))\n", + " correct = test_data[test_data.language == test_data.guess]\n", + " print('Proportion of test data correctly labeled: {:.3f}'.format(len(correct) / len(test_data)))\n", + " print(test_data[['language', 'guess', 'text']])\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Train score: 0.989, Test score: 0.949\n", + "Proportion of test data correctly labeled: 0.727\n", + " language guess \\\n", + "item \n", + "0 perl perl \n", + "1 clojure clojure \n", + "2 clojure clojure \n", + "3 clojure clojure \n", + "4 clojure clojure \n", + "5 python python \n", + "6 python clojure \n", + "7 python ruby \n", + "8 python python \n", + "9 javascript javascript \n", + "10 javascript javascript \n", + "11 javascript clojure \n", + "12 javascript php \n", + "13 ruby ruby \n", + "14 ruby clojure \n", + "15 ruby ruby \n", + "16 haskell haskell \n", + "17 haskell haskell \n", + "18 haskell clojure \n", + "19 scheme scheme \n", + "20 scheme scheme \n", + "21 scheme scheme \n", + "22 java java \n", + "23 java c \n", + "24 scala scala \n", + "25 scala scala \n", + "26 tcl tcl \n", + "27 tcl python \n", + "28 php clojure \n", + "29 php php \n", + "30 php php \n", + "31 ocaml ocaml \n", + "32 ocaml ocaml \n", + "\n", + " text \n", + "item \n", + "0 use warnings;\\nuse strict;\\n\\nmy $initial = jo... \n", + "1 (defn cf-settings\\n \"Setup settings for campf... \n", + "2 (ns my-cli.core)\\n\\n(defn -main [& args]\\n (p... \n", + "3 (extend-type String\\n Person\\n (first-name [... \n", + "4 (require '[overtone.live :as overtone])\\n\\n(de... \n", + "5 from pkgutil import iter_modules\\nfrom subproc... \n", + "6 import re\\nimport subprocess\\n\\ndef cmd_keymap... \n", + "7 class NoSuchService(Exception):\\n def __ini... \n", + "8 from collections import namedtuple\\nimport fun... \n", + "9 function errorHandler(context) {\\n return fun... \n", + "10 var _ = require('lodash'),\\n fs = require('... \n", + "11 /* Riot v2.0.8, @license MIT, (c) 2015 Muut In... \n", + "12 var r = riot.route = function(arg) {\\n //... \n", + "13 module ActiveJob\\n module Core\\n extend Ac... \n", + "14 require 'formula'\\n\\nclass A52dec < Formula\\n ... \n", + "15 module Fluent\\n class Input\\n include Conf... \n", + "16 {-# LANGUAGE ScopedTypeVariables, FlexibleInst... \n", + "17 reverseDependencies :: ModuleGraph -> M.Map Mo... \n", + "18 {- git-annex extra config files\\n -\\n - Copyri... \n", + "19 (define subst-f\\n (lambda (new old l)\\n (c... \n", + "20 (define add1\\n (lambda (n) (+ n 1))) \n", + "21 (define-lib-primitive (length lst)\\n (if (nul... \n", + "22 /**\\n * Interface to represent a persistence s... \n", + "23 /*\\n * Copyright 2002-2008 the original author... \n", + "24 package com.github.pathikrit\\n\\nimport scala.a... \n", + "25 /* sbt -- Simple Build Tool\\n * Copyright 2010... \n", + "26 proc isaac::mix {a b c d e f g h} {\\n set a... \n", + "27 proc twitter::follow {nick uhost hand chan arg... \n", + "28 class View\\n{\\n /**\\n * Data available ... \n", + "29 public function formatLocalized($format)\\n... \n", + "30 class Application extends App {\\n\\t/**\\n\\t * @... \n", + "31 type name = string\\n\\nlet compare_label label1... \n", + "32 let search_compiler_libs () =\\n prerr_endline... \n" + ] + } + ], + "source": [ + "spam_pipe = Pipeline([('bag_of_words', CountVectorizer()),\n", + " ('bayes', MultinomialNB())])\n", + "classifier = assess_classifier(spam_pipe, *args)\n", + "c = classifier.predict(X)\n", + "#print('Guesses: ', c[0:5])\n", + "assess_test_data(spam_pipe)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Inspect the baseline features" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['zin', 'zip', 'zipwith', 'zipwithindex', 'zipwithm', 'zipwithm_', 'ziv', 'ziv1', 'ziv2', 'zizi', 'zoo', 'zotov', 'zq', 'zr', 'zr1', 'zr2', 'zri', 'zrn', 'zrv', 'zrv1', 'zrv2', 'zrzi', 'zrzr', 'zs', 'zt', 'zu', 'zubach', 'zx', 'zy', 'zz']\n", + "Train score: 0.989, Test score: 0.949\n" + ] + } + ], + "source": [ + "cv = CountVectorizer()\n", + "ft = cv.fit_transform(X)\n", + "print(cv.get_feature_names()[-30:])\n", + "spam_pipe = Pipeline([('bag_of_words', cv),\n", + " ('bayes', MultinomialNB())])\n", + "classifier = assess_classifier(spam_pipe, *args)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "###Now try making a limited custom vocabulary to discriminate between languages" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "collapsed": false, + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['}', ')', 'var', 'fn', 'function', 'end', 'defn', '===', 'lambda']\n", + "Train score: 0.329, Test score: 0.436\n", + "Proportion of test data correctly labeled: 0.485\n", + " language guess \\\n", + "item \n", + "0 perl ruby \n", + "1 clojure clojure \n", + "2 clojure clojure \n", + "3 clojure ruby \n", + "4 clojure clojure \n", + "5 python ruby \n", + "6 python ruby \n", + "7 python ruby \n", + "8 python python \n", + "9 javascript php \n", + "10 javascript javascript \n", + "11 javascript javascript \n", + "12 javascript javascript \n", + "13 ruby ruby \n", + "14 ruby ruby \n", + "15 ruby ruby \n", + "16 haskell ruby \n", + "17 haskell ruby \n", + "18 haskell ruby \n", + "19 scheme scheme \n", + "20 scheme scheme \n", + "21 scheme scheme \n", + "22 java ruby \n", + "23 java ruby \n", + "24 scala ruby \n", + "25 scala ruby \n", + "26 tcl ruby \n", + "27 tcl ruby \n", + "28 php php \n", + "29 php php \n", + "30 php php \n", + "31 ocaml haskell \n", + "32 ocaml ruby \n", + "\n", + " text \n", + "item \n", + "0 use warnings;\\nuse strict;\\n\\nmy $initial = jo... \n", + "1 (defn cf-settings\\n \"Setup settings for campf... \n", + "2 (ns my-cli.core)\\n\\n(defn -main [& args]\\n (p... \n", + "3 (extend-type String\\n Person\\n (first-name [... \n", + "4 (require '[overtone.live :as overtone])\\n\\n(de... \n", + "5 from pkgutil import iter_modules\\nfrom subproc... \n", + "6 import re\\nimport subprocess\\n\\ndef cmd_keymap... \n", + "7 class NoSuchService(Exception):\\n def __ini... \n", + "8 from collections import namedtuple\\nimport fun... \n", + "9 function errorHandler(context) {\\n return fun... \n", + "10 var _ = require('lodash'),\\n fs = require('... \n", + "11 /* Riot v2.0.8, @license MIT, (c) 2015 Muut In... \n", + "12 var r = riot.route = function(arg) {\\n //... \n", + "13 module ActiveJob\\n module Core\\n extend Ac... \n", + "14 require 'formula'\\n\\nclass A52dec < Formula\\n ... \n", + "15 module Fluent\\n class Input\\n include Conf... \n", + "16 {-# LANGUAGE ScopedTypeVariables, FlexibleInst... \n", + "17 reverseDependencies :: ModuleGraph -> M.Map Mo... \n", + "18 {- git-annex extra config files\\n -\\n - Copyri... \n", + "19 (define subst-f\\n (lambda (new old l)\\n (c... \n", + "20 (define add1\\n (lambda (n) (+ n 1))) \n", + "21 (define-lib-primitive (length lst)\\n (if (nul... \n", + "22 /**\\n * Interface to represent a persistence s... \n", + "23 /*\\n * Copyright 2002-2008 the original author... \n", + "24 package com.github.pathikrit\\n\\nimport scala.a... \n", + "25 /* sbt -- Simple Build Tool\\n * Copyright 2010... \n", + "26 proc isaac::mix {a b c d e f g h} {\\n set a... \n", + "27 proc twitter::follow {nick uhost hand chan arg... \n", + "28 class View\\n{\\n /**\\n * Data available ... \n", + "29 public function formatLocalized($format)\\n... \n", + "30 class Application extends App {\\n\\t/**\\n\\t * @... \n", + "31 type name = string\\n\\nlet compare_label label1... \n", + "32 let search_compiler_libs () =\\n prerr_endline... \n" + ] + } + ], + "source": [ + "cv = CountVectorizer(vocabulary=['}', ')', 'var', 'fn', 'function', 'end', 'defn',\n", + " '===', 'lambda']) #, '(define', 'elif'])\n", + "ft = cv.fit_transform(X)\n", + "print(cv.get_feature_names()[-30:]) \n", + "spam_pipe = Pipeline([('bag_of_words', cv),\n", + " ('bayes', MultinomialNB())])\n", + "classifier = assess_classifier(spam_pipe, *args)\n", + "assess_test_data(spam_pipe)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Vocabulary|Results\n", + "----------|-------\n", + "} | Train score: 0.122, Test score: 0.137\n", + "}, ) | Train score: 0.122, Test score: 0.137\n", + "}, ), var | Train score: 0.161, Test score: 0.179\n", + "}, ), var, fn | Train score: 0.195, Test score: 0.248\n", + "}, ), var, fn, function| Train score: 0.287, Test score: 0.308\n", + "}, ), var, fn, function, end | Train score: 0.278, Test score: 0.325\n", + "}, ), var, fn, function, end, defn | Train score: 0.302, Test score: 0.359\n", + "}, ), var, fn, function, end, defn, === | Train score: 0.300, Test score: 0.368\n", + "}, ), var, fn, function, end, defn, ===, lambda | Train score: 0.334, Test score: 0.427\n", + "}, ), var, fn, function, end, defn, ===, lambda | Proportion of test data correctly labeled: 0.485\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The above didn't work nearly as well as the automatically-generated vocabulary with thousands of words. Adding new words quickly yielded diminishing returns. Since we want to get above 80% accuracy, it looks like we will need to add our own features." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "##Define new features" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "def longest_run_of_caps_feature(text):\n", + " \"\"\"Find the longest run of capitol letters and return their length.\"\"\"\n", + " runs = sorted(re.findall(r\"[A-Z]+\", text), key=len)\n", + " if runs:\n", + " return len(runs[-1])\n", + " else:\n", + " return 0" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "def percent_character_feature(char):\n", + " \"\"\"Return percentage of text that is a particular char compared to total text length.\"\"\"\n", + " def feature_fn(text):\n", + " chars = text.count(char)\n", + " return chars / len(text)\n", + " return feature_fn\n", + "\n", + "# def percent_characters_feature(char_list):\n", + "# \"\"\"\n", + "# Return percentage of text for each char/substring in char_list,\n", + "# compared to total text length.\n", + "# \"\"\"\n", + "# def feature_fn(text):\n", + "# hits = []\n", + "# for char in char_list:\n", + "# hits.append(text.count(char) / len(text))\n", + "# return hits\n", + "# return feature_fn\n", + "\n", + "def count_word_feature(word):\n", + " \"\"\"Return percentage of text that is a particular char compared to total text length.\"\"\"\n", + " def feature_fn(text):\n", + " num_words = text.count(word)\n", + " return num_words\n", + " return feature_fn\n", + "\n", + "def longest_line_feature(text):\n", + " lines = re.findall(r'.*?\\n', text)\n", + " if not lines:\n", + " return len(text)\n", + " lens = [len(line) for line in lines]\n", + " return max(lens) # Includes newline character\n", + "\n", + "def longest_run_of_parens(text):\n", + " matches = re.findall(r'\\)*', text)\n", + " if not matches:\n", + " return 0\n", + " lens = [len(match) for match in matches]\n", + " return max(lens)\n", + "\n", + "def nested_dots(text):\n", + " matches = re.findall(r'\\.([^\\s]*\\.)*', text)\n", + " if not matches:\n", + " return 0\n", + " lens = [match.count('.') for match in matches]\n", + " return max(lens)\n", + "\n", + "def max_paren_depth(text):\n", + " max_depth = 0\n", + " depth = 0\n", + " for char in text:\n", + " if char == '(':\n", + " depth += 1\n", + " max_depth = depth if depth > max_depth else max_depth\n", + " if char == ')':\n", + " depth -= 1\n", + " return max_depth\n", + "\n", + "def max_curly_brace_depth(text):\n", + " max_depth = 0\n", + " depth = 0\n", + " for char in text:\n", + " if char == '{':\n", + " depth += 1\n", + " max_depth = depth if depth > max_depth else max_depth\n", + " if char == '}':\n", + " depth -= 1\n", + " return max_depth\n", + "\n", + "def percent_words_match_regex(regex):\n", + " \"\"\"Return percentage of text that is a matches regex compared to total number words\"\"\"\n", + " def feature_fn(text):\n", + " num_matches = 0\n", + " #words = re.findall(r'[^[\\s]]+\\b', text)\n", + " words = text.split()\n", + " for word in words:\n", + " if re.search(regex, word):\n", + " num_matches +=1\n", + " if len(words) == 0:\n", + " return 0\n", + " return num_matches / len(words)\n", + " return feature_fn\n", + "\n", + "def count_endings_feature(regex): \n", + " def feature_fn(text):\n", + " num_matches = 0\n", + " lines = re.findall(r'.*?\\n', text)\n", + " if not lines:\n", + " return len(text)\n", + "\n", + " words = text.split()\n", + " for word in words:\n", + " if re.search(regex + r'\\w*$', word):\n", + " num_matches +=1\n", + " if len(words) == 0:\n", + " return 0\n", + " return num_matches #/ len(words)\n", + " return feature_fn\n", + "\n", + "def count_beginnings_feature(regex): \n", + " def feature_fn(text):\n", + " num_matches = 0\n", + " lines = re.findall(r'.*?\\n', text)\n", + " if not lines:\n", + " return len(text)\n", + "\n", + " words = text.split()\n", + " for word in words:\n", + " if re.search(r'\\w*' + regex, word):\n", + " num_matches +=1\n", + " if len(words) == 0:\n", + " return 0\n", + " return num_matches #/ len(words)\n", + " return feature_fn\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "###Look at the new features on a simple python program" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "##Feature Ideas\n", + "- +Longest line\n", + "- bag of words with chars, ngrams, let\n", + "- +run of )\n", + "- % _, }, :\\n, \"\"\"\n", + "- key words: def, defn, var `__x__`, module, end, ->, ::, ;, /*, //, case, final, extends, public, protected, $word, @param, self, this\n", + "- + depth nested dots (or % nested dots)\n", + "- +() nest depth\n", + "- ignore/strip comments?\n", + "-Hyphenated or camel or underscored\n", + "-Indentation...\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 108, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[2, 1, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 1]])" + ] + }, + "execution_count": 108, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "txt = '''\n", + "# Test python program\n", + "class MyClass:\n", + " \"\"\"MyClass is a class to do something\"\"\"\n", + " def __init__(self, name='name'):\n", + " self.name = name\n", + " def longest_run_of_caps_feature(text):\n", + " runs = sorted(re.findall(r\"[A-Z]+\", text), key=len)\n", + " if len(runs) == 0:\n", + " return 0\n", + " longest = runs[-1]\n", + " return len(longest)\n", + " print('{}'.format(self.name))\n", + " $thing \n", + " @thing\n", + " :thing\n", + " end\n", + " end\n", + "end\n", + "\n", + "'''\n", + "featurizer = FunctionFeaturizer(\n", + "# longest_run_of_caps_feature,\n", + "# percent_character_feature('.'),\n", + "# longest_line_feature,\n", + "# longest_run_of_parens,\n", + "# nested_dots,\n", + " max_paren_depth,\n", + "# percent_words_match_regex(r'\\$\\w'),\n", + "# percent_words_match_regex(r'\\@\\w'),\n", + "# percent_words_match_regex(r':\\w'),\n", + "# percent_character_feature(r';'),\n", + " count_word_feature('}'),\n", + " count_word_feature(')'),\n", + " count_word_feature('var'),\n", + " count_word_feature('fn'),\n", + " count_word_feature('function'),\n", + "# count_word_feature('end'),\n", + " count_word_feature('defn'),\n", + " count_word_feature('==='),\n", + "# count_word_feature('lambda'),\n", + " count_word_feature(';'),\n", + " count_word_feature('public'),\n", + "# count_word_feature('val'),\n", + "# count_word_feature('=>'),\n", + " count_word_feature('set'),\n", + " count_word_feature('extends'),\n", + " count_word_feature('module'),\n", + " count_endings_feature(r'end'),\n", + " count_beginnings_feature(r'let'),\n", + " count_word_feature('->'), #.758\n", + " count_beginnings_feature(r'\\(define'), #.818 got scheme\n", + " count_beginnings_feature(r'\\{-'), #.848 less haskell\n", + " count_word_feature('object'), #\n", + " max_curly_brace_depth, #.879 got javascript\n", + "# # count_beginnings_feature(r'from'), #added ^ to start of regex, numbers dropped, so removed\n", + "# percent_character_feature(']'),\n", + "# # count_word_feature('.'),\n", + "# count_word_feature('proc'),\n", + "# count_beginnings_feature('public'),\n", + "# # count_endings_feature(r';'), #fixed java but broke haskell/scala/php/ocaml\n", + " \n", + " \n", + " \n", + " \n", + " )\n", + "featurizer.transform([txt])" + ] + }, + { + "cell_type": "code", + "execution_count": 109, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "3" + ] + }, + "execution_count": 109, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "count_endings_feature('end')(txt)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "###Test the pipeline on the actual test data" + ] + }, + { + "cell_type": "code", + "execution_count": 110, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# pipe = Pipeline([('fnfeaturizer', featurizer),\n", + "# ('bayes', MultinomialNB())])\n", + "pipe = make_pipeline(featurizer, MultinomialNB())\n", + "#pd.DataFrame(args[0]).index.values/pd.DataFrame(args[2]).index.values #, args[2])" + ] + }, + { + "cell_type": "code", + "execution_count": 111, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0" + ] + }, + "execution_count": 111, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "count_beginnings_feature('public')(test_data.loc[2].text)\n", + "# count_word_feature('proc')(test_data.loc[27].text)" + ] + }, + { + "cell_type": "code", + "execution_count": 112, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Train score: 0.786, Test score: 0.803\n", + "Proportion of test data correctly labeled: 0.879\n", + " language guess \\\n", + "item \n", + "0 perl perl \n", + "1 clojure clojure \n", + "2 clojure clojure \n", + "3 clojure python \n", + "4 clojure clojure \n", + "5 python ruby \n", + "6 python python \n", + "7 python python \n", + "8 python python \n", + "9 javascript javascript \n", + "10 javascript javascript \n", + "11 javascript javascript \n", + "12 javascript javascript \n", + "13 ruby ruby \n", + "14 ruby ruby \n", + "15 ruby ruby \n", + "16 haskell haskell \n", + "17 haskell haskell \n", + "18 haskell haskell \n", + "19 scheme scheme \n", + "20 scheme scheme \n", + "21 scheme scheme \n", + "22 java tcl \n", + "23 java java \n", + "24 scala scala \n", + "25 scala scala \n", + "26 tcl tcl \n", + "27 tcl tcl \n", + "28 php php \n", + "29 php php \n", + "30 php php \n", + "31 ocaml haskell \n", + "32 ocaml ocaml \n", + "\n", + " text \n", + "item \n", + "0 use warnings;\\nuse strict;\\n\\nmy $initial = jo... \n", + "1 (defn cf-settings\\n \"Setup settings for campf... \n", + "2 (ns my-cli.core)\\n\\n(defn -main [& args]\\n (p... \n", + "3 (extend-type String\\n Person\\n (first-name [... \n", + "4 (require '[overtone.live :as overtone])\\n\\n(de... \n", + "5 from pkgutil import iter_modules\\nfrom subproc... \n", + "6 import re\\nimport subprocess\\n\\ndef cmd_keymap... \n", + "7 class NoSuchService(Exception):\\n def __ini... \n", + "8 from collections import namedtuple\\nimport fun... \n", + "9 function errorHandler(context) {\\n return fun... \n", + "10 var _ = require('lodash'),\\n fs = require('... \n", + "11 /* Riot v2.0.8, @license MIT, (c) 2015 Muut In... \n", + "12 var r = riot.route = function(arg) {\\n //... \n", + "13 module ActiveJob\\n module Core\\n extend Ac... \n", + "14 require 'formula'\\n\\nclass A52dec < Formula\\n ... \n", + "15 module Fluent\\n class Input\\n include Conf... \n", + "16 {-# LANGUAGE ScopedTypeVariables, FlexibleInst... \n", + "17 reverseDependencies :: ModuleGraph -> M.Map Mo... \n", + "18 {- git-annex extra config files\\n -\\n - Copyri... \n", + "19 (define subst-f\\n (lambda (new old l)\\n (c... \n", + "20 (define add1\\n (lambda (n) (+ n 1))) \n", + "21 (define-lib-primitive (length lst)\\n (if (nul... \n", + "22 /**\\n * Interface to represent a persistence s... \n", + "23 /*\\n * Copyright 2002-2008 the original author... \n", + "24 package com.github.pathikrit\\n\\nimport scala.a... \n", + "25 /* sbt -- Simple Build Tool\\n * Copyright 2010... \n", + "26 proc isaac::mix {a b c d e f g h} {\\n set a... \n", + "27 proc twitter::follow {nick uhost hand chan arg... \n", + "28 class View\\n{\\n /**\\n * Data available ... \n", + "29 public function formatLocalized($format)\\n... \n", + "30 class Application extends App {\\n\\t/**\\n\\t * @... \n", + "31 type name = string\\n\\nlet compare_label label1... \n", + "32 let search_compiler_libs () =\\n prerr_endline... \n" + ] + } + ], + "source": [ + "pipe.fit(args[0], args[2]) # X_train, y_train\n", + "pipe.score(args[1], args[3])\n", + "classifier = assess_classifier(pipe, *args)\n", + "assess_test_data(pipe)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# print(confusion_matrix(classifier.predict(args[1]), args[3]))" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "collapsed": false, + "scrolled": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Confusion matrix, without normalization\n", + "[[ 2 0 0 1 0 3 0 0 1 2 0 0 0 0 0]\n", + " [ 0 12 0 0 0 0 0 0 0 0 0 0 0 0 0]\n", + " [ 0 1 5 0 0 0 0 0 0 0 2 0 0 0 0]\n", + " [ 0 0 0 5 0 2 0 0 0 0 0 0 0 0 0]\n", + " [ 0 0 0 0 6 0 0 0 0 0 1 0 0 0 0]\n", + " [ 0 0 0 1 0 5 0 0 1 0 0 0 0 0 0]\n", + " [ 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0]\n", + " [ 0 0 0 0 0 0 0 6 0 0 0 0 0 0 0]\n", + " [ 1 0 0 0 0 0 0 0 4 0 0 0 0 0 0]\n", + " [ 1 0 0 2 0 0 0 0 1 10 0 0 0 0 0]\n", + " [ 0 0 0 0 0 0 0 0 0 0 6 0 1 0 0]\n", + " [ 0 0 0 0 0 0 0 0 0 0 0 14 1 0 0]\n", + " [ 0 0 0 0 1 0 0 0 0 0 0 0 12 0 0]\n", + " [ 0 0 0 0 0 0 0 0 0 0 0 0 0 6 0]\n", + " [ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]]\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAewAAAG4CAYAAACHNdSBAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzs3Xe8XFW9///XOxAklIQmCFLiRcBLDyDSUrDwQ1CwgCgo\ngrd4wcK1cC3wxRDgimIFQVEEgqICilIEKV5yAggCgQQQQUGRIr2EEkpI3r8/9j5hGE6bM2fOnH3m\n/eQxj7PL2p+9Zk44n1lrr722bBMREREj25h2VyAiIiL6l4QdERFRAUnYERERFZCEHRERUQFJ2BER\nERWQhB0REVEBSdgRbSZpnKQLJD0p6awm4uwn6ZKhrFu7SJos6fZ21yNiJFHuw44YGEn7Ap8FNgKe\nBuYCx9i+usm4HwE+CWxve3HTFR3hJC0G3mj7b+2uS0SVpIUdMQCSPgt8GzgaWB1YBzgR2GMIwq8H\n/KUTknUN9bpDWno4KxJRFUnYEf2QNAE4EjjY9m9sP2d7ke3f2v5CWeY1kr4j6f7y9W1Jy5T7pkm6\nT9JnJT0k6Z+SDij3HQn8P2AfSU9L+pik6ZJ+UnP+iZIWSxpTrh8g6S5JT0n6W9ny795+Zc1xO0i6\nvuxqv07S9jX7ZkmaIemqMs4lklbt5f131/9QSQ+X9X+PpN0k/UXSY5K+WFN+W0nXSHqiLHuCpLHl\nvtllsXnl+927Jv7/SHoA+HG57d7ymPXLc0wq19eS9IikKU39YiMqJgk7on/bA8sCv+6jzGHAtsAW\n5Wtb4PCa/WsA44G1gH8DTpQ0wfZXgP8FfmF7RdunAr1ep5K0PPBdYFfb48u6ze2h3CrAb4HvAKsA\n3wJ+K2nlmmIfAg6g6DFYBvh8H+9vDeA1wJrAEcApwH7AJGAycISk9cqyLwGHAKuW9XsbcDCA7e4k\nu3n5fs+pib8ysC7w8doT274L+ALwU0njgNOA02zPJqKDJGFH9G9V4NF+uqz3BWbYftT2oxQt8o/U\n7F9Y7l9k+2LgGYpr4VB0D9d2EffaXVxaDGwmaZzth2zf1kOZ3YE7bJ9pe7HtXwC383IXvimS3p22\nnwfOBrbs45wLKa7XLwLOovgS8B3bz5bnv637eNs32r6uPO8/gB8CUwfwnr5ie2FZn1ewfQpwJ3Ad\nRXI/rJ94EaNOEnZE/x4DVuvuku7FWsA/atbvKbctiVGX8BcAKzRaEdvPAvsA/wX8U9KFkjbqoeha\nZR1q/aOuTg/WLD/XT30e88sjVJ8rfz5Ud/zyAJI2LOv1gKT5wDEUX3r68ojtF/spcwqwCXCC7YX9\nlI0YdZKwI/p3DfAC8N4+yvwTmFizvm65bTCeAZarWX9d7U7bl9repdx+O/CjHmLcTzGYrdZ65fZW\n+z5Fi/uNtidQtIb7+1vT5+0qklag6N4/BTiyrms/oiMkYUf0w/Z8iuu2J0raU9JyksZKeqekr5XF\nfg4cLmk1SauV5X/SW8x+zAWmSFqnHPD2pe4dklYv67A8RTf1s8CiHmJcDGwo6UOSlpa0D/Am4MKa\nMv11vQ/WChS3vS2Q9CbgoLr9DwHrNxjzu8B1tv+T4tr8D5quZUTFJGFHDIDtb1Hcg3048DBFd/PB\nvDwQ7WjgBuDm8nVDuW1JiL7C1+63fTnFdeKbgeuBC2r2jwE+Q9FSfoxiwNdB9XFsPwa8C/gc8CjF\ngLJ32X68lzqZ/uvY13qtz1Nc03+K4vr1L+rKTwdmlqPI9+rj3AaQtCewCy+/z88CW0n6UB91iBh1\nMnFKREREBaSFHRERUQFJ2BERERWQhB0REVEBSdgREREVkEn2W0RSRvNFRAyQ7VbdZrjEUP1dHo66\n9iQJu4X+8uCzAyp3/HHH8OlDBzbT4jqrLtd/odLRM6Zz+BHTB1y+VXHnLxjYpFTHfXUGh37piAHH\nnbDc2AGVa7S+t9331IDLnvydr/Lx//5S/wWBjdceP+C4o/1399RzA4vbyP8bMPD/P1r1+bYy9miO\nO27s8OW/Zbf8RFPHPz/3xCGqSeOSsCMionP0OcPwyJaEHRERnUNt6c0eEknYI8BbdpjckrhTpk6r\nVNwddurvgU6D06r6Amy93U4tiZvfXaFq/2+0MnbiDpEKt7Az01mLSPJAr2E3opFr2CPFQK+DNmqg\n17Ab1cg17EY0cg17pGjV726g17AbVcX/P6K4hj1cg86W3eYzTcV4/oZvt23QWXW/avRD0nRJn+un\nzJGS3jZcdYqIiBis0dwl3m/Xge2vNBJQ0tK2Xxp8lSIioq2GuEtc0qnA7sDDtjcrtx0F7EGRhx4D\nDrB9bw/H3k3xkJxFwELb2/Z1rlHTwpa0v6R5kuZKOqNu35aSri33nytppXL76ZLeXy7fLWmVcnkb\nSVeUy9Ml/UTSVRRPGFpN0i8lXVe+dhjmtxoREYMlNfd6tdOAXeu2fd32Fra3BH4D9NY4NDDN9qT+\nkjWMkoQtaRPgMGDn8gM6pNzV3co+AzjU9hbALbz84dU+1q+vFvmbgLfZ3g84Hvh2+eHuBZwyZG8k\nIiJaS2Oae9WxfSXwRN22p2tWV6B4xG2vNRpo1UdLl/hbgbO7n/Vr+wmV34QkjQcmlB8qwEzgnAZi\nGzjf9gvl+tuBf9XL37RWlLSc7QVNvoeIiBglJB0DfARYAGzXSzEDl0taBJxs+0d9xRwtCdsM/FtK\nb+Ve4uUeh2Xr9tUmYwFvsf1ifyc6/rhjliy/ZYfJvGXHKQOsYkTE6DW7axazu2a15+TDdB+27cOA\nwyR9Efg2cGAPxXa0/YCk1wKXSbq9pnH5KqMlYf8f8GtJ37L9ePe1aIrb1p6S9ISknWxfRfGNZ1YP\nMe4GtgF+B7y/Znv9b/dS4NPAN6C4Pm57bk+VamRKxYiITjFl6rRX3Kd9zFFHDt/JGxx0tmj+P1j8\n1D3NnPFnwEU97bD9QPnzEUm/BrYFRnfCtn1b2f3QVXYt3ESRgLuvS38U+IGk5YC76PmbzpHAjyU9\nRZHQa69t117f/jRwoqR5FJ9fF3DwkL6hiIhojQZb2EutNJGlVpq4ZH3RfVcN4BTawPZfy9U9KXJS\nfZnlgKVsPy1peWAXijzUq1GRsAFsn0ExuKynffOA7XvYtSrFkHvK1vdGPRx7ZN36Y8AHm61vRERU\nn6SfA1OB1STdSzGoeTdJG1HcrnUXcFBZdi3gR7Z3B14HnFuOh1oaONP2pX2da9Qk7EaV986NA/r/\nuhQREaPDEN+HbftDPWw+tZey/6S4ZxvbfwO2bORcHZuwbX+s3XWIiIhhlod/REREVECFH/6RhB0R\nEZ2jwgm7ujWPiIjoIGlhR0RE5xiTa9jRg1Y8m3e1fU8f8pgAj/7sgJbEhdY9t7pVqvjc6lZp1e+u\nav8mYhSpcJd4EnZERHSOCo8Sr+5XjYiIiA6SFnZERHSOdIlHRERUQIW7xJOwIyKic6SFHRERUQEV\nbmFX96vGEJJ0d/cztCVd3e76RERE1EsLu7Dkede2d2xnRSIiooUq3CXe0ppL2l/SPElzJc2UtJ6k\n/yu3XS5pnbLc6ZJOknSNpLskTSvL3ybptJp4z0j6uqRbJV0maTtJXeUx7y7LLCvpNEk3S7pR0rRy\n+wGSzpV0saS/SPpaL3V+pvy5pqTZkm6SdIukHWvq8K2yDpdLWq2Vn2FERAwhqblXG7UsYUvaBDgM\n2Nn2lsB/A98DTrO9BXAmcHzNISvZ3h74DHA+8HVgE2AzSZuXZZYDfm97U+BpYAbwVuC95TLAJ4BF\ntjcHPgTMlPSact8WwAeAzYB9JL2+h6p3t7b3BX5ne1J53LyaOlxf1qGL4mHlERFRBRrT3KuNWnn2\ntwJn234cwPYTwHbAz8r9PwV2KpcNXFAu3wo8aPtPtg38CZhY7nvR9iXl8i3AFbYXlcd0l9mxjI3t\nO4B/ABuW5/i97adtvwDcBqzXR/2vAw6U9BVgM9vPlNsXA2f18B4iIiJappXXsA301H/QW5/Ci+XP\nxcALNdsX83I9F9ZtfxHA9mJJte+lt3PUxl1EH+/f9pWSJgPvAk6X9C3bP6krJmquf9c7esb0JctT\npk5jytRpvRWNiOgYs7tmMbtrVntOXuFR4q1M2P8H/LpMdI+Xo7D/AHyQomW6HzC7Bee9sox9haQN\ngXWB24GtGwkiaV3gftunSFoWmAT8hKJXYm+KVva+5fl6dPgR0wdT/4iIUa2+AXPMUUcO38krPOis\nZQnb9m2SjgG6JC0CbgQ+BZwm6VDgYeDA2kN6WaaP7T0dcxLwfUk3Ay8BH7W9UJL7iNtTzJ2Bz0ta\nSHG9fP9y+7PAtpIOBx4C9hlAzIiIGAkqnLBVXCaOgZL0tO0VB1DOzy0c+s+2io/XjIjoy7ixwnbL\n+6oledl3ndhUjOcv/MSw1LUnuQ+7cfmGExFRVbmG3Tlsj293HSIiYpAq3CWehB0REZ0jLeyIiIgK\nqHALu7o1j4iI6CBpYUdEROdIl3gMl7tO2a8lcX85776WxAXYa4u1WxY7ota9jy1oSdx1Vl2uJXFj\n+GmIE7akU4HdgYdtb1ZuO45ilswXgbuAA23P7+HYXYHvAEsBp9ju8aFU3dIlHhERHUNSU68enAbs\nWrftUmCT8kFXfwG+1EM9lqJ4INauwMbAhyT9a191T8KOiIgYJNtXAk/UbbvM9uJy9Y9AT92M2wJ3\n2r7b9kLgF8CefZ0rCTsiIjqHmnw17mPART1sfz1wb836feW2XuUadkREdIxGr2Evevh2Fj1y+2DP\ndRjFY6F/1sPuhmfNTMKOiIiO0WjCXnqNf2XpNV6+tLzwtvMGep4DgN2At/VS5H5gnZr1dSha2b1K\nl3hERMQQKkd/Hwrsafv5XordAGwgaaKkZSie/Hh+X3GTsCMiomMM9ShxST8H/gBsJOleSR8DTgBW\nAC6TdJOkk8qya0n6LYDtl4BPApcAtwFn2f5zX3XvqC7xsotia9ufanddIiJi+A31fdi2P9TD5lN7\nKftPinu2u9cvBi4e6Lk6KmHT5KMxJY2pGaofERFVU92JzkZPl7ik/SXNkzRX0hmS9pZ0S7k+q7sY\nsJakiyX9RdLXao4/SdL1km6VNL1m+92SjpU0B9hb0ixJ3ym7OW6R9OZhfaMRETFoLZg4ZdiMiha2\npE2Aw4DtbT8uaWWgC9jF9gOSap9hvWX5ehG4Q9Lxtu8HDrP9RDn7zOWSNrV9K0Wr/FHbW5fn+i9g\nnO1JkiZTdH1sNmxvNiIiOtKoSNjAW4GzbT8OUCbeq4GZks4Gzi3LGfi97acBJN0GrEcxvH4fSf9B\n8ZmsSTFV3K3lcWfVne/n5XmulDRe0njbT9VX6ugZ05csT5k6jSlTpw3BW42IqLbZXbOY3TWrLedu\ndyu5GaMlYZu6KxO2D5K0LcUF/jmSti7LvFBTbBGwtKQ3AJ8DtrE9X9JpwLI15Z4dwPlf5fAjpjf0\nJiIiOkF9A+aYo44ctnNXOWGPlmvY/0dxfXkVAEmrSFrf9nW2vwI8QnFTek+JVcCKFEn5KUlrAO/s\n53z7lOfZCXiyu8UeEREjW65ht5nt2yQdA3RJWgTcBIyXtAFFQr7c9jxJW/LqpG3bN0u6CbidYm7X\nq/o55fOSbqT4/D42pG8mIiKiB6MiYQPYPgM4o58yM4GZNevvrlk+sJdj3tDD5p/Y/swgqxoREe1S\n3R7x0ZOwIyIi+tPubu1mJGE3yPbO7a5DREQMTpUT9mgZdBYRETGqpYUdEREdo8ot7CTsiIjoHNXN\n10nYERHROdLCjmEzYbmxLYm71xZrtyQuwC/n3deSuK2q872PLWhJ3HVWXa4lceNl+YyjP1VO2Bl0\nFhERUQFpYUdERMeocgs7CTsiIjpGEnZEREQVVDdf5xp2REREFaSFHRERHaPKXeKVb2FLmijpliZj\nHCDphAbKz5K0Vbl8d/dzuCMiYmTL87Crr/4Z2Y2Ub/TYiIhok3Yn3WZUvoVdWkrSDyXdKukSSctK\n+g9J10maK+mXksYBSNpb0i3l9lnl8Ut+g5J2l/QHSatK2qVcniPpbEnLt+PNRUREjJaEvQHwPdub\nAk8C7wd+ZXtb21sCfwb+rSz7/4Bdyu17lNsMIOm9wBeAd1Ik8cOAt9neGpgDfHaY3k9ERLSCmny1\n0WjpEv+77ZvL5TnARGAzSUcDE4AVgN+V+68GZko6Gzi33CbgrcA2wDtsPyPpXcDGwB/KLpRlgD80\nUqmjZ0xfsjxl6jSmTJ3W6PuKiBh1ZnfNYnbXrLacu8pd4qMlYb9Qs7wIGAecBuxp+xZJHwWmAdg+\nSNK2wO7AHElbU7Sw7wLeAGxEkfQBLrO972ArdfgR0wd7aETEqFXfgDnmqCOH7dxDnbAlnUqRTx62\nvVm5bRXgLGA94G7gA7af7OHYu4GnKPLWQtvb9nWu0dIl3pMVgAcljQU+3L1R0vq2r7P9FeARYJ1y\n1z+AvYAzJG0M/BHYUdL65XHLS9pgWN9BREQMqRaMEj8N2LVu2xcpGnwbAr8v13tiYJrtSf0laxg9\nCbunkdpHUCTdqyiuYXeX+bqkm8tbwa62Pa87hu07gP2AcygS/gHAzyXNo+gO36h1byEiIqrG9pXA\nE3Wb9wBmlsszgff0EWLATf7Kd4nbvhvYvGb9mzW7f9BD+ff3sG0m5Ydrey6wSbnr78CrvvXY3rlm\n+Q2DrHpERAyzYbqGvYbth8rlh4A1eiln4HJJi4CTbf+or6CVT9gREREDNsxjzmxbUm/zdexo+wFJ\nrwUuk3R72WLvURJ2RER0jEZb2M/dezPP39fwZJoPSXqd7QclrQk83FMh2w+UPx+R9GuKHt0k7IiI\niEaNW2dzxq2z5KorT177s4Ecdj7wUeBr5c/f1BeQtBywlO2ny0m5dgH6HC4/WgadRURE9GuoR4lL\n+jnloGRJ90o6EDgWeIekv1DM8XFsWXYtSb8tD30dcKWkuRQDpC+0fWlfdU8LOyIiOsZQjzmz/aFe\ndr29h7L/pLhnG9t/A7Zs5FxJ2BER0TEy01lEH/baYu2WxP3ulXe1JO4hk9dvSdxovfkLFrYk7oTl\nxrYkbkQjkrAjIqJjVLiBnYQdERGdI13iERERFVDhfJ2EHRERnWPMmOpm7NyHHRERUQFpYUdERMeo\ncpd4R7ewJV3d7jpERMTwacHzsIdNR7ewbe/Y7jpERMTwSQu7oiQ9I2l5SZdLmiPpZkl7lPuOlXRw\nTdnpkj7XW/mIiIhW6ugWNsXDw58D3ls+MWU14BqKJ638AvgOcFJZdm+Kp6k830v5iIgY4drdrd2M\nTk/YUPQyfFXSZGAxsJak1W3PlbR6+SzT1YEnbN8vaWwv5Xt83mlERIwcSdjVth+wGrCV7UWS/g4s\nW+47B9iL4jFovxhA+Vc4esb0JctTpk5jytRprah/RESlzO6axeyuWW05d4XzdRI2MAF4uEy+OwPr\n1ew7CzgFWBWYUm4b30f5Vzj8iOmtqXFERIXVN2COOerI9lWmQjo9YRs4E7hA0s3ADcCfl+y0b5O0\nAnCf7YfKzb2Wj4iIkS1d4hUkaVXgcduPATv0Vs725nXrfZaPiIiRq8L5ujMTtqS1gCuA49pdl4iI\nGD5pYVeM7X8CG7W7HhERMbwqnK87e+KUiIiIqujIFnZERHSmdIlHRERUQIXzdRJ2RER0jrSwI/ow\nf8HClsQ9ZPL6LYn7y3n3tSTuXlus3ZK4rdSq392E5cZWKm7ESJCEHRERHaPCDewk7IiI6BzpEo+I\niKiACufr3IcdERHRDEmHSLpF0q2SDumlzPGS/ippnqRJgzlPEnZERHQMSU29eoi3KfDvwJuBLYB3\nSVq/rsxuwBttbwD8J/D9wdQ9CTsiIjqG1NyrB28C/mj7eduLgC7gfXVl9gBmAtj+I7CSpDUarfuI\nStiSrm53HaB4OIikc/opM0HSQcNVp4iIaN5Qt7CBW4HJklaRtBywO1B/D+frgXtr1u/roUy/RtSg\nM9s7trsOkpYuHw6ydz9FVwYOZpBdGxERMfyGepS47dslfQ24FHgWuAlY3NOp6w9t9FwjKmFLegZY\nAziPIiGOBQ63fb6kY4F7bJ9Ulp0OPA38oJfyywNnU3yzWQo4yvbZkt4MfAdYHngeeDuwF0UXxvLA\nGEkHAL+1vWm5/F5gfBnrp7ZnAMcC60u6CbjU9hda+uFERMSwe/LOG5l/1019lrF9KnAqgKT/Be6p\nK3I/sE7N+trltoaMqIRN8Y3jOeC9tp+WtBpwDXA+8AuKRHtSWXZvYBeKpNtT+V2B+23vDiBpvKRl\nyjgfsD1H0grl+QAmAZvZflLSRF757efNwCZl2esl/Rb4ArCJ7UGN9ouIiOHXaAN75Q22YuUNtlqy\nfs+lp/UQU6vbfljSuhQNvLfUFTkf+CTwC0nbAU/afqixmoy8hA3FdfWvSppM0a2wVvlhzJW0uqQ1\ngdWBJ2zfL2lsT+WBm4FvlC3zC21fJWkz4AHbcwBsPwMgyRSt5Cd7qdOltp8oy54L7AT8plUfQERE\ntEaLJk75paRVgYXAwbafkvRxANsn275I0m6S7qToNj9wMCcZiQl7P2A1YCvbiyT9HVi23HcORff1\n6yhayr2Wt/3X8l633YGjJf0e+HUf510wwPqJnq9PvMrRM6YvWZ4ydRpTpk4b4CkiIkav2V2zmN01\nqy3nbkW+tj2lh20n161/stnzjMSEPQF4uEy+OwPr1ew7CzgFWBXo/oDG91S+bIk/YftMSfOBj1Fc\nd15T0ja2b5C0IkWi7u9X+A5JK1N0v+9J8e3oGWDFvg46/IjpA33PEREdo74Bc8xRR7avMhUy0hK2\ngTOBCyTdDNwA/HnJTvu28rrzfTX9/72V3ww4TtJiim6K/7K9UNI+wAmSxlEk63eU560fsVe7fh3w\nK4qBAj+xfSMUt6FJugW4KIPOIiJGvswlPgTK/v/HbT8G7NBbOdub1633Vv4eimH29cffAGxft3lm\n+eouczdQe577bL+3h1j79VbPiIgYeSqcr0dGwpa0FnAFcFy769KDnlrfERFRQWMqnLFHRMIuJyrZ\nqN316IntV7S+IyIi2mFEJOyIiIjhUOEGdhJ2RER0jgw6i4iIqIAx1c3XSdgREdE50sKO6MOE5ca2\nuwoN2WuLhp96NyDzFyxsSdxWfr5V+91FjGZJ2BER0TEq3MBOwo6IiM6hfmeiHrmSsCMiomNUedDZ\nmHZXICIiIvqXFnZERHSMjBKPiIiogArn63SJN0vSLElbt7seERHRvzFSU6+21r2tZx8d8jSviIho\nuVGbsCV9VtIt5euQctv+kuZJmitpZrnt3ZKulXSjpMskrV5uny5ppqTZku6W9D5J35B0s6SLJeVy\nQkRExUjNvdppVCadsov6AGBbii8lf5R0PXAYsL3txyWtXBa/0vZ25XH/DvwP8Ply3xuAnYFNgGuB\n99r+vKRzgd2B84bpLUVExBAYlYPOJJ3Qx3G2/ekW1Geo7ASca/s5gDLBbgOcbftxANtPlGXXkXQ2\n8DpgGeBv5XYDF9teJOlWYIztS8p9twATh+WdRETEkKlwvu6zhT2Hl6/Ndr9Fl8sj/Zptdz27qe5n\nrROAb9i+UNJUYHrNvhcBbC+WVDsR9GJgqf4qcfSMl0NNmTqNKVOnDaDqERGj2+yuWczumtWWc7d7\n4Fgzek3Ytk+vXZe0vO1nW16joXElcLqkYym6xN8DfBw4TdK3urvEy1b2eOCf5XEH1MTo77fa72/9\n8COmN1rviIhRr74Bc8xRR7avMhXS76AzSTtIug24vVzfUtJJLa9ZE2zfBJwOXEdx7flHtv8AHAN0\nSZoLfLMsPh04R9INwCO83HtQP/q7vldhpPcyREREHTX5aifZfecdSdcBewHn2Z5UbvuT7U2GoX6V\nJcnPLUxOj5dV8fGaEcNh3Fhhu+X5UJI/OPOmpmL84qOThqWuPRnQKHHb99SNrHupNdWJiIhonSo/\n/GMgCfseSTsCSFoG+DTw55bWKiIiIl5hIBOnHAR8Ang9cD8wqVyPiIioFElNvXqIt5Gkm2pe8yV9\nuq7MtHJ7d5nDB1P3flvYth8B9h1M8IiIiJFkqO/qsn0HRUMWSWMoGra/7qFol+09mjnXQEaJry/p\nAkmPSnpE0nmS/qWZk0ZERLTDULew67wduMv2vT2dutm6D6RL/GfA2cCawFrAOcDPmz1xRETEKPNB\nipxZz8AO5bMsLpK08WCCD2TQ2TjbP6lZ/6mkQwdzsohO1qrbr7575V0tiQtwyOT1WxY7oh0aHSX+\n4G3X8+BtN/RbrhyU/W7gCz3svhFYx/YCSe8EfgNs2FhN+p5LfBWKJvzFkr7Ey63qfYCLGz1RRERE\nuzX68I81N9mWNTfZdsn6vHN/0FvRdwJzynFfr2D76ZrliyWdJGmV7mdbDFRfLewbeeVsXv9Z/uye\nS/yLjZwoIiKi3Vp4G/aH6OVysaQ1gIdtW9K2FJOWNZSsoe+5xCc2GiwiImIka8XDPyQtTzHg7D9q\ntn0cwPbJFLOFHiTpJWABxbXuhg1opjNJmwIbA8t2b7N9xmBOGBERMZqUD8ZarW7byTXLJwInNnue\nfhO2pOnAVGAT4LcU/fRXAUnYERFRKRV+uuaAbuvai6Kp/4DtA4EtgJVaWquIiIgWaPF92C01kC7x\n52wvkvSSpAnAw8A6La5XRETEkKtyC3sgCft6SSsDPwJuAJ4F/tDSWlVIecngadvf7K9sRETEYA1k\nLvGDy8UfSLoEGG97XmurVQ2SluaVt75FRMQI1opR4sOlr4lTtqaXZCRpK9s3tqxWw0jSROB3FL0H\nWwF/AvanGBX/TWAF4FHgANsPSpoF3ATsRKZojYiolArn6z5b2N+k79bjzkNcl3baEDjQ9jWSfgx8\nEngPsKftRyXtAxwD/BvFZzLW9psBJH2lXZWOiIjGtHvgWDP6mjhl2jDWo93utX1NufxT4DBgU+Cy\n8pe7FPDPmvJnDSTo0TOmL1meMnUaU6ZOG4KqRkRU2+yuWczumtXualTOgCZO6QC1PQkCngL+ZHuH\nXso/O5Cghx8xvclqRUSMPvUNmGOOOnLYzj2Qe5lHqirXfSitK2m7cnlf4Frgtd3bJI0d7OPQIiJi\n5KjyfdinQdhPAAAgAElEQVRJ2IU7gE9Iug2YABxPMWHM1yTNpRhktn0fx2ekeEREBYxRc692GsjU\npGOA/YA32J4haV3gdbava3nths9Ltj9St20exZSsr2B757r14evLiYiIprQ76TZjIC3skyhal/uW\n68+U20aTtJAjImJEG8igs7fYniTpJgDbj0sa2+J6DRvbdwObt7seERHReu2+Dt2MgSTsFyUt1b0i\n6bXA4tZVKSIiojWq3CU+kIR9AvBrYHVJ/0sxGOvwltYqIiKiBSrcwB7QXOI/lTQHeFu5aU/bf25t\ntSIiIqLWQEaJr0sxUcgF5SZLWtf2PS2tWURExBAblQ//qHERL4+iXhZ4A8V9y5u0qlKjxfwFC4c8\n5oTlRs14vxgih0xev2Wxr/zrIy2JO3mD17YkbkR/qjz5yEC6xDetXZe0FfCJltUoIiKiRSrcwG78\ny0b5WM23tKAuERER0YuBXMP+XM3qGIpnRt/fshpFRES0yGi/hr1CzfJLwIXAr1pTnYiIiNapcL7u\nO2GXE6aMt/25vspFRERUwaicOEXS0rZfkrSjJNnOfNsREVFpo7VL/DqK69VzgfMknQMsKPfZ9rmt\nrlw7Sbob2Mr24+2uS0RERF8Ju/tryLLAY8Bb6/aP6oRNce95db+KRUTEq7SigS1pJeAUivlJDHzM\n9rV1ZY4H3knR8D3A9k2NnqevhP1aSZ8Fbmk0aJVImgj8DriBokfhT8D+5e5PSXo3MBbY2/YdkqYD\n65ev1YCv2z5lmKsdERGD0KJr2N8FLrK9l6SlgeVrd0raDXij7Q0kvQX4PrBdoyfp6z7spYAVKUaJ\n9/QaTTYETrS9MfAUL08M84jtrSk+3M/XlN8U2JniOeFHSFpzOCsbERGDoyb/e1U8aQIw2fapALZf\nsj2/rtgewMxy/x+BlSSt0Wjd+2phP2j7yEYDVtS9tq8pl38KHFIud3f73wi8r1w2cJ7tF4AXJF0B\nbAucVx/0uK/OWLK8w05T2XHy1BZUPSKiWmZ3zWJ216x2V2OovAF4RNJpwBbAHOAQ2wtqyrweuLdm\n/T5gbeChRk40kPuwO0HtCHjx8vO+Xyh/LqLvz6rH54Mf+qUjmq9ZRMQoM2XqNKZMnbZk/Zijhq9t\n2IIu8aUpLqd+0vb1kr4DfBGoTwD1Z274zqu+ktDbGw1WYetK2q4cJLAvcBUwqZeyAvaU9FWKSwPT\ngC8MSy0jIqIpjSbsu+Zey9/m/rGvIvcB99m+vlz/JUXCrnU/sE7N+toMYsbQXhO27ccaDVZhdwCf\nkHQqxaCz7wOfqtlvXv42ZOBm4AqKQWczbD84jHWNiIhBUoPDxN84aXveOGn7JeuXn3HCK/bbflDS\nvZI2tP0Xisbun+rCnA98EviFpO2AJ2031B0O6RLv9pLtj9Rte0P3gu05vPK2tpttf3RYahYRESPd\np4AzJS0D3AV8TNLHAWyfbPsiSbtJuhN4FjhwMCdJwi40ei0hs75FRFRQK27rsj0PeHPd5pPrynyy\n2fN0fMK2fTeweQPlO2XkfETEqFPhmUmTsCMionOM1rnEIyIiRpUqP62rr5nOIiIiYoRICzsiIjpG\nhXvEk7Cj9e59bEH/hQZhnVWXa0nceNnmr1+pJXG3/sqlLYk758hdWhI3Ro8xFX4IYxJ2RER0jCq3\nsHMNOyIiogLSwo6IiI5R5VHiSdgREdExch92REREBVQ4X+cadkRERBV0ZMKW9OWa5YmSbmlnfSIi\nYniMkZp6tbXubT17+3yp3RWIiIjhJzX3aqdRkbDLVvLtkn4q6TZJ50h6p6Rf15R5h6RzJX0VGCfp\nJkk/oXhU5lKSfijpVkmXSFq2PGZLSddKmlceu1K5fZakYyX9UdIdknZqyxuPiIiGjGny1U7tPv9Q\n2hA40fbGwFPAJsCbJK1W7j8Q+LHtLwHP2Z5k+yOAgA2A79neFHgSeH95zBnAoba3AG4BvlJuN7CU\n7bcA/12zPSIiRjBJTb3aaTSNEr/X9jXl8k+BT1Mk3A9LOh3YDvhwL8f+3fbN5fIcYKKk8cAE21eW\n22cC59Qcc27580ZgYk9Bj/vqjCXLO+w0lR0nT23k/UREjEqzu2Yxu2tWu6tROaMpYbtmWeX66cAF\nwPPA2bYX93LsCzXLi4BleyhT/9Wq+5hF9PI5HvqlI/qucUREB5oydRpTpk5bsn7MUUcO27krfFfX\nqOoSX1fSduXyvsCVth8A/gkcDpxWU3ahpL6+rMj2U8ATNdenPwLMGuI6R0TEMMoo8ZHhDuATkm4D\nJgDfL7f/DLjH9h01ZX8I3Fwz6My8Uvf6R4HjJM0DNgdm0LP64yMiYgRSk692Gk1d4i+Vg8jq7QT8\nqHaD7S8CX6zZtHnNvm/WLM8Dtq8PaHvnmuVHgX8ZfLUjIiL6N5oS9qtauZLmAE8Dnxn+6kRExEjT\n7nupmzEqErbtu6lpJdds33r4axMRESNVu2/NasaoSNgREREDUeWBW1Wue0RERMdICzsiIjpGusQj\nIiIqoLrpupggpN11GJUk+bmF+WwjhtN3r7yrJXEPmbx+S+LOX7CwJXEBJiw3tmWxh9q4scJ2y3Op\nJJ8z959Nxdh7y7V6rKukpYAbgPtsv7tu3zTgPOBv5aZf2T660XOnhR0RER2jhQO3DgFuA1bsZX+X\n7T2aOUEGnUVERDRB0trAbsAp9N7r3nQPQhJ2RER0jBY9XvPbwKFAbw+YMrCDpHmSLpK08WDqni7x\niIjoGI02c2+9/g/cesMfeo8nvQt42PZN5bXqntwIrGN7gaR3Ar8BNmywKknYERHRORq9q2uzbXdg\ns213WLJ+1g++WV9kB2APSbtRPJp5vKQzbO/fXcD20zXLF0s6SdIqth9vpC7pEo+IiBgk21+2vY7t\nNwAfBP6vNlkDSFpDZX+6pG0p7tBqKFlDWti9knQ6cIHtX7W7LhERMTTGtP5ObANI+jiA7ZOBvYCD\nJL0ELKBI7A3r6IRdfuOR7Z4GCuQm6oiIUaaVE53Z7gK6yuWTa7afCJzYbPyO6xKXNFHSHZJmArcA\nL9Xs20vSaTXF3y7p+rL87mWZLklb1BxzlaTNhu0NRETEoKnJ/9qp4xJ26Y3AibY3BZ6t2V7bqhaw\nnu03A7sDP5D0GuDHwAEAkjYEXmP7lmGpdUREdKxOTdj/sH1dP2UMnA1g+06KKeU2As4B3iVpaeBj\nwGm9RoiIiBFFau7VTp16Dbu3VvW4fo6z7eckXQa8B9gb2Kq3wkfPmL5kecrUaUyZOq3hikZEjDaz\nu2Yxu2tWW849DIPOWqZTE3athyS9CfgL8F5gfrldwN7lte5/KV93lPtOAS6kmBt2Pr04/Ijprapz\nRERl1TdgjjnqyGE7d7tbyc3o1IRd26r+IkXyfYTiSSvL15S5B7gOGA983PaLALZvlDSfdIdHRFRK\nEnaF2L4b2Lxm/VfAq+61tn1gbzEkrQWMsX1pK+oYERFRr1MHnQ2apP2Ba4Evt7suERHRmCrf1tVx\nLexm2T4DOKPd9YiIiMaNSZd4RETEyNfuVnIz0iUeERFRAWlhR0REx8go8YiIiAqocpd4EnZEjBqH\nTF6/JXFXfvMnWxL37q5vtyRu9K7Kg85yDTsiIqIC0sKOiIiOkS7xiIiICsigs4iIiAqocL5Owo6I\niM4xpsJN7Aw6i4iIqIAk7AGQNFHSLe2uR0RENEdNvtopXeIREdE52p11m9ARCVvS8sDZwOuBpYCj\ngL8D3wWWA14A3gasRvEkruXLQz9p+5q6WBP7KxMRESNTbusa+XYF7re9O4Ck8cBNwAdsz5G0AvAc\n8BDwDtsvSNoA+Bnw5rpYAykTERExpDolYd8MfEPSscCFwHzgAdtzAGw/AyBpGeB7krYAFgEb9hBr\nIGUiImIEqvAg8c5I2Lb/KmkSsDtwNHBFL0U/Q5HIPyJpKeD5QZYB4OgZ05csT5k6jSlTpw3uDURE\njCKzu2Yxu2tWW85d4XzdGQlb0prAE7bPlDQfOAh4naRtbN8gaUVgATAeuK88bH+K6931BlIGgMOP\nmD5E7yAiYvSob8Acc9SRw3fyCmfsjkjYwGbAcZIWAy9SJOwxwAmSxlEk67cDJwG/krQ/8DvgmZoY\nLn/2VSYiIjqIpGWBLuA1FJdMz7P9pR7KHQ+8kyLfHGD7pkbP1REJ2/alwKU97Nq+bv1OYIua9S+W\nx98NbF4u91gmIiJGvqEeJW77eUk7214gaWngKkk72b5qyTml3YA32t5A0luA7wPbNXqujkjYERER\n0JpBZ7YXlIvLUFwmfbyuyB7AzLLsHyWtJGkN2w81cp7MdBYRER2jFTOdSRojaS7Fbb9X2L6trsjr\ngXtr1u8D1m607mlhR0RE52iwhX3DNVcy59qr+ixjezGwpaQJwCWSptme1c+ZTYOSsCMiInqxzfaT\n2Wb7yUvWf/jdY3sta3u+pN8C2wCzanbdD6xTs752ua0h6RKPiIiOoSb/e1U8aTVJK5XL44B3UMyk\nWet8ituAkbQd8GSj168hLeyIiOggLRh0tiYwU9IYikbwT2z/XtLHAWyfbPsiSbtJuhN4FjhwMCeS\n3XA3egyAJD+3MJ9tK81fsLAlcScsN7YlcSPqrbbv6S2L/ejPDmhZ7KE2bqyw3fIpTSR57j+eairG\nluuNH5a69iRd4hERERWQLvGIiOgcmZo0IiJi5MvzsCMiIiqgyo/XzDXsiIiICkgLOyIiOkaFG9ij\nt4Ut6QBJJ7S7HhERMYK0YjLxYTKaW9i5CToiIl6hyoPOKtfClrS8pN9KmivpFkkfkPRmSX8ot10r\naYWy+FqSLpb0F0lfq4mxS1l+jqSzJS1fbr9b0v9KuknSDZK2knSppDu7Z60pyx0q6TpJ8yRNH95P\nICIiBktq7tVOVWxh7wrcb3t3AEnjKeZt/YDtOWWyfo6i82LL8vUicIek44EXgMOAt9l+TtIXgM8C\nR1G0yv9he5KkbwGnA9sD44BbgZMl7ULxIPJty6nozpM02faVw/UBRERE56liwr4Z+IakY4ELgfnA\nA7bnANh+Boop6IDf2366XL8NmAisDGwM/EHF16VlgD/UxD+//HkLsLztZ4FnJb1QPjptF2AXSd2T\nuy8PvBFIwo6IGOGq2yFewYRt+6+SJgG7A0cDV/RR/IWa5UW8/H4vs71vP8cspmiZU7PeffxXbf+w\nv7oePWP6kuUpU6cxZeq0/g6JiBj1ZnfNYnbXrPacvMIZu3IJW9KawBO2z5Q0HzgIeJ2kbWzfIGlF\nYAE9/1oMXAucKGl923eV16/Xsv3X+lP1cvwlwFGSzrT9rKTXAy/afqS+8OFHTB/0+4yIGK3qGzDH\nHHXksJ27yoPOKpewgc2A4yR1t4APohg8d0L5LNIFFM8jNT2MFLf9qKQDgJ9Lek25+TCgPmHXH+/y\n+Msk/StwTdml/jTwYeBVCTsiImKo5PGaLZLHa7ZeHq8ZVZfHaxaG8/Gatz/wbFMx3rTm8m17vGYV\nW9gRERGDUt0O8STsiIjoJBXO2JWbOCUiIqITpYUdEREdI6PEIyIiKqDd04s2Iwk7IiI6RoXzdRJ2\nRER0kApn7NyH3SK5Dzsi2um7V97VkriHTF5/yGMO533Ydz68oKkYb1x9udyHHRER0WoZdBYREVEB\nGXQWERFRARXO15k4JSIiYrAknSrpIUm39LJ/mqT5km4qX4cP9lxpYUdEROcY+ib2acAJwBl9lOmy\nvUezJ0rCjoiIjjHUg85sXylpYr+nHQLpEu+FpAmSDuqnzOmS3j9cdYqIiOZIzb0GwcAOkuZJukjS\nxoOtexJ271YGDu6njMtXRERET24E1rG9BUXX+W8GGyhd4r07Flhf0k3AZcBjwH7AYuAi218uy1V5\n0GFEREdp9A/2NVd1cc3Vswd9PttP1yxfLOkkSavYfrzRWEnYvfsCsIntSZLeCRwObGv7eUkrtblu\nERExCI12a+8weSo7TJ66ZP07Xz+mwfNpDeBh25a0LcUMow0na0jC7kvtr/XtwKm2nwew/eRAAhw9\nY/qS5SlTpzFl6rQhrF5ERDXN7prF7K5ZbTr70HaKSvo5MBVYTdK9wFeAsQC2Twb2Ag6S9BKwAPjg\noM+VucR7Vo76u8D2ZpK+Adxu+5S6MqcBF9r+VQ/HZy7xiGibzCX+apJ83xMvNBVj7ZVf07a5xDPo\nrHdPAyuWy5cDB0oaByBp5bbVKiIiOlISdi9sPwZcXc5e81bgfOCGchDa52qLtqN+ERHRODX5aqdc\nw+6D7f3qNn2tbv+Bw1idiIhoUh7+ERERUQFVfrxmusQjIiIqIC3siIjoHNVtYCdhR0RE56hwvk7C\njoiIzlHlQWe5hh0REVEBmemsRTLTWUTEwAznTGcPP7WwqRirjx/btpnO0iUeERGdo8Jd4knYERHR\nMSqcr5OwIyKic2TQWURERLRUWtgREdExqjw1aRJ2RER0jHSJR0REREslYUdERFRAusQjIqJjVLlL\nPAk7IiI6RgadRY+OnjF9yfKUqdOYMnVa2+oSETFSzO6axeyuWW05d5Vb2JlLvEUyl3hExMAM51zi\n859b1FSMCeOWylziERERrVbhBnYSdkREdJAKZ+wk7IiI6BgZdBYREVEBVR50lolTIiIiKiAJewRo\n1e0NidvauK2MnbjVjNvK2Ik7NNTkq8eY0q6Sbpf0V0lf6KXM8eX+eZImDabuSdgjQNX+h0nc1sdO\n3GrGbWXsxB0iQ5yxJS0FfA/YFdgY+JCkf60rsxvwRtsbAP8JfH8wVU/CjoiIjqEm/+vBtsCdtu+2\nvRD4BbBnXZk9gJkAtv8IrCRpjUbrnoQdERExeK8H7q1Zv6/c1l+ZtRs9UWY6axFJ+WAjIgZouGY6\nG4o4tXWV9H5gV9v/Ua5/GHiL7U/VlLkAONb21eX65cD/2L6xkfPmtq4WadfUdRER0bMW/V2+H1in\nZn0dihZ0X2XWLrc1JF3iERERg3cDsIGkiZKWAfYBzq8rcz6wP4Ck7YAnbT/U6InSwo6IiBgk2y9J\n+iRwCbAU8GPbf5b08XL/ybYvkrSbpDuBZ4EDB3OuXMOOiIiogLSw20jScrYXtLsew03SKn3tt/34\nEJ1ndWDZmrj3DEXcKpD0uT522/a3huAca1Lc0rIYuN72g83GbBVJm9m+pd31iGhGEnYbSNoBOAVY\nEVhH0pbAf9o+uMm4qwFfAXYCDFwJzLD9WJNV7o4/nuKP/dNNhrqRon69eUMzwSXtAXwTWAt4GFgP\n+DOwSTNxy9hfs/2F/rY1EG9r+vgsGh1FWmPFvuI2S9K/A0cAV5Sbvidphu0fNxl3J4p/wxN5+e+T\nbf9LM3GB70t6DXAacKbt+U3GW0LS9sDxFJNmLEPRLfqM7fFDEHsyxYQbp0l6LbCC7b8PMtb7Kf5N\n9DTwyrbPbaKqMQzSJd4Gkq4D9gLOsz2p3PYn200llPJWgS7gpxT/U+4LTLP99ibjvhk4Fej+A/Qk\n8G+2b2gmbqtIuhl4K3CZ7UmSdgY+YvtjQxD7pu7fWc22W2xvNsh4s+g7Ye88mLhl7KWBTw9Fa7qH\n2H8Btu/+MihpVeAa2xs2GfcO4L8pvtQt6t5u+9Fm4paxNwQ+BuwNXAecZvvSIYg7B/ggcDawDcXg\noo1sf7HJuNOBrctYG0p6PXC27R0HGe90+v63NqjrqjF80sJuE9v36JWPjXlpCMK+zvZRNetHS9pn\nCOKeChxs+0pY0go6Fdh8MMEkbdXX/iZald0W2n5U0hhJS9m+QtJ3mwko6SDgYGB9SbVdqysCVw82\nru1pzdSrn9gvSfoQMOQJG3gUeKZm/ZlyW7OetH3xEMR5Fdt/kXQ4xaje44EtJY0Bvmz7V03G/mv5\nb20RcJqkuUBTCRt4LzAJmFOe435JKzZRxwOarE+0WRJ2e9wjaUeA8jaAT1N02Tbr0vIP9Fnl+t5A\n0y0I4KXuZA1g+ypJzXzB+BZ9d9cOulVZeqL8w3YlcKakh3llchmMnwEXA8cCtd3fzwzFJYeyNbw7\nRVfwUhQ9JENxrfkqSd+j+DfxbPfGIfhSdBdwraTzyvU9gZvLa+fN1PsKSccB5wIvdG9str6StgAO\nAN4FXAa8y/aNktYCrgWaSdjPlt3t8yR9HXiQ3p8T0YgXbC/u/mIvafkhiImk/wW+bvvJcn1l4HO2\nDx+K+NE66RJvg/Ja8/HA2yn+x76UouuyqT/8kp4BlqMYBATFffbdf6Q92Gtqkr4DjAN+Xm7aB3ge\n+EkZuNk//kOq/MP2PMX734+iK//MIbyWvzXFOIHFwNVD8f4lXQw8B9zCy78/bB/ZZNxZ9PDlqJmu\n9jLu9O5Q3ZtqzzPYerewvl3Aj4FzbD9Xt29/22c0EXsi8BDF9evPUPx7O8n2nYOucBH3UOCNwC7A\nVym6839m+/gm4861vWXdtldd6omRJwl7mJUtqZm292t3XQaqVddZy8T6WWBd2/8haQOK63UXDqqi\nL8f9HPAL2w3PJDSA2EdQ9FycS5Gk9gR+WXcpYjBxb7Y9qEsMMTBlK3gjin/Ld9h+sc1V6pekXSgS\nNsAlti8bgpg3A9vafr5cHwfc0OwYmmi9dIkPs/K64nqSXmP7hf6PGLiym32e7WckfYTi+td3bf+j\niZhjgO/bPqvfwo07jeL63A7l+j+BXwJNJWyK68qXSnqC4sk55wxmVqFefBjYvOaP3VeBeUBTCZui\nvv+f7UuarWAtSa8DjgFeb3tXSRtTDBZrdjT3RsDnefVo7rc2GXclilHiU8pNsyjudGhqVLek3YEf\nAH8rN/2LpI/bvqiJmH3dJuah+AJm+1JJf6T4jC1plSG47fFM4PeSTqX40nkgMOgehhg+aWG3gaSf\nAG+imK6u+z7spq9Xln9AtgA2A06n6ALc2/bUJuPOsb11MzH6ilvbHSdpnu0thij+FsAHKEbk32f7\nbUMQ8wrgfbafKNdXBn41BInqfRSj+8cAC8vNg76MURP3dxRfjA6zvbmkscBNtjdtMu7NFM/0rR3N\nbdtzmox7LsVlgZkUyeQjFF+Q3tdk3DuA3bu7qSWtD1xke6MmYk7sa7/tuwcbu4z/ceBIimv53ZdJ\nmr7FrbzOfgXFJTkDlwNvtf0/zcSN1ksLuz3uKl9jgBWou/7XhJfKQSrvAU60fYqkpm9lAi6T9Hle\nPXCp2W/6L5TdccCSP6JD2evwMMUAoMeA1w5RzKeAP0nqHsz3DuA6SSdQ/DH99CDjfgvYDrjV9uL+\nCjdgNdtnSfoigO2FTQ4Y7LbQ9veHIE699euS83RJ84Yg7lN115T/RvG7HLRmE/IAHApsOhS3tNV5\nR5mcl4zGLwf6JWGPcEnYbWB7eotCPy3pyxTdtpMlLQWMHYK4H6T4QvGJuu1NTXACTAd+B6wt6WfA\njhQjeZsi6WCKlvXqwDnAv9u+rdm4pV+Xr26zapab+dJ1D/CnIU7WAM+UgxyBJQ8eGHT3sopZ6gRc\nIOkTvHo0d7Nf4p6TNLnuFsJBzwaoYrIQgBskXURxrzQU4xCGZB4BtW7ilL9RDEQcEq26NTGGT7rE\n26DsVq03FNf/1qSYLOU621dKWhfY2fbMZuK2iorJNkTRsgT4I03M5FQT91iKQWdzm6zisJE0k+IL\n0MVA92CoobhMsjVFMtkU+BNFT8NetgfVapV0N30PQGx2lrotKa6nTig3PQF8tIn6ns7/3969B9tZ\nlXcc//4ILdcAQlWgchsqCDQgdwIjIqhcahhRAhawFgFxBEXLtNN2RCPE0Q5CW1TsABJAoZVQGaHc\nK0hCLgYItxAE6ii2hSqX0IYAw+3XP9Z6c/Y5OclJzrve8+bd5/nMnJm999n72SsnyVn7XetZzzN8\nJnt1bK52sRA1VzhlL9LW1jwG/5sY1SqOpE2BtzFwNLE6era01AmK0KyYsFsgaZ+eu+sDHyctZ/9l\nS0MakaQ/Jl1B9NbmrpWoImkucGSVUJQTomaWylZVwVrikmbanippEStOWLUTjFZ2TKrAsa4NgDOB\nw0lLwPOBi6qkuZpxz2DgeNs9pOTEIleESmVwsV1r2Xos9ORiLM/0H+7o1Cji3gfMYuCoX/VvYq38\nAB6aFxP2WkLSvbb3HeVr59g+KJ/DHm4yqZu4NA14P6kW903AkcA9to+tGfdPSJ/0jyIdt7kKOLHu\nlbFWUku8zgcBSVvbflrSdgxTFGMM9jNHRdJM0kTdW652U9tT19K41QfY7RlcQObcmnFnDHnIpMAl\nytXOIuUyXAY8Q8qb+FTd5Mk4Gx2Gij3sFmhwt6p1SMtoo55UnWsL29645tBW5lhS9vlC2ydLeifp\naEgttm9SqvR2Byn57mO2H68bF5gOTGZILfE6AfNkvS5whWsW8RhOXg34K9IqRpWIV3ubBNjN9q49\n9++UVGI/v6m4PyHVqr+fVPymlJsY+DC7Aans59OFYv8Z6f/xmaTCKe8ifeio65acKX4DZfMEQkfF\nhN2O3m5VbwC/Bk4ZbTA1367yFdtvSnoj74P9DthmtMFyRnWvTUhZ82dKqpNpXSleSxyWn6F/S9Jm\nzmUdC7qalIX/EeB0UvLdswXiLpQ02fY8WJ50VuvoVcNx/9D24QXiDGL7ut77OcmxVKLVc8BreTtg\nWk72XK9A3BNIvyeG7oXXTfYMHRUTdgtsb184ZKPtKoF783njS0mZtcuAuTXi3c9Amz/33C+liVri\nlWXAI/lYV+8Z+rofMrbIx/C+YPtu4O68h1nXPsAcSf9J+hlvCzyes4Tr7L03FXeupN1tPzzK16+u\nnSh31O+nwGEM/BvbELiNgYJAo9LA74nQcbGHPYYkHWb7pxroSzvU86S94TeH+d5aQdIOwMTSv1Dz\nKsE2o80GHhJrI9JxmKqW+KYUqiUu6c/zzaHJYbUSgSTNt31A/iBwEWm5dqbtHWvG3X5V3x/t3nvp\nuD3HjCYA7wZ+xcAycK2kPqVqfW/QU1efVPv7r12zS1eOP1xt7hJJZ42U7g3dFVfYY+tg0qfxKQw/\nYbll0agAAAwjSURBVG8BfJmUwDIqSj1zt6Pn79b2rNHGW0lcSTq4blylGuVH55j3A8/mBLovjTLe\nnLyf/1tW/PlOl/QCcL7t79YY9nXkLYL8nhPoyUSv4etKZTnPBr5N2iYY1c+hV1PJcA3EnVKFZsWk\nvrpXFQYWu2Z1t1V4WdLezlXe8imQEtnyTZXuDR0VV9hrGUmXjzZzVdLfkTppLWagXCS2p6z0Re3G\nfdD2eyWdSrq6/qqkR2xPqhN3Fe+3BTDX9cpRzgc+aPulfH8iqSlDreXPkEj6ge1PjvTYKOJeSar+\nt6DWAIePvS+pZv0z+aEtgU/YrrWloYZL94buiSvsFmgVDQ5qHjM5hrRkVrSpSINxJ+RiL8eRVhag\n7F72ILafzxnjdaxfTdY55lJJG9aMWU0oX/TgGuUXlDh21DGDroJzZn6JOvYHACdJeorBLWdLdEjb\ngdRoZzvgY8B+9LRIraHp0r2hY9ZpewDj1OWkM6xTSZPVUtLyV12/JJVGLK2puOeSknN+aXtB/oX0\nZAPvs5ztukd5lilVDwOKLn/uUU3WAPn2XgXidoKkv5W0FJgkaWn1RTqRcEOBtzgc2BE4lLT8PoW0\nHVPCObnAy6bAB0hNUUrUWZ/G4NK9d5LqFoRxKpbEWzDcslaJpS6lTkd7kPbJexN2RlvKsDp+tTXw\n3lJxu2yY5c+tgOMLLH8+RCoj+0K+vzlwd1PbA2srSd+w/Tdtj2NN9GztfBN4xPbVpYqeKNWBr0r3\nznf5RiChQ2JJvB1FGxz0uCF/DVc7eTR6j1/d2HO7yKe8vNx3CisWC1lrl4Ft3ytpF1JlNgOP2359\nhJetjguAeZKuJf2Mp5L6WI83O+cKeLe4fCOUpvy3pEtIyaLfzNXaSq1erkeqp74usKukIkmkoZvi\nCrsFKtzgYEjs9UhnTAF+UWgy6Y1f8vjVdcBjpKNXXyN1GXtsbb5yl3QccKvt/5N0DmnvcrrthQVi\nH0g63wxwn+06Z907SdKHgJNJV5XXAjMKVb9rTD5+dQTwsO0nc17GJNu3j/DSkeI2kuwZuism7Bap\ncIMDSYcAVwJP5Ye2JX0QuLtm3J8x5PgVMOrjVz1xq6XEh23vLun3SOfQ968Tt0lVFnteFZkOfAv4\niu39asY9CziN1K5SwEeBS21fVHfMXZQTMz9BSkb8Dalozw9LfwBdm0l6gjTxR6JZAGJJfExJOrvn\nrnser4pv1GqlCFwIfLi6IpG0E2m/tW7y0mb5ivJU4Krq+FXNmDDQMvB/JU0iNU0oVX2qKdWVzkdI\nE+q/STqvQNxTgf1tLwOqFqHzSUVUxpW8b3tS/loIXEPqCvYp4JD2RjbmqmTPmLADEBP2WJtIg8eW\ngHV7lw9tP5GPxdTV1PGrS/MS+5dJTR82Br5SIG6TmtyvfGslt8cNSdcD7yFtGU2xXSX3/YtS3+m+\n15Ps+TLwoKRxn+wZkpiwx5DtaQCSrgLO6jlzuzkp6aiu+yVdxkDLwxNJtb/rqo5fzSl8/OoHDLRS\nrEp7vrNA3CYdR9qvPN/2i/mDTIk+5jOAn+dM/2pJ/PICcbvmElIb14OAfSXNJvXZftV2ifPYXdBb\nW/9GyiWRho6LPewWNFh7eH3gDNIvO0jNLy6uuwe2sg8Ytk+uGfc2Blop9ibVlPjw0iildpjLS5La\n/k2BmHuTln4NzLb9QN2YXaOG+mx3kaSNGaYMbrVtEsafmLBb0LUztw1+wFjUYH3nRkg6mrQasjWp\nqMd2pMz23VodWJ+QtNiD+2wP+9h4EGVww1BR6awd1Znb8yRNB+YB59cNKmmKpAckLempFlUiA13q\n6bmdb08oEHeupBKlIcfSdGAy8ITtHUhtFX/e7pD6ykJJk6s7Ktdnu4tWKINLat0ZxqnYw26B7aty\nAs2hpOXPY2wvLhD6H0h1vxcVLjpRtKiHBrdSPFlSsVaKY+B1289JWkfSBNt3SfrHtgfVR5rqs91F\nTXUBCx0VE3ZLbD8KPFo47H8Bj5auENXAB4wuF35YkpcmZwNXS/od8NIIrwmr74i2B7AWOQu4VtKg\nLmAtjie0LPaw+0hePjwXuIuBM84lzneHLFe1epW0nXQiqW/11bafb3Vgoe9UVfUY3AXsnBJV9UI3\nxR52fzmPdLW3PulM88aks9+hnM8CW9p+3fYVti+KyTo0pKkuYKGjYkm8v2xl+0NtD6LPTQRul7SE\nVEVupu3ftjym0J+aqqoXOiqusPvLzZIOb3sQ/cz2tHyE6wxSa81ZuRJVCKVVVfWOB24qXFUvdFDs\nYfcRSS+Rjn28BlRNEmx7k/ZG1Z9yhbNjgT8FNh5n2cthDDTVBSx0V0zYIawBSZ8jlSd9BzAT+FGh\nI3khhLBKsYfdZ3Ihku3p+bu1/ePWBtR/tgW+aPvBtgcSQhhf4gq7j0iaAUwine9efha7bs3vsKIm\naomHEMKqxITdRyQtBnZz/KU2JmqJhxDaEhmH/eVeYNw1SRhjUUs8hNCK2MPuLzNINb//h+7U5u6a\nqCUeQmhFTNj95fvAScAievawQ1FRSzyE0IrYw+4jkubZnjzyM8No5bOxrzBQS3xTopZ4CGEMxITd\nRyRdDGwG3Mjg5h9xrKsmSXNsH5SL0wz9T2PgBeB8298d+9GFEMaDmLD7iKQr8s1Bf6lxrKt5krYA\n5treue2xhBD6U0zYIRQiaWvbT7c9jhBCf4pjXX1E0jaSrpf0bP76V0nvantc40VM1iGEJsWE3V9m\nADeQinpsTdrLntHqiEIIIRQRS+J9RNJDtvcY6bEQQgjdE1fY/eV5SZ+UNEHSupJOAp5re1AhhBDq\niyvsPiJpO+A7wAH5obnA56MxRQghdF9M2H1E0pWk1o9L8v3NgW/Z/nS7IwshhFBXLIn3lz2qyRrA\n9gvAXi2OJ4QQQiExYfcX5avq6s7mwIQWxxNCCKGQaP7RXy4gdeu6FhAwFfh6u0MKIYRQQuxh9xlJ\nuwGHksqT3ml7cctDCiGEUEBM2CGEEEIHxB52CCGE0AExYYcQQggdEBN2CCGE0AExYYfQIElvSnpA\n0iOSrpW0QY1YV0j6eL59qaRdVvHc90uaPIr3+HXv0cCRHh/ynJfW8L2mSTp7TccYwngVE3YIzXrZ\n9p62JwGvAZ/t/aakNTla6fyF7dNsP7aK534AOHBNB1vFX4PH1/Q5dZ4fwrgWE3YIY2c28Ef56ne2\npJ8AiyStI+l8SQskPSTpM5Cq4Ej6jqRfSLoDeEcVSNLPJO2dbx8h6X5JD0q6I9eUPx34Ur66P0jS\n2yVdl99jgaQD82u3kHS7pEWSLiWd31+l3HP9vvya04Z878L8+L9L+oP82I6SbsmvmSVp5zI/zhDG\nlyicEsIYyFfSRwE354f2BHaz/VSeoF+0vZ+k9YB7JN1OKiu7E7ALsCWwGPh+fr0BS3o7cAnwvhxr\nM9svSvonYKntC/P7XwP8ve05krYFbgV2Bb4KzLI9XdJRwCmr8cf5tO0leXl/gaTrckncjYB7bf+F\npHNy7M/n8Z1u+z8k7Q9cDBw2yh9lCONWTNghNGsDSQ/k27OAy4GDgAW2n8qPfxiYJOnYfH8T4N3A\n+4BrnIolPCPpziGxRerMNquKZfvFId+vfBDYRVr+0ERJG+X3OCa/9mZJSxjZWZI+mm9vk8e6AHgL\n+FF+/IfAj/N7HAjM7Hnv31+N9wghDBETdgjNesX2nr0P5Ilr2ZDnnWn7jiHPO4qRl6hXdx9YwP62\nXxtmLCMug/c8/xDS1fEBtl+VdBew/krez6RttyVDfwYhhDUXe9ghtO824HNVApqknSRtSLoiPz7v\ncW9FSiTrZWA+cLCk7fNrq0zupcDEnufeDnyhuiNpj3xzFnBCfuxI4G0jjHUT0gT8qqT3MNB7HdLv\nk6n59gnAbNtLgV9Vqwd5X373Ed4jhDCMmLBDaNZwV8Ae8vhlpP3phZIeAb4HTLB9PfBk/t6VwNwV\nAtnPAZ8hLT8/CPxz/taNwDFV0hlpst4nJ7U9SkpKA/gaacJfRFoaf4rhVeO9FVhX0mLgG8C8nucs\nA/bLf4ZDgHPz4ycCp+TxLQKOHuHnE0IYRtQSDyGEEDogrrBDCCGEDogJO4QQQuiAmLBDCCGEDogJ\nO4QQQuiAmLBDCCGEDogJO4QQQuiAmLBDCCGEDogJO4QQQuiA/wed+GaSlnp66gAAAABJRU5ErkJg\ngg==\n", + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)\n", + "\n", + "# Run classifier, using a model that is too regularized (C too low) to see\n", + "# the impact on the results\n", + "# classifier = svm.SVC(kernel='linear', C=0.01)\n", + "y_pred = classifier.fit(X_train, y_train).predict(X_test)\n", + "\n", + "import matplotlib.pyplot as plt\n", + "%matplotlib inline\n", + "\n", + "import matplotlib.pylab as pylab\n", + "pylab.rcParams['figure.figsize'] = 8, 6 # that's default image size for this interactive session\n", + "\n", + "my_labels = classifier.classes_\n", + "\n", + "\n", + "def plot_confusion_matrix(cm, title='Confusion matrix', cmap=plt.cm.Blues):\n", + " fig, ax = plt.subplots()\n", + " plt.imshow(cm, interpolation='nearest', cmap=cmap)\n", + " plt.title(title)\n", + " plt.colorbar()\n", + "\n", + "\n", + " tick_marks = np.arange(len(classifier.classes_))\n", + " plt.xticks(tick_marks, my_labels, rotation=90)\n", + " plt.yticks(tick_marks, my_labels)\n", + " plt.tight_layout()\n", + " plt.ylabel('True label')\n", + " plt.xlabel('Predicted label')\n", + "\n", + " from matplotlib.ticker import MultipleLocator # from http://stackoverflow.com/a/19252430 comments\n", + " ax.xaxis.set_major_locator(MultipleLocator(1))\n", + " ax.yaxis.set_major_locator(MultipleLocator(1))\n", + "\n", + "\n", + "# Compute confusion matrix\n", + "cm = confusion_matrix(y_test, y_pred, labels=my_labels)\n", + "np.set_printoptions(precision=2)\n", + "print('Confusion matrix, without normalization')\n", + "print(cm)\n", + "plt.figure()\n", + "plot_confusion_matrix(cm)\n", + "\n", + "# Normalize the confusion matrix by row (i.e by the number of samples\n", + "# in each class)\n", + "cm_normalized = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]\n", + "#print('Normalized confusion matrix')\n", + "#print(cm_normalized)\n", + "#plt.figure()\n", + "#plot_confusion_matrix(cm_normalized, title='Normalized confusion matrix')\n", + "\n", + "#plt.show()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "####Something is incorrectly lining up in the confusion matrix plots\n", + "For instance the java/tcl confusion is not showing up" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": true + }, + "source": [ + "#Conclusions:\n", + "##Got up to 28/32 correct by tweaking features iteratively\n", + "This method is almost guaranteed to yield overfitting, however.\n", + "I think more data is needed." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.4.3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/initial_exploration.ipynb b/initial_exploration.ipynb new file mode 100644 index 0000000..290df7f --- /dev/null +++ b/initial_exploration.ipynb @@ -0,0 +1,1580 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer\n", + "from sklearn.cross_validation import train_test_split\n", + "from sklearn.naive_bayes import MultinomialNB, GaussianNB, BernoulliNB\n", + "from sklearn.pipeline import Pipeline\n", + "import pandas as pd\n", + "from os import walk\n", + "import re\n", + "import glob" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "def extract_extension(string):\n", + " match = re.match('.*\\.(?P.*)$', string)\n", + " if match:\n", + " return match.groupdict()['ext']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 84, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
languagetext
0ats(*\\n** The Computer Language Benchmarks Game\\n...
1ats(*\\n** The Computer Language Benchmarks Game\\n...
2clojure;; The Computer Language Benchmarks Game\\n;; h...
3clojure;; The Computer Language Benchmarks Game\\n;; h...
4clojure;; The Computer Language Benchmarks Game\\n;; h...
5csharp/*\\n The Computer Language Benchmarks Ga...
6csharp/* The Computer Language Benchmarks Game\\n h...
7dart/* The Computer Language Benchmarks game\\n h...
8erlang% The Computer Language Benchmarks Game\\n% htt...
9erlang% The Computer Language Benchmarks Game\\n% htt...
10fpascal(*\\n The Computer Language Benchmarks Game\\n ...
11fsharp(*\\n The Computer Language Benchmarks Ga...
12fsharp// The Computer Language Benchmarks Game\\n// ...
13fsharp// The Computer Language Benchmarks Game\\n// ...
14gcc/* The Computer Language Benchmarks Game\\n * h...
15gcc/* \\n * The Computer Language Benchmarks Game ...
16gcc// The Computer Language Benchmarks Game\\n// h...
17gcc/* The Computer Language Benchmarks Game\\n * h...
18gcc/* The Computer Language Benchmarks Game\\n * h...
19gcc/* The Computer Language Benchmarks Game\\n * h...
20ghc--\\n-- The Computer Language Benchmarks Game\\n...
21ghc--\\n-- The Computer Language Benchmarks Game\\n...
22ghc--\\n-- The Computer Language Benchmarks Game\\n...
23gnat-- The Computer Language Benchmarks Game\\n-- h...
24gnat-- The Computer Language Benchmarks Game\\n-- h...
25gnat-- The Computer Language Benchmarks Game\\n-- ...
26gnat-- The Computer Language Benchmarks Game\\n-- ...
27go/* The Computer Language Benchmarks Game\\n * h...
28go/* The Computer Language Benchmarks Game\\n * h...
29go/* The Computer Language Benchmarks Game\\n * h...
.........
893go/* The Computer Language Benchmarks Game\\n h...
894gpp/*\\n* The Computer Language Benchmarks Game\\n*...
895gpp/*\\n * The Computer Language Benchmarks Game\\n...
896gpp/*\\n* The Computer Language Benchmarks Game\\n*...
897gpp/*\\n* The Computer Language Benchmarks Game\\n*...
898gpp/*\\n* The Computer Language Benchmarks Game\\n*...
899java/**\\n * The Computer Language Benchmarks Game\\...
900java/**\\n * The Computer Language Benchmarks Game\\...
901java/**\\n * The Computer Language Benchmarks Game\\...
902java/**\\n * The Computer Language Benchmarks Game\\...
903java/**\\n * The Computer Language Benchmarks Game\\...
904java/**\\n * The Computer Language Benchmarks Game\\...
905jruby# The Computer Language Benchmarks Game\\n# htt...
906jruby# The Computer Language Benchmarks Game\\n# htt...
907lua-- The Computer Language Benchmarks Game\\n-- h...
908ocaml(* The Computer Language Benchmarks Game\\n * h...
909ocaml(* The Computer Language Benchmarks Game\\n * h...
910ocaml(* The Computer Language Benchmarks Game\\n * h...
911oz% The Computer Language Benchmarks Game ...
912perl# The Computer Language Benchmarks Game\\n# htt...
913perl# The Computer Language Benchmarks Game\\n# htt...
914python3# The Computer Language Benchmarks Game\\n# htt...
915python3# The Computer Language Benchmarks Game\\n# htt...
916racket#lang racket/base\\n\\n;;; The Computer Language...
917sbcl;;; The Computer Language Benchmarks Game\\n;;;...
918sbcl;;; The Computer Language Benchmarks Game\\n;;;...
919scala/* The Computer Language Benchmarks Game\\n h...
920vw\"* The Computer Language Benchmarks Game\\n ...
921yarv# The Computer Language Benchmarks Game\\n# htt...
922yarv# The Computer Language Benchmarks Game\\n# htt...
\n", + "

923 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " language text\n", + "0 ats (*\\n** The Computer Language Benchmarks Game\\n...\n", + "1 ats (*\\n** The Computer Language Benchmarks Game\\n...\n", + "2 clojure ;; The Computer Language Benchmarks Game\\n;; h...\n", + "3 clojure ;; The Computer Language Benchmarks Game\\n;; h...\n", + "4 clojure ;; The Computer Language Benchmarks Game\\n;; h...\n", + "5 csharp /*\\n The Computer Language Benchmarks Ga...\n", + "6 csharp /* The Computer Language Benchmarks Game\\n h...\n", + "7 dart /* The Computer Language Benchmarks game\\n h...\n", + "8 erlang % The Computer Language Benchmarks Game\\n% htt...\n", + "9 erlang % The Computer Language Benchmarks Game\\n% htt...\n", + "10 fpascal (*\\n The Computer Language Benchmarks Game\\n ...\n", + "11 fsharp (*\\n The Computer Language Benchmarks Ga...\n", + "12 fsharp // The Computer Language Benchmarks Game\\n// ...\n", + "13 fsharp // The Computer Language Benchmarks Game\\n// ...\n", + "14 gcc /* The Computer Language Benchmarks Game\\n * h...\n", + "15 gcc /* \\n * The Computer Language Benchmarks Game ...\n", + "16 gcc // The Computer Language Benchmarks Game\\n// h...\n", + "17 gcc /* The Computer Language Benchmarks Game\\n * h...\n", + "18 gcc /* The Computer Language Benchmarks Game\\n * h...\n", + "19 gcc /* The Computer Language Benchmarks Game\\n * h...\n", + "20 ghc --\\n-- The Computer Language Benchmarks Game\\n...\n", + "21 ghc --\\n-- The Computer Language Benchmarks Game\\n...\n", + "22 ghc --\\n-- The Computer Language Benchmarks Game\\n...\n", + "23 gnat -- The Computer Language Benchmarks Game\\n-- h...\n", + "24 gnat -- The Computer Language Benchmarks Game\\n-- h...\n", + "25 gnat -- The Computer Language Benchmarks Game\\n-- ...\n", + "26 gnat -- The Computer Language Benchmarks Game\\n-- ...\n", + "27 go /* The Computer Language Benchmarks Game\\n * h...\n", + "28 go /* The Computer Language Benchmarks Game\\n * h...\n", + "29 go /* The Computer Language Benchmarks Game\\n * h...\n", + ".. ... ...\n", + "893 go /* The Computer Language Benchmarks Game\\n h...\n", + "894 gpp /*\\n* The Computer Language Benchmarks Game\\n*...\n", + "895 gpp /*\\n * The Computer Language Benchmarks Game\\n...\n", + "896 gpp /*\\n* The Computer Language Benchmarks Game\\n*...\n", + "897 gpp /*\\n* The Computer Language Benchmarks Game\\n*...\n", + "898 gpp /*\\n* The Computer Language Benchmarks Game\\n*...\n", + "899 java /**\\n * The Computer Language Benchmarks Game\\...\n", + "900 java /**\\n * The Computer Language Benchmarks Game\\...\n", + "901 java /**\\n * The Computer Language Benchmarks Game\\...\n", + "902 java /**\\n * The Computer Language Benchmarks Game\\...\n", + "903 java /**\\n * The Computer Language Benchmarks Game\\...\n", + "904 java /**\\n * The Computer Language Benchmarks Game\\...\n", + "905 jruby # The Computer Language Benchmarks Game\\n# htt...\n", + "906 jruby # The Computer Language Benchmarks Game\\n# htt...\n", + "907 lua -- The Computer Language Benchmarks Game\\n-- h...\n", + "908 ocaml (* The Computer Language Benchmarks Game\\n * h...\n", + "909 ocaml (* The Computer Language Benchmarks Game\\n * h...\n", + "910 ocaml (* The Computer Language Benchmarks Game\\n * h...\n", + "911 oz % The Computer Language Benchmarks Game ...\n", + "912 perl # The Computer Language Benchmarks Game\\n# htt...\n", + "913 perl # The Computer Language Benchmarks Game\\n# htt...\n", + "914 python3 # The Computer Language Benchmarks Game\\n# htt...\n", + "915 python3 # The Computer Language Benchmarks Game\\n# htt...\n", + "916 racket #lang racket/base\\n\\n;;; The Computer Language...\n", + "917 sbcl ;;; The Computer Language Benchmarks Game\\n;;;...\n", + "918 sbcl ;;; The Computer Language Benchmarks Game\\n;;;...\n", + "919 scala /* The Computer Language Benchmarks Game\\n h...\n", + "920 vw \"* The Computer Language Benchmarks Game\\n ...\n", + "921 yarv # The Computer Language Benchmarks Game\\n# htt...\n", + "922 yarv # The Computer Language Benchmarks Game\\n# htt...\n", + "\n", + "[923 rows x 2 columns]" + ] + }, + "execution_count": 84, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "def load_bench_data():\n", + " text = ''\n", + " df = pd.DataFrame(columns=['language', 'text'])\n", + " files = glob.glob('bench/*/*')\n", + "\n", + " for fn in files:\n", + " try:\n", + " with open(fn) as fh:\n", + " data = {'language': extract_extension(fn),\n", + " 'text': ''.join(fh.readlines())}\n", + " if data['language'] and data['text']:\n", + " df = df.append(data, ignore_index=True)\n", + " except (IsADirectoryError, UnicodeDecodeError):\n", + " pass\n", + " return df\n", + "\n", + "df = load_bench_data()\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 86, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
languagetext
\n", + "
" + ], + "text/plain": [ + "Empty DataFrame\n", + "Columns: [language, text]\n", + "Index: []" + ] + }, + "execution_count": 86, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[df.language == None]" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# walker = os.walk('./bench/binarytrees/')\n", + "# next(walker)" + ] + }, + { + "cell_type": "code", + "execution_count": 95, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "def test_classifier(pipe, *split_args):\n", + " #classifier = classifierType()\n", + " pipe.fit(split_args[0], split_args[2])\n", + "# predicted = classifier.predict(X_test)\n", + " train_score = pipe.score(split_args[0], split_args[2])\n", + " test_score = pipe.score(split_args[1], split_args[3])\n", + " print('Train score: {:.3f}, Test score: {:.3f}'.format(train_score, test_score))\n", + " return pipe" + ] + }, + { + "cell_type": "code", + "execution_count": 93, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "X = df.text\n", + "y = df.language\n", + "args = train_test_split(X, y, test_size=0.2, )#random_state=0) # X_train, X_test, y_train, y_test\n" + ] + }, + { + "cell_type": "code", + "execution_count": 94, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Train score: 0.9132791327913279, Test score: 0.67\n" + ] + } + ], + "source": [ + "spam_pipe = Pipeline([('bag_of_words', CountVectorizer()),\n", + "# ('tfidf', TfidfTransformer()),\n", + " ('bayes', MultinomialNB())])\n", + "# spam_pipe\n", + "classifier = test_classifier(spam_pipe, *args)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['gpp', 'gpp', 'gpp', ..., 'gpp', 'gpp', 'gpp'], \n", + " dtype='0.9), but low test scores (between ~0.05 and ~0.5).\n", + "####With the larger dataset ( > 900 files), the score was consistently between 0.65 and 0.75" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Train score: 0.7859078590785907, Test score: 0.5837837837837838\n" + ] + } + ], + "source": [ + "spam_pipe = Pipeline([('bag_of_words', CountVectorizer()),\n", + " ('tfidf', TfidfTransformer()),\n", + " ('bayes', MultinomialNB())])\n", + "# spam_pipe\n", + "classifier = test_classifier(spam_pipe, *args)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "####Adding tfidf into the pipeline made it perform significantly worse" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "from sklearn.ensemble import RandomForestClassifier" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 3 µs, sys: 0 ns, total: 3 µs\n", + "Wall time: 5.96 µs\n", + "Train score: 0.9728997289972899, Test score: 0.8378378378378378\n" + ] + } + ], + "source": [ + "%time\n", + "#clf = RandomForestClassifier(n_estimators=100, )#random_state=0)\n", + "#visualize_tree(clf, X, y, boundaries=False);\n", + "\n", + "spam_pipe = Pipeline([('bag_of_words', CountVectorizer()),\n", + " ('tfidf', TfidfTransformer()),\n", + " ('RFC', RandomForestClassifier())])\n", + "classifier = test_classifier(spam_pipe, *args)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "####Random Forest with 10 estimators (default) is consistently > 0.80, usually in the range 0.8 to 0.9. With n=10, it takes a few microseconds to run on the ~900 entry dataset." + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 3 µs, sys: 1 µs, total: 4 µs\n", + "Wall time: 8.11 µs\n", + "Train score: 0.975609756097561, Test score: 0.8540540540540541\n" + ] + } + ], + "source": [ + "%time\n", + "#visualize_tree(clf, X, y, boundaries=False);\n", + "\n", + "spam_pipe = Pipeline([('bag_of_words', CountVectorizer()),\n", + " ('tfidf', TfidfTransformer()),\n", + " ('RFC', RandomForestClassifier())])\n", + "spam_pipe.set_params(RFC__n_estimators=1000)\n", + "classifier = test_classifier(spam_pipe, *args)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "####Not significantly better for n_estimators=1000, but it does take about 30 seconds to run" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "C (.gcc, .c)\n", + "C#\n", + "Common Lisp (.sbcl)\n", + "Clojure\n", + "Haskell\n", + "Java\n", + "JavaScript\n", + "OCaml\n", + "Perl\n", + "PHP (.hack, .php)\n", + "Python\n", + "Ruby (.jruby, .yarv)\n", + "Scala\n", + "Scheme (.racket)" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'c': 'C',\n", + " 'clojure': 'Clojure',\n", + " 'csharp': 'C#',\n", + " 'gcc': 'C',\n", + " 'hack': 'PHP',\n", + " 'haskell': 'Haskell',\n", + " 'java': 'Java',\n", + " 'javascript': 'JavaScript',\n", + " 'jruby': 'Ruby',\n", + " 'ocaml': 'OCaml',\n", + " 'perl': 'Perl',\n", + " 'php': 'PHP',\n", + " 'python3': 'Python',\n", + " 'racket': 'Scheme',\n", + " 'sbcl': 'Common Lisp',\n", + " 'scala': 'Scala',\n", + " 'yarv': 'Ruby'}" + ] + }, + "execution_count": 41, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "extensions = {'C' : ['gcc', 'c'],\n", + " 'C#' : 'csharp',\n", + " 'Common Lisp' : 'sbcl',\n", + " 'Clojure' : 'clojure',\n", + " 'Haskell' : 'haskell',\n", + " 'Java' : 'java',\n", + " 'JavaScript' : 'javascript',\n", + " 'OCaml' : 'ocaml',\n", + " 'Perl' : 'perl',\n", + " 'PHP' : ['hack','php'],\n", + " 'Python' : 'python3',\n", + " 'Ruby' : ['jruby', 'yarv'],\n", + " 'Scala' : 'scala',\n", + " 'Scheme' : 'racket',\n", + " \n", + "}\n", + "\n", + "ext_lookup = {}\n", + "for key, value in extensions.items():\n", + " \"\"\"Flip the dictionary around\"\"\"\n", + " if type(value) == type([]): #hasattr(value, '__iter__'):\n", + " for value2 in value:\n", + " ext_lookup[value2] = key\n", + " else:\n", + " ext_lookup[value] = key\n", + "ext_lookup" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['ats', 'clojure', 'csharp', 'dart', 'erlang', 'fpascal', 'fsharp',\n", + " 'gcc', 'ghc', 'gnat', 'go', 'gpp', 'hack', 'ifc', 'java',\n", + " 'javascript', 'jruby', 'lua', 'ocaml', 'oz', 'perl', 'php',\n", + " 'python3', 'racket', 'rust', 'sbcl', 'scala', 'vw', 'yarv', 'cint',\n", + " 'h', 'javasteady', 'parrot'], dtype=object)" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.language.unique()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# from sklearn.tree import DecisionTreeClassifier, export_graphviz\n", + "# classifier = test_classifier(DecisionTreeClassifier(criterion='entropy'), *args)\n", + "# export_graphviz(classifier, out_file='tree.dot') " + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
itemlanguagetextguess
01clojureNaNNaN
12clojureNaNNaN
23clojureNaNNaN
34clojureNaNNaN
45pythonNaNNaN
\n", + "
" + ], + "text/plain": [ + " item language text guess\n", + "0 1 clojure NaN NaN\n", + "1 2 clojure NaN NaN\n", + "2 3 clojure NaN NaN\n", + "3 4 clojure NaN NaN\n", + "4 5 python NaN NaN" + ] + }, + "execution_count": 53, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "test_data = pd.read_csv('./test.csv', names=['item', 'language', 'text', 'guess'])\n", + "test_data.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
itemlanguagetextguess
01clojure(defn cf-settings\\n \"Setup settings for campf...NaN
12clojurevar _ = require('lodash'),\\n fs = require('...NaN
23clojure/* Riot v2.0.8, @license MIT, (c) 2015 Muut In...NaN
34clojurevar r = riot.route = function(arg) {\\n //...NaN
45pythonmodule ActiveJob\\n module Core\\n extend Ac...NaN
\n", + "
" + ], + "text/plain": [ + " item language text guess\n", + "0 1 clojure (defn cf-settings\\n \"Setup settings for campf... NaN\n", + "1 2 clojure var _ = require('lodash'),\\n fs = require('... NaN\n", + "2 3 clojure /* Riot v2.0.8, @license MIT, (c) 2015 Muut In... NaN\n", + "3 4 clojure var r = riot.route = function(arg) {\\n //... NaN\n", + "4 5 python module ActiveJob\\n module Core\\n extend Ac... NaN" + ] + }, + "execution_count": 54, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "test_files = glob.glob('./test/*')\n", + "\n", + "for (idx, fn) in enumerate(test_files):\n", + "# try:\n", + " with open(fn) as fh:\n", + "# df.loc[extract_extension(fn)] = ''.join(fh.readlines())\n", + "# data = {'language': extract_extension(fn),\n", + "# 'text': ''.join(fh.readlines())}\n", + "# if data['language'] and data['text']:\n", + "# df = df.append(data, ignore_index = True)\n", + "# except (IsADirectoryError, UnicodeDecodeError):\n", + "# pass\n", + " #test_data['text'][idx] = ''.join(fh.readlines())\n", + " test_data.ix[idx, 'text'] = ''.join(fh.readlines())\n", + "test_data.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Train score: 0.9227642276422764, Test score: 0.7135135135135136\n" + ] + } + ], + "source": [ + "spam_pipe = Pipeline([('bag_of_words', CountVectorizer()),\n", + "# ('tfidf', TfidfTransformer()),\n", + " ('bayes', MultinomialNB())])\n", + "# spam_pipe\n", + "classifier = test_classifier(spam_pipe, *args)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0.0625" + ] + }, + "execution_count": 61, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "test_data['guess'] = pd.DataFrame(spam_pipe.predict(test_data['text']))\n", + "correct = test_data[test_data.language == test_data.guess]\n", + "len(correct)/len(test_data)" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
itemlanguagetextguess
01clojure(defn cf-settings\\n \"Setup settings for campf...clojure
12clojurevar _ = require('lodash'),\\n fs = require('...javascript
23clojure/* Riot v2.0.8, @license MIT, (c) 2015 Muut In...clojure
34clojurevar r = riot.route = function(arg) {\\n //...lua
45pythonmodule ActiveJob\\n module Core\\n extend Ac...yarv
56pythonrequire 'formula'\\n\\nclass A52dec < Formula\\n ...clojure
67pythonmodule Fluent\\n class Input\\n include Conf...jruby
78python{-# LANGUAGE ScopedTypeVariables, FlexibleInst...ghc
89javascriptreverseDependencies :: ModuleGraph -> M.Map Mo...ghc
910javascript{- git-annex extra config files\\n -\\n - Copyri...clojure
1011javascript(define subst-f\\n (lambda (new old l)\\n (c...sbcl
1112javascript(ns my-cli.core)\\n\\n(defn -main [& args]\\n (p...clojure
1213ruby(define add1\\n (lambda (n) (+ n 1)))racket
1314ruby(define-lib-primitive (length lst)\\n (if (nul...racket
1415ruby/**\\n * Interface to represent a persistence s...java
1516haskell/*\\n * Copyright 2002-2008 the original author...gnat
1617haskellpackage com.github.pathikrit\\n\\nimport scala.a...scala
1718haskell/* sbt -- Simple Build Tool\\n * Copyright 2010...scala
1819schemeproc isaac::mix {a b c d e f g h} {\\n set a...racket
1920schemeproc twitter::follow {nick uhost hand chan arg...python3
2021schemeclass View\\n{\\n /**\\n * Data available ...gpp
2122javapublic function formatLocalized($format)\\n...lua
2223java(extend-type String\\n Person\\n (first-name [...clojure
2324scalaclass Application extends App {\\n\\t/**\\n\\t * @...php
2425scalatype name = string\\n\\nlet compare_label label1...ocaml
2526tcllet search_compiler_libs () =\\n prerr_endline...ocaml
2627tcl(require '[overtone.live :as overtone])\\n\\n(de...clojure
2728phpfrom pkgutil import iter_modules\\nfrom subproc...python3
2829phpimport re\\nimport subprocess\\n\\ndef cmd_keymap...clojure
2930phpclass NoSuchService(Exception):\\n def __ini...yarv
3031ocamlfrom collections import namedtuple\\nimport fun...rust
3132ocamlfunction errorHandler(context) {\\n return fun...javascript
\n", + "
" + ], + "text/plain": [ + " item language text \\\n", + "0 1 clojure (defn cf-settings\\n \"Setup settings for campf... \n", + "1 2 clojure var _ = require('lodash'),\\n fs = require('... \n", + "2 3 clojure /* Riot v2.0.8, @license MIT, (c) 2015 Muut In... \n", + "3 4 clojure var r = riot.route = function(arg) {\\n //... \n", + "4 5 python module ActiveJob\\n module Core\\n extend Ac... \n", + "5 6 python require 'formula'\\n\\nclass A52dec < Formula\\n ... \n", + "6 7 python module Fluent\\n class Input\\n include Conf... \n", + "7 8 python {-# LANGUAGE ScopedTypeVariables, FlexibleInst... \n", + "8 9 javascript reverseDependencies :: ModuleGraph -> M.Map Mo... \n", + "9 10 javascript {- git-annex extra config files\\n -\\n - Copyri... \n", + "10 11 javascript (define subst-f\\n (lambda (new old l)\\n (c... \n", + "11 12 javascript (ns my-cli.core)\\n\\n(defn -main [& args]\\n (p... \n", + "12 13 ruby (define add1\\n (lambda (n) (+ n 1))) \n", + "13 14 ruby (define-lib-primitive (length lst)\\n (if (nul... \n", + "14 15 ruby /**\\n * Interface to represent a persistence s... \n", + "15 16 haskell /*\\n * Copyright 2002-2008 the original author... \n", + "16 17 haskell package com.github.pathikrit\\n\\nimport scala.a... \n", + "17 18 haskell /* sbt -- Simple Build Tool\\n * Copyright 2010... \n", + "18 19 scheme proc isaac::mix {a b c d e f g h} {\\n set a... \n", + "19 20 scheme proc twitter::follow {nick uhost hand chan arg... \n", + "20 21 scheme class View\\n{\\n /**\\n * Data available ... \n", + "21 22 java public function formatLocalized($format)\\n... \n", + "22 23 java (extend-type String\\n Person\\n (first-name [... \n", + "23 24 scala class Application extends App {\\n\\t/**\\n\\t * @... \n", + "24 25 scala type name = string\\n\\nlet compare_label label1... \n", + "25 26 tcl let search_compiler_libs () =\\n prerr_endline... \n", + "26 27 tcl (require '[overtone.live :as overtone])\\n\\n(de... \n", + "27 28 php from pkgutil import iter_modules\\nfrom subproc... \n", + "28 29 php import re\\nimport subprocess\\n\\ndef cmd_keymap... \n", + "29 30 php class NoSuchService(Exception):\\n def __ini... \n", + "30 31 ocaml from collections import namedtuple\\nimport fun... \n", + "31 32 ocaml function errorHandler(context) {\\n return fun... \n", + "\n", + " guess \n", + "0 clojure \n", + "1 javascript \n", + "2 clojure \n", + "3 lua \n", + "4 yarv \n", + "5 clojure \n", + "6 jruby \n", + "7 ghc \n", + "8 ghc \n", + "9 clojure \n", + "10 sbcl \n", + "11 clojure \n", + "12 racket \n", + "13 racket \n", + "14 java \n", + "15 gnat \n", + "16 scala \n", + "17 scala \n", + "18 racket \n", + "19 python3 \n", + "20 gpp \n", + "21 lua \n", + "22 clojure \n", + "23 php \n", + "24 ocaml \n", + "25 ocaml \n", + "26 clojure \n", + "27 python3 \n", + "28 clojure \n", + "29 yarv \n", + "30 rust \n", + "31 javascript " + ] + }, + "execution_count": 62, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "test_data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 76, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "[4]" + ] + }, + "execution_count": 76, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "def longest_run_of_capitol_letters_feature(text):\n", + " runs = sorted(re.findall(r\"[A-Z]+\", text), key=len)\n", + " if len(runs) == 0:\n", + " return [0]\n", + " longest = runs[-1]\n", + " return [len(longest)]\n", + "longest_run_of_capitol_letters_feature('ABCabddwAAAA absd AB sd A.AA.AAA')" + ] + }, + { + "cell_type": "code", + "execution_count": 77, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "[0.5]" + ] + }, + "execution_count": 77, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "def percent_periods_feature(text):\n", + " \"\"\"Return percentage of text that is periods compared to total text length.\"\"\"\n", + " periods = text.count(\".\")\n", + " return [periods / len(text)]\n", + "percent_periods_feature('. . . . ')" + ] + }, + { + "cell_type": "code", + "execution_count": 80, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "[3, 0.13043478260869565]" + ] + }, + "execution_count": 80, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "def feature_vector(text):\n", + " return longest_run_of_capitol_letters_feature(text) + percent_periods_feature(text)\n", + "feature_vector('AAH! feature_vector... ')" + ] + }, + { + "cell_type": "code", + "execution_count": 81, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "\n", + "class CustomFeaturizer:\n", + " def __init__(self, *featurizers):\n", + " self.featurizers = featurizers\n", + " \n", + " def fit(self, X, y=None):\n", + " \"\"\"All SciKit-Learn compatible transformers and classifiers have the\n", + " same interface. `fit` always returns the same object.\"\"\"\n", + " return self\n", + " \n", + " def transform(self, X):\n", + " \"\"\"Given a list of original data, return a list of feature vectors.\"\"\"\n", + " fvs = []\n", + " for datum in X:\n", + " fv = np.array([f(datum) for f in self.featurizers])\n", + " fvs.append(fv.reshape(1, -1)[0])\n", + " return np.array(fvs)\n", + " \n", + "featurizer = CustomFeaturizer(longest_run_of_capitol_letters_feature,\n", + " percent_periods_feature)" + ] + }, + { + "cell_type": "code", + "execution_count": 82, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[ 5.00000000e+00, 2.73597811e-03],\n", + " [ 1.10000000e+01, 2.07530388e-03],\n", + " [ 2.00000000e+00, 5.37190083e-03],\n", + " ..., \n", + " [ 1.00000000e+00, 1.06312292e-02],\n", + " [ 6.00000000e+00, 1.95865071e-02],\n", + " [ 6.00000000e+00, 2.19594595e-02]])" + ] + }, + "execution_count": 82, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "featurizer.transform(df.text)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.4.3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/lang_classifier.py b/lang_classifier.py new file mode 100644 index 0000000..0802789 --- /dev/null +++ b/lang_classifier.py @@ -0,0 +1,204 @@ +import re +import glob + +from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer +from sklearn.cross_validation import train_test_split +from sklearn.naive_bayes import MultinomialNB +from sklearn.pipeline import Pipeline +from sklearn.ensemble import RandomForestClassifier +from sklearn.base import TransformerMixin +import pandas as pd +import numpy as np +import pickle +import os.path +import collections + +# TODO: Future Ideas: +# use n-grams? +# inverse_transform or otherwise make an best-case exemplar +# web scraping +# need an example of tcl + +def make_extension_dict(): + """ + Returns a dictionary for translating benchmark file extensions + to the name of the programming language + """ + extensions = {'c': ['gcc', 'c'], + 'csharp': 'csharp', + 'commonlisp': 'sbcl', + 'clojure': 'clojure', + 'haskell': 'ghc', + 'java': 'java', + 'javascript': 'javascript', + 'ocaml': 'ocaml', + 'perl': 'perl', + 'php': ['hack', 'php'], + 'python': 'python3', + 'ruby': ['jruby', 'yarv'], + 'scala': 'scala', + 'scheme': 'racket', + 'tcl': 'tcl', + } + + ext_lookup = {} + for key, value in extensions.items(): + """Flip the dictionary around""" + if type(value) == type([]): # hasattr(value, '__iter__'): + for value2 in value: + ext_lookup[value2] = key + else: + ext_lookup[value] = key + return ext_lookup + + +def extract_extension(string): + match = re.match('.*\.(?P.*)$', string) + if match: + return match.groupdict()['ext'] + + +def unpickle(name, reload=False): + if os.path.isfile(name) and not reload: + df = pickle.load(open("bench.data", "rb")) + return df + else: + return None + + +def load_bench_data(reload=False): + df = unpickle('bench.data', reload=reload) + # if os.path.isfile("bench.data") and not reload: + # df = pickle.load( open( "bench.data", "rb" ) ) + # return df + if df is not None: + return df + df = pd.DataFrame(columns=['language', 'text']) + files = glob.glob('bench/*/*') + exts = make_extension_dict() + for fn in files: + try: + with open(fn) as fh: + data = {'language': exts.get(extract_extension(fn), None), + 'text': ''.join(fh.readlines())} + if data['language'] and data['text']: + df = df.append(data, ignore_index=True) + except (IsADirectoryError, UnicodeDecodeError): + pass + pickle.dump(df, open("bench.data", "wb")) + return df + + +def load_test_data(): + test_data = pd.read_csv('./test.csv', + names=['item', 'language', 'text', 'guess']) + test_data = test_data.set_index('item') + test_files = glob.glob('./test/*') + + for filename in test_files: + # try: + with open(filename) as fh: + # df.loc[extract_extension(fn)] = ''.join(fh.readlines()) + # data = {'language': extract_extension(fn), + # 'text': ''.join(fh.readlines())} + # if data['language'] and data['text']: + # df = df.append(data, ignore_index = True) + # except (IsADirectoryError, UnicodeDecodeError): + # pass + # test_data['text'][idx] = ''.join(fh.readlines()) + num = re.match('.*/(?P\d+)$', filename).groupdict()['num'] + # FIXME: Do this with os module instead of regex + test_data.ix[int(num), 'text'] = ''.join(fh.readlines()) + return test_data + + +def assess_classifier(pipe, *split_args): + # print(split_args[0])#, len(split_args[2])) + pipe.fit(split_args[0], split_args[2]) + train_score = pipe.score(split_args[0], split_args[2]) + test_score = pipe.score(split_args[1], split_args[3]) + print('Train score: {:.3f}, Test score: {:.3f}'.format(train_score, + test_score)) + return pipe + + +def longest_run_of_caps_feature(text): + """Find the longest run of capitol letters and return their length.""" + runs = sorted(re.findall(r"[A-Z]+", text), key=len) + if runs: + return len(runs[-1]) + else: + return 0 + + +def percent_character_feature(char): + """Return percentage of text that is a particular char compared to total text length.""" + + def feature_fn(text): + periods = text.count(char) + return periods / len(text) + + return feature_fn + + + +class FunctionFeaturizer(TransformerMixin): + def __init__(self, *featurizers): + self.featurizers = featurizers + + def fit(self, X, y=None): + """All SciKit-Learn compatible transformers and classifiers have the + same interface. `fit` always returns the same object.""" + return self + + def flatten(self, x): + if isinstance(x, collections.Iterable): + return [a for i in x for a in self.flatten(i)] + else: + return [x] + + def transform(self, X): + """Given a list of original data, return a list of feature vectors.""" + fvs = [] + for datum in X: + fv = [f(datum) for f in self.featurizers] + # if type(fv) is type([1, 2, 3]): # FIXME: Is there a cleaner way? + # fvs.extend(fv) + # else: + # fvs.append(fv) + # fvs = self.flatten(fvs) # fvs = [item for sublist in fvs for item in sublist] + # print('fvs ==> ', fvs) + fvs.append(fv) + return np.array(fvs) + + +if __name__ == '__main__': + df = load_bench_data(reload=True) + X = df.text + y = df.language + test_data = load_test_data() + + args = train_test_split(X, y, + test_size=0.2, ) # random_state=0) # X_train, X_test, y_train, y_test + + spam_pipe = Pipeline([('bag_of_words', CountVectorizer()), + ('bayes', MultinomialNB())]) + print(spam_pipe) + classifier = assess_classifier(spam_pipe, *args) + classifier.predict(args[1]) + + spam_pipe = Pipeline([('bag_of_words', CountVectorizer()), + ('tfidf', TfidfTransformer()), + ('RFC', RandomForestClassifier())]) + spam_pipe.set_params(RFC__n_estimators=1000) + print(spam_pipe) + classifier = assess_classifier(spam_pipe, *args) + + test_data['guess'] = pd.DataFrame(spam_pipe.predict(test_data['text'])) + correct = test_data[test_data.language == test_data.guess] + print('Proportion of test data correctly labeled: {:.3f}'.format( + len(correct) / len(test_data))) + print(test_data) + + featurizer = FunctionFeaturizer(longest_run_of_caps_feature, + percent_character_feature('.')) diff --git a/requirements.txt b/requirements.txt index 473a3b2..c00e8ba 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,3 +3,7 @@ scipy pandas numpy matplotlib +pytest +ipython[notebook] +beautifulsoup4 +requests diff --git a/test.csv b/test.csv index 7d007aa..1fccfd0 100644 --- a/test.csv +++ b/test.csv @@ -1,3 +1,4 @@ +0,perl 1,clojure 2,clojure 3,clojure diff --git a/test/0 b/test/0 new file mode 100644 index 0000000..4e22c3d --- /dev/null +++ b/test/0 @@ -0,0 +1,100 @@ +use warnings; +use strict; + +my $initial = join ",", qw(abc def ghi); +my %reverse = qw(X O O X); + +# In list context, returns best move, +# In scalar context, returns the score of best move. +my %cache; +sub best_move { + my ($b, $me) = @_; + if( exists $cache{$b,$me,wantarray} ) { + return $cache{$b,$me,wantarray}; + } elsif( my $s = score( $b, $me ) ) { + return $cache{$b,$me,wantarray} = (wantarray ? undef : $s); + } + my $him = $reverse{$me}; + my ($best, @best) = (-999); + for my $m (moves($b)) { + (my $with_m = $b) =~ s/$m/$me/ or die; + # The || operator supplies scalar context to best_move(...) + my $s = -(score($with_m, $him) || best_move($with_m, $him)); + if( $s > $best ) { + ($best, @best) = ($s, $m); + } elsif( $s == $best ) { + push @best, $m; + } + } + $cache{$b,$me,wantarray} = wantarray ? $best[rand @best] : $best; +} + +my $winner = q[([XOxo])(?:\1\1|...\1...\1|..\1..\1|....\1....\1)]; +sub score { + my ($b, $me) = @_; + $b =~ m/$winner/o or return 0; + return $1 eq $me ? +1 : -1; +} + +sub moves { + my ($b) = @_; + $b =~ /([^xoXO,\n])/g; +} + +sub print_board { + my ($b) = @_; + $b =~ s/\B/|/g; + $b =~ s/,/\n-+-+-\n/g; + print $b, "\n"; +} + +sub prompt { + my ($b, $color) = @_; + my @moves = moves($b); + unless( @moves ) { + return; + } + while( 1 ) { + print "Place your $color on one of [@moves]: "; + defined(my $m = <>) or return; + chomp($m); + return $m if grep $m eq $_, @moves; + } +} + +my @players = ( + { whose => "your", name => "You", + verb => "You place", get_move => \&prompt }, + { whose => "the computer's", name => "Computer", + verb => "The computer places", get_move => \&best_move }, +); +my $whose_turn = int rand 2; + +my $color = "X"; +my $b = $initial; + +while( 1 ) { + my $p = $players[$whose_turn]; + print_board($b); + print "It is $p->{whose} turn.\n"; + # The parens around $m supply list context to the right side + # or the = operator, which causes sub best_move to return the + # best move, rather than the score of the best move. + my ( $m ) = $p->{get_move}->($b, $color); + if( $m ) { + print "$p->{verb} an $color at $m\n"; + $b =~ s/$m/$color/; + my $s = score($b, $color) or next; + print_board($b); + print "$p->{name} ", $s > 0 ? "won!\n" : "lost!\n"; + } else { + print "$p->{name} cannot move.\n"; + } + print "Game over.\nNew Game...\n"; + ($b, $color, $whose_turn) = ($initial, "X", int rand 2); + redo; +} continue { + $color = $reverse{$color}; + $whose_turn = !$whose_turn; +} + diff --git a/test_lang_classifier.py b/test_lang_classifier.py new file mode 100644 index 0000000..9c6b4f0 --- /dev/null +++ b/test_lang_classifier.py @@ -0,0 +1,109 @@ +from lang_classifier import * + + +def test_make_extension_dict(): + ext_lookup = make_extension_dict() + assert ext_lookup['php'] == 'php' + # assert ext_lookup['NONExISTNTANT!'] is None + + +def test_extract_extension(): + assert extract_extension('abc/def/ghi.jkl') == 'jkl' + assert extract_extension('abc/def/ghi') == None + + +def test_load_bench_data(): + df = load_bench_data() # reload=True) + print(df.head(5)) + assert df['language'][2] == 'clojure' + + +def test_bench_data_only_contains_desired_languages(): + df = load_bench_data(reload=True) + + langs = ['clojure', 'python', 'javascript', 'ruby', 'haskell', 'scheme', + 'java', 'scala', + 'tcl', # in reqs + tests, but no examples in bench + 'c', 'csharp', 'commonlisp', 'perl', # in reqs + bench, no tests + 'php', 'ocaml'] + training = df['language'].unique() + for lang in langs: + assert lang in training # We have examples for each required language + for lang in training: + assert lang in langs # We don't train for any non-required languages + + +def test_load_test_data(): + test_data = load_test_data() + assert test_data['language'][1] == 'clojure' + assert test_data['text'][2][:16] == '(ns my-cli.core)' + +def setup(): + df = load_bench_data() + X = df.text + y = df.language + test_data = load_test_data() + args = train_test_split(X, y, test_size=0.2, random_state=0) + # X_train, X_test, y_train, y_test + + return df, X, y, test_data, args + +def test_assess_classifier(): + df, X, y, test_data, args = setup() + spam_pipe = Pipeline([('bag_of_words', CountVectorizer()), + ('bayes', MultinomialNB())]) + classifier = assess_classifier(spam_pipe, *args) + c = classifier.predict(X) + assert len(c) == 585 # 923 total + assert c[3] == 'csharp' + +def test_longest_run_of_caps_feature(): + assert longest_run_of_caps_feature( + 'ABCabddwAAAA absd AB sd A.AA.AAA') == 4 + +def test_percent_periods_feature(): + assert percent_character_feature('.')('. . . . ') == 0.5 + +def test_featurizer(): + featurizer = FunctionFeaturizer(longest_run_of_caps_feature, + percent_character_feature('.')) + np.testing.assert_equal(featurizer.transform(['AAH! feature....']) + # , np.array([[ 3. , 0.25]])) + , np.array([ 3. , 0.25])) + + + +""" + df = load_bench_data() + X = df.text + y = df.language + test_data = load_bench_data() + + args = train_test_split(X, y, test_size=0.2, )#random_state=0) # X_train, X_test, y_train, y_test + + spam_pipe = Pipeline([('bag_of_words', CountVectorizer()), + ('bayes', MultinomialNB())]) + print(spam_pipe) + classifier = test_classifier(spam_pipe, *args) + classifier.predict(args[1].iloc[2]) + + spam_pipe = Pipeline([('bag_of_words', CountVectorizer()), + ('tfidf', TfidfTransformer()), + ('RFC', RandomForestClassifier())]) + spam_pipe.set_params(RFC__n_estimators=1000) + print(spam_pipe) + classifier = test_classifier(spam_pipe, *args) + + + test_data['guess'] = pd.DataFrame(spam_pipe.predict(test_data['text'])) + correct = test_data[test_data.language == test_data.guess] + print('Proportion of test data correctly labeled: {:.3f}'.format(len(correct)/len(test_data))) + + longest_run_of_capitol_letters_feature('ABCabddwAAAA absd AB sd A.AA.AAA') + percent_periods_feature('. . . . ') + feature_vector('AAH! feature_vector... ') + + featurizer = CustomFeaturizer(longest_run_of_capitol_letters_feature, + percent_periods_feature) + +"""