From d7756e818b836cbb2656ee8f39107723f58032d4 Mon Sep 17 00:00:00 2001 From: John Waldrep Date: Wed, 3 Jun 2015 23:17:42 -0400 Subject: [PATCH 01/12] Add initial exploration, achieving 0.8 to 0.9 scores on the "bench" dataset --- .gitignore | 2 + initial_exploration.ipynb | 785 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 787 insertions(+) create mode 100644 initial_exploration.ipynb diff --git a/.gitignore b/.gitignore index f00dbf2..8143a15 100644 --- a/.gitignore +++ b/.gitignore @@ -65,3 +65,5 @@ docs/_build/ # PyBuilder target/ +benchmarksgame-scm-latest.tar.gz +bench diff --git a/initial_exploration.ipynb b/initial_exploration.ipynb new file mode 100644 index 0000000..d41dda1 --- /dev/null +++ b/initial_exploration.ipynb @@ -0,0 +1,785 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer\n", + "from sklearn.cross_validation import train_test_split\n", + "from sklearn.naive_bayes import MultinomialNB, GaussianNB, BernoulliNB\n", + "from sklearn.pipeline import Pipeline\n", + "import pandas as pd\n", + "from os import walk\n", + "import re\n", + "import glob" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "def extract_extension(string):\n", + " match = re.match('.*\\.(?P.*)$', string)\n", + " if match:\n", + " return match.groupdict()['ext']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 50, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
languagetext
0ats(*\\n** The Computer Language Benchmarks Game\\n...
1ats(*\\n** The Computer Language Benchmarks Game\\n...
2clojure;; The Computer Language Benchmarks Game\\n;; h...
3clojure;; The Computer Language Benchmarks Game\\n;; h...
4clojure;; The Computer Language Benchmarks Game\\n;; h...
5csharp/*\\n The Computer Language Benchmarks Ga...
6csharp/* The Computer Language Benchmarks Game\\n h...
7dart/* The Computer Language Benchmarks game\\n h...
8erlang% The Computer Language Benchmarks Game\\n% htt...
9erlang% The Computer Language Benchmarks Game\\n% htt...
10fpascal(*\\n The Computer Language Benchmarks Game\\n ...
11fsharp(*\\n The Computer Language Benchmarks Ga...
12fsharp// The Computer Language Benchmarks Game\\n// ...
13fsharp// The Computer Language Benchmarks Game\\n// ...
14gcc/* The Computer Language Benchmarks Game\\n * h...
15gcc/* \\n * The Computer Language Benchmarks Game ...
16gcc// The Computer Language Benchmarks Game\\n// h...
17gcc/* The Computer Language Benchmarks Game\\n * h...
18gcc/* The Computer Language Benchmarks Game\\n * h...
19gcc/* The Computer Language Benchmarks Game\\n * h...
20ghc--\\n-- The Computer Language Benchmarks Game\\n...
21ghc--\\n-- The Computer Language Benchmarks Game\\n...
22ghc--\\n-- The Computer Language Benchmarks Game\\n...
23gnat-- The Computer Language Benchmarks Game\\n-- h...
24gnat-- The Computer Language Benchmarks Game\\n-- h...
25gnat-- The Computer Language Benchmarks Game\\n-- ...
26gnat-- The Computer Language Benchmarks Game\\n-- ...
27go/* The Computer Language Benchmarks Game\\n * h...
28go/* The Computer Language Benchmarks Game\\n * h...
29go/* The Computer Language Benchmarks Game\\n * h...
.........
893go/* The Computer Language Benchmarks Game\\n h...
894gpp/*\\n* The Computer Language Benchmarks Game\\n*...
895gpp/*\\n * The Computer Language Benchmarks Game\\n...
896gpp/*\\n* The Computer Language Benchmarks Game\\n*...
897gpp/*\\n* The Computer Language Benchmarks Game\\n*...
898gpp/*\\n* The Computer Language Benchmarks Game\\n*...
899java/**\\n * The Computer Language Benchmarks Game\\...
900java/**\\n * The Computer Language Benchmarks Game\\...
901java/**\\n * The Computer Language Benchmarks Game\\...
902java/**\\n * The Computer Language Benchmarks Game\\...
903java/**\\n * The Computer Language Benchmarks Game\\...
904java/**\\n * The Computer Language Benchmarks Game\\...
905jruby# The Computer Language Benchmarks Game\\n# htt...
906jruby# The Computer Language Benchmarks Game\\n# htt...
907lua-- The Computer Language Benchmarks Game\\n-- h...
908ocaml(* The Computer Language Benchmarks Game\\n * h...
909ocaml(* The Computer Language Benchmarks Game\\n * h...
910ocaml(* The Computer Language Benchmarks Game\\n * h...
911oz% The Computer Language Benchmarks Game ...
912perl# The Computer Language Benchmarks Game\\n# htt...
913perl# The Computer Language Benchmarks Game\\n# htt...
914python3# The Computer Language Benchmarks Game\\n# htt...
915python3# The Computer Language Benchmarks Game\\n# htt...
916racket#lang racket/base\\n\\n;;; The Computer Language...
917sbcl;;; The Computer Language Benchmarks Game\\n;;;...
918sbcl;;; The Computer Language Benchmarks Game\\n;;;...
919scala/* The Computer Language Benchmarks Game\\n h...
920vw\"* The Computer Language Benchmarks Game\\n ...
921yarv# The Computer Language Benchmarks Game\\n# htt...
922yarv# The Computer Language Benchmarks Game\\n# htt...
\n", + "

923 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " language text\n", + "0 ats (*\\n** The Computer Language Benchmarks Game\\n...\n", + "1 ats (*\\n** The Computer Language Benchmarks Game\\n...\n", + "2 clojure ;; The Computer Language Benchmarks Game\\n;; h...\n", + "3 clojure ;; The Computer Language Benchmarks Game\\n;; h...\n", + "4 clojure ;; The Computer Language Benchmarks Game\\n;; h...\n", + "5 csharp /*\\n The Computer Language Benchmarks Ga...\n", + "6 csharp /* The Computer Language Benchmarks Game\\n h...\n", + "7 dart /* The Computer Language Benchmarks game\\n h...\n", + "8 erlang % The Computer Language Benchmarks Game\\n% htt...\n", + "9 erlang % The Computer Language Benchmarks Game\\n% htt...\n", + "10 fpascal (*\\n The Computer Language Benchmarks Game\\n ...\n", + "11 fsharp (*\\n The Computer Language Benchmarks Ga...\n", + "12 fsharp // The Computer Language Benchmarks Game\\n// ...\n", + "13 fsharp // The Computer Language Benchmarks Game\\n// ...\n", + "14 gcc /* The Computer Language Benchmarks Game\\n * h...\n", + "15 gcc /* \\n * The Computer Language Benchmarks Game ...\n", + "16 gcc // The Computer Language Benchmarks Game\\n// h...\n", + "17 gcc /* The Computer Language Benchmarks Game\\n * h...\n", + "18 gcc /* The Computer Language Benchmarks Game\\n * h...\n", + "19 gcc /* The Computer Language Benchmarks Game\\n * h...\n", + "20 ghc --\\n-- The Computer Language Benchmarks Game\\n...\n", + "21 ghc --\\n-- The Computer Language Benchmarks Game\\n...\n", + "22 ghc --\\n-- The Computer Language Benchmarks Game\\n...\n", + "23 gnat -- The Computer Language Benchmarks Game\\n-- h...\n", + "24 gnat -- The Computer Language Benchmarks Game\\n-- h...\n", + "25 gnat -- The Computer Language Benchmarks Game\\n-- ...\n", + "26 gnat -- The Computer Language Benchmarks Game\\n-- ...\n", + "27 go /* The Computer Language Benchmarks Game\\n * h...\n", + "28 go /* The Computer Language Benchmarks Game\\n * h...\n", + "29 go /* The Computer Language Benchmarks Game\\n * h...\n", + ".. ... ...\n", + "893 go /* The Computer Language Benchmarks Game\\n h...\n", + "894 gpp /*\\n* The Computer Language Benchmarks Game\\n*...\n", + "895 gpp /*\\n * The Computer Language Benchmarks Game\\n...\n", + "896 gpp /*\\n* The Computer Language Benchmarks Game\\n*...\n", + "897 gpp /*\\n* The Computer Language Benchmarks Game\\n*...\n", + "898 gpp /*\\n* The Computer Language Benchmarks Game\\n*...\n", + "899 java /**\\n * The Computer Language Benchmarks Game\\...\n", + "900 java /**\\n * The Computer Language Benchmarks Game\\...\n", + "901 java /**\\n * The Computer Language Benchmarks Game\\...\n", + "902 java /**\\n * The Computer Language Benchmarks Game\\...\n", + "903 java /**\\n * The Computer Language Benchmarks Game\\...\n", + "904 java /**\\n * The Computer Language Benchmarks Game\\...\n", + "905 jruby # The Computer Language Benchmarks Game\\n# htt...\n", + "906 jruby # The Computer Language Benchmarks Game\\n# htt...\n", + "907 lua -- The Computer Language Benchmarks Game\\n-- h...\n", + "908 ocaml (* The Computer Language Benchmarks Game\\n * h...\n", + "909 ocaml (* The Computer Language Benchmarks Game\\n * h...\n", + "910 ocaml (* The Computer Language Benchmarks Game\\n * h...\n", + "911 oz % The Computer Language Benchmarks Game ...\n", + "912 perl # The Computer Language Benchmarks Game\\n# htt...\n", + "913 perl # The Computer Language Benchmarks Game\\n# htt...\n", + "914 python3 # The Computer Language Benchmarks Game\\n# htt...\n", + "915 python3 # The Computer Language Benchmarks Game\\n# htt...\n", + "916 racket #lang racket/base\\n\\n;;; The Computer Language...\n", + "917 sbcl ;;; The Computer Language Benchmarks Game\\n;;;...\n", + "918 sbcl ;;; The Computer Language Benchmarks Game\\n;;;...\n", + "919 scala /* The Computer Language Benchmarks Game\\n h...\n", + "920 vw \"* The Computer Language Benchmarks Game\\n ...\n", + "921 yarv # The Computer Language Benchmarks Game\\n# htt...\n", + "922 yarv # The Computer Language Benchmarks Game\\n# htt...\n", + "\n", + "[923 rows x 2 columns]" + ] + }, + "execution_count": 50, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "text = ''\n", + "df = pd.DataFrame(columns=['language', 'text'])\n", + "#files = glob.glob('bench/binarytrees/*')\n", + "files = glob.glob('bench/*/*')\n", + "# ['./bench/binarytrees/binarytrees.clojure',\n", + "# './bench/binarytrees/binarytrees.csharp']\n", + "\n", + "for fn in files:\n", + " try:\n", + " with open(fn) as fh:\n", + " # df.loc[extract_extension(fn)] = ''.join(fh.readlines())\n", + " data = {'language': extract_extension(fn),\n", + " 'text': ''.join(fh.readlines())}\n", + " if data['language'] and data['text']:\n", + " df = df.append(data, ignore_index = True)\n", + " except (IsADirectoryError, UnicodeDecodeError):\n", + " pass\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
languagetext
\n", + "
" + ], + "text/plain": [ + "Empty DataFrame\n", + "Columns: [language, text]\n", + "Index: []" + ] + }, + "execution_count": 54, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[df.language == None]" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# walker = os.walk('./bench/binarytrees/')\n", + "# next(walker)" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "def test_classifier(pipe, *split_args):\n", + " #classifier = classifierType()\n", + " pipe.fit(split_args[0], split_args[2])\n", + "# predicted = classifier.predict(X_test)\n", + " train_score = pipe.score(split_args[0], split_args[2])\n", + " test_score = pipe.score(split_args[1], split_args[3])\n", + " print('Train score: {}, Test score: {}'.format(train_score, test_score))\n", + " return pipe" + ] + }, + { + "cell_type": "code", + "execution_count": 104, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "X = df.text\n", + "y = df.language\n", + "args = train_test_split(X, y, test_size=0.2, )#random_state=0) # X_train, X_test, y_train, y_test\n" + ] + }, + { + "cell_type": "code", + "execution_count": 105, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Train score: 0.9186991869918699, Test score: 0.7243243243243244\n" + ] + } + ], + "source": [ + "spam_pipe = Pipeline([('bag_of_words', CountVectorizer()),\n", + "# ('tfidf', TfidfTransformer()),\n", + " ('bayes', MultinomialNB())])\n", + "# spam_pipe\n", + "classifier = test_classifier(spam_pipe, *args)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['yarv', 'yarv', 'yarv', ..., 'yarv', 'yarv', 'yarv'], \n", + " dtype='0.9), but low test scores (between ~0.05 and ~0.5).\n", + "####With the larger dataset ( > 900 files), the score was consistently between 0.65 and 0.75" + ] + }, + { + "cell_type": "code", + "execution_count": 106, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Train score: 0.7953929539295393, Test score: 0.5783783783783784\n" + ] + } + ], + "source": [ + "spam_pipe = Pipeline([('bag_of_words', CountVectorizer()),\n", + "# ('tfidf', TfidfTransformer()),\n", + " ('bayes', MultinomialNB())])\n", + "# spam_pipe\n", + "classifier = test_classifier(spam_pipe, *args)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "####Adding tfidf into the pipeline made it perform significantly worse" + ] + }, + { + "cell_type": "code", + "execution_count": 118, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "from sklearn.ensemble import RandomForestClassifier" + ] + }, + { + "cell_type": "code", + "execution_count": 122, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 2 µs, sys: 0 ns, total: 2 µs\n", + "Wall time: 5.01 µs\n", + "Train score: 0.9688346883468835, Test score: 0.8594594594594595\n" + ] + } + ], + "source": [ + "%time\n", + "#clf = RandomForestClassifier(n_estimators=100, )#random_state=0)\n", + "#visualize_tree(clf, X, y, boundaries=False);\n", + "\n", + "spam_pipe = Pipeline([('bag_of_words', CountVectorizer()),\n", + " ('tfidf', TfidfTransformer()),\n", + " ('RFC', RandomForestClassifier())])\n", + "classifier = test_classifier(spam_pipe, *args)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "####Random Forest with 10 estimators (default) is consistently > 0.80, usually in the range 0.8 to 0.9. With n=10, it takes a few microseconds to run on the ~900 entry dataset." + ] + }, + { + "cell_type": "code", + "execution_count": 130, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 2 µs, sys: 1 µs, total: 3 µs\n", + "Wall time: 5.01 µs\n", + "Train score: 0.9728997289972899, Test score: 0.8756756756756757\n" + ] + } + ], + "source": [ + "%time\n", + "#visualize_tree(clf, X, y, boundaries=False);\n", + "\n", + "spam_pipe = Pipeline([('bag_of_words', CountVectorizer()),\n", + " ('tfidf', TfidfTransformer()),\n", + " ('RFC', RandomForestClassifier())])\n", + "spam_pipe.set_params(RFC__n_estimators=1000)\n", + "classifier = test_classifier(spam_pipe, *args)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "####Not significantly better for n_estimators=1000, but it does take about 30 seconds to run" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 74, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# from sklearn.tree import DecisionTreeClassifier, export_graphviz\n", + "# classifier = test_classifier(DecisionTreeClassifier(criterion='entropy'), *args)\n", + "# export_graphviz(classifier, out_file='tree.dot') " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.4.3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} From 1f47202538efe2e3ad898b7f498dd940e8be85a7 Mon Sep 17 00:00:00 2001 From: John Waldrep Date: Fri, 5 Jun 2015 12:03:17 -0400 Subject: [PATCH 02/12] Add initial featurizer for testing --- initial_exploration.ipynb | 902 ++++++++++++++++++++++++++++++-------- 1 file changed, 729 insertions(+), 173 deletions(-) diff --git a/initial_exploration.ipynb b/initial_exploration.ipynb index d41dda1..d01435e 100644 --- a/initial_exploration.ipynb +++ b/initial_exploration.ipynb @@ -3,9 +3,7 @@ { "cell_type": "code", "execution_count": 1, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [], "source": [ "from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer\n", @@ -21,9 +19,7 @@ { "cell_type": "code", "execution_count": 2, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "def extract_extension(string):\n", @@ -35,18 +31,16 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [], - "source": [] + "source": [ + "" + ] }, { "cell_type": "code", - "execution_count": 50, - "metadata": { - "collapsed": false - }, + "execution_count": 3, + "metadata": {}, "outputs": [ { "data": { @@ -370,77 +364,10 @@ "\n", "

923 rows × 2 columns

\n", "" - ], - "text/plain": [ - " language text\n", - "0 ats (*\\n** The Computer Language Benchmarks Game\\n...\n", - "1 ats (*\\n** The Computer Language Benchmarks Game\\n...\n", - "2 clojure ;; The Computer Language Benchmarks Game\\n;; h...\n", - "3 clojure ;; The Computer Language Benchmarks Game\\n;; h...\n", - "4 clojure ;; The Computer Language Benchmarks Game\\n;; h...\n", - "5 csharp /*\\n The Computer Language Benchmarks Ga...\n", - "6 csharp /* The Computer Language Benchmarks Game\\n h...\n", - "7 dart /* The Computer Language Benchmarks game\\n h...\n", - "8 erlang % The Computer Language Benchmarks Game\\n% htt...\n", - "9 erlang % The Computer Language Benchmarks Game\\n% htt...\n", - "10 fpascal (*\\n The Computer Language Benchmarks Game\\n ...\n", - "11 fsharp (*\\n The Computer Language Benchmarks Ga...\n", - "12 fsharp // The Computer Language Benchmarks Game\\n// ...\n", - "13 fsharp // The Computer Language Benchmarks Game\\n// ...\n", - "14 gcc /* The Computer Language Benchmarks Game\\n * h...\n", - "15 gcc /* \\n * The Computer Language Benchmarks Game ...\n", - "16 gcc // The Computer Language Benchmarks Game\\n// h...\n", - "17 gcc /* The Computer Language Benchmarks Game\\n * h...\n", - "18 gcc /* The Computer Language Benchmarks Game\\n * h...\n", - "19 gcc /* The Computer Language Benchmarks Game\\n * h...\n", - "20 ghc --\\n-- The Computer Language Benchmarks Game\\n...\n", - "21 ghc --\\n-- The Computer Language Benchmarks Game\\n...\n", - "22 ghc --\\n-- The Computer Language Benchmarks Game\\n...\n", - "23 gnat -- The Computer Language Benchmarks Game\\n-- h...\n", - "24 gnat -- The Computer Language Benchmarks Game\\n-- h...\n", - "25 gnat -- The Computer Language Benchmarks Game\\n-- ...\n", - "26 gnat -- The Computer Language Benchmarks Game\\n-- ...\n", - "27 go /* The Computer Language Benchmarks Game\\n * h...\n", - "28 go /* The Computer Language Benchmarks Game\\n * h...\n", - "29 go /* The Computer Language Benchmarks Game\\n * h...\n", - ".. ... ...\n", - "893 go /* The Computer Language Benchmarks Game\\n h...\n", - "894 gpp /*\\n* The Computer Language Benchmarks Game\\n*...\n", - "895 gpp /*\\n * The Computer Language Benchmarks Game\\n...\n", - "896 gpp /*\\n* The Computer Language Benchmarks Game\\n*...\n", - "897 gpp /*\\n* The Computer Language Benchmarks Game\\n*...\n", - "898 gpp /*\\n* The Computer Language Benchmarks Game\\n*...\n", - "899 java /**\\n * The Computer Language Benchmarks Game\\...\n", - "900 java /**\\n * The Computer Language Benchmarks Game\\...\n", - "901 java /**\\n * The Computer Language Benchmarks Game\\...\n", - "902 java /**\\n * The Computer Language Benchmarks Game\\...\n", - "903 java /**\\n * The Computer Language Benchmarks Game\\...\n", - "904 java /**\\n * The Computer Language Benchmarks Game\\...\n", - "905 jruby # The Computer Language Benchmarks Game\\n# htt...\n", - "906 jruby # The Computer Language Benchmarks Game\\n# htt...\n", - "907 lua -- The Computer Language Benchmarks Game\\n-- h...\n", - "908 ocaml (* The Computer Language Benchmarks Game\\n * h...\n", - "909 ocaml (* The Computer Language Benchmarks Game\\n * h...\n", - "910 ocaml (* The Computer Language Benchmarks Game\\n * h...\n", - "911 oz % The Computer Language Benchmarks Game ...\n", - "912 perl # The Computer Language Benchmarks Game\\n# htt...\n", - "913 perl # The Computer Language Benchmarks Game\\n# htt...\n", - "914 python3 # The Computer Language Benchmarks Game\\n# htt...\n", - "915 python3 # The Computer Language Benchmarks Game\\n# htt...\n", - "916 racket #lang racket/base\\n\\n;;; The Computer Language...\n", - "917 sbcl ;;; The Computer Language Benchmarks Game\\n;;;...\n", - "918 sbcl ;;; The Computer Language Benchmarks Game\\n;;;...\n", - "919 scala /* The Computer Language Benchmarks Game\\n h...\n", - "920 vw \"* The Computer Language Benchmarks Game\\n ...\n", - "921 yarv # The Computer Language Benchmarks Game\\n# htt...\n", - "922 yarv # The Computer Language Benchmarks Game\\n# htt...\n", - "\n", - "[923 rows x 2 columns]" ] }, - "execution_count": 50, - "metadata": {}, - "output_type": "execute_result" + "output_type": "execute_result", + "metadata": {} } ], "source": [ @@ -466,10 +393,8 @@ }, { "cell_type": "code", - "execution_count": 54, - "metadata": { - "collapsed": false - }, + "execution_count": 4, + "metadata": {}, "outputs": [ { "data": { @@ -487,16 +412,10 @@ " \n", "\n", "" - ], - "text/plain": [ - "Empty DataFrame\n", - "Columns: [language, text]\n", - "Index: []" ] }, - "execution_count": 54, - "metadata": {}, - "output_type": "execute_result" + "output_type": "execute_result", + "metadata": {} } ], "source": [ @@ -505,10 +424,8 @@ }, { "cell_type": "code", - "execution_count": 20, - "metadata": { - "collapsed": false - }, + "execution_count": 5, + "metadata": {}, "outputs": [], "source": [ "# walker = os.walk('./bench/binarytrees/')\n", @@ -517,10 +434,8 @@ }, { "cell_type": "code", - "execution_count": 55, - "metadata": { - "collapsed": false - }, + "execution_count": 6, + "metadata": {}, "outputs": [], "source": [ "def test_classifier(pipe, *split_args):\n", @@ -535,29 +450,25 @@ }, { "cell_type": "code", - "execution_count": 104, - "metadata": { - "collapsed": false - }, + "execution_count": 7, + "metadata": {}, "outputs": [], "source": [ "X = df.text\n", "y = df.language\n", - "args = train_test_split(X, y, test_size=0.2, )#random_state=0) # X_train, X_test, y_train, y_test\n" + "args = train_test_split(X, y, test_size=0.2, )#random_state=0) # X_train, X_test, y_train, y_test" ] }, { "cell_type": "code", - "execution_count": 105, - "metadata": { - "collapsed": false - }, + "execution_count": 8, + "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Train score: 0.9186991869918699, Test score: 0.7243243243243244\n" + "Train score: 0.9227642276422764, Test score: 0.7135135135135136\n" ] } ], @@ -566,26 +477,24 @@ "# ('tfidf', TfidfTransformer()),\n", " ('bayes', MultinomialNB())])\n", "# spam_pipe\n", - "classifier = test_classifier(spam_pipe, *args)\n" + "classifier = test_classifier(spam_pipe, *args)" ] }, { "cell_type": "code", - "execution_count": 24, - "metadata": { - "collapsed": false - }, + "execution_count": 9, + "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "array(['yarv', 'yarv', 'yarv', ..., 'yarv', 'yarv', 'yarv'], \n", - " dtype='\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
itemlanguagetextguess
01clojureNaNNaN
12clojureNaNNaN
23clojureNaNNaN
34clojureNaNNaN
45pythonNaNNaN
\n", + "" + ] + }, + "output_type": "execute_result", + "metadata": {} + } + ], + "source": [ + "test_data = pd.read_csv('./test.csv', names=['item', 'language', 'text', 'guess'])\n", + "test_data.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
itemlanguagetextguess
01clojure(defn cf-settings\\n \"Setup settings for campf...NaN
12clojurevar _ = require('lodash'),\\n fs = require('...NaN
23clojure/* Riot v2.0.8, @license MIT, (c) 2015 Muut In...NaN
34clojurevar r = riot.route = function(arg) {\\n //...NaN
45pythonmodule ActiveJob\\n module Core\\n extend Ac...NaN
\n", + "
" + ] + }, + "output_type": "execute_result", + "metadata": {} + } + ], + "source": [ + "test_files = glob.glob('./test/*')\n", + "\n", + "for (idx, fn) in enumerate(test_files):\n", + "# try:\n", + " with open(fn) as fh:\n", + "# df.loc[extract_extension(fn)] = ''.join(fh.readlines())\n", + "# data = {'language': extract_extension(fn),\n", + "# 'text': ''.join(fh.readlines())}\n", + "# if data['language'] and data['text']:\n", + "# df = df.append(data, ignore_index = True)\n", + "# except (IsADirectoryError, UnicodeDecodeError):\n", + "# pass\n", + " #test_data['text'][idx] = ''.join(fh.readlines())\n", + " test_data.ix[idx, 'text'] = ''.join(fh.readlines())\n", + "test_data.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Train score: 0.9227642276422764, Test score: 0.7135135135135136\n" + ] + } + ], + "source": [ + "spam_pipe = Pipeline([('bag_of_words', CountVectorizer()),\n", + "# ('tfidf', TfidfTransformer()),\n", + " ('bayes', MultinomialNB())])\n", + "# spam_pipe\n", + "classifier = test_classifier(spam_pipe, *args)" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.0625" + ] + }, + "execution_count": 61, + "output_type": "execute_result", + "metadata": {} + } + ], + "source": [ + "test_data['guess'] = pd.DataFrame(spam_pipe.predict(test_data['text']))\n", + "correct = test_data[test_data.language == test_data.guess]\n", + "len(correct)/len(test_data)" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
itemlanguagetextguess
01clojure(defn cf-settings\\n \"Setup settings for campf...clojure
12clojurevar _ = require('lodash'),\\n fs = require('...javascript
23clojure/* Riot v2.0.8, @license MIT, (c) 2015 Muut In...clojure
34clojurevar r = riot.route = function(arg) {\\n //...lua
45pythonmodule ActiveJob\\n module Core\\n extend Ac...yarv
56pythonrequire 'formula'\\n\\nclass A52dec < Formula\\n ...clojure
67pythonmodule Fluent\\n class Input\\n include Conf...jruby
78python{-# LANGUAGE ScopedTypeVariables, FlexibleInst...ghc
89javascriptreverseDependencies :: ModuleGraph -> M.Map Mo...ghc
910javascript{- git-annex extra config files\\n -\\n - Copyri...clojure
1011javascript(define subst-f\\n (lambda (new old l)\\n (c...sbcl
1112javascript(ns my-cli.core)\\n\\n(defn -main [& args]\\n (p...clojure
1213ruby(define add1\\n (lambda (n) (+ n 1)))racket
1314ruby(define-lib-primitive (length lst)\\n (if (nul...racket
1415ruby/**\\n * Interface to represent a persistence s...java
1516haskell/*\\n * Copyright 2002-2008 the original author...gnat
1617haskellpackage com.github.pathikrit\\n\\nimport scala.a...scala
1718haskell/* sbt -- Simple Build Tool\\n * Copyright 2010...scala
1819schemeproc isaac::mix {a b c d e f g h} {\\n set a...racket
1920schemeproc twitter::follow {nick uhost hand chan arg...python3
2021schemeclass View\\n{\\n /**\\n * Data available ...gpp
2122javapublic function formatLocalized($format)\\n...lua
2223java(extend-type String\\n Person\\n (first-name [...clojure
2324scalaclass Application extends App {\\n\\t/**\\n\\t * @...php
2425scalatype name = string\\n\\nlet compare_label label1...ocaml
2526tcllet search_compiler_libs () =\\n prerr_endline...ocaml
2627tcl(require '[overtone.live :as overtone])\\n\\n(de...clojure
2728phpfrom pkgutil import iter_modules\\nfrom subproc...python3
2829phpimport re\\nimport subprocess\\n\\ndef cmd_keymap...clojure
2930phpclass NoSuchService(Exception):\\n def __ini...yarv
3031ocamlfrom collections import namedtuple\\nimport fun...rust
3132ocamlfunction errorHandler(context) {\\n return fun...javascript
\n", + "
" + ] + }, + "output_type": "execute_result", + "metadata": {} + } + ], + "source": [ + "test_data" + ] }, { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], - "source": [] + "source": [ + "" + ] }, { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], - "source": [] + "source": [ + "" + ] }, { "cell_type": "code", - "execution_count": 74, - "metadata": { - "collapsed": false - }, + "execution_count": 76, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[4]" + ] + }, + "execution_count": 76, + "output_type": "execute_result", + "metadata": {} + } + ], + "source": [ + "def longest_run_of_capitol_letters_feature(text):\n", + " runs = sorted(re.findall(r\"[A-Z]+\", text), key=len)\n", + " if len(runs) == 0:\n", + " return [0]\n", + " longest = runs[-1]\n", + " return [len(longest)]\n", + "longest_run_of_capitol_letters_feature('ABCabddwAAAA absd AB sd A.AA.AAA')" + ] + }, + { + "cell_type": "code", + "execution_count": 77, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[0.5]" + ] + }, + "execution_count": 77, + "output_type": "execute_result", + "metadata": {} + } + ], + "source": [ + "def percent_periods_feature(text):\n", + " \"\"\"Return percentage of text that is periods compared to total text length.\"\"\"\n", + " periods = text.count(\".\")\n", + " return [periods / len(text)]\n", + "percent_periods_feature('. . . . ')" + ] + }, + { + "cell_type": "code", + "execution_count": 80, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[3, 0.13043478260869565]" + ] + }, + "execution_count": 80, + "output_type": "execute_result", + "metadata": {} + } + ], + "source": [ + "def feature_vector(text):\n", + " return longest_run_of_capitol_letters_feature(text) + percent_periods_feature(text)\n", + "feature_vector('AAH! feature_vector... ')" + ] + }, + { + "cell_type": "code", + "execution_count": 81, + "metadata": {}, "outputs": [], "source": [ - "# from sklearn.tree import DecisionTreeClassifier, export_graphviz\n", - "# classifier = test_classifier(DecisionTreeClassifier(criterion='entropy'), *args)\n", - "# export_graphviz(classifier, out_file='tree.dot') " + "import numpy as np\n", + "\n", + "class CustomFeaturizer:\n", + " def __init__(self, *featurizers):\n", + " self.featurizers = featurizers\n", + " \n", + " def fit(self, X, y=None):\n", + " \"\"\"All SciKit-Learn compatible transformers and classifiers have the\n", + " same interface. `fit` always returns the same object.\"\"\"\n", + " return self\n", + " \n", + " def transform(self, X):\n", + " \"\"\"Given a list of original data, return a list of feature vectors.\"\"\"\n", + " fvs = []\n", + " for datum in X:\n", + " fv = np.array([f(datum) for f in self.featurizers])\n", + " fvs.append(fv.reshape(1, -1)[0])\n", + " return np.array(fvs)\n", + " \n", + "featurizer = CustomFeaturizer(longest_run_of_capitol_letters_feature,\n", + " percent_periods_feature)" ] }, { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], - "source": [] + "source": [ + "" + ] } ], "metadata": { @@ -770,7 +1326,7 @@ "language_info": { "codemirror_mode": { "name": "ipython", - "version": 3 + "version": 3.0 }, "file_extension": ".py", "mimetype": "text/x-python", @@ -782,4 +1338,4 @@ }, "nbformat": 4, "nbformat_minor": 0 -} +} \ No newline at end of file From 1cf138892f36cac031ff83febb94fb814a84096d Mon Sep 17 00:00:00 2001 From: John Waldrep Date: Fri, 5 Jun 2015 13:47:36 -0400 Subject: [PATCH 03/12] Move functions into .py file --- initial_exploration.ipynb | 62 ++++++++++----- initial_exploration.py | 158 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 202 insertions(+), 18 deletions(-) create mode 100644 initial_exploration.py diff --git a/initial_exploration.ipynb b/initial_exploration.ipynb index d01435e..c56ce60 100644 --- a/initial_exploration.ipynb +++ b/initial_exploration.ipynb @@ -39,7 +39,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 84, "metadata": {}, "outputs": [ { @@ -371,29 +371,29 @@ } ], "source": [ - "text = ''\n", - "df = pd.DataFrame(columns=['language', 'text'])\n", - "#files = glob.glob('bench/binarytrees/*')\n", - "files = glob.glob('bench/*/*')\n", - "# ['./bench/binarytrees/binarytrees.clojure',\n", - "# './bench/binarytrees/binarytrees.csharp']\n", + "def load_bench_data():\n", + " text = ''\n", + " df = pd.DataFrame(columns=['language', 'text'])\n", + " files = glob.glob('bench/*/*')\n", "\n", - "for fn in files:\n", - " try:\n", - " with open(fn) as fh:\n", - " # df.loc[extract_extension(fn)] = ''.join(fh.readlines())\n", - " data = {'language': extract_extension(fn),\n", - " 'text': ''.join(fh.readlines())}\n", - " if data['language'] and data['text']:\n", - " df = df.append(data, ignore_index = True)\n", - " except (IsADirectoryError, UnicodeDecodeError):\n", - " pass\n", + " for fn in files:\n", + " try:\n", + " with open(fn) as fh:\n", + " data = {'language': extract_extension(fn),\n", + " 'text': ''.join(fh.readlines())}\n", + " if data['language'] and data['text']:\n", + " df = df.append(data, ignore_index=True)\n", + " except (IsADirectoryError, UnicodeDecodeError):\n", + " pass\n", + " return df\n", + "\n", + "df = load_bench_data()\n", "df" ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 86, "metadata": {}, "outputs": [ { @@ -1307,6 +1307,32 @@ " percent_periods_feature)" ] }, + { + "cell_type": "code", + "execution_count": 82, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[ 5.00000000e+00, 2.73597811e-03],\n", + " [ 1.10000000e+01, 2.07530388e-03],\n", + " [ 2.00000000e+00, 5.37190083e-03],\n", + " ..., \n", + " [ 1.00000000e+00, 1.06312292e-02],\n", + " [ 6.00000000e+00, 1.95865071e-02],\n", + " [ 6.00000000e+00, 2.19594595e-02]])" + ] + }, + "execution_count": 82, + "output_type": "execute_result", + "metadata": {} + } + ], + "source": [ + "featurizer.transform(df.text)" + ] + }, { "cell_type": "code", "execution_count": null, diff --git a/initial_exploration.py b/initial_exploration.py new file mode 100644 index 0000000..1c51b7c --- /dev/null +++ b/initial_exploration.py @@ -0,0 +1,158 @@ +import re +import glob + +from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer +from sklearn.cross_validation import train_test_split +from sklearn.naive_bayes import MultinomialNB +from sklearn.pipeline import Pipeline +from sklearn.ensemble import RandomForestClassifier +import pandas as pd +import numpy as np +import pickle +import os.path + + +def make_extension_dict(): + extensions = {'C' : ['gcc', 'c'], + 'C#' : 'csharp', + 'Common Lisp' : 'sbcl', + 'Clojure' : 'clojure', + 'Haskell' : 'haskell', + 'Java' : 'java', + 'JavaScript' : 'javascript', + 'OCaml' : 'ocaml', + 'Perl' : 'perl', + 'PHP' : ['hack','php'], + 'Python' : 'python3', + 'Ruby' : ['jruby', 'yarv'], + 'Scala' : 'scala', + 'Scheme' : 'racket', + } + ext_lookup = {} + for key, value in extensions.items(): + """Flip the dictionary around""" + if type(value) == type([]): #hasattr(value, '__iter__'): + for value2 in value: + ext_lookup[value2] = key + else: + ext_lookup[value] = key + return ext_lookup + + +def extract_extension(string): + match = re.match('.*\.(?P.*)$', string) + if match: + return match.groupdict()['ext'] + + +def load_bench_data(reload=False): + if os.path.isfile("bench.data") and not reload: + df = pickle.load( open( "bench.data", "rb" ) ) + return df + df = pd.DataFrame(columns=['language', 'text']) + files = glob.glob('bench/*/*') + + for fn in files: + try: + with open(fn) as fh: + data = {'language': extract_extension(fn), + 'text': ''.join(fh.readlines())} + if data['language'] and data['text']: + df = df.append(data, ignore_index=True) + except (IsADirectoryError, UnicodeDecodeError): + pass + pickle.dump( df, open( "bench.data", "wb" ) ) + return df + + +def load_test_data(): + test_data = pd.read_csv('./test.csv', names=['item', 'language', 'text', 'guess']) + test_files = glob.glob('./test/*') + + for (idx, fn) in enumerate(test_files): + # try: + with open(fn) as fh: + # df.loc[extract_extension(fn)] = ''.join(fh.readlines()) + # data = {'language': extract_extension(fn), + # 'text': ''.join(fh.readlines())} + # if data['language'] and data['text']: + # df = df.append(data, ignore_index = True) + # except (IsADirectoryError, UnicodeDecodeError): + # pass + #test_data['text'][idx] = ''.join(fh.readlines()) + test_data.ix[idx, 'text'] = ''.join(fh.readlines()) + return test_data + +def test_classifier(pipe, *split_args): + pipe.fit(split_args[0], split_args[2]) + train_score = pipe.score(split_args[0], split_args[2]) + test_score = pipe.score(split_args[1], split_args[3]) + print('Train score: {:.3f}, Test score: {:.3f}'.format(train_score, test_score)) + return pipe + +def longest_run_of_capitol_letters_feature(text): + runs = sorted(re.findall(r"[A-Z]+", text), key=len) + if len(runs) == 0: + return [0] + longest = runs[-1] + return [len(longest)] + +def percent_periods_feature(text): + """Return percentage of text that is periods compared to total text length.""" + periods = text.count(".") + return [periods / len(text)] + +def feature_vector(text): + return longest_run_of_capitol_letters_feature(text) + percent_periods_feature(text) + + +class CustomFeaturizer: + def __init__(self, *featurizers): + self.featurizers = featurizers + + def fit(self, X, y=None): + """All SciKit-Learn compatible transformers and classifiers have the + same interface. `fit` always returns the same object.""" + return self + + def transform(self, X): + """Given a list of original data, return a list of feature vectors.""" + fvs = [] + for datum in X: + fv = np.array([f(datum) for f in self.featurizers]) + fvs.append(fv.reshape(1, -1)[0]) + return np.array(fvs) + + +if __name__ == '__main__': + df = load_bench_data() + X = df.text + y = df.language + test_data = load_bench_data() + + args = train_test_split(X, y, test_size=0.2, )#random_state=0) # X_train, X_test, y_train, y_test + + spam_pipe = Pipeline([('bag_of_words', CountVectorizer()), + ('bayes', MultinomialNB())]) + classifier = test_classifier(spam_pipe, *args) + classifier.predict(args[1].iloc[2]) + + spam_pipe = Pipeline([('bag_of_words', CountVectorizer()), + ('tfidf', TfidfTransformer()), + ('RFC', RandomForestClassifier())]) + spam_pipe.set_params(RFC__n_estimators=1000) + classifier = test_classifier(spam_pipe, *args) + + + test_data['guess'] = pd.DataFrame(spam_pipe.predict(test_data['text'])) + correct = test_data[test_data.language == test_data.guess] + print('Proportion of test data correctly labeled: {:.3f}'.format(len(correct)/len(test_data))) + + longest_run_of_capitol_letters_feature('ABCabddwAAAA absd AB sd A.AA.AAA') + percent_periods_feature('. . . . ') + feature_vector('AAH! feature_vector... ') + + featurizer = CustomFeaturizer(longest_run_of_capitol_letters_feature, + percent_periods_feature) + + From e2dbfa262c6439a7ee117820be98cda65d961bfb Mon Sep 17 00:00:00 2001 From: John Waldrep Date: Sat, 6 Jun 2015 06:20:24 -0400 Subject: [PATCH 04/12] Clean up functions further and add tests; 4/5 passing --- initial_exploration.py => lang_classifier.py | 97 ++++++++++++-------- 1 file changed, 58 insertions(+), 39 deletions(-) rename initial_exploration.py => lang_classifier.py (64%) diff --git a/initial_exploration.py b/lang_classifier.py similarity index 64% rename from initial_exploration.py rename to lang_classifier.py index 1c51b7c..8b85c8a 100644 --- a/initial_exploration.py +++ b/lang_classifier.py @@ -11,27 +11,32 @@ import pickle import os.path +# TODO: Future Ideas: +# use n-grams? +# inverse_transform or otherwise make an best-case exemplar +# web scraping def make_extension_dict(): - extensions = {'C' : ['gcc', 'c'], - 'C#' : 'csharp', - 'Common Lisp' : 'sbcl', - 'Clojure' : 'clojure', - 'Haskell' : 'haskell', - 'Java' : 'java', - 'JavaScript' : 'javascript', - 'OCaml' : 'ocaml', - 'Perl' : 'perl', - 'PHP' : ['hack','php'], - 'Python' : 'python3', - 'Ruby' : ['jruby', 'yarv'], - 'Scala' : 'scala', - 'Scheme' : 'racket', + extensions = {'C': ['gcc', 'c'], + 'C#': 'csharp', + 'Common Lisp': 'sbcl', + 'Clojure': 'clojure', + 'Haskell': 'haskell', + 'Java': 'java', + 'JavaScript': 'javascript', + 'OCaml': 'ocaml', + 'Perl': 'perl', + 'PHP': ['hack', 'php'], + 'Python': 'python3', + 'Ruby': ['jruby', 'yarv'], + 'Scala': 'scala', + 'Scheme': 'racket', } + ext_lookup = {} for key, value in extensions.items(): """Flip the dictionary around""" - if type(value) == type([]): #hasattr(value, '__iter__'): + if type(value) == type([]): # hasattr(value, '__iter__'): for value2 in value: ext_lookup[value2] = key else: @@ -46,8 +51,11 @@ def extract_extension(string): def load_bench_data(reload=False): - if os.path.isfile("bench.data") and not reload: - df = pickle.load( open( "bench.data", "rb" ) ) + df = unpickle('bench.data', reload=reload) + # if os.path.isfile("bench.data") and not reload: + # df = pickle.load( open( "bench.data", "rb" ) ) + # return df + if df is not None: return df df = pd.DataFrame(columns=['language', 'text']) files = glob.glob('bench/*/*') @@ -61,7 +69,7 @@ def load_bench_data(reload=False): df = df.append(data, ignore_index=True) except (IsADirectoryError, UnicodeDecodeError): pass - pickle.dump( df, open( "bench.data", "wb" ) ) + pickle.dump(df, open("bench.data", "wb")) return df @@ -70,26 +78,28 @@ def load_test_data(): test_files = glob.glob('./test/*') for (idx, fn) in enumerate(test_files): - # try: + # try: with open(fn) as fh: - # df.loc[extract_extension(fn)] = ''.join(fh.readlines()) - # data = {'language': extract_extension(fn), - # 'text': ''.join(fh.readlines())} - # if data['language'] and data['text']: - # df = df.append(data, ignore_index = True) - # except (IsADirectoryError, UnicodeDecodeError): - # pass - #test_data['text'][idx] = ''.join(fh.readlines()) + # df.loc[extract_extension(fn)] = ''.join(fh.readlines()) + # data = {'language': extract_extension(fn), + # 'text': ''.join(fh.readlines())} + # if data['language'] and data['text']: + # df = df.append(data, ignore_index = True) + # except (IsADirectoryError, UnicodeDecodeError): + # pass + # test_data['text'][idx] = ''.join(fh.readlines()) test_data.ix[idx, 'text'] = ''.join(fh.readlines()) return test_data -def test_classifier(pipe, *split_args): + +def assess_classifier(pipe, *split_args): pipe.fit(split_args[0], split_args[2]) train_score = pipe.score(split_args[0], split_args[2]) test_score = pipe.score(split_args[1], split_args[3]) print('Train score: {:.3f}, Test score: {:.3f}'.format(train_score, test_score)) return pipe + def longest_run_of_capitol_letters_feature(text): runs = sorted(re.findall(r"[A-Z]+", text), key=len) if len(runs) == 0: @@ -97,24 +107,34 @@ def longest_run_of_capitol_letters_feature(text): longest = runs[-1] return [len(longest)] + def percent_periods_feature(text): """Return percentage of text that is periods compared to total text length.""" periods = text.count(".") return [periods / len(text)] + def feature_vector(text): return longest_run_of_capitol_letters_feature(text) + percent_periods_feature(text) +def unpickle(name, reload=False): + if os.path.isfile(name) and not reload: + df = pickle.load(open("bench.data", "rb")) + return df + else: + return None + + class CustomFeaturizer: def __init__(self, *featurizers): self.featurizers = featurizers - + def fit(self, X, y=None): """All SciKit-Learn compatible transformers and classifiers have the same interface. `fit` always returns the same object.""" return self - + def transform(self, X): """Given a list of original data, return a list of feature vectors.""" fvs = [] @@ -130,23 +150,24 @@ def transform(self, X): y = df.language test_data = load_bench_data() - args = train_test_split(X, y, test_size=0.2, )#random_state=0) # X_train, X_test, y_train, y_test + args = train_test_split(X, y, test_size=0.2, ) # random_state=0) # X_train, X_test, y_train, y_test spam_pipe = Pipeline([('bag_of_words', CountVectorizer()), ('bayes', MultinomialNB())]) - classifier = test_classifier(spam_pipe, *args) + print(spam_pipe) + classifier = assess_classifier(spam_pipe, *args) classifier.predict(args[1].iloc[2]) spam_pipe = Pipeline([('bag_of_words', CountVectorizer()), - ('tfidf', TfidfTransformer()), - ('RFC', RandomForestClassifier())]) + ('tfidf', TfidfTransformer()), + ('RFC', RandomForestClassifier())]) spam_pipe.set_params(RFC__n_estimators=1000) - classifier = test_classifier(spam_pipe, *args) - + print(spam_pipe) + classifier = assess_classifier(spam_pipe, *args) test_data['guess'] = pd.DataFrame(spam_pipe.predict(test_data['text'])) correct = test_data[test_data.language == test_data.guess] - print('Proportion of test data correctly labeled: {:.3f}'.format(len(correct)/len(test_data))) + print('Proportion of test data correctly labeled: {:.3f}'.format(len(correct) / len(test_data))) longest_run_of_capitol_letters_feature('ABCabddwAAAA absd AB sd A.AA.AAA') percent_periods_feature('. . . . ') @@ -154,5 +175,3 @@ def transform(self, X): featurizer = CustomFeaturizer(longest_run_of_capitol_letters_feature, percent_periods_feature) - - From db4476b7bedd428c61f730946927951178c16f07 Mon Sep 17 00:00:00 2001 From: John Waldrep Date: Sat, 6 Jun 2015 08:27:52 -0400 Subject: [PATCH 05/12] Add more tests, ready to begin building features in ipynb; 8/8 passing --- initial_exploration.ipynb | 381 +++++++++++++++++++++++++++++--------- lang_classifier.py | 14 +- requirements.txt | 2 + test_lang_classifier.py | 89 +++++++++ 4 files changed, 392 insertions(+), 94 deletions(-) create mode 100644 test_lang_classifier.py diff --git a/initial_exploration.ipynb b/initial_exploration.ipynb index c56ce60..290df7f 100644 --- a/initial_exploration.ipynb +++ b/initial_exploration.ipynb @@ -3,7 +3,9 @@ { "cell_type": "code", "execution_count": 1, - "metadata": {}, + "metadata": { + "collapsed": false + }, "outputs": [], "source": [ "from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer\n", @@ -19,7 +21,9 @@ { "cell_type": "code", "execution_count": 2, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "def extract_extension(string):\n", @@ -31,16 +35,18 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": false + }, "outputs": [], - "source": [ - "" - ] + "source": [] }, { "cell_type": "code", "execution_count": 84, - "metadata": {}, + "metadata": { + "collapsed": false + }, "outputs": [ { "data": { @@ -364,10 +370,77 @@ "\n", "

923 rows × 2 columns

\n", "" + ], + "text/plain": [ + " language text\n", + "0 ats (*\\n** The Computer Language Benchmarks Game\\n...\n", + "1 ats (*\\n** The Computer Language Benchmarks Game\\n...\n", + "2 clojure ;; The Computer Language Benchmarks Game\\n;; h...\n", + "3 clojure ;; The Computer Language Benchmarks Game\\n;; h...\n", + "4 clojure ;; The Computer Language Benchmarks Game\\n;; h...\n", + "5 csharp /*\\n The Computer Language Benchmarks Ga...\n", + "6 csharp /* The Computer Language Benchmarks Game\\n h...\n", + "7 dart /* The Computer Language Benchmarks game\\n h...\n", + "8 erlang % The Computer Language Benchmarks Game\\n% htt...\n", + "9 erlang % The Computer Language Benchmarks Game\\n% htt...\n", + "10 fpascal (*\\n The Computer Language Benchmarks Game\\n ...\n", + "11 fsharp (*\\n The Computer Language Benchmarks Ga...\n", + "12 fsharp // The Computer Language Benchmarks Game\\n// ...\n", + "13 fsharp // The Computer Language Benchmarks Game\\n// ...\n", + "14 gcc /* The Computer Language Benchmarks Game\\n * h...\n", + "15 gcc /* \\n * The Computer Language Benchmarks Game ...\n", + "16 gcc // The Computer Language Benchmarks Game\\n// h...\n", + "17 gcc /* The Computer Language Benchmarks Game\\n * h...\n", + "18 gcc /* The Computer Language Benchmarks Game\\n * h...\n", + "19 gcc /* The Computer Language Benchmarks Game\\n * h...\n", + "20 ghc --\\n-- The Computer Language Benchmarks Game\\n...\n", + "21 ghc --\\n-- The Computer Language Benchmarks Game\\n...\n", + "22 ghc --\\n-- The Computer Language Benchmarks Game\\n...\n", + "23 gnat -- The Computer Language Benchmarks Game\\n-- h...\n", + "24 gnat -- The Computer Language Benchmarks Game\\n-- h...\n", + "25 gnat -- The Computer Language Benchmarks Game\\n-- ...\n", + "26 gnat -- The Computer Language Benchmarks Game\\n-- ...\n", + "27 go /* The Computer Language Benchmarks Game\\n * h...\n", + "28 go /* The Computer Language Benchmarks Game\\n * h...\n", + "29 go /* The Computer Language Benchmarks Game\\n * h...\n", + ".. ... ...\n", + "893 go /* The Computer Language Benchmarks Game\\n h...\n", + "894 gpp /*\\n* The Computer Language Benchmarks Game\\n*...\n", + "895 gpp /*\\n * The Computer Language Benchmarks Game\\n...\n", + "896 gpp /*\\n* The Computer Language Benchmarks Game\\n*...\n", + "897 gpp /*\\n* The Computer Language Benchmarks Game\\n*...\n", + "898 gpp /*\\n* The Computer Language Benchmarks Game\\n*...\n", + "899 java /**\\n * The Computer Language Benchmarks Game\\...\n", + "900 java /**\\n * The Computer Language Benchmarks Game\\...\n", + "901 java /**\\n * The Computer Language Benchmarks Game\\...\n", + "902 java /**\\n * The Computer Language Benchmarks Game\\...\n", + "903 java /**\\n * The Computer Language Benchmarks Game\\...\n", + "904 java /**\\n * The Computer Language Benchmarks Game\\...\n", + "905 jruby # The Computer Language Benchmarks Game\\n# htt...\n", + "906 jruby # The Computer Language Benchmarks Game\\n# htt...\n", + "907 lua -- The Computer Language Benchmarks Game\\n-- h...\n", + "908 ocaml (* The Computer Language Benchmarks Game\\n * h...\n", + "909 ocaml (* The Computer Language Benchmarks Game\\n * h...\n", + "910 ocaml (* The Computer Language Benchmarks Game\\n * h...\n", + "911 oz % The Computer Language Benchmarks Game ...\n", + "912 perl # The Computer Language Benchmarks Game\\n# htt...\n", + "913 perl # The Computer Language Benchmarks Game\\n# htt...\n", + "914 python3 # The Computer Language Benchmarks Game\\n# htt...\n", + "915 python3 # The Computer Language Benchmarks Game\\n# htt...\n", + "916 racket #lang racket/base\\n\\n;;; The Computer Language...\n", + "917 sbcl ;;; The Computer Language Benchmarks Game\\n;;;...\n", + "918 sbcl ;;; The Computer Language Benchmarks Game\\n;;;...\n", + "919 scala /* The Computer Language Benchmarks Game\\n h...\n", + "920 vw \"* The Computer Language Benchmarks Game\\n ...\n", + "921 yarv # The Computer Language Benchmarks Game\\n# htt...\n", + "922 yarv # The Computer Language Benchmarks Game\\n# htt...\n", + "\n", + "[923 rows x 2 columns]" ] }, - "output_type": "execute_result", - "metadata": {} + "execution_count": 84, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ @@ -394,7 +467,9 @@ { "cell_type": "code", "execution_count": 86, - "metadata": {}, + "metadata": { + "collapsed": false + }, "outputs": [ { "data": { @@ -412,10 +487,16 @@ " \n", "\n", "" + ], + "text/plain": [ + "Empty DataFrame\n", + "Columns: [language, text]\n", + "Index: []" ] }, - "output_type": "execute_result", - "metadata": {} + "execution_count": 86, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ @@ -425,7 +506,9 @@ { "cell_type": "code", "execution_count": 5, - "metadata": {}, + "metadata": { + "collapsed": false + }, "outputs": [], "source": [ "# walker = os.walk('./bench/binarytrees/')\n", @@ -434,8 +517,10 @@ }, { "cell_type": "code", - "execution_count": 6, - "metadata": {}, + "execution_count": 95, + "metadata": { + "collapsed": false + }, "outputs": [], "source": [ "def test_classifier(pipe, *split_args):\n", @@ -444,31 +529,35 @@ "# predicted = classifier.predict(X_test)\n", " train_score = pipe.score(split_args[0], split_args[2])\n", " test_score = pipe.score(split_args[1], split_args[3])\n", - " print('Train score: {}, Test score: {}'.format(train_score, test_score))\n", + " print('Train score: {:.3f}, Test score: {:.3f}'.format(train_score, test_score))\n", " return pipe" ] }, { "cell_type": "code", - "execution_count": 7, - "metadata": {}, + "execution_count": 93, + "metadata": { + "collapsed": false + }, "outputs": [], "source": [ "X = df.text\n", "y = df.language\n", - "args = train_test_split(X, y, test_size=0.2, )#random_state=0) # X_train, X_test, y_train, y_test" + "args = train_test_split(X, y, test_size=0.2, )#random_state=0) # X_train, X_test, y_train, y_test\n" ] }, { "cell_type": "code", - "execution_count": 8, - "metadata": {}, + "execution_count": 94, + "metadata": { + "collapsed": false + }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Train score: 0.9227642276422764, Test score: 0.7135135135135136\n" + "Train score: 0.9132791327913279, Test score: 0.67\n" ] } ], @@ -477,13 +566,15 @@ "# ('tfidf', TfidfTransformer()),\n", " ('bayes', MultinomialNB())])\n", "# spam_pipe\n", - "classifier = test_classifier(spam_pipe, *args)" + "classifier = test_classifier(spam_pipe, *args)\n" ] }, { "cell_type": "code", "execution_count": 9, - "metadata": {}, + "metadata": { + "collapsed": false + }, "outputs": [ { "data": { @@ -493,8 +584,8 @@ ] }, "execution_count": 9, - "output_type": "execute_result", - "metadata": {} + "metadata": {}, + "output_type": "execute_result" } ], "source": [ @@ -512,7 +603,9 @@ { "cell_type": "code", "execution_count": 10, - "metadata": {}, + "metadata": { + "collapsed": false + }, "outputs": [ { "name": "stdout", @@ -527,7 +620,7 @@ " ('tfidf', TfidfTransformer()),\n", " ('bayes', MultinomialNB())])\n", "# spam_pipe\n", - "classifier = test_classifier(spam_pipe, *args)" + "classifier = test_classifier(spam_pipe, *args)\n" ] }, { @@ -540,7 +633,9 @@ { "cell_type": "code", "execution_count": 11, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "from sklearn.ensemble import RandomForestClassifier" @@ -549,7 +644,9 @@ { "cell_type": "code", "execution_count": 12, - "metadata": {}, + "metadata": { + "collapsed": false + }, "outputs": [ { "name": "stdout", @@ -582,7 +679,9 @@ { "cell_type": "code", "execution_count": 13, - "metadata": {}, + "metadata": { + "collapsed": false + }, "outputs": [ { "name": "stdout", @@ -635,7 +734,9 @@ { "cell_type": "code", "execution_count": 41, - "metadata": {}, + "metadata": { + "collapsed": false + }, "outputs": [ { "data": { @@ -660,8 +761,8 @@ ] }, "execution_count": 41, - "output_type": "execute_result", - "metadata": {} + "metadata": {}, + "output_type": "execute_result" } ], "source": [ @@ -696,7 +797,9 @@ { "cell_type": "code", "execution_count": 15, - "metadata": {}, + "metadata": { + "collapsed": false + }, "outputs": [ { "data": { @@ -709,8 +812,8 @@ ] }, "execution_count": 15, - "output_type": "execute_result", - "metadata": {} + "metadata": {}, + "output_type": "execute_result" } ], "source": [ @@ -720,27 +823,31 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": false + }, "outputs": [], - "source": [ - "" - ] + "source": [] }, { "cell_type": "code", "execution_count": 16, - "metadata": {}, + "metadata": { + "collapsed": false + }, "outputs": [], "source": [ "# from sklearn.tree import DecisionTreeClassifier, export_graphviz\n", "# classifier = test_classifier(DecisionTreeClassifier(criterion='entropy'), *args)\n", - "# export_graphviz(classifier, out_file='tree.dot')" + "# export_graphviz(classifier, out_file='tree.dot') " ] }, { "cell_type": "code", "execution_count": 53, - "metadata": {}, + "metadata": { + "collapsed": false + }, "outputs": [ { "data": { @@ -795,10 +902,19 @@ " \n", "\n", "" + ], + "text/plain": [ + " item language text guess\n", + "0 1 clojure NaN NaN\n", + "1 2 clojure NaN NaN\n", + "2 3 clojure NaN NaN\n", + "3 4 clojure NaN NaN\n", + "4 5 python NaN NaN" ] }, - "output_type": "execute_result", - "metadata": {} + "execution_count": 53, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ @@ -809,7 +925,9 @@ { "cell_type": "code", "execution_count": 54, - "metadata": {}, + "metadata": { + "collapsed": false + }, "outputs": [ { "data": { @@ -864,10 +982,19 @@ " \n", "\n", "" + ], + "text/plain": [ + " item language text guess\n", + "0 1 clojure (defn cf-settings\\n \"Setup settings for campf... NaN\n", + "1 2 clojure var _ = require('lodash'),\\n fs = require('... NaN\n", + "2 3 clojure /* Riot v2.0.8, @license MIT, (c) 2015 Muut In... NaN\n", + "3 4 clojure var r = riot.route = function(arg) {\\n //... NaN\n", + "4 5 python module ActiveJob\\n module Core\\n extend Ac... NaN" ] }, - "output_type": "execute_result", - "metadata": {} + "execution_count": 54, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ @@ -891,7 +1018,9 @@ { "cell_type": "code", "execution_count": 55, - "metadata": {}, + "metadata": { + "collapsed": false + }, "outputs": [ { "name": "stdout", @@ -906,13 +1035,15 @@ "# ('tfidf', TfidfTransformer()),\n", " ('bayes', MultinomialNB())])\n", "# spam_pipe\n", - "classifier = test_classifier(spam_pipe, *args)" + "classifier = test_classifier(spam_pipe, *args)\n" ] }, { "cell_type": "code", "execution_count": 61, - "metadata": {}, + "metadata": { + "collapsed": false + }, "outputs": [ { "data": { @@ -921,8 +1052,8 @@ ] }, "execution_count": 61, - "output_type": "execute_result", - "metadata": {} + "metadata": {}, + "output_type": "execute_result" } ], "source": [ @@ -934,7 +1065,9 @@ { "cell_type": "code", "execution_count": 62, - "metadata": {}, + "metadata": { + "collapsed": false + }, "outputs": [ { "data": { @@ -1178,10 +1311,80 @@ " \n", "\n", "" + ], + "text/plain": [ + " item language text \\\n", + "0 1 clojure (defn cf-settings\\n \"Setup settings for campf... \n", + "1 2 clojure var _ = require('lodash'),\\n fs = require('... \n", + "2 3 clojure /* Riot v2.0.8, @license MIT, (c) 2015 Muut In... \n", + "3 4 clojure var r = riot.route = function(arg) {\\n //... \n", + "4 5 python module ActiveJob\\n module Core\\n extend Ac... \n", + "5 6 python require 'formula'\\n\\nclass A52dec < Formula\\n ... \n", + "6 7 python module Fluent\\n class Input\\n include Conf... \n", + "7 8 python {-# LANGUAGE ScopedTypeVariables, FlexibleInst... \n", + "8 9 javascript reverseDependencies :: ModuleGraph -> M.Map Mo... \n", + "9 10 javascript {- git-annex extra config files\\n -\\n - Copyri... \n", + "10 11 javascript (define subst-f\\n (lambda (new old l)\\n (c... \n", + "11 12 javascript (ns my-cli.core)\\n\\n(defn -main [& args]\\n (p... \n", + "12 13 ruby (define add1\\n (lambda (n) (+ n 1))) \n", + "13 14 ruby (define-lib-primitive (length lst)\\n (if (nul... \n", + "14 15 ruby /**\\n * Interface to represent a persistence s... \n", + "15 16 haskell /*\\n * Copyright 2002-2008 the original author... \n", + "16 17 haskell package com.github.pathikrit\\n\\nimport scala.a... \n", + "17 18 haskell /* sbt -- Simple Build Tool\\n * Copyright 2010... \n", + "18 19 scheme proc isaac::mix {a b c d e f g h} {\\n set a... \n", + "19 20 scheme proc twitter::follow {nick uhost hand chan arg... \n", + "20 21 scheme class View\\n{\\n /**\\n * Data available ... \n", + "21 22 java public function formatLocalized($format)\\n... \n", + "22 23 java (extend-type String\\n Person\\n (first-name [... \n", + "23 24 scala class Application extends App {\\n\\t/**\\n\\t * @... \n", + "24 25 scala type name = string\\n\\nlet compare_label label1... \n", + "25 26 tcl let search_compiler_libs () =\\n prerr_endline... \n", + "26 27 tcl (require '[overtone.live :as overtone])\\n\\n(de... \n", + "27 28 php from pkgutil import iter_modules\\nfrom subproc... \n", + "28 29 php import re\\nimport subprocess\\n\\ndef cmd_keymap... \n", + "29 30 php class NoSuchService(Exception):\\n def __ini... \n", + "30 31 ocaml from collections import namedtuple\\nimport fun... \n", + "31 32 ocaml function errorHandler(context) {\\n return fun... \n", + "\n", + " guess \n", + "0 clojure \n", + "1 javascript \n", + "2 clojure \n", + "3 lua \n", + "4 yarv \n", + "5 clojure \n", + "6 jruby \n", + "7 ghc \n", + "8 ghc \n", + "9 clojure \n", + "10 sbcl \n", + "11 clojure \n", + "12 racket \n", + "13 racket \n", + "14 java \n", + "15 gnat \n", + "16 scala \n", + "17 scala \n", + "18 racket \n", + "19 python3 \n", + "20 gpp \n", + "21 lua \n", + "22 clojure \n", + "23 php \n", + "24 ocaml \n", + "25 ocaml \n", + "26 clojure \n", + "27 python3 \n", + "28 clojure \n", + "29 yarv \n", + "30 rust \n", + "31 javascript " ] }, - "output_type": "execute_result", - "metadata": {} + "execution_count": 62, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ @@ -1191,25 +1394,27 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], - "source": [ - "" - ] + "source": [] }, { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], - "source": [ - "" - ] + "source": [] }, { "cell_type": "code", "execution_count": 76, - "metadata": {}, + "metadata": { + "collapsed": false + }, "outputs": [ { "data": { @@ -1218,8 +1423,8 @@ ] }, "execution_count": 76, - "output_type": "execute_result", - "metadata": {} + "metadata": {}, + "output_type": "execute_result" } ], "source": [ @@ -1235,7 +1440,9 @@ { "cell_type": "code", "execution_count": 77, - "metadata": {}, + "metadata": { + "collapsed": false + }, "outputs": [ { "data": { @@ -1244,8 +1451,8 @@ ] }, "execution_count": 77, - "output_type": "execute_result", - "metadata": {} + "metadata": {}, + "output_type": "execute_result" } ], "source": [ @@ -1259,7 +1466,9 @@ { "cell_type": "code", "execution_count": 80, - "metadata": {}, + "metadata": { + "collapsed": false + }, "outputs": [ { "data": { @@ -1268,8 +1477,8 @@ ] }, "execution_count": 80, - "output_type": "execute_result", - "metadata": {} + "metadata": {}, + "output_type": "execute_result" } ], "source": [ @@ -1281,7 +1490,9 @@ { "cell_type": "code", "execution_count": 81, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "import numpy as np\n", @@ -1310,7 +1521,9 @@ { "cell_type": "code", "execution_count": 82, - "metadata": {}, + "metadata": { + "collapsed": false + }, "outputs": [ { "data": { @@ -1325,8 +1538,8 @@ ] }, "execution_count": 82, - "output_type": "execute_result", - "metadata": {} + "metadata": {}, + "output_type": "execute_result" } ], "source": [ @@ -1336,11 +1549,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], - "source": [ - "" - ] + "source": [] } ], "metadata": { @@ -1352,7 +1565,7 @@ "language_info": { "codemirror_mode": { "name": "ipython", - "version": 3.0 + "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", @@ -1364,4 +1577,4 @@ }, "nbformat": 4, "nbformat_minor": 0 -} \ No newline at end of file +} diff --git a/lang_classifier.py b/lang_classifier.py index 8b85c8a..1602698 100644 --- a/lang_classifier.py +++ b/lang_classifier.py @@ -100,7 +100,7 @@ def assess_classifier(pipe, *split_args): return pipe -def longest_run_of_capitol_letters_feature(text): +def longest_run_of_caps_feature(text): runs = sorted(re.findall(r"[A-Z]+", text), key=len) if len(runs) == 0: return [0] @@ -114,10 +114,6 @@ def percent_periods_feature(text): return [periods / len(text)] -def feature_vector(text): - return longest_run_of_capitol_letters_feature(text) + percent_periods_feature(text) - - def unpickle(name, reload=False): if os.path.isfile(name) and not reload: df = pickle.load(open("bench.data", "rb")) @@ -148,7 +144,7 @@ def transform(self, X): df = load_bench_data() X = df.text y = df.language - test_data = load_bench_data() + test_data = load_test_data() args = train_test_split(X, y, test_size=0.2, ) # random_state=0) # X_train, X_test, y_train, y_test @@ -156,7 +152,7 @@ def transform(self, X): ('bayes', MultinomialNB())]) print(spam_pipe) classifier = assess_classifier(spam_pipe, *args) - classifier.predict(args[1].iloc[2]) + classifier.predict(args[1]) spam_pipe = Pipeline([('bag_of_words', CountVectorizer()), ('tfidf', TfidfTransformer()), @@ -168,10 +164,8 @@ def transform(self, X): test_data['guess'] = pd.DataFrame(spam_pipe.predict(test_data['text'])) correct = test_data[test_data.language == test_data.guess] print('Proportion of test data correctly labeled: {:.3f}'.format(len(correct) / len(test_data))) + print(test_data) - longest_run_of_capitol_letters_feature('ABCabddwAAAA absd AB sd A.AA.AAA') - percent_periods_feature('. . . . ') - feature_vector('AAH! feature_vector... ') featurizer = CustomFeaturizer(longest_run_of_capitol_letters_feature, percent_periods_feature) diff --git a/requirements.txt b/requirements.txt index 473a3b2..2cc845f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,3 +3,5 @@ scipy pandas numpy matplotlib +pytest +ipython[notebook] diff --git a/test_lang_classifier.py b/test_lang_classifier.py new file mode 100644 index 0000000..ca2f390 --- /dev/null +++ b/test_lang_classifier.py @@ -0,0 +1,89 @@ +from lang_classifier import * + + +def test_make_extension_dict(): + ext_lookup = make_extension_dict() + assert ext_lookup['php'] == 'PHP' + # assert ext_lookup['NONExISTNTANT!'] is None + + +def test_extract_extension(): + assert extract_extension('abc/def/ghi.jkl') == 'jkl' + assert extract_extension('abc/def/ghi') == None + + +def test_load_bench_data(): + df = load_bench_data() # reload=True) + print(df.head(5)) + assert df['language'][2] == 'clojure' + + +def test_load_test_data(): + test_data = load_test_data() + assert test_data['language'][1] == 'clojure' + +def setup(): + df = load_bench_data() + X = df.text + y = df.language + test_data = load_test_data() + args = train_test_split(X, y, test_size=0.2, random_state=0) + # X_train, X_test, y_train, y_test + + return df, X, y, test_data, args + +def test_assess_classifier(): + df, X, y, test_data, args = setup() + spam_pipe = Pipeline([('bag_of_words', CountVectorizer()), + ('bayes', MultinomialNB())]) + classifier = assess_classifier(spam_pipe, *args) + c = classifier.predict(X) + assert len(c) == 923 + assert c[3] == 'clojure' + +def test_longest_run_of_caps_feature(): + assert longest_run_of_caps_feature( + 'ABCabddwAAAA absd AB sd A.AA.AAA') == [4] + +def test_percent_periods_feature(): + assert percent_periods_feature('. . . . ') == [0.5] + +def test_featurizer(): + featurizer = CustomFeaturizer(longest_run_of_caps_feature, + percent_periods_feature) + np.testing.assert_equal(featurizer.transform(['AAH! feature....']) + , np.array([[ 3. , 0.25]])) +""" + df = load_bench_data() + X = df.text + y = df.language + test_data = load_bench_data() + + args = train_test_split(X, y, test_size=0.2, )#random_state=0) # X_train, X_test, y_train, y_test + + spam_pipe = Pipeline([('bag_of_words', CountVectorizer()), + ('bayes', MultinomialNB())]) + print(spam_pipe) + classifier = test_classifier(spam_pipe, *args) + classifier.predict(args[1].iloc[2]) + + spam_pipe = Pipeline([('bag_of_words', CountVectorizer()), + ('tfidf', TfidfTransformer()), + ('RFC', RandomForestClassifier())]) + spam_pipe.set_params(RFC__n_estimators=1000) + print(spam_pipe) + classifier = test_classifier(spam_pipe, *args) + + + test_data['guess'] = pd.DataFrame(spam_pipe.predict(test_data['text'])) + correct = test_data[test_data.language == test_data.guess] + print('Proportion of test data correctly labeled: {:.3f}'.format(len(correct)/len(test_data))) + + longest_run_of_capitol_letters_feature('ABCabddwAAAA absd AB sd A.AA.AAA') + percent_periods_feature('. . . . ') + feature_vector('AAH! feature_vector... ') + + featurizer = CustomFeaturizer(longest_run_of_capitol_letters_feature, + percent_periods_feature) + +""" From 724a9f6aead112fe5a303e5851f39e767a4832dd Mon Sep 17 00:00:00 2001 From: John Waldrep Date: Sat, 6 Jun 2015 15:53:23 -0400 Subject: [PATCH 06/12] Fixed issues with validating % correct of test data, now baseline is ~9% accurate; 9/9 tests passing --- feature_testing.ipynb | 225 ++++++++++++++++++++++++++++++++++++++++ lang_classifier.py | 42 ++++---- test_lang_classifier.py | 24 ++++- 3 files changed, 270 insertions(+), 21 deletions(-) create mode 100644 feature_testing.ipynb diff --git a/feature_testing.ipynb b/feature_testing.ipynb new file mode 100644 index 0000000..d11d594 --- /dev/null +++ b/feature_testing.ipynb @@ -0,0 +1,225 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#Initial Setup" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "from lang_classifier import *" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "def setup():\n", + " \"\"\"Load the training benchmark training data and split it for train/test\"\"\"\n", + " df = load_bench_data()\n", + " X = df.text\n", + " y = df.language\n", + " test_data = load_test_data()\n", + " args = train_test_split(X, y, test_size=0.2, random_state=0)\n", + " # X_train, X_test, y_train, y_test\n", + " \n", + " return df, X, y, test_data, args" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "df, X, y, test_data, args = setup() # Load and split the train/test data" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Train score: 0.991, Test score: 0.949\n", + "Proportion of test data correctly labeled: 0.094\n", + " language guess text\n", + "0 clojure clojure (defn cf-settings\\n \"Setup settings for campf...\n", + "1 clojure javascript var _ = require('lodash'),\\n fs = require('...\n", + "2 clojure clojure /* Riot v2.0.8, @license MIT, (c) 2015 Muut In...\n", + "3 clojure php var r = riot.route = function(arg) {\\n //...\n", + "4 python ruby module ActiveJob\\n module Core\\n extend Ac...\n", + "5 python clojure require 'formula'\\n\\nclass A52dec < Formula\\n ...\n", + "6 python ruby module Fluent\\n class Input\\n include Conf...\n", + "7 python haskell {-# LANGUAGE ScopedTypeVariables, FlexibleInst...\n", + "8 javascript haskell reverseDependencies :: ModuleGraph -> M.Map Mo...\n", + "9 javascript clojure {- git-annex extra config files\\n -\\n - Copyri...\n", + "10 javascript scheme (define subst-f\\n (lambda (new old l)\\n (c...\n", + "11 javascript clojure (ns my-cli.core)\\n\\n(defn -main [& args]\\n (p...\n", + "12 ruby scheme (define add1\\n (lambda (n) (+ n 1)))\n", + "13 ruby scheme (define-lib-primitive (length lst)\\n (if (nul...\n", + "14 ruby java /**\\n * Interface to represent a persistence s...\n", + "15 haskell c /*\\n * Copyright 2002-2008 the original author...\n", + "16 haskell scala package com.github.pathikrit\\n\\nimport scala.a...\n", + "17 haskell scala /* sbt -- Simple Build Tool\\n * Copyright 2010...\n", + "18 scheme scheme proc isaac::mix {a b c d e f g h} {\\n set a...\n", + "19 scheme python proc twitter::follow {nick uhost hand chan arg...\n", + "20 scheme c class View\\n{\\n /**\\n * Data available ...\n", + "21 java php public function formatLocalized($format)\\n...\n", + "22 java clojure (extend-type String\\n Person\\n (first-name [...\n", + "23 scala php class Application extends App {\\n\\t/**\\n\\t * @...\n", + "24 scala ocaml type name = string\\n\\nlet compare_label label1...\n", + "25 tcl ocaml let search_compiler_libs () =\\n prerr_endline...\n", + "26 tcl clojure (require '[overtone.live :as overtone])\\n\\n(de...\n", + "27 php python from pkgutil import iter_modules\\nfrom subproc...\n", + "28 php clojure import re\\nimport subprocess\\n\\ndef cmd_keymap...\n", + "29 php ruby class NoSuchService(Exception):\\n def __ini...\n", + "30 ocaml python from collections import namedtuple\\nimport fun...\n", + "31 ocaml javascript function errorHandler(context) {\\n return fun...\n" + ] + } + ], + "source": [ + "# Use a generic bag of words/naive bayes classifier pipeline as a baseline\n", + "spam_pipe = Pipeline([('bag_of_words', CountVectorizer()),\n", + " ('bayes', MultinomialNB())])\n", + "classifier = assess_classifier(spam_pipe, *args)\n", + "c = classifier.predict(X)\n", + "#print('Guesses: ', c[0:5])\n", + "test_data['guess'] = pd.DataFrame(spam_pipe.predict(test_data['text']))\n", + "correct = test_data[test_data.language == test_data.guess]\n", + "print('Proportion of test data correctly labeled: {:.3f}'.format(len(correct) / len(test_data)))\n", + "print(test_data[['language', 'guess', 'text']])" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "def longest_run_of_caps_feature(text):\n", + " runs = sorted(re.findall(r\"[A-Z]+\", text), key=len)\n", + " if len(runs) == 0:\n", + " return [0]\n", + " longest = runs[-1]\n", + " return [len(longest)]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[ 1. , 0.00550964]])" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "txt = '''\n", + "# Test python program\n", + "class MyClass:\n", + " \"\"\"MyClass is a class to do something\"\"\"\n", + " def __init__(self, name='name'):\n", + " self.name = name\n", + " def longest_run_of_caps_feature(text):\n", + " runs = sorted(re.findall(r\"[A-Z]+\", text), key=len)\n", + " if len(runs) == 0:\n", + " return [0]\n", + " longest = runs[-1]\n", + " return [len(longest)] \n", + "'''\n", + "featurizer = CustomFeaturizer(longest_run_of_caps_feature,\n", + " percent_periods_feature)\n", + "featurizer.transform([txt])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "##Feature Ideas\n", + "- Longest line\n", + "- bag of words with chars, ngrams, let\n", + "- run of )\n", + "- % _, }, :\\n, \"\"\"\n", + "- key words: def, defn, var `__x__`, module, end, ->, ::, ;, /*, //, case, final, extends, public, protected, $word, @param, self, this\n", + "- % nested dots\n", + "- () nest depth\n", + "- ignore/strip comments?\n", + "-Hyphenated or camel or underscored\n", + "-Indentation...\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.4.3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/lang_classifier.py b/lang_classifier.py index 1602698..18daeb0 100644 --- a/lang_classifier.py +++ b/lang_classifier.py @@ -15,22 +15,28 @@ # use n-grams? # inverse_transform or otherwise make an best-case exemplar # web scraping +# need an example of tcl def make_extension_dict(): - extensions = {'C': ['gcc', 'c'], - 'C#': 'csharp', - 'Common Lisp': 'sbcl', - 'Clojure': 'clojure', - 'Haskell': 'haskell', - 'Java': 'java', - 'JavaScript': 'javascript', - 'OCaml': 'ocaml', - 'Perl': 'perl', - 'PHP': ['hack', 'php'], - 'Python': 'python3', - 'Ruby': ['jruby', 'yarv'], - 'Scala': 'scala', - 'Scheme': 'racket', + """ + Returns a dictionary for translating benchmark file extensions + to the name of the programming language + """ + extensions = {'c': ['gcc', 'c'], + 'csharp': 'csharp', + 'commonlisp': 'sbcl', + 'clojure': 'clojure', + 'haskell': 'ghc', + 'java': 'java', + 'javascript': 'javascript', + 'ocaml': 'ocaml', + 'perl': 'perl', + 'php': ['hack', 'php'], + 'python': 'python3', + 'ruby': ['jruby', 'yarv'], + 'scala': 'scala', + 'scheme': 'racket', + 'tcl': 'tcl', } ext_lookup = {} @@ -59,11 +65,11 @@ def load_bench_data(reload=False): return df df = pd.DataFrame(columns=['language', 'text']) files = glob.glob('bench/*/*') - + exts = make_extension_dict() for fn in files: try: with open(fn) as fh: - data = {'language': extract_extension(fn), + data = {'language': exts.get(extract_extension(fn), None), 'text': ''.join(fh.readlines())} if data['language'] and data['text']: df = df.append(data, ignore_index=True) @@ -141,7 +147,7 @@ def transform(self, X): if __name__ == '__main__': - df = load_bench_data() + df = load_bench_data(reload=True) X = df.text y = df.language test_data = load_test_data() @@ -167,5 +173,5 @@ def transform(self, X): print(test_data) - featurizer = CustomFeaturizer(longest_run_of_capitol_letters_feature, + featurizer = CustomFeaturizer(longest_run_of_caps_feature, percent_periods_feature) diff --git a/test_lang_classifier.py b/test_lang_classifier.py index ca2f390..c18ce73 100644 --- a/test_lang_classifier.py +++ b/test_lang_classifier.py @@ -3,7 +3,7 @@ def test_make_extension_dict(): ext_lookup = make_extension_dict() - assert ext_lookup['php'] == 'PHP' + assert ext_lookup['php'] == 'php' # assert ext_lookup['NONExISTNTANT!'] is None @@ -18,6 +18,21 @@ def test_load_bench_data(): assert df['language'][2] == 'clojure' +def test_bench_data_only_contains_desired_languages(): + df = load_bench_data(reload=True) + + langs = ['clojure', 'python', 'javascript', 'ruby', 'haskell', 'scheme', + 'java', 'scala', + #'tcl', # in reqs + tests, but no examples in bench + 'c', 'csharp', 'commonlisp', 'perl', # in reqs + bench, no tests + 'php', 'ocaml'] + training = df['language'].unique() + for lang in langs: + assert lang in training # We have examples for each required language + for lang in training: + assert lang in langs # We don't train for any non-required languages + + def test_load_test_data(): test_data = load_test_data() assert test_data['language'][1] == 'clojure' @@ -38,8 +53,8 @@ def test_assess_classifier(): ('bayes', MultinomialNB())]) classifier = assess_classifier(spam_pipe, *args) c = classifier.predict(X) - assert len(c) == 923 - assert c[3] == 'clojure' + assert len(c) == 584 # 923 total + assert c[3] == 'csharp' def test_longest_run_of_caps_feature(): assert longest_run_of_caps_feature( @@ -53,6 +68,9 @@ def test_featurizer(): percent_periods_feature) np.testing.assert_equal(featurizer.transform(['AAH! feature....']) , np.array([[ 3. , 0.25]])) + + + """ df = load_bench_data() X = df.text From 483da2b0b881fbbc7160ee301a64c3c7e435a542 Mon Sep 17 00:00:00 2001 From: John Waldrep Date: Sat, 6 Jun 2015 21:54:44 -0400 Subject: [PATCH 07/12] Fixed issues with alignment of test data dataframe, now baseline is ~70% accurate; 6/9 tests passing --- feature_testing.ipynb | 541 +++++++++++++++++++++++++++++++++++++--- lang_classifier.py | 9 +- test.csv | 1 + test/0 | 100 ++++++++ test_lang_classifier.py | 1 + 5 files changed, 608 insertions(+), 44 deletions(-) create mode 100644 test/0 diff --git a/feature_testing.ipynb b/feature_testing.ipynb index d11d594..3ffe37d 100644 --- a/feature_testing.ipynb +++ b/feature_testing.ipynb @@ -55,46 +55,360 @@ "metadata": { "collapsed": false }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
languagetextguess
item
0perluse warnings;\\nuse strict;\\n\\nmy $initial = jo...NaN
1clojure(defn cf-settings\\n \"Setup settings for campf...NaN
2clojure(ns my-cli.core)\\n\\n(defn -main [& args]\\n (p...NaN
3clojure(extend-type String\\n Person\\n (first-name [...NaN
4clojure(require '[overtone.live :as overtone])\\n\\n(de...NaN
5pythonfrom pkgutil import iter_modules\\nfrom subproc...NaN
6pythonimport re\\nimport subprocess\\n\\ndef cmd_keymap...NaN
7pythonclass NoSuchService(Exception):\\n def __ini...NaN
8pythonfrom collections import namedtuple\\nimport fun...NaN
9javascriptfunction errorHandler(context) {\\n return fun...NaN
10javascriptvar _ = require('lodash'),\\n fs = require('...NaN
11javascript/* Riot v2.0.8, @license MIT, (c) 2015 Muut In...NaN
12javascriptvar r = riot.route = function(arg) {\\n //...NaN
13rubymodule ActiveJob\\n module Core\\n extend Ac...NaN
14rubyrequire 'formula'\\n\\nclass A52dec < Formula\\n ...NaN
15rubymodule Fluent\\n class Input\\n include Conf...NaN
16haskell{-# LANGUAGE ScopedTypeVariables, FlexibleInst...NaN
17haskellreverseDependencies :: ModuleGraph -> M.Map Mo...NaN
18haskell{- git-annex extra config files\\n -\\n - Copyri...NaN
19scheme(define subst-f\\n (lambda (new old l)\\n (c...NaN
20scheme(define add1\\n (lambda (n) (+ n 1)))NaN
21scheme(define-lib-primitive (length lst)\\n (if (nul...NaN
22java/**\\n * Interface to represent a persistence s...NaN
23java/*\\n * Copyright 2002-2008 the original author...NaN
24scalapackage com.github.pathikrit\\n\\nimport scala.a...NaN
25scala/* sbt -- Simple Build Tool\\n * Copyright 2010...NaN
26tclproc isaac::mix {a b c d e f g h} {\\n set a...NaN
27tclproc twitter::follow {nick uhost hand chan arg...NaN
28phpclass View\\n{\\n /**\\n * Data available ...NaN
29phppublic function formatLocalized($format)\\n...NaN
30phpclass Application extends App {\\n\\t/**\\n\\t * @...NaN
31ocamltype name = string\\n\\nlet compare_label label1...NaN
32ocamllet search_compiler_libs () =\\n prerr_endline...NaN
\n", + "
" + ], + "text/plain": [ + " language text guess\n", + "item \n", + "0 perl use warnings;\\nuse strict;\\n\\nmy $initial = jo... NaN\n", + "1 clojure (defn cf-settings\\n \"Setup settings for campf... NaN\n", + "2 clojure (ns my-cli.core)\\n\\n(defn -main [& args]\\n (p... NaN\n", + "3 clojure (extend-type String\\n Person\\n (first-name [... NaN\n", + "4 clojure (require '[overtone.live :as overtone])\\n\\n(de... NaN\n", + "5 python from pkgutil import iter_modules\\nfrom subproc... NaN\n", + "6 python import re\\nimport subprocess\\n\\ndef cmd_keymap... NaN\n", + "7 python class NoSuchService(Exception):\\n def __ini... NaN\n", + "8 python from collections import namedtuple\\nimport fun... NaN\n", + "9 javascript function errorHandler(context) {\\n return fun... NaN\n", + "10 javascript var _ = require('lodash'),\\n fs = require('... NaN\n", + "11 javascript /* Riot v2.0.8, @license MIT, (c) 2015 Muut In... NaN\n", + "12 javascript var r = riot.route = function(arg) {\\n //... NaN\n", + "13 ruby module ActiveJob\\n module Core\\n extend Ac... NaN\n", + "14 ruby require 'formula'\\n\\nclass A52dec < Formula\\n ... NaN\n", + "15 ruby module Fluent\\n class Input\\n include Conf... NaN\n", + "16 haskell {-# LANGUAGE ScopedTypeVariables, FlexibleInst... NaN\n", + "17 haskell reverseDependencies :: ModuleGraph -> M.Map Mo... NaN\n", + "18 haskell {- git-annex extra config files\\n -\\n - Copyri... NaN\n", + "19 scheme (define subst-f\\n (lambda (new old l)\\n (c... NaN\n", + "20 scheme (define add1\\n (lambda (n) (+ n 1))) NaN\n", + "21 scheme (define-lib-primitive (length lst)\\n (if (nul... NaN\n", + "22 java /**\\n * Interface to represent a persistence s... NaN\n", + "23 java /*\\n * Copyright 2002-2008 the original author... NaN\n", + "24 scala package com.github.pathikrit\\n\\nimport scala.a... NaN\n", + "25 scala /* sbt -- Simple Build Tool\\n * Copyright 2010... NaN\n", + "26 tcl proc isaac::mix {a b c d e f g h} {\\n set a... NaN\n", + "27 tcl proc twitter::follow {nick uhost hand chan arg... NaN\n", + "28 php class View\\n{\\n /**\\n * Data available ... NaN\n", + "29 php public function formatLocalized($format)\\n... NaN\n", + "30 php class Application extends App {\\n\\t/**\\n\\t * @... NaN\n", + "31 ocaml type name = string\\n\\nlet compare_label label1... NaN\n", + "32 ocaml let search_compiler_libs () =\\n prerr_endline... NaN" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "test_data" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "collapsed": false + }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Train score: 0.991, Test score: 0.949\n", - "Proportion of test data correctly labeled: 0.094\n", - " language guess text\n", - "0 clojure clojure (defn cf-settings\\n \"Setup settings for campf...\n", - "1 clojure javascript var _ = require('lodash'),\\n fs = require('...\n", - "2 clojure clojure /* Riot v2.0.8, @license MIT, (c) 2015 Muut In...\n", - "3 clojure php var r = riot.route = function(arg) {\\n //...\n", - "4 python ruby module ActiveJob\\n module Core\\n extend Ac...\n", - "5 python clojure require 'formula'\\n\\nclass A52dec < Formula\\n ...\n", - "6 python ruby module Fluent\\n class Input\\n include Conf...\n", - "7 python haskell {-# LANGUAGE ScopedTypeVariables, FlexibleInst...\n", - "8 javascript haskell reverseDependencies :: ModuleGraph -> M.Map Mo...\n", - "9 javascript clojure {- git-annex extra config files\\n -\\n - Copyri...\n", - "10 javascript scheme (define subst-f\\n (lambda (new old l)\\n (c...\n", - "11 javascript clojure (ns my-cli.core)\\n\\n(defn -main [& args]\\n (p...\n", - "12 ruby scheme (define add1\\n (lambda (n) (+ n 1)))\n", - "13 ruby scheme (define-lib-primitive (length lst)\\n (if (nul...\n", - "14 ruby java /**\\n * Interface to represent a persistence s...\n", - "15 haskell c /*\\n * Copyright 2002-2008 the original author...\n", - "16 haskell scala package com.github.pathikrit\\n\\nimport scala.a...\n", - "17 haskell scala /* sbt -- Simple Build Tool\\n * Copyright 2010...\n", - "18 scheme scheme proc isaac::mix {a b c d e f g h} {\\n set a...\n", - "19 scheme python proc twitter::follow {nick uhost hand chan arg...\n", - "20 scheme c class View\\n{\\n /**\\n * Data available ...\n", - "21 java php public function formatLocalized($format)\\n...\n", - "22 java clojure (extend-type String\\n Person\\n (first-name [...\n", - "23 scala php class Application extends App {\\n\\t/**\\n\\t * @...\n", - "24 scala ocaml type name = string\\n\\nlet compare_label label1...\n", - "25 tcl ocaml let search_compiler_libs () =\\n prerr_endline...\n", - "26 tcl clojure (require '[overtone.live :as overtone])\\n\\n(de...\n", - "27 php python from pkgutil import iter_modules\\nfrom subproc...\n", - "28 php clojure import re\\nimport subprocess\\n\\ndef cmd_keymap...\n", - "29 php ruby class NoSuchService(Exception):\\n def __ini...\n", - "30 ocaml python from collections import namedtuple\\nimport fun...\n", - "31 ocaml javascript function errorHandler(context) {\\n return fun...\n" + "Proportion of test data correctly labeled: 0.697\n", + " language guess \\\n", + "item \n", + "0 perl perl \n", + "1 clojure clojure \n", + "2 clojure clojure \n", + "3 clojure clojure \n", + "4 clojure clojure \n", + "5 python python \n", + "6 python clojure \n", + "7 python ruby \n", + "8 python python \n", + "9 javascript javascript \n", + "10 javascript javascript \n", + "11 javascript clojure \n", + "12 javascript php \n", + "13 ruby ruby \n", + "14 ruby clojure \n", + "15 ruby ruby \n", + "16 haskell haskell \n", + "17 haskell haskell \n", + "18 haskell clojure \n", + "19 scheme scheme \n", + "20 scheme scheme \n", + "21 scheme scheme \n", + "22 java java \n", + "23 java c \n", + "24 scala scala \n", + "25 scala scala \n", + "26 tcl scheme \n", + "27 tcl python \n", + "28 php c \n", + "29 php php \n", + "30 php php \n", + "31 ocaml ocaml \n", + "32 ocaml ocaml \n", + "\n", + " text \n", + "item \n", + "0 use warnings;\\nuse strict;\\n\\nmy $initial = jo... \n", + "1 (defn cf-settings\\n \"Setup settings for campf... \n", + "2 (ns my-cli.core)\\n\\n(defn -main [& args]\\n (p... \n", + "3 (extend-type String\\n Person\\n (first-name [... \n", + "4 (require '[overtone.live :as overtone])\\n\\n(de... \n", + "5 from pkgutil import iter_modules\\nfrom subproc... \n", + "6 import re\\nimport subprocess\\n\\ndef cmd_keymap... \n", + "7 class NoSuchService(Exception):\\n def __ini... \n", + "8 from collections import namedtuple\\nimport fun... \n", + "9 function errorHandler(context) {\\n return fun... \n", + "10 var _ = require('lodash'),\\n fs = require('... \n", + "11 /* Riot v2.0.8, @license MIT, (c) 2015 Muut In... \n", + "12 var r = riot.route = function(arg) {\\n //... \n", + "13 module ActiveJob\\n module Core\\n extend Ac... \n", + "14 require 'formula'\\n\\nclass A52dec < Formula\\n ... \n", + "15 module Fluent\\n class Input\\n include Conf... \n", + "16 {-# LANGUAGE ScopedTypeVariables, FlexibleInst... \n", + "17 reverseDependencies :: ModuleGraph -> M.Map Mo... \n", + "18 {- git-annex extra config files\\n -\\n - Copyri... \n", + "19 (define subst-f\\n (lambda (new old l)\\n (c... \n", + "20 (define add1\\n (lambda (n) (+ n 1))) \n", + "21 (define-lib-primitive (length lst)\\n (if (nul... \n", + "22 /**\\n * Interface to represent a persistence s... \n", + "23 /*\\n * Copyright 2002-2008 the original author... \n", + "24 package com.github.pathikrit\\n\\nimport scala.a... \n", + "25 /* sbt -- Simple Build Tool\\n * Copyright 2010... \n", + "26 proc isaac::mix {a b c d e f g h} {\\n set a... \n", + "27 proc twitter::follow {nick uhost hand chan arg... \n", + "28 class View\\n{\\n /**\\n * Data available ... \n", + "29 public function formatLocalized($format)\\n... \n", + "30 class Application extends App {\\n\\t/**\\n\\t * @... \n", + "31 type name = string\\n\\nlet compare_label label1... \n", + "32 let search_compiler_libs () =\\n prerr_endline... \n" ] } ], @@ -105,15 +419,160 @@ "classifier = assess_classifier(spam_pipe, *args)\n", "c = classifier.predict(X)\n", "#print('Guesses: ', c[0:5])\n", - "test_data['guess'] = pd.DataFrame(spam_pipe.predict(test_data['text']))\n", - "correct = test_data[test_data.language == test_data.guess]\n", - "print('Proportion of test data correctly labeled: {:.3f}'.format(len(correct) / len(test_data)))\n", - "print(test_data[['language', 'guess', 'text']])" + "def assess_test_data():\n", + " test_data['guess'] = pd.DataFrame(spam_pipe.predict(test_data['text']))\n", + " correct = test_data[test_data.language == test_data.guess]\n", + " print('Proportion of test data correctly labeled: {:.3f}'.format(len(correct) / len(test_data)))\n", + " print(test_data[['language', 'guess', 'text']])\n", + "assess_test_data()" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['zin', 'zip', 'zipwith', 'zipwithindex', 'zipwithm', 'zipwithm_', 'ziv', 'ziv1', 'ziv2', 'zizi', 'zoo', 'zotov', 'zq', 'zr', 'zr1', 'zr2', 'zri', 'zrn', 'zrv', 'zrv1', 'zrv2', 'zrzi', 'zrzr', 'zs', 'zt', 'zu', 'zubach', 'zx', 'zy', 'zz']\n", + "Train score: 0.991, Test score: 0.949\n" + ] + } + ], + "source": [ + "cv = CountVectorizer()\n", + "ft = cv.fit_transform(X)\n", + "print(cv.get_feature_names()[-30:])\n", + "spam_pipe = Pipeline([('bag_of_words', cv),\n", + " ('bayes', MultinomialNB())])\n", + "classifier = assess_classifier(spam_pipe, *args)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "collapsed": false, + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['}', ')', 'var', 'fn', 'function', 'end', 'defn', '===', 'lambda']\n", + "Train score: 0.334, Test score: 0.427\n", + "Proportion of test data correctly labeled: 0.485\n", + " language guess \\\n", + "item \n", + "0 perl ruby \n", + "1 clojure clojure \n", + "2 clojure clojure \n", + "3 clojure ruby \n", + "4 clojure clojure \n", + "5 python ruby \n", + "6 python ruby \n", + "7 python ruby \n", + "8 python python \n", + "9 javascript php \n", + "10 javascript javascript \n", + "11 javascript javascript \n", + "12 javascript javascript \n", + "13 ruby ruby \n", + "14 ruby ruby \n", + "15 ruby ruby \n", + "16 haskell ruby \n", + "17 haskell ruby \n", + "18 haskell ruby \n", + "19 scheme scheme \n", + "20 scheme scheme \n", + "21 scheme scheme \n", + "22 java ruby \n", + "23 java ruby \n", + "24 scala ruby \n", + "25 scala ruby \n", + "26 tcl ruby \n", + "27 tcl ruby \n", + "28 php php \n", + "29 php php \n", + "30 php php \n", + "31 ocaml haskell \n", + "32 ocaml ruby \n", + "\n", + " text \n", + "item \n", + "0 use warnings;\\nuse strict;\\n\\nmy $initial = jo... \n", + "1 (defn cf-settings\\n \"Setup settings for campf... \n", + "2 (ns my-cli.core)\\n\\n(defn -main [& args]\\n (p... \n", + "3 (extend-type String\\n Person\\n (first-name [... \n", + "4 (require '[overtone.live :as overtone])\\n\\n(de... \n", + "5 from pkgutil import iter_modules\\nfrom subproc... \n", + "6 import re\\nimport subprocess\\n\\ndef cmd_keymap... \n", + "7 class NoSuchService(Exception):\\n def __ini... \n", + "8 from collections import namedtuple\\nimport fun... \n", + "9 function errorHandler(context) {\\n return fun... \n", + "10 var _ = require('lodash'),\\n fs = require('... \n", + "11 /* Riot v2.0.8, @license MIT, (c) 2015 Muut In... \n", + "12 var r = riot.route = function(arg) {\\n //... \n", + "13 module ActiveJob\\n module Core\\n extend Ac... \n", + "14 require 'formula'\\n\\nclass A52dec < Formula\\n ... \n", + "15 module Fluent\\n class Input\\n include Conf... \n", + "16 {-# LANGUAGE ScopedTypeVariables, FlexibleInst... \n", + "17 reverseDependencies :: ModuleGraph -> M.Map Mo... \n", + "18 {- git-annex extra config files\\n -\\n - Copyri... \n", + "19 (define subst-f\\n (lambda (new old l)\\n (c... \n", + "20 (define add1\\n (lambda (n) (+ n 1))) \n", + "21 (define-lib-primitive (length lst)\\n (if (nul... \n", + "22 /**\\n * Interface to represent a persistence s... \n", + "23 /*\\n * Copyright 2002-2008 the original author... \n", + "24 package com.github.pathikrit\\n\\nimport scala.a... \n", + "25 /* sbt -- Simple Build Tool\\n * Copyright 2010... \n", + "26 proc isaac::mix {a b c d e f g h} {\\n set a... \n", + "27 proc twitter::follow {nick uhost hand chan arg... \n", + "28 class View\\n{\\n /**\\n * Data available ... \n", + "29 public function formatLocalized($format)\\n... \n", + "30 class Application extends App {\\n\\t/**\\n\\t * @... \n", + "31 type name = string\\n\\nlet compare_label label1... \n", + "32 let search_compiler_libs () =\\n prerr_endline... \n" + ] + } + ], + "source": [ + "cv = CountVectorizer(vocabulary=['}', ')', 'var', 'fn', 'function', 'end', 'defn',\n", + " '===', 'lambda']) #, '(define', 'elif'])\n", + "ft = cv.fit_transform(X)\n", + "print(cv.get_feature_names()[-30:]) \n", + "spam_pipe = Pipeline([('bag_of_words', cv),\n", + " ('bayes', MultinomialNB())])\n", + "classifier = assess_classifier(spam_pipe, *args)\n", + "assess_test_data()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Vocabulary|Results\n", + "----------|-------\n", + "} | Train score: 0.122, Test score: 0.137\n", + "}, ) | Train score: 0.122, Test score: 0.137\n", + "}, ), var | Train score: 0.161, Test score: 0.179\n", + "}, ), var, fn | Train score: 0.195, Test score: 0.248\n", + "}, ), var, fn, function| Train score: 0.287, Test score: 0.308\n", + "}, ), var, fn, function, end | Train score: 0.278, Test score: 0.325\n", + "}, ), var, fn, function, end, defn | Train score: 0.302, Test score: 0.359\n", + "}, ), var, fn, function, end, defn, === | Train score: 0.300, Test score: 0.368\n", + "}, ), var, fn, function, end, defn, ===, lambda | Train score: 0.334, Test score: 0.427\n", + "}, ), var, fn, function, end, defn, ===, lambda | Proportion of test data correctly labeled: 0.485\n" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 14, "metadata": { "collapsed": true }, @@ -138,7 +597,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 15, "metadata": { "collapsed": false }, @@ -149,7 +608,7 @@ "array([[ 1. , 0.00550964]])" ] }, - "execution_count": 6, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } diff --git a/lang_classifier.py b/lang_classifier.py index 18daeb0..089af56 100644 --- a/lang_classifier.py +++ b/lang_classifier.py @@ -81,11 +81,12 @@ def load_bench_data(reload=False): def load_test_data(): test_data = pd.read_csv('./test.csv', names=['item', 'language', 'text', 'guess']) + test_data = test_data.set_index('item') test_files = glob.glob('./test/*') - for (idx, fn) in enumerate(test_files): + for filename in test_files: # try: - with open(fn) as fh: + with open(filename) as fh: # df.loc[extract_extension(fn)] = ''.join(fh.readlines()) # data = {'language': extract_extension(fn), # 'text': ''.join(fh.readlines())} @@ -94,7 +95,9 @@ def load_test_data(): # except (IsADirectoryError, UnicodeDecodeError): # pass # test_data['text'][idx] = ''.join(fh.readlines()) - test_data.ix[idx, 'text'] = ''.join(fh.readlines()) + num = re.match('.*/(?P\d+)$', filename).groupdict()['num'] + # FIXME: Do this with os module instead of regex + test_data.ix[int(num), 'text'] = ''.join(fh.readlines()) return test_data diff --git a/test.csv b/test.csv index 7d007aa..1fccfd0 100644 --- a/test.csv +++ b/test.csv @@ -1,3 +1,4 @@ +0,perl 1,clojure 2,clojure 3,clojure diff --git a/test/0 b/test/0 new file mode 100644 index 0000000..4e22c3d --- /dev/null +++ b/test/0 @@ -0,0 +1,100 @@ +use warnings; +use strict; + +my $initial = join ",", qw(abc def ghi); +my %reverse = qw(X O O X); + +# In list context, returns best move, +# In scalar context, returns the score of best move. +my %cache; +sub best_move { + my ($b, $me) = @_; + if( exists $cache{$b,$me,wantarray} ) { + return $cache{$b,$me,wantarray}; + } elsif( my $s = score( $b, $me ) ) { + return $cache{$b,$me,wantarray} = (wantarray ? undef : $s); + } + my $him = $reverse{$me}; + my ($best, @best) = (-999); + for my $m (moves($b)) { + (my $with_m = $b) =~ s/$m/$me/ or die; + # The || operator supplies scalar context to best_move(...) + my $s = -(score($with_m, $him) || best_move($with_m, $him)); + if( $s > $best ) { + ($best, @best) = ($s, $m); + } elsif( $s == $best ) { + push @best, $m; + } + } + $cache{$b,$me,wantarray} = wantarray ? $best[rand @best] : $best; +} + +my $winner = q[([XOxo])(?:\1\1|...\1...\1|..\1..\1|....\1....\1)]; +sub score { + my ($b, $me) = @_; + $b =~ m/$winner/o or return 0; + return $1 eq $me ? +1 : -1; +} + +sub moves { + my ($b) = @_; + $b =~ /([^xoXO,\n])/g; +} + +sub print_board { + my ($b) = @_; + $b =~ s/\B/|/g; + $b =~ s/,/\n-+-+-\n/g; + print $b, "\n"; +} + +sub prompt { + my ($b, $color) = @_; + my @moves = moves($b); + unless( @moves ) { + return; + } + while( 1 ) { + print "Place your $color on one of [@moves]: "; + defined(my $m = <>) or return; + chomp($m); + return $m if grep $m eq $_, @moves; + } +} + +my @players = ( + { whose => "your", name => "You", + verb => "You place", get_move => \&prompt }, + { whose => "the computer's", name => "Computer", + verb => "The computer places", get_move => \&best_move }, +); +my $whose_turn = int rand 2; + +my $color = "X"; +my $b = $initial; + +while( 1 ) { + my $p = $players[$whose_turn]; + print_board($b); + print "It is $p->{whose} turn.\n"; + # The parens around $m supply list context to the right side + # or the = operator, which causes sub best_move to return the + # best move, rather than the score of the best move. + my ( $m ) = $p->{get_move}->($b, $color); + if( $m ) { + print "$p->{verb} an $color at $m\n"; + $b =~ s/$m/$color/; + my $s = score($b, $color) or next; + print_board($b); + print "$p->{name} ", $s > 0 ? "won!\n" : "lost!\n"; + } else { + print "$p->{name} cannot move.\n"; + } + print "Game over.\nNew Game...\n"; + ($b, $color, $whose_turn) = ($initial, "X", int rand 2); + redo; +} continue { + $color = $reverse{$color}; + $whose_turn = !$whose_turn; +} + diff --git a/test_lang_classifier.py b/test_lang_classifier.py index c18ce73..1ec0268 100644 --- a/test_lang_classifier.py +++ b/test_lang_classifier.py @@ -36,6 +36,7 @@ def test_bench_data_only_contains_desired_languages(): def test_load_test_data(): test_data = load_test_data() assert test_data['language'][1] == 'clojure' + assert test_data['text'][2] == 'abcdefg' def setup(): df = load_bench_data() From 6607e3fc154d4e66d34c2299e211cfaff83bea89 Mon Sep 17 00:00:00 2001 From: John Waldrep Date: Sat, 6 Jun 2015 22:33:16 -0400 Subject: [PATCH 08/12] Update tests; 9/9 tests passing --- test_lang_classifier.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test_lang_classifier.py b/test_lang_classifier.py index 1ec0268..65c71ab 100644 --- a/test_lang_classifier.py +++ b/test_lang_classifier.py @@ -23,7 +23,7 @@ def test_bench_data_only_contains_desired_languages(): langs = ['clojure', 'python', 'javascript', 'ruby', 'haskell', 'scheme', 'java', 'scala', - #'tcl', # in reqs + tests, but no examples in bench + 'tcl', # in reqs + tests, but no examples in bench 'c', 'csharp', 'commonlisp', 'perl', # in reqs + bench, no tests 'php', 'ocaml'] training = df['language'].unique() @@ -36,7 +36,7 @@ def test_bench_data_only_contains_desired_languages(): def test_load_test_data(): test_data = load_test_data() assert test_data['language'][1] == 'clojure' - assert test_data['text'][2] == 'abcdefg' + assert test_data['text'][2][:16] == '(ns my-cli.core)' def setup(): df = load_bench_data() @@ -54,7 +54,7 @@ def test_assess_classifier(): ('bayes', MultinomialNB())]) classifier = assess_classifier(spam_pipe, *args) c = classifier.predict(X) - assert len(c) == 584 # 923 total + assert len(c) == 585 # 923 total assert c[3] == 'csharp' def test_longest_run_of_caps_feature(): From b4a3d40e33f5f374d61037b42ea913f6571bc4eb Mon Sep 17 00:00:00 2001 From: John Waldrep Date: Sun, 7 Jun 2015 07:40:39 -0400 Subject: [PATCH 09/12] Add first custom features to ipython notebook to build up feature vector --- feature_testing.ipynb | 255 +++++++++++++++++++++++++++++++++------- lang_classifier.py | 72 ++++++++---- test_lang_classifier.py | 11 +- 3 files changed, 269 insertions(+), 69 deletions(-) diff --git a/feature_testing.ipynb b/feature_testing.ipynb index 3ffe37d..68a938e 100644 --- a/feature_testing.ipynb +++ b/feature_testing.ipynb @@ -15,7 +15,8 @@ }, "outputs": [], "source": [ - "from lang_classifier import *" + "from lang_classifier import *\n", + "from sklearn.pipeline import make_pipeline, make_union" ] }, { @@ -42,13 +43,20 @@ "cell_type": "code", "execution_count": 3, "metadata": { - "collapsed": true + "collapsed": false }, "outputs": [], "source": [ "df, X, y, test_data, args = setup() # Load and split the train/test data" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "####Inspect the test_data dataframe to make sure language and text are properly aligned" + ] + }, { "cell_type": "code", "execution_count": 4, @@ -325,9 +333,31 @@ "test_data" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Use a generic bag of words/naive bayes classifier pipeline as a baseline" + ] + }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 5, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "def assess_test_data(pipe):\n", + " test_data['guess'] = pd.DataFrame(pipe.predict(test_data['text']))\n", + " correct = test_data[test_data.language == test_data.guess]\n", + " print('Proportion of test data correctly labeled: {:.3f}'.format(len(correct) / len(test_data)))\n", + " print(test_data[['language', 'guess', 'text']])\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, "metadata": { "collapsed": false }, @@ -336,8 +366,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "Train score: 0.991, Test score: 0.949\n", - "Proportion of test data correctly labeled: 0.697\n", + "Train score: 0.989, Test score: 0.949\n", + "Proportion of test data correctly labeled: 0.727\n", " language guess \\\n", "item \n", "0 perl perl \n", @@ -366,9 +396,9 @@ "23 java c \n", "24 scala scala \n", "25 scala scala \n", - "26 tcl scheme \n", + "26 tcl tcl \n", "27 tcl python \n", - "28 php c \n", + "28 php clojure \n", "29 php php \n", "30 php php \n", "31 ocaml ocaml \n", @@ -413,23 +443,24 @@ } ], "source": [ - "# Use a generic bag of words/naive bayes classifier pipeline as a baseline\n", "spam_pipe = Pipeline([('bag_of_words', CountVectorizer()),\n", " ('bayes', MultinomialNB())])\n", "classifier = assess_classifier(spam_pipe, *args)\n", "c = classifier.predict(X)\n", "#print('Guesses: ', c[0:5])\n", - "def assess_test_data():\n", - " test_data['guess'] = pd.DataFrame(spam_pipe.predict(test_data['text']))\n", - " correct = test_data[test_data.language == test_data.guess]\n", - " print('Proportion of test data correctly labeled: {:.3f}'.format(len(correct) / len(test_data)))\n", - " print(test_data[['language', 'guess', 'text']])\n", - "assess_test_data()" + "assess_test_data(spam_pipe)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Inspect the baseline features" ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 7, "metadata": { "collapsed": false }, @@ -439,7 +470,7 @@ "output_type": "stream", "text": [ "['zin', 'zip', 'zipwith', 'zipwithindex', 'zipwithm', 'zipwithm_', 'ziv', 'ziv1', 'ziv2', 'zizi', 'zoo', 'zotov', 'zq', 'zr', 'zr1', 'zr2', 'zri', 'zrn', 'zrv', 'zrv1', 'zrv2', 'zrzi', 'zrzr', 'zs', 'zt', 'zu', 'zubach', 'zx', 'zy', 'zz']\n", - "Train score: 0.991, Test score: 0.949\n" + "Train score: 0.989, Test score: 0.949\n" ] } ], @@ -452,9 +483,16 @@ "classifier = assess_classifier(spam_pipe, *args)" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "###Now try making a limited custom vocabulary to discriminate between languages" + ] + }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 8, "metadata": { "collapsed": false, "scrolled": true @@ -465,7 +503,7 @@ "output_type": "stream", "text": [ "['}', ')', 'var', 'fn', 'function', 'end', 'defn', '===', 'lambda']\n", - "Train score: 0.334, Test score: 0.427\n", + "Train score: 0.329, Test score: 0.436\n", "Proportion of test data correctly labeled: 0.485\n", " language guess \\\n", "item \n", @@ -549,7 +587,7 @@ "spam_pipe = Pipeline([('bag_of_words', cv),\n", " ('bayes', MultinomialNB())])\n", "classifier = assess_classifier(spam_pipe, *args)\n", - "assess_test_data()" + "assess_test_data(spam_pipe)" ] }, { @@ -570,30 +608,76 @@ "}, ), var, fn, function, end, defn, ===, lambda | Proportion of test data correctly labeled: 0.485\n" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The above didn't work nearly as well as the automatically-generated vocabulary with thousands of words. Adding new words quickly yielded diminishing returns. Since we want to get above 80% accuracy, it looks like we will need to add our own features." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "##Define new features" + ] + }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 9, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def longest_run_of_caps_feature(text):\n", + " \"\"\"Find the longest run of capitol letters and return their length.\"\"\"\n", " runs = sorted(re.findall(r\"[A-Z]+\", text), key=len)\n", - " if len(runs) == 0:\n", - " return [0]\n", - " longest = runs[-1]\n", - " return [len(longest)]" + " if runs:\n", + " return len(runs[-1])\n", + " else:\n", + " return 0" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "metadata": { - "collapsed": true + "collapsed": false }, "outputs": [], - "source": [] + "source": [ + "def percent_character_feature(char):\n", + " \"\"\"Return percentage of text that is a particular char compared to total text length.\"\"\"\n", + " def feature_fn(text):\n", + " periods = text.count(char)\n", + " return periods / len(text)\n", + " return feature_fn\n", + "\n", + "# def percent_characters_feature(char_list):\n", + "# \"\"\"\n", + "# Return percentage of text for each char/substring in char_list,\n", + "# compared to total text length.\n", + "# \"\"\"\n", + "# def feature_fn(text):\n", + "# hits = []\n", + "# for char in char_list:\n", + "# hits.append(text.count(char) / len(text))\n", + "# return hits\n", + "# return feature_fn\n", + "\n", + "def longest_line_feature(text):\n", + " lines = re.findall('.*?\\n', text)\n", + " lens = [len(line) for line in lines]\n", + " return max(lens) # Includes newline character" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "###Look at the new features on a simple python program" + ] }, { "cell_type": "code", @@ -603,14 +687,15 @@ }, "outputs": [ { - "data": { - "text/plain": [ - "array([[ 1. , 0.00550964]])" - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" + "ename": "NameError", + "evalue": "name 'percent_characters_feature' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 14\u001b[0m '''\n\u001b[1;32m 15\u001b[0m featurizer = FunctionFeaturizer(longest_run_of_caps_feature,\n\u001b[0;32m---> 16\u001b[0;31m \u001b[0mpercent_characters_feature\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'.'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m')'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m'=>'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 17\u001b[0m longest_line_feature)\n\u001b[1;32m 18\u001b[0m \u001b[0mfeaturizer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtransform\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mtxt\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mNameError\u001b[0m: name 'percent_characters_feature' is not defined" + ] } ], "source": [ @@ -618,20 +703,101 @@ "# Test python program\n", "class MyClass:\n", " \"\"\"MyClass is a class to do something\"\"\"\n", - " def __init__(self, name='name'):\n", + " def __init__(self, name='name'):i\n", " self.name = name\n", " def longest_run_of_caps_feature(text):\n", " runs = sorted(re.findall(r\"[A-Z]+\", text), key=len)\n", " if len(runs) == 0:\n", - " return [0]\n", + " return 0\n", " longest = runs[-1]\n", - " return [len(longest)] \n", + " return len(longest)\n", + "\n", "'''\n", - "featurizer = CustomFeaturizer(longest_run_of_caps_feature,\n", - " percent_periods_feature)\n", + "featurizer = FunctionFeaturizer(longest_run_of_caps_feature,\n", + " percent_character_feature(['.',')','=>]),\n", + " longest_line_feature)\n", "featurizer.transform([txt])" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "###Test the pipeline on the actual test data" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# pipe = Pipeline([('fnfeaturizer', featurizer),\n", + "# ('bayes', MultinomialNB())])\n", + "pipe = make_pipeline(featurizer, MultinomialNB())\n", + "#pd.DataFrame(args[0]).index.values/pd.DataFrame(args[2]).index.values #, args[2])" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Train score: 0.150, Test score: 0.103\n", + "Proportion of test data correctly labeled: 0.030\n", + " language guess text\n", + "item \n", + "0 perl scheme use warnings;\\nuse strict;\\n\\nmy $initial = jo...\n", + "1 clojure tcl (defn cf-settings\\n \"Setup settings for campf...\n", + "2 clojure ruby (ns my-cli.core)\\n\\n(defn -main [& args]\\n (p...\n", + "3 clojure scheme (extend-type String\\n Person\\n (first-name [...\n", + "4 clojure scheme (require '[overtone.live :as overtone])\\n\\n(de...\n", + "5 python scheme from pkgutil import iter_modules\\nfrom subproc...\n", + "6 python java import re\\nimport subprocess\\n\\ndef cmd_keymap...\n", + "7 python scheme class NoSuchService(Exception):\\n def __ini...\n", + "8 python ruby from collections import namedtuple\\nimport fun...\n", + "9 javascript java function errorHandler(context) {\\n return fun...\n", + "10 javascript scheme var _ = require('lodash'),\\n fs = require('...\n", + "11 javascript scheme /* Riot v2.0.8, @license MIT, (c) 2015 Muut In...\n", + "12 javascript java var r = riot.route = function(arg) {\\n //...\n", + "13 ruby scheme module ActiveJob\\n module Core\\n extend Ac...\n", + "14 ruby scheme require 'formula'\\n\\nclass A52dec < Formula\\n ...\n", + "15 ruby scheme module Fluent\\n class Input\\n include Conf...\n", + "16 haskell c {-# LANGUAGE ScopedTypeVariables, FlexibleInst...\n", + "17 haskell scheme reverseDependencies :: ModuleGraph -> M.Map Mo...\n", + "18 haskell scheme {- git-annex extra config files\\n -\\n - Copyri...\n", + "19 scheme scheme (define subst-f\\n (lambda (new old l)\\n (c...\n", + "20 scheme ruby (define add1\\n (lambda (n) (+ n 1)))\n", + "21 scheme java (define-lib-primitive (length lst)\\n (if (nul...\n", + "22 java scheme /**\\n * Interface to represent a persistence s...\n", + "23 java ruby /*\\n * Copyright 2002-2008 the original author...\n", + "24 scala tcl package com.github.pathikrit\\n\\nimport scala.a...\n", + "25 scala scheme /* sbt -- Simple Build Tool\\n * Copyright 2010...\n", + "26 tcl scheme proc isaac::mix {a b c d e f g h} {\\n set a...\n", + "27 tcl ruby proc twitter::follow {nick uhost hand chan arg...\n", + "28 php scheme class View\\n{\\n /**\\n * Data available ...\n", + "29 php scheme public function formatLocalized($format)\\n...\n", + "30 php scheme class Application extends App {\\n\\t/**\\n\\t * @...\n", + "31 ocaml scheme type name = string\\n\\nlet compare_label label1...\n", + "32 ocaml scheme let search_compiler_libs () =\\n prerr_endline...\n" + ] + } + ], + "source": [ + "pipe.fit(args[0], args[2]) # X_train, y_train\n", + "pipe.score(args[1], args[3])\n", + "classifier = assess_classifier(pipe, *args)\n", + "assess_test_data(pipe)" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -650,6 +816,15 @@ "\n" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + }, { "cell_type": "code", "execution_count": null, diff --git a/lang_classifier.py b/lang_classifier.py index 089af56..0802789 100644 --- a/lang_classifier.py +++ b/lang_classifier.py @@ -6,10 +6,12 @@ from sklearn.naive_bayes import MultinomialNB from sklearn.pipeline import Pipeline from sklearn.ensemble import RandomForestClassifier +from sklearn.base import TransformerMixin import pandas as pd import numpy as np import pickle import os.path +import collections # TODO: Future Ideas: # use n-grams? @@ -56,6 +58,14 @@ def extract_extension(string): return match.groupdict()['ext'] +def unpickle(name, reload=False): + if os.path.isfile(name) and not reload: + df = pickle.load(open("bench.data", "rb")) + return df + else: + return None + + def load_bench_data(reload=False): df = unpickle('bench.data', reload=reload) # if os.path.isfile("bench.data") and not reload: @@ -80,7 +90,8 @@ def load_bench_data(reload=False): def load_test_data(): - test_data = pd.read_csv('./test.csv', names=['item', 'language', 'text', 'guess']) + test_data = pd.read_csv('./test.csv', + names=['item', 'language', 'text', 'guess']) test_data = test_data.set_index('item') test_files = glob.glob('./test/*') @@ -102,36 +113,36 @@ def load_test_data(): def assess_classifier(pipe, *split_args): + # print(split_args[0])#, len(split_args[2])) pipe.fit(split_args[0], split_args[2]) train_score = pipe.score(split_args[0], split_args[2]) test_score = pipe.score(split_args[1], split_args[3]) - print('Train score: {:.3f}, Test score: {:.3f}'.format(train_score, test_score)) + print('Train score: {:.3f}, Test score: {:.3f}'.format(train_score, + test_score)) return pipe def longest_run_of_caps_feature(text): + """Find the longest run of capitol letters and return their length.""" runs = sorted(re.findall(r"[A-Z]+", text), key=len) - if len(runs) == 0: - return [0] - longest = runs[-1] - return [len(longest)] + if runs: + return len(runs[-1]) + else: + return 0 -def percent_periods_feature(text): - """Return percentage of text that is periods compared to total text length.""" - periods = text.count(".") - return [periods / len(text)] +def percent_character_feature(char): + """Return percentage of text that is a particular char compared to total text length.""" + def feature_fn(text): + periods = text.count(char) + return periods / len(text) -def unpickle(name, reload=False): - if os.path.isfile(name) and not reload: - df = pickle.load(open("bench.data", "rb")) - return df - else: - return None + return feature_fn -class CustomFeaturizer: + +class FunctionFeaturizer(TransformerMixin): def __init__(self, *featurizers): self.featurizers = featurizers @@ -140,12 +151,24 @@ def fit(self, X, y=None): same interface. `fit` always returns the same object.""" return self + def flatten(self, x): + if isinstance(x, collections.Iterable): + return [a for i in x for a in self.flatten(i)] + else: + return [x] + def transform(self, X): """Given a list of original data, return a list of feature vectors.""" fvs = [] for datum in X: - fv = np.array([f(datum) for f in self.featurizers]) - fvs.append(fv.reshape(1, -1)[0]) + fv = [f(datum) for f in self.featurizers] + # if type(fv) is type([1, 2, 3]): # FIXME: Is there a cleaner way? + # fvs.extend(fv) + # else: + # fvs.append(fv) + # fvs = self.flatten(fvs) # fvs = [item for sublist in fvs for item in sublist] + # print('fvs ==> ', fvs) + fvs.append(fv) return np.array(fvs) @@ -155,7 +178,8 @@ def transform(self, X): y = df.language test_data = load_test_data() - args = train_test_split(X, y, test_size=0.2, ) # random_state=0) # X_train, X_test, y_train, y_test + args = train_test_split(X, y, + test_size=0.2, ) # random_state=0) # X_train, X_test, y_train, y_test spam_pipe = Pipeline([('bag_of_words', CountVectorizer()), ('bayes', MultinomialNB())]) @@ -172,9 +196,9 @@ def transform(self, X): test_data['guess'] = pd.DataFrame(spam_pipe.predict(test_data['text'])) correct = test_data[test_data.language == test_data.guess] - print('Proportion of test data correctly labeled: {:.3f}'.format(len(correct) / len(test_data))) + print('Proportion of test data correctly labeled: {:.3f}'.format( + len(correct) / len(test_data))) print(test_data) - - featurizer = CustomFeaturizer(longest_run_of_caps_feature, - percent_periods_feature) + featurizer = FunctionFeaturizer(longest_run_of_caps_feature, + percent_character_feature('.')) diff --git a/test_lang_classifier.py b/test_lang_classifier.py index 65c71ab..9c6b4f0 100644 --- a/test_lang_classifier.py +++ b/test_lang_classifier.py @@ -59,16 +59,17 @@ def test_assess_classifier(): def test_longest_run_of_caps_feature(): assert longest_run_of_caps_feature( - 'ABCabddwAAAA absd AB sd A.AA.AAA') == [4] + 'ABCabddwAAAA absd AB sd A.AA.AAA') == 4 def test_percent_periods_feature(): - assert percent_periods_feature('. . . . ') == [0.5] + assert percent_character_feature('.')('. . . . ') == 0.5 def test_featurizer(): - featurizer = CustomFeaturizer(longest_run_of_caps_feature, - percent_periods_feature) + featurizer = FunctionFeaturizer(longest_run_of_caps_feature, + percent_character_feature('.')) np.testing.assert_equal(featurizer.transform(['AAH! feature....']) - , np.array([[ 3. , 0.25]])) + # , np.array([[ 3. , 0.25]])) + , np.array([ 3. , 0.25])) From d1c963c4c37a189dc19b883ecfcdc7975aa7d418 Mon Sep 17 00:00:00 2001 From: John Waldrep Date: Sun, 7 Jun 2015 15:00:19 -0400 Subject: [PATCH 10/12] Add notebook testing features - 89% of tests identified correctly --- feature_testing.ipynb | 469 +++++++++++++++++++++++++++++++++++------- 1 file changed, 394 insertions(+), 75 deletions(-) diff --git a/feature_testing.ipynb b/feature_testing.ipynb index 68a938e..d13bfb9 100644 --- a/feature_testing.ipynb +++ b/feature_testing.ipynb @@ -16,7 +16,8 @@ "outputs": [], "source": [ "from lang_classifier import *\n", - "from sklearn.pipeline import make_pipeline, make_union" + "from sklearn.pipeline import make_pipeline, make_union\n", + "from sklearn.metrics import classification_report, confusion_matrix" ] }, { @@ -47,7 +48,8 @@ }, "outputs": [], "source": [ - "df, X, y, test_data, args = setup() # Load and split the train/test data" + "df, X, y, test_data, args = setup() # Load and split the train/test data\n", + "X_train, X_test, y_train, y_test = args" ] }, { @@ -641,7 +643,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 73, "metadata": { "collapsed": false }, @@ -650,8 +652,8 @@ "def percent_character_feature(char):\n", " \"\"\"Return percentage of text that is a particular char compared to total text length.\"\"\"\n", " def feature_fn(text):\n", - " periods = text.count(char)\n", - " return periods / len(text)\n", + " chars = text.count(char)\n", + " return chars / len(text)\n", " return feature_fn\n", "\n", "# def percent_characters_feature(char_list):\n", @@ -666,10 +668,102 @@ "# return hits\n", "# return feature_fn\n", "\n", + "def count_word_feature(word):\n", + " \"\"\"Return percentage of text that is a particular char compared to total text length.\"\"\"\n", + " def feature_fn(text):\n", + " num_words = text.count(word)\n", + " return num_words\n", + " return feature_fn\n", + "\n", "def longest_line_feature(text):\n", - " lines = re.findall('.*?\\n', text)\n", + " lines = re.findall(r'.*?\\n', text)\n", + " if not lines:\n", + " return len(text)\n", " lens = [len(line) for line in lines]\n", - " return max(lens) # Includes newline character" + " return max(lens) # Includes newline character\n", + "\n", + "def longest_run_of_parens(text):\n", + " matches = re.findall(r'\\)*', text)\n", + " if not matches:\n", + " return 0\n", + " lens = [len(match) for match in matches]\n", + " return max(lens)\n", + "\n", + "def nested_dots(text):\n", + " matches = re.findall(r'\\.([^\\s]*\\.)*', text)\n", + " if not matches:\n", + " return 0\n", + " lens = [match.count('.') for match in matches]\n", + " return max(lens)\n", + "\n", + "def max_paren_depth(text):\n", + " max_depth = 0\n", + " depth = 0\n", + " for char in text:\n", + " if char == '(':\n", + " depth += 1\n", + " max_depth = depth if depth > max_depth else max_depth\n", + " if char == ')':\n", + " depth -= 1\n", + " return max_depth\n", + "\n", + "def max_curly_brace_depth(text):\n", + " max_depth = 0\n", + " depth = 0\n", + " for char in text:\n", + " if char == '{':\n", + " depth += 1\n", + " max_depth = depth if depth > max_depth else max_depth\n", + " if char == '}':\n", + " depth -= 1\n", + " return max_depth\n", + "\n", + "def percent_words_match_regex(regex):\n", + " \"\"\"Return percentage of text that is a matches regex compared to total number words\"\"\"\n", + " def feature_fn(text):\n", + " num_matches = 0\n", + " #words = re.findall(r'[^[\\s]]+\\b', text)\n", + " words = text.split()\n", + " for word in words:\n", + " if re.search(regex, word):\n", + " num_matches +=1\n", + " if len(words) == 0:\n", + " return 0\n", + " return num_matches / len(words)\n", + " return feature_fn\n", + "\n", + "def count_endings_feature(regex): \n", + " def feature_fn(text):\n", + " num_matches = 0\n", + " lines = re.findall(r'.*?\\n', text)\n", + " if not lines:\n", + " return len(text)\n", + "\n", + " words = text.split()\n", + " for word in words:\n", + " if re.search(regex + r'\\w*$', word):\n", + " num_matches +=1\n", + " if len(words) == 0:\n", + " return 0\n", + " return num_matches #/ len(words)\n", + " return feature_fn\n", + "\n", + "def count_beginnings_feature(regex): \n", + " def feature_fn(text):\n", + " num_matches = 0\n", + " lines = re.findall(r'.*?\\n', text)\n", + " if not lines:\n", + " return len(text)\n", + "\n", + " words = text.split()\n", + " for word in words:\n", + " if re.search(r'\\w*' + regex, word):\n", + " num_matches +=1\n", + " if len(words) == 0:\n", + " return 0\n", + " return num_matches #/ len(words)\n", + " return feature_fn\n", + "\n" ] }, { @@ -679,23 +773,44 @@ "###Look at the new features on a simple python program" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "##Feature Ideas\n", + "- +Longest line\n", + "- bag of words with chars, ngrams, let\n", + "- +run of )\n", + "- % _, }, :\\n, \"\"\"\n", + "- key words: def, defn, var `__x__`, module, end, ->, ::, ;, /*, //, case, final, extends, public, protected, $word, @param, self, this\n", + "- + depth nested dots (or % nested dots)\n", + "- +() nest depth\n", + "- ignore/strip comments?\n", + "-Hyphenated or camel or underscored\n", + "-Indentation...\n", + "\n" + ] + }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 137, "metadata": { "collapsed": false }, "outputs": [ { - "ename": "NameError", - "evalue": "name 'percent_characters_feature' is not defined", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 14\u001b[0m '''\n\u001b[1;32m 15\u001b[0m featurizer = FunctionFeaturizer(longest_run_of_caps_feature,\n\u001b[0;32m---> 16\u001b[0;31m \u001b[0mpercent_characters_feature\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'.'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m')'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m'=>'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 17\u001b[0m longest_line_feature)\n\u001b[1;32m 18\u001b[0m \u001b[0mfeaturizer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtransform\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mtxt\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mNameError\u001b[0m: name 'percent_characters_feature' is not defined" - ] + "data": { + "text/plain": [ + "array([[ 2.00e+00, 1.00e+00, 8.00e+00, 0.00e+00, 0.00e+00,\n", + " 0.00e+00, 0.00e+00, 0.00e+00, 0.00e+00, 0.00e+00,\n", + " 0.00e+00, 0.00e+00, 0.00e+00, 3.00e+00, 0.00e+00,\n", + " 0.00e+00, 0.00e+00, 0.00e+00, 0.00e+00, 1.00e+00,\n", + " 4.35e-03]])" + ] + }, + "execution_count": 137, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ @@ -703,7 +818,7 @@ "# Test python program\n", "class MyClass:\n", " \"\"\"MyClass is a class to do something\"\"\"\n", - " def __init__(self, name='name'):i\n", + " def __init__(self, name='name'):\n", " self.name = name\n", " def longest_run_of_caps_feature(text):\n", " runs = sorted(re.findall(r\"[A-Z]+\", text), key=len)\n", @@ -711,14 +826,84 @@ " return 0\n", " longest = runs[-1]\n", " return len(longest)\n", + " print('{}'.format(self.name))\n", + " $thing \n", + " @thing\n", + " :thing\n", + " end\n", + " end\n", + "end\n", "\n", "'''\n", - "featurizer = FunctionFeaturizer(longest_run_of_caps_feature,\n", - " percent_character_feature(['.',')','=>]),\n", - " longest_line_feature)\n", + "featurizer = FunctionFeaturizer(\n", + "# longest_run_of_caps_feature,\n", + "# percent_character_feature('.'),\n", + "# longest_line_feature,\n", + "# longest_run_of_parens,\n", + "# nested_dots,\n", + " max_paren_depth,\n", + "# percent_words_match_regex(r'\\$\\w'),\n", + "# percent_words_match_regex(r'\\@\\w'),\n", + "# percent_words_match_regex(r':\\w'),\n", + "# percent_character_feature(r';'),\n", + " count_word_feature('}'),\n", + " count_word_feature(')'),\n", + " count_word_feature('var'),\n", + " count_word_feature('fn'),\n", + " count_word_feature('function'),\n", + "# count_word_feature('end'),\n", + " count_word_feature('defn'),\n", + " count_word_feature('==='),\n", + "# count_word_feature('lambda'),\n", + " count_word_feature(';'),\n", + " count_word_feature('public'),\n", + "# count_word_feature('val'),\n", + "# count_word_feature('=>'),\n", + " count_word_feature('set'),\n", + " count_word_feature('extends'),\n", + " count_word_feature('module'),\n", + " count_endings_feature(r'end'),\n", + " count_beginnings_feature(r'let'),\n", + " count_word_feature('->'), #.758\n", + " count_beginnings_feature(r'\\(define'), #.818 got scheme\n", + " count_beginnings_feature(r'\\{-'), #.848 less haskell\n", + " count_word_feature('object'), #\n", + " max_curly_brace_depth, #.879 got javascript\n", + "# count_beginnings_feature(r'from'), #added ^ to start of regex, numbers dropped, so removed\n", + " percent_character_feature(']'),\n", + "# count_word_feature('.'),\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " )\n", "featurizer.transform([txt])" ] }, + { + "cell_type": "code", + "execution_count": 138, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "3" + ] + }, + "execution_count": 138, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "count_endings_feature('end')(txt)" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -728,7 +913,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 139, "metadata": { "collapsed": false }, @@ -742,7 +927,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 140, "metadata": { "collapsed": false }, @@ -751,43 +936,79 @@ "name": "stdout", "output_type": "stream", "text": [ - "Train score: 0.150, Test score: 0.103\n", - "Proportion of test data correctly labeled: 0.030\n", - " language guess text\n", - "item \n", - "0 perl scheme use warnings;\\nuse strict;\\n\\nmy $initial = jo...\n", - "1 clojure tcl (defn cf-settings\\n \"Setup settings for campf...\n", - "2 clojure ruby (ns my-cli.core)\\n\\n(defn -main [& args]\\n (p...\n", - "3 clojure scheme (extend-type String\\n Person\\n (first-name [...\n", - "4 clojure scheme (require '[overtone.live :as overtone])\\n\\n(de...\n", - "5 python scheme from pkgutil import iter_modules\\nfrom subproc...\n", - "6 python java import re\\nimport subprocess\\n\\ndef cmd_keymap...\n", - "7 python scheme class NoSuchService(Exception):\\n def __ini...\n", - "8 python ruby from collections import namedtuple\\nimport fun...\n", - "9 javascript java function errorHandler(context) {\\n return fun...\n", - "10 javascript scheme var _ = require('lodash'),\\n fs = require('...\n", - "11 javascript scheme /* Riot v2.0.8, @license MIT, (c) 2015 Muut In...\n", - "12 javascript java var r = riot.route = function(arg) {\\n //...\n", - "13 ruby scheme module ActiveJob\\n module Core\\n extend Ac...\n", - "14 ruby scheme require 'formula'\\n\\nclass A52dec < Formula\\n ...\n", - "15 ruby scheme module Fluent\\n class Input\\n include Conf...\n", - "16 haskell c {-# LANGUAGE ScopedTypeVariables, FlexibleInst...\n", - "17 haskell scheme reverseDependencies :: ModuleGraph -> M.Map Mo...\n", - "18 haskell scheme {- git-annex extra config files\\n -\\n - Copyri...\n", - "19 scheme scheme (define subst-f\\n (lambda (new old l)\\n (c...\n", - "20 scheme ruby (define add1\\n (lambda (n) (+ n 1)))\n", - "21 scheme java (define-lib-primitive (length lst)\\n (if (nul...\n", - "22 java scheme /**\\n * Interface to represent a persistence s...\n", - "23 java ruby /*\\n * Copyright 2002-2008 the original author...\n", - "24 scala tcl package com.github.pathikrit\\n\\nimport scala.a...\n", - "25 scala scheme /* sbt -- Simple Build Tool\\n * Copyright 2010...\n", - "26 tcl scheme proc isaac::mix {a b c d e f g h} {\\n set a...\n", - "27 tcl ruby proc twitter::follow {nick uhost hand chan arg...\n", - "28 php scheme class View\\n{\\n /**\\n * Data available ...\n", - "29 php scheme public function formatLocalized($format)\\n...\n", - "30 php scheme class Application extends App {\\n\\t/**\\n\\t * @...\n", - "31 ocaml scheme type name = string\\n\\nlet compare_label label1...\n", - "32 ocaml scheme let search_compiler_libs () =\\n prerr_endline...\n" + "Train score: 0.786, Test score: 0.803\n", + "Proportion of test data correctly labeled: 0.879\n", + " language guess \\\n", + "item \n", + "0 perl perl \n", + "1 clojure clojure \n", + "2 clojure clojure \n", + "3 clojure python \n", + "4 clojure clojure \n", + "5 python ruby \n", + "6 python python \n", + "7 python python \n", + "8 python python \n", + "9 javascript javascript \n", + "10 javascript javascript \n", + "11 javascript javascript \n", + "12 javascript javascript \n", + "13 ruby ruby \n", + "14 ruby ruby \n", + "15 ruby ruby \n", + "16 haskell haskell \n", + "17 haskell haskell \n", + "18 haskell haskell \n", + "19 scheme scheme \n", + "20 scheme scheme \n", + "21 scheme scheme \n", + "22 java tcl \n", + "23 java java \n", + "24 scala scala \n", + "25 scala scala \n", + "26 tcl tcl \n", + "27 tcl tcl \n", + "28 php php \n", + "29 php php \n", + "30 php php \n", + "31 ocaml haskell \n", + "32 ocaml ocaml \n", + "\n", + " text \n", + "item \n", + "0 use warnings;\\nuse strict;\\n\\nmy $initial = jo... \n", + "1 (defn cf-settings\\n \"Setup settings for campf... \n", + "2 (ns my-cli.core)\\n\\n(defn -main [& args]\\n (p... \n", + "3 (extend-type String\\n Person\\n (first-name [... \n", + "4 (require '[overtone.live :as overtone])\\n\\n(de... \n", + "5 from pkgutil import iter_modules\\nfrom subproc... \n", + "6 import re\\nimport subprocess\\n\\ndef cmd_keymap... \n", + "7 class NoSuchService(Exception):\\n def __ini... \n", + "8 from collections import namedtuple\\nimport fun... \n", + "9 function errorHandler(context) {\\n return fun... \n", + "10 var _ = require('lodash'),\\n fs = require('... \n", + "11 /* Riot v2.0.8, @license MIT, (c) 2015 Muut In... \n", + "12 var r = riot.route = function(arg) {\\n //... \n", + "13 module ActiveJob\\n module Core\\n extend Ac... \n", + "14 require 'formula'\\n\\nclass A52dec < Formula\\n ... \n", + "15 module Fluent\\n class Input\\n include Conf... \n", + "16 {-# LANGUAGE ScopedTypeVariables, FlexibleInst... \n", + "17 reverseDependencies :: ModuleGraph -> M.Map Mo... \n", + "18 {- git-annex extra config files\\n -\\n - Copyri... \n", + "19 (define subst-f\\n (lambda (new old l)\\n (c... \n", + "20 (define add1\\n (lambda (n) (+ n 1))) \n", + "21 (define-lib-primitive (length lst)\\n (if (nul... \n", + "22 /**\\n * Interface to represent a persistence s... \n", + "23 /*\\n * Copyright 2002-2008 the original author... \n", + "24 package com.github.pathikrit\\n\\nimport scala.a... \n", + "25 /* sbt -- Simple Build Tool\\n * Copyright 2010... \n", + "26 proc isaac::mix {a b c d e f g h} {\\n set a... \n", + "27 proc twitter::follow {nick uhost hand chan arg... \n", + "28 class View\\n{\\n /**\\n * Data available ... \n", + "29 public function formatLocalized($format)\\n... \n", + "30 class Application extends App {\\n\\t/**\\n\\t * @... \n", + "31 type name = string\\n\\nlet compare_label label1... \n", + "32 let search_compiler_libs () =\\n prerr_endline... \n" ] } ], @@ -799,28 +1020,126 @@ ] }, { - "cell_type": "markdown", - "metadata": {}, + "cell_type": "code", + "execution_count": 128, + "metadata": { + "collapsed": false + }, + "outputs": [], "source": [ - "##Feature Ideas\n", - "- Longest line\n", - "- bag of words with chars, ngrams, let\n", - "- run of )\n", - "- % _, }, :\\n, \"\"\"\n", - "- key words: def, defn, var `__x__`, module, end, ->, ::, ;, /*, //, case, final, extends, public, protected, $word, @param, self, this\n", - "- % nested dots\n", - "- () nest depth\n", - "- ignore/strip comments?\n", - "-Hyphenated or camel or underscored\n", - "-Indentation...\n", - "\n" + "# print(confusion_matrix(classifier.predict(args[1]), args[3]))" + ] + }, + { + "cell_type": "code", + "execution_count": 107, + "metadata": { + "collapsed": false, + "scrolled": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Confusion matrix, without normalization\n", + "[[ 3 0 0 1 0 1 1 0 3 0 0 0 0 0 0]\n", + " [ 0 11 0 0 0 0 0 0 0 0 1 0 0 0 0]\n", + " [ 0 1 7 0 0 0 0 0 0 0 0 0 0 0 0]\n", + " [ 0 0 0 5 0 2 0 0 0 0 0 0 0 0 0]\n", + " [ 0 0 0 0 6 0 0 0 0 0 1 0 0 0 0]\n", + " [ 0 0 0 1 0 5 0 0 1 0 0 0 0 0 0]\n", + " [ 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0]\n", + " [ 0 0 0 0 0 0 0 6 0 0 0 0 0 0 0]\n", + " [ 1 0 0 0 0 0 0 0 4 0 0 0 0 0 0]\n", + " [ 1 0 0 2 0 1 0 0 1 9 0 0 0 0 0]\n", + " [ 0 0 1 0 0 0 0 0 0 0 5 0 1 0 0]\n", + " [ 0 0 0 0 0 0 0 0 0 0 0 15 0 0 0]\n", + " [ 0 0 0 0 1 0 0 0 0 0 0 0 12 0 0]\n", + " [ 0 0 0 0 0 0 0 0 0 0 0 0 0 6 0]\n", + " [ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]]\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAewAAAG4CAYAAACHNdSBAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzs3Xe8XVWd/vHPEwgSSigiSiiGQcCh904SLPwQEMaCWEYE\nR8cBGWdsoyMMhKaoYwNRGZEqFlAcFUVAhySAQCghCSJNQWnSOwFC8vz+2Psmh8Pt5+x77r7nefM6\nr+yy9nevc+7lfs9ae+21ZZuIiIgY3cZ1ugIRERExsCTsiIiIGkjCjoiIqIEk7IiIiBpIwo6IiKiB\nJOyIiIgaSMKO6DBJEyT9UtLjkn7cQpz3Sbq4nXXrFEm7S7ql0/WIGE2U+7AjBkfSe4FPABsDTwE3\nAifYvrLFuO8HDgd2tr245YqOcpIWA6+z/edO1yWiTtLCjhgESZ8AvgYcD6wJrAucAuzXhvCvBW7r\nhmTdQH3ukJYdyYpE1EUSdsQAJK0CHAMcZvt/bS+wvcj2r2x/pizzCklfl3Rv+fqapOXKfdMk3SPp\nE5IekHSfpIPLfccA/wUcKOkpSR+UNF3SOQ3nnyxpsaRx5frBkv4k6UlJfy5b/j3bL284bhdJ15Zd\n7bMl7dywb4akYyVdUca5WNIr+3j/PfX/tKQHy/r/g6S9Jd0m6RFJn20ov4OkqyQ9VpY9WdL4ct+s\nstjc8v0e0BD/PyTdD3yv3HZ3ecwG5Tm2LtcnSXpI0pSWfrARNZOEHTGwnYHlgZ/1U+YIYAdgy/K1\nA3Bkw/5XAxOBScA/AadIWsX20cDngR/ZXtn26UCf16kkrQh8A9jL9sSybjf2Um514FfA14HVga8C\nv5K0WkOx9wAHU/QYLAd8qp/392rgFcBawFHAacD7gK2B3YGjJL22LPsi8G/AK8v6vRE4DMB2T5Ld\nony/5zfEXw1YD/hI44lt/wn4DPB9SROAM4AzbM8iooskYUcM7JXAwwN0Wb8XONb2w7YfpmiRv79h\n/8Jy/yLbFwFPU1wLh6J7uLGLuM/u4tJiYHNJE2w/YPvmXsrsA9xq+1zbi23/CLiFpV34pkh6d9h+\nDjgP2Kqfcy6kuF6/CPgxxZeAr9t+pjz/zT3H277B9uzyvH8B/geYOoj3dLTthWV9XsL2acAdwGyK\n5H7EAPEixpwk7IiBPQKs0dMl3YdJwF8a1v9ablsSoynhPwusNNSK2H4GOBD4F+A+SRdK2riXopPK\nOjT6S1Od/tawvGCA+jzipSNUF5T/PtB0/IoAkjYq63W/pCeAEyi+9PTnIdsvDFDmNGBT4GTbCwco\nGzHmJGFHDOwq4Hngbf2UuQ+Y3LC+XrltOJ4GVmhYf03jTtuX2N6z3H4L8N1eYtxLMZit0WvL7VX7\nNkWL+3W2V6FoDQ/0t6bf21UkrUTRvX8acExT135EV0jCjhiA7ScortueIml/SStIGi/pLZK+WBb7\nIXCkpDUkrVGWP6evmAO4EZgiad1ywNt/9uyQtGZZhxUpuqmfARb1EuMiYCNJ75G0rKQDgdcDFzaU\nGajrfbhWorjt7VlJrwcObdr/ALDBEGN+A5ht+58prs1/p+VaRtRMEnbEINj+KsU92EcCD1J0Nx/G\n0oFoxwPXAfPK13XltiUh+gvfuN/2bymuE88DrgV+2bB/HPBxipbyIxQDvg5tjmP7EWBf4JPAwxQD\nyva1/WgfdTID17G/9Uaforim/yTF9esfNZWfDpxVjiJ/Zz/nNoCk/YE9Wfo+PwFsI+k9/dQhYszJ\nxCkRERHDJOl0ikGeD9revNw2HfgQ8FBZ7D9t/6aXY/eiuNSzDHCa7S82l3lJ+STsiIiI4ZG0O8W4\nk7MbEvbRwFNlz1xfxy0D3Aq8iaLH7FrgPbb/2Ncx6RKPiIgYJtuXA4/1smugMSI7AHfYvqu86+FH\nwP79HZCEHRER0X7/KmmupO9JWrWX/WsDdzes31Nu61MSdkRERHt9G1ifYjKh+4Gv9FJmyNejM8l+\nRSRlcEBExCDZruo2wyXa9Xd5oLrafrDhnKdR3OnR7F6Khwj1WJeild2nJOwKzfnLk4Mq952vfZ5/\n+fjnBlX29ZNWHvT5jz92OkceNX3Q5auK++SCwU1K9eXPH8unP3fUoONOnDB+UOWqqi8Mvc5VxB3s\n5wBD/yxuue+pQZUbyu8wDP73eLT8Do+G2GM57oTxlefqJZbf6qMtHf/cjacMWEbSWrbvL1ffBszv\npdh1wIaSJlNMsnQgxfz+fUrCjoiI7tHvDMPDCCf9kGKu/DXKJ8wdDUyTtBVFt/edlA+0kTQJ+K7t\nfWy/KOlw4GKK27q+198IcUjCjoiIbqL2tuZt99YqPr2PsvdR3LPds34RxayEg5KEPQpst9PulcSd\nMnVareLusvtAD3QanqrqC9XVuW6fRX6Hq4+duG3S5hb2SMrEKRWR5MFewx6KoVzDHi2Gck14KIZy\n7XYoqqpvVar6HGDw17CHqo6/x1GdCeM1YoPOlt/u4y3FeO66r41IXXtT368aA5A0XdInByhzjKQ3\njlSdIiIihmssd4kP2HVg++ihBJS0rO0Xh1+liIjoqBp3ide35k0kHVTOKnOjpLOb9m0l6epy/wU9\ns85IOlPSO8rluyStXi5vJ+mycnm6pHMkXUHxhKE1JP1E0uzytcsIv9WIiBguqbVXB42JFrakTYEj\ngJ1tP1o+3P5jLG1lnw181Pblko6hGHb/cV76WL/+WuSvB3az/bykHwBfs32lpPWA3wCbtP9dRURE\n29W4hT0mEjbwBuC8nmf92n5M5TchSROBVcoJ2gHOAs4fQmwDv7D9fLn+JuDvtfSb1sqSVrD9bIvv\nISIiok9jJWGbgZ+M0qOvci+y9BLB8k37GpOxgB1tvzDQib7ztc8vWd5up93Zbudqbn2JiKiTWTNn\nMGvmjM6cvMPd2q0YKwn7/4CfSfpq2SW+erldtp+U9Jik3WxfAbwfmNFLjLuA7Si6uN/RsL35p3sJ\nRXf7f0Nxfdz2jb1VaihTNUZEdIspU6e95D7tE447ZuROni7xzrJ9s6QTgJmSFgFzKBJwz3XpDwDf\nkbQC8CfgkF7CHAN8T9KTFAm98dp24/XtjwGnSJpL8fnNBA5r6xuKiIhqpIXdebbPphhc1tu+ucDO\nvex6JfBIWeYKYONejj2maf0R4N2t1jciImIoxkzCHipJpwMTgCs6XZeIiBgh6RKvH9sf7HQdIiJi\nhKVLPCIiogbSwo6IiKiBGifs+tY8IiKii6SFHRER3WNcrmFHL6p45u8Wn/tN22MCzPv8XpXEhWqf\n11yFutW3SnV7bnXdnr0eHVDjLvEk7IiI6B41HiVe368aERERXSQt7IiI6B7pEo+IiKiBGneJJ2FH\nRET3SAs7IiKiBmrcwq7vV402knRXzzO0JV3Z6fpEREQ9SDpd0gOS5jdsO07SXEk3SvqdpHX7OPYu\nSfMkzZE0e6BzJWEXljzv2vaunaxIRERUSONae73cGUDzRBZfsr2l7a2A/wWO7qM2BqbZ3tr2DgNV\nvdKELemghm8ZZ0l6raT/K7f9tudbh6QzJX1L0lWS/iRpWln+ZklnNMR7WtKXJN0k6VJJO0maWR7z\n1rLM8pLOKL+13CBpWrn9YEkXSLpI0m2SvthHnZ8u/11L0qzym898Sbs21OGrZR1+K2mNKj/DiIho\nI6m1VxPblwOPNW17qmF1JeDh/mo02KpXlrAlbQocAexRfsv4d+CbwBm2twTOBU5qOGRV2zsDHwd+\nAXwJ2BTYXNIWZZkVgN/Z3gx4CjgWeAPwtnIZ4KPAIttbAO8BzpL0inLflsC7gM2BAyWt3UvVe1rb\n7wV+Y3vr8ri5DXW4tqzDTPr+5hQREaNN+1vYvZ9GOkHSX4EPACf2UczAbyVdJ+nDA8WssoX9BuA8\n248C2H4M2An4Qbn/+8Bu5bKBX5bLNwF/s/0H2wb+AEwu971g++JyeT5wme1F5TE9ZXYtY2P7VuAv\nwEblOX5n+ynbzwM3A6/tp/6zgUMkHQ1sbvvpcvti4Me9vIeIiAgAbB9hez3gTOBrfRTbtWwUvgX4\nqKTd+4tZ5Shx03tTv6/m/wvlv4uB5xu2L2ZpPRc2bX8BwPZiSY3vpa9zNMZdRD/v3/bl5Ye3L3Cm\npK/aPqepmGi4/t3s+GOnL1meMnUaU6ZO66toRETXmDVzBrNmzujMyYc4SnzRI7ez+JE7WjnjD4Bf\n97bD9v3lvw9J+hmwA3B5X4GqTNj/B/ysTHSPlqOwfw+8m6Jl+j5gVgXnvbyMfZmkjYD1gFuAbYcS\nRNJ6wL22T5O0PLA1cA5Fr8QBFK3s99LPh3vkUdOHU/+IiDGtuQFzwnHHjNzJh3gf9jJrbMwya2y8\nZH3RHQM/gEnShrZvL1f3B+b0UmYFYBnbT0laEdgT6PeDqCxh275Z0gnATEmLgBuAfwXOkPRp4EHg\nkMZD+limn+29HfMt4NuS5gEvAh+wvVCS+4nbW8w9gE9JWkhxvfygcvszwA6SjgQeAA4cRMyIiBgN\n2jxxiqQfAlOBNSTdTTGuaW9JG1P05P4JOLQsOwn4ru19gNcAF6ho8S8LnGv7kn7PVVwmjsGS9JTt\nAZ85KMkLFrb/s63j4zUjRkoer1lPE8YL25XPaCLJy+97SksxnrvwoyNS195kprOhyzeciIi6qvFM\nZ0nYQ2R7YqfrEBERw5S5xCMiImogLeyIiIgaqHELu741j4iI6CJpYUdERPdIl3iMlCv+642VxD3m\nklsriQtw9J4bD1woog1y+1UMREnYERERo1+dE3auYUdERNRAWtgREdE96tvATsKOiIjuUecu8STs\niIjoGnVO2LmGHRERUQNpYUdERNdIC7smJB0s6eRO1yMiIjpDUkuvTuq2FnZLj8aUNM724nZVJiIi\nRlh9G9hjp4Ut6SBJcyXdKOlsSQdIml+uz+gpBkySdJGk2yR9seH4b0m6VtJNkqY3bL9L0omSrgcO\nkDRD0tclzSnjbz+ibzQiIoYtLewOk7QpcASws+1HJa0GzAT2tH2/pMZnWG9Vvl4AbpV0ku17gSNs\nPyZpGeC3kjazfRNFq/xh29uW5/oXYILtrSXtDpwObD5ibzYiIrrSmEjYwBuA82w/ClAm3iuBsySd\nB1xQljPwO9tPAUi6GXgtcC9woKQPU3wmawGbADeVx/246Xw/LM9zuaSJkibafrK5UscfO33J8pSp\n05gydVob3mpERL3NmjmDWTNndOTcnW4lt2KsJGzTdGXC9qGSdgD2Aa6XtG1Z5vmGYouAZSWtD3wS\n2M72E5LOAJZvKPfMIM7/MkceNX1IbyIiohs0N2BOOO6YETt3nRP2WLmG/X8U15dXB5C0uqQNbM+2\nfTTwELAuvSdWAStTJOUnJb0aeMsA5zuwPM9uwOM9LfaIiBjdcg27w2zfLOkEYKakRcAcYKKkDSkS\n8m9tz5W0FS9P2rY9T9Ic4BbgbuCKAU75nKQbKD6/D7b1zURERPRiTCRsANtnA2cPUOYs4KyG9bc2\nLB/SxzHr97L5HNsfH2ZVIyKiU+rbIz52EnZERMRAOt2t3Yok7CGyvUen6xAREcNT54Q9VgadRURE\njDhJp0t6QNL8hm1flvTHcjKvCySt0sexe0m6RdLtkj4z0LmSsCMiomtUMEr8DGCvpm2XAJva3hK4\nDfjPXuqxDPDN8thNgPdI+vv+6p6EHRER3UMtvprYvhx4rGnbpQ3PnbgGWKeXmuwA3GH7LtsLgR8B\n+/dX9VzDjoiIrtGBa9gfpJwds8naFLcR97gH2LG/QEnYNTNxwvhK4h6958aVxAW48Kb7Kom772aT\nKol776MLKom79uoTKokbEYM31IT9wn1/4IX7/zDccx0BvGD7B73sHvLTI5OwIyIi+rDcpE1ZbtKm\nS9afmXP+oI6TdDCwN/DGPorcSzEDZ491KVrZfUrCjoiIrjESXeKS9gI+DUy1/Vwfxa4DNpQ0GbiP\nYsrr9/QXN4POIiKia7R7lLikHwK/BzaWdLekDwInAysBl0qaI+lbZdlJkn4FYPtF4HDgYuBm4Me2\n/9hf3dPCjoiI7tHmBrbt3lrFp/dR9j6KJ0j2rF8EXDTYc6WFHRERUQNpYUdERNfI1KQdJGly45Rw\nw4xxsKSTh1B+hqRtyuW7ep7DHRERo1ueh11/Q70fzn0sR0TEKNbppNuK2rewS8tI+h9JN0m6WNLy\nkj4sabakGyX9RNIEAEkHSJpfbp9RHr/kJyhpH0m/l/RKSXuWy9dLOk/Sip14cxEREWMlYW8IfNP2\nZsDjwDuAn9rewfZWwB+BfyrL/hewZ7l9v3KbASS9DfgM8BaKJH4E8Ebb2wLXA58YofcTERFVaPNc\n4iNprHSJ32l7Xrl8PTAZ2FzS8cAqFPfD/abcfyVwlqTzgAvKbQLeAGwHvNn205L2pXiCyu/LLpTl\nKO61G7Tjj52+ZHnK1GlMmTptqO8rImLMmTVzBrNmzujIuevcJT5WEvbzDcuLgAkUjzzb3/Z8SR8A\npgHYPlTSDhT3wl0vaVuKFvafgPWBjSmSPsCltt873EodedT04R4aETFmNTdgTjjumBE7d50T9ljp\nEu/NSsDfJI0H/rFno6QNbM+2fTTwEEvncv0L8E7gbEmbUDwSbVdJG5THrShpwxF9BxER0VZ1HiU+\nVhJ2byO1j6JIuldQXMPuKfMlSfPKW8GutD23J4btW4H3AedTJPyDgR9Kmks59Vx1byEiIqJvte8S\nt30XsEXD+lcadn+nl/Lv6GXbWcBZ5fKNQM+jWe6keMh4c/k9GpbXH2bVIyJihHW6ldyK2ifsiIiI\nQatvvk7CjoiI7lHnFvZYuYYdERExpqWFHRERXaPOLewk7IiI6Bo1ztdJ2BER0T3Swo7ox76bTaok\n7mnX3FlJ3A/tmDv16urJBQsriTtxwvhK4kYMRRJ2RER0jRo3sJOwIyKie6RLPCIiogZqnK+TsCMi\nonuMG1ffjJ2JUyIiImogLeyIiOgade4S7+oWtqQrO12HiIgYOe1+Hrak0yU9UD6yuWfb6pIulXSb\npEskrdpHXe4qH/c8R9Lsgere1Qnb9q6drkNERIwcqbVXL84A9mra9lngUtsbAb8r13tjYJrtrW2/\n7FHOzbo6YUt6WtKKkn4r6frym85+5b4TJR3WUHa6pE/2VT4iIrqP7cuBx5o27wecVS6fBfxDPyEG\n3Unf1Qmb4tvNAuBttrcF3gB8pdz3I+BdDWUPKLc910f5iIgY5drdJd6HV9t+oFx+AHh1H+UM/FbS\ndZI+PFDQDDorvrR8QdLuwGJgkqQ1bd8oaU1JawFrAo/ZvlfS+D7KP9i5txAREYMx0hOn2LYk97F7\nV9v3S3oVcKmkW8oWe6+SsOF9wBrANrYXSboTWL7cdz7wTuA1FK3rgcq/xPHHTl+yPGXqNKZMnVZF\n/SMiamXWzBnMmjmjI+cear5++q4befquuUM9zQOSXmP7b2Wjr9cGne37y38fkvQzYAcgCbsfqwAP\nlsl3D+C1Dft+DJwGvBKYUm6b2E/5lzjyqOnV1DgiosaaGzAnHHdM5yozgJUmb8VKk7dasv7AzHMG\nc9gvgA8AXyz//d/mApJWAJax/ZSkFYE9gX4/iG5P2AbOBX4paR5wHfDHJTvtmyWtBNzTcD2iz/IR\nETG6tbtLXNIPganAGpLuBo4CTgTOk/RPwF2U46EkTQK+a3sfip7bC8r6LAuca/uS/s7VtQlb0iuB\nR20/AuzSVznbWzSt91s+IiJGr3Zfwrb9nj52vamXsvcB+5TLfwa2ai7Tn65M2OW3nMuAL3e6LhER\nMXLytK6aKb/lbNzpekRExMiqcb7u+vuwIyIiaqErW9gREdGd0iUeERFRAzXO10nYERHRPdLCjujH\nkwsWVhL3QzuuX0ncC2+6r5K4+242qZK4VarqZzdxwvhaxY0YDZKwIyKia9S4gZ2EHRER3SNd4hER\nETVQ43yd+7AjIiLqIC3siIjoGukSj4iIqIEa5+vR1SUu6cpO1wGKh4NIOn+AMqtIOnSk6hQREa2T\n1NKrk0ZVwra9a6frIGlZ2/fZPmCAoqsBh41EnSIioj2SsNtE0tOSVpT0W0nXS5onab9y34mSDmso\nO13SJ/spv6KkX0m6UdJ8ST0PEN9e0pXl9qslrSTpYEm/kPQ74FJJr5V0U1n+YEk/l3SZpNskHVVW\n4URgA0lzJH1xRD+oiIjoOqPtGraBBcDbbD8laQ3gKuAXwI+ArwPfKsseAOwJPNdH+b2Ae23vAyBp\noqTlyjjvsn29pJXK8wFsDWxu+3FJk8u69Nge2LQse62kXwGfATa1vXUFn0NERFQg17DbaxzwBUlz\ngUuBSZLWtH0jsKaktSRtCTxm+96+ygPzgDeXLfPdbD9J8Qzs+21fD2D7aduLKJLzJbYf76NOl9h+\nzPZzwAXAbtW9/YiIqEqdu8RHWwsb4H3AGsA2thdJuhNYvtx3PvBO4DUULeU+y9u+XdLWwD7A8WV3\n98/6Oe+zg6yfgMWDKXj8sdOXLE+ZOo0pU6cN8hQREWPXrJkzmDVzRkfOXecW9mhM2KsAD5bJdw/g\ntQ37fgycBrwSmFJum9hbeUlrUbTCz5X0BPBBiuvOa0nazvZ1klamSNQD/QjfLGk1iu73/YFDgKeB\nlfs76Mijpg/2PUdEdI3mBswJxx3TucrUyGhL2AbOBX4paR5wHfDHJTvtm8vrzvfYfqDc3Ff5zYEv\nS1oMLAT+xfZCSQcCJ0uaQJGs31yet/GaNU3rs4GfAusA59i+AYrb0CTNB35t+zPt+QgiIqIqne7W\nbsWoSdiSXgk8avsRYJe+ytneomm9r/J/BS7p5fjrgJ2bNp9VvnrK3AU0nuce22/rJdb7+qpnRESM\nPjXO16MjYUuaBFwGfLnTdelFb63viIiooXE1ztijImHbvo9iBPeoY/slre+IiIhOGI23dUVERFRC\nau3Ve0z9WzlB102S/q2PMidJul3S3PIOpiFLwo6IiK7R7vuwJW0GfIhigq0tgX0lbdBUZm/gdbY3\nBP4Z+PZw6p6EHRERXWOcWnv14vXANbafKyfimgm8vanMfpSXVm1fA6wq6dVDrvtQD4iIiKirCmY6\nuwnYXdLqklagmKxrnaYyawN3N6zf00uZAY2KQWcxtk2cML7TVRiSfTebVEncJxcsrCRulZ9v3X52\nESPN9i3lA6AuAZ4B5tD7bJjN2X7Idx8lYUdERNcY6l1dj9x6PY/cdkO/ZWyfDpxexNfnKeYBaXQv\nsG7D+jrltiFJwo6IiK6hAWeifqk1Nt6ONTbebsn6Hb867eUxiwdUPShpPeBtwI5NRX4BHA78SNJO\nwOMNs3UOWhJ2RER0jT4GjrXqJ+VsnQuBw2w/KekjALZPtf1rSXtLuoOi2/yQ4ZwkCTsiIqIFtqf0\nsu3UpvXDWz1PEnZERHSNPPwjIiKiBmqcr3MfdqskzZC0bafrERERAxsntfTqaN07evaxIU/zioiI\nyo3ZhC3pE+Vk7PN7JmOXdFA58fqNks4qt71V0tWSbpB0qaQ1y+3TJZ0laZakuyS9XdJ/S5on6SJJ\nuZwQEVEzVTz8Y6SMyaRTdlEfDOxA8aXkGknXAkcAO9t+VNJqZfHLbe9UHvch4D+AT5X71gf2ADYF\nrgbeZvtTki6gmH7u5yP0liIiog3G5KAzSSf3c5xtf6yC+rTLbsAFthcAlAl2O+A8248C2H6sLLuu\npPOA1wDLAX8utxu4yPYiSTcB42xfXO6bD0wekXcSERFtU+N83W8L+3qWXpvteYsul0f7NdueevZQ\n07+NTgb+2/aFkqYC0xv2vQBge7GkxomgFwPLDFSJ449dGmrK1GlMmTptEFWPiBjbZs2cwayZMzpy\n7k4PHGtFnwnb9pmN65JWtP1M5TVqj8uBMyWdSNEl/g/AR4AzJH21p0u8bGVPBO4rjzu4IcZAP9UB\nf+pHHjV9qPWOiBjzmhswJxx3TOcqUyMDDjqTtIukm4FbyvWtJH2r8pq1wPYc4ExgNsW15+/a/j1w\nAjBT0o3AV8ri04HzJV0HPMTS3oPm0d/NvQqjvZchIiKaqMVXJ8nuP+9Img28E/i57a3LbX+wvekI\n1K+2JHnBwuT0WKqOj9eMGAkTxgvbledDSX73WXNaivGjD2w9InXtzaBGidv+a9PIuherqU5ERER1\nKnr4x4gYTML+q6RdASQtB3wM+GOltYqIiIiXGEzCPhT4BrA2xQO3LwE+WmWlIiIiqjAm78PuYfsh\n4L0jUJeIiIhK1ThfD2qU+AaSfinpYUkPSfq5pL8bicpFRES0k6SWXp00mLnEfwCcB6wFTALOB35Y\nZaUiIiLipQZzDXuC7XMa1r8v6dNVVShirKrq9qvTrrmzkrgAH9px/cpiR3TCmBwlLml1ivvEL5L0\nnyxtVR8IXDQCdYuIiGirTndrt6K/FvYNvHQ2r38u/+2ZS/yzVVUqIiKiCvVN1/3PJT55BOsRERFR\nuTH58I9GkjYDNgGW79lm++yqKhUREREvNWDCljQdmApsCvwKeAtwBZCEHRERtVLjBvagbut6J/Am\n4H7bhwBbAqtWWquIiIgK1Pk+7MF0iS+wvUjSi5JWAR4E1q24XhEREW031lvY10paDfgucB0wB/h9\npbWqEUnTJX2y0/WIiIiRJ2ljSXMaXk9I+lhTmWnl9p4yRw7nXIOZS/ywcvE7ki4GJtqeO5yTjTWS\nluWlt75FRMQo1u5R4rZvBbYGkDSO4iFZP+ul6Ezb+7Vyrv4mTtmWPpKRpG1s39DKiUcLSZOB31D0\nHmwD/AE4iGJU/FeAlYCHgYNt/03SDIpeht3IFK0REbVScZf4m4A/2b67t1O3Gry/FvZX6L/1uEer\nJx9FNgIOsX2VpO8BhwP/AOxv+2FJBwInAP9E8ZmMt709gKSjO1XpiIgYmooHjr2b4vkbzQzsImku\nRQv8U7ZvHmrw/iZOmTbUYDV2t+2ryuXvA0cAmwGXlj/cZYD7Gsr/eDBBjz92+pLlKVOnMWXqtDZU\nNSKi3mbNnMGsmTM6XY1Bufem2dx70+wBy0laDngr8Jledt8ArGv7WUlvAf6XoqE4JLK7+xJs2SU+\no2dmN0l8SDHIAAAgAElEQVRvoGhhv8b2Lr2Uvwz4ZM8lgbKF/bTtrzSV84KF3f3ZxsjIwz+i7iaM\nF7YrH78tyYdfMOSG7Ut88+2b9FpXSfsDh9reaxD1uBPY1vajQzn3YEaJd4P1JO1ULr8XuBp4Vc82\nSeMlbdKx2kVERFtUeB/2e+hjXJOkV6s8WNIOFI3lISVrGOTUpF3gVuCjkk6nGHR2EnAxcFJ57/my\nwNeAvr6apSkdEVEDVTxeU9KKFAPOPtyw7SMAtk+lmIDsUEkvAs9SXOsessFMTToOeB+wvu1jJa1H\n0V08cKd+fbxo+/1N2+ZSTMn6Erb3aFo/psqKRURE+1SRsG0/A6zRtO3UhuVTgFNaPc9gusS/BexM\n0VUM8HS5bSxJCzkiIka1wXSJ72h7a0lzAGw/Kml8xfUaMbbvArbodD0iIqJ6nZ4PvBWDSdgvSFqm\nZ0XSq4DF1VUpIiKiGlV0iY+UwSTskymmWVtT0ucpLp4Pax7UiIiITqpxA3tQc4l/X9L1wBvLTfvb\n/mO11YqIiIhGgxklvh7wDPDLcpMlrWf7r5XWLCIios3a/fCPkTSYLvFfs3QU9fLA+hT3LW9aVaXG\niicXLGx7zIkTxsx4v2iTKmcju+ZPQ57bYVB23GD1SuJGDKTOs4UNpkt8s8Z1SdsAH62sRhERERWp\ncQN76F82yjm0d6ygLhEREdGHwVzD/mTD6jiKZ0bfW1mNIiIiKjLWr2Gv1LD8InAh8NNqqhMREVGd\nGufr/hN2OWHKRNuf7K9cREREHYzJiVMkLWv7RUm7SpK7/cHZERFRe2O1S3w2xfXqG4GfSzqf4rFg\nALZ9QdWV6yRJdwHbDOeZpREREe3WX8Lu+RqyPPAI8Iam/WM6YVPce17fr2IREfEyNW5g95uwXyXp\nE8D8kapMJ0iaDPwGuI6iR+EPwEHl7n+V9FZgPHCA7VslTQc2KF9rAF+yfdoIVzsiIoahztew+7sP\nexlgZYpR4r29xpKNgFNsbwI8ydKJYR6yvS3wbeBTDeU3A/ageE74UZLWGsnKRkTE8KjF/zqpvxb2\n32wfM2I16ay7bV9VLn8f+Ldyuafb/wbg7eWygZ/bfh54XtJlwA7Az5uDfvnzxy5Z3mX3qey6+9QK\nqh4RUS+zZs5g1swZna5G7QzmPuxu0DgCXix93vfz5b+L6P+z6vX54J/+3FGt1ywiYoyZMnUaU6ZO\nW7J+wnEj1zYcq13ibxqxWnTeepJ2KpffC1zRT1kB+0t6haRXAtOAayuuX0REtME4tfbqaN372mH7\nkZGsSIfdCnxU0s3AKhTXrBuZpa1wA/OAy4CrgGNt/22kKhoREcMnqaVXJ6VLvPCi7fc3bVvyzELb\n1/PS29rm2f7AiNQsIiKCJOweQ53FLbO+RUTUUKe7tVvR9Qnb9l3AFkMo3y0j5yMixpw6T5wy5Odh\nR0RE1NU4qaVXbyStKuknkv4o6eaGQcyNZU6SdLukuZK2Hk7du76FHRER3aOiLvFvAL+2/U5JywIr\nNu6UtDfwOtsbStqRYmDzy5L6QNLCjoiIGCZJqwC72z4dwPaLtp9oKrYfcFa5/xpgVUmvHuq5krAj\nIqJrSK29erE+8JCkMyTdIOm7klZoKrM2cHfD+j3AOkOte7rEo3L3Prqgkrhrrz6hkrhPLlhYSdyJ\nE8ZXErdKfz9p5UrivuO02ZXE/emHdqgkbowd44Y4H/htN1zN7XOu7q/IshQPjjrc9rWSvg58Fmie\n6rL5xEO+2ygJOyIiusZQR4lvvO1ObLzt0svNF53xjeYi9wD32O6Z8fInFAm70b3Aug3r65TbhiRd\n4hEREcNUznR5t6SNyk1vonhMc6NfUD62uRxB/rjtB4Z6rrSwIyKia1Q0SvxfgXMlLQf8CfigpI8A\n2D7V9q8l7S3pDuAZ4JDhnCQJOyIiukZf91K3wvZcYPumzac2lTm81fMkYUdERNfITGcRERFRqa5M\n2JI+17A8WdL8TtYnIiJGRhVTk45Y3Tt69s75z05XICIiRl4FE6eMmDGRsMtW8i2Svl9OvH6+pLdI\n+llDmTdLukDSF4AJkuZIOofi5vVlJP2PpJskXSxp+fKYrSRdXU7WfoGkVcvtMySdKOkaSbdK2q0j\nbzwiIoZkXIuvTur0+dtpI+AU25sATwKbAq+XtEa5/xDge7b/E1hge2vb76eYfWZD4Ju2NwMeB95R\nHnM28GnbWwLzgaPL7QaWsb0j8O8N2yMiYhST1NKrk8bSKPG7bV9VLn8f+BhFwv1HSWdSPBnlH/s4\n9k7b88rl64HJkiYCq9i+vNx+FnB+wzEXlP/eAEzuLeiXP3/skuVddp/KrrtPHcr7iYgYk2bNnMGs\nmTM6XY3aGUsJu3FeVpXrZwK/BJ4DzrO9uI9jn29YXgQs30uZ5q9WPccsoo/P8dOfa55KNiIipkyd\nxpSp05asn3DcMSN27hrf1TWmusTXa3ho+HuBy23fD9wHHAmc0VB2YfnM0r7I9pPAYw3Xp98PzGhz\nnSMiYgRllPjocCvwUUk3A6tQPCAc4AfAX23f2lD2f4B5DYPOmp+a0rP+AeDLkuYCWwDH0rshP3Ul\nIiJGnlp8ddJY6hJ/sRxE1mw34LuNG2x/lpc+TWWLhn1faVieC+zcHND2Hg3LDwN/N/xqR0REDGws\nJeyXtXIlXQ88BXx85KsTERGjTafvpW7FmEjYtu+ioZXcsH3bka9NRESMVp2+NasVYyJhR0REDEad\nB27Vue4RERFdIy3siIjoGukSj4iIqIH6pusk7EpNnDC+01UYtCcXLKws9tqrT6gsdhXq9HOrWlWf\nxU8/tEMlcS+86b5K4u672aRK4lb5/11+j3uXFnZEREQN1HngVp3rHhER0TXSwo6IiK6RLvGIiIga\nqG+6TsKOiIguUuMGdq5hR0RE1EESdh8knSnpHZ2uR0REtM841NKrL5KWkTRH0i972TdN0hPl/jmS\njhxO3bu6S1zF6APZXtzL7jzjOiJijKmwS/zfgJuBlfvYP9P2fq2coOta2JImS7pV0lnAfODFhn3v\nlHRGQ/E3Sbq2LL9PWWampC0bjrlC0uYj9gYiImLY1OJ/vcaU1gH2Bk6j73FtLX9V6LqEXXodcIrt\nzYBnGrY3tqoFvNb29sA+wHckvQL4HnAwgKSNgFfYnj8itY6IiNHoa8Cngd56a6HILbtImivp15I2\nGc5JujVh/8X27AHKGDgPwPYdwJ+BjYHzgX0lLQt8EDijzwgRETGqSK29Xh5P+wIP2p5D363oG4B1\nbW8JnAz873Dq3q3XsPtqVQ806bVtL5B0KfAPwAHANn0VPv7Y6UuWp0ydxpSp04Zc0YiIsWbWzBnM\nmjmjI+fub+BYb+bNvpJ5117ZX5FdgP0k7Q0sD0yUdLbtg3oK2H6qYfkiSd+StLrtR4dSF9ndNbZK\n0mTgl7Y3L9dvB94K3EbRen7C9gclnQm8CtgX+DtgBrCB7RckbQNcSDGI4D19nMcLFtbns81DCGIs\nyMM/lqrT/3cTxgvbld8hLcm/+cODLcXYa9M1+6yrpKnAp2y/tWn7qyla4Za0A3Ce7clDPXe3trAb\nM+lnKZLvQ8B1wIoNZf4KzAYmAh+x/QKA7RskPUG6wyMiamUEJk5xcR59BMD2qcA7gUMlvQg8C7x7\nOIG7roXdDpImAZfZ3rifMmlhl+r0TT/qLS3sper0/91ItrAvvrm1Fvb/26TvFnbVunXQ2bBJOgi4\nGvhcp+sSERFDU8VtXSOlW7vEh8322cDZna5HREQM3bgazyWehB0REV2j063kVqRLPCIiogbSwo6I\niK5R58drJmFHRETXqHOXeBJ2APW6BSSiL1XdfrXa9odXEvexa79ZSdzoW50HneUadkRERA2khR0R\nEV0jXeIRERE1kEFnERERNVDjfJ2EHRER3WNcjZvYGXQWERFRA0nYgyBpsqT5na5HRES0Ri2+Oild\n4hER0T06nXVb0BUJW9KKwHnA2sAywHHAncA3gBWA54E3AmtQPIlrxfLQw21f1RRr8kBlIiJidMpt\nXaPfXsC9tvcBkDQRmAO8y/b1klYCFgAPAG+2/bykDYEfANs3xRpMmYiIiLbqloQ9D/hvSScCFwJP\nAPfbvh7A9tMAkpYDvilpS2ARsFEvsQZTJiIiRqEaDxLvjoRt+3ZJWwP7AMcDl/VR9OMUifz9kpYB\nnhtmGQCOP3b6kuUpU6cxZeq04b2BiIgxZNbMGcyaOaMj565xvu6OhC1pLeAx2+dKegI4FHiNpO1s\nXydpZeBZYCJwT3nYQRTXu5sNpgwARx41vU3vICJi7GhuwJxw3DEjd/IaZ+yuSNjA5sCXJS0GXqBI\n2OOAkyVNoEjWbwK+BfxU0kHAb4CnG2K4/Le/MhEREZWQ7YFLxZBJ8oKF+WwjxoI8XrNaE8YL25W3\nfSX52j8/0VKM7f9ulRGpa2+6pYUdERGRQWcRERF1UON8nalJIyKii7R5blJJy0u6RtKNkm6W9IVe\nTyudJOl2SXPLu5aGLC3siIiIYbL9nKQ9bD8raVngCkm72b6ip4ykvYHX2d5Q0o7At4GdhnquJOyI\niOgaVUxNavvZcnE5ilt9H20qsh9wVln2GkmrSnq17QeGcp50iUdERNeQWnv1HlPjJN1IMXX1ZbZv\nbiqyNnB3w/o9wDpDrXta2FFbTy5YWEnciRPGVxI36quq268mH/qTSuIC3PXtd1YWu86qGHRmezGw\nlaRVgIslTbM9Y4BTD/m+3yTsiIiIPlx71eVcd/Xlgypr+wlJvwK2A2Y07LoXWLdhfZ1y25AkYUdE\nRPcYYhN7+112Z/tddl+yfurXT3xpOGkN4EXbj5czZ74ZaJ5r9RfA4cCPJO0EPD7U69eQhB0REV2k\ngkFnawFnSRpHMS7sHNu/k/QRANun2v61pL0l3QE8AxwynBMlYUdERNdo90xntucD2/Sy/dSm9Zbn\nt80o8YiIiBpICzsiIrpGpiYdhSQdLOnkTtcjIiJGkTZPTTqSxnILO8+2jIiIl6hiprORUrsWtqQV\nJf2qnGh9vqR3Sdpe0u/LbVdLWqksPknSRZJuk/TFhhh7luWvl3SepBXL7XdJ+rykOZKuk7SNpEsk\n3dEz4q8s92lJs8tJ3KeP7CcQERHDVcVMZyOlji3svYB7be8DIGkiMAd4l+3ry2S9gKLzYqvy9QJw\nq6STgOeBI4A32l4g6TPAJ4DjKFrlf7G9taSvAmcCOwMTgJuAUyXtSTGJ+w7lMP6fS9rd9uDurI+I\niBiGOibsecB/SzoRuBB4Arjf9vUAtp8GkGTgd7afKtdvBiYDqwGbAL9X8XVpOeD3DfF/Uf47H1jR\n9jPAM5KeL6ed2xPYU9KcstyKwOuAJOyIiFGuvh3iNUzYtm8vnyW6D3A8cFk/xZ9vWF7E0vd7qe33\nDnDMYoqWOQ3rPcd/wfb/DFTX44+dvmR5ytRpTJk6baBDIiLGvFkzZzBr5ozOnLzGGbt2CVvSWsBj\nts+V9ARwKPAaSdvZvk7SysCz9P5jMXA1cIqkDWz/qbx+Pcn27c2n6uP4i4HjJJ1r+xlJawMv2H6o\nufCRR00f9vuMiBirmhswJxzXPJNndeo86Kx2CRvYHPiypJ4W8KEUg+dOLudxfZZiLlfTy0hx2w9L\nOhj4oaRXlJuPAJoTdvPxLo+/VNLfA1eVXepPAf8IvCxhR0REtIvs3P1UBUlesDCfbZXyeM2ouzxe\nszBhvLBdedNXkm+5/5mWYrx+rRVHpK69qWMLOyIiYljq2yGehB0REd2kxhm7dhOnREREdKO0sCMi\nomtklHhEREQNdHp60VYkYUdERNeocb5Owo6IiC5S44yd+7ArkvuwI6KTTrvmzkrifmjH9dsecyTv\nw77jwWdbivG6NVfIfdgRERFVy6CziIiIGsigs4iIiBqocb7OxCkRERF1kBZ2RER0jxo3sZOwIyKi\na9R50Fm6xPsgaRVJhw5Q5kxJ7xipOkVERGuk1l4vj6fTJT0gaX7v59M0SU9ImlO+jhxu3ZOw+7Ya\ncNgAZVy+IiKiO50B7DVAmZm2ty5fxw/3ROkS79uJwAaS5gCXAo8A7wMWA7+2/bmyXH37VyIiuky7\n/2DbvlzS5JE4bRJ23z4DbGp7a0lvAY4EdrD9nKRVO1y3iIgYhg7ch21gF0lzgXuBT9m+eTiBkrD7\n1vhjfRNwuu3nAGw/PpgAxx87fcnylKnTmDJ1WhurFxFRT7NmzmDWzBkdOvvQMvZVV8zkqitmtXLC\nG4B1bT9bNv7+F9hoOIEyl3gfyi6OX9reXNJ/A7fYPq2pzBnAhbZ/2svxmUs8Ijomc4m/nCTf89jz\nLcVYZ7VXvKyujfliEHW4E9jW9qNDPXcGnfXtKWDlcvm3wCGSJgBIWq1jtYqIiNqQ9Gqp6IiXtANF\nQ3nIyRrSJd4n249IurIcqn8R8AvgOkkvAL+iuKYNGSUeEVEb7W7GS/ohMBVYQ9LdwNHAeADbpwLv\nBA6V9CLwLPDuYZ8rXeLVSJd4RHRSusRfTpLve7y1LvFJq768S3ykpIUdERFdIzOdRURERKXSwo6I\niO5R3wZ2EnZERHSPGufrJOyIiOgeHZjprG1yDTsiIqIGcltXRXJbV0TE4IzkbV0PPrmwpRhrThyf\n27oiIiIqV+Mu8STsiIjoGjXO10nYERHRPTLoLCIiIiqVFnZERHSNOk9NmoQdERFdI13iERERUakk\n7IiIiBpIl3hERHSNOneJJ2FHRETXyKCz6NXxx05fsjxl6jSmTJ3WsbpERIwWs2bOYNbMGR05d51b\n2JlLvCKZSzwiYnBGci7xJxYsainGKhOWyVziERERVatxAzsJOyIiukiNM3YSdkREdI0MOouIiKiB\nOg86y8QpERERNZCEPQpUdXtD4lYbt8rYiVvPuFXGTtz2UIuvXmNKe0m6RdLtkj7TR5mTyv1zJW09\nnLonYY8CdfsfJnGrj5249YxbZezEbZM2Z2xJywDfBPYCNgHeI+nvm8rsDbzO9obAPwPfHk7Vk7Aj\nIqJrqMX/erEDcIftu2wvBH4E7N9UZj/gLADb1wCrSnr1UOuehB0RETF8awN3N6zfU24bqMw6Qz1R\nZjqriKR8sBERgzRSM521I05jXSW9A9jL9ofL9X8EdrT9rw1lfgmcaPvKcv23wH/YvmEo581tXRXp\n1NR1ERHRu4r+Lt8LrNuwvi5FC7q/MuuU24YkXeIRERHDdx2woaTJkpYDDgR+0VTmF8BBAJJ2Ah63\n/cBQT5QWdkRExDDZflHS4cDFwDLA92z/UdJHyv2n2v61pL0l3QE8AxwynHPlGnZEREQNpIXdQZJW\nsP1sp+sx0iSt3t9+24+26TxrAss3xP1rO+LWgaRP9rPbtr/ahnOsRXFLy2LgWtt/azVmVSRtbnt+\np+sR0Yok7A6QtAtwGrAysK6krYB/tn1Yi3HXAI4GdgMMXA4ca/uRFqvcE38ixR/7p1oMdQNF/fqy\nfivBJe0HfAWYBDwIvBb4I7BpK3HL2F+0/ZmBtg0h3rb081kMdRRpg5X7i9sqSR8CjgIuKzd9U9Kx\ntr/XYtzdKH6HJ7P075Nt/10rcYFvS3oFcAZwru0nWoy3hKSdgZMoJs1YjqJb9GnbE9sQe3eKCTfO\nkPQqYCXbdw4z1jsofid6G3hl2xe0UNUYAekS7wBJs4F3Aj+3vXW57Q+2W0oo5a0CM4HvU/xP+V5g\nmu03tRh3e+B0oOcP0OPAP9m+rpW4VZE0D3gDcKntrSXtAbzf9gfbEHtOz8+sYdt825sPM94M+k/Y\newwnbhl7WeBj7WhN9xL7NmDnni+Dkl4JXGV7oxbj3gr8O8WXukU9220/3ErcMvZGwAeBA4DZwBm2\nL2lD3OuBdwPnAdtRDC7a2PZnW4w7Hdi2jLWRpLWB82zvOsx4Z9L/79qwrqvGyEkLu0Ns/1UvfWzM\ni20I+xrbxzWsHy/pwDbEPR04zPblsKQVdDqwxXCCSdqmv/0ttCp7LLT9sKRxkpaxfZmkb7QSUNKh\nwGHABpIau1ZXBq4cblzb01qp1wCxX5T0HqDtCRt4GHi6Yf3pclurHrd9URvivIzt2yQdSTGq9yRg\nK0njgM/Z/mmLsW8vf9cWAWdIuhFoKWEDbwO2Bq4vz3GvpJVbqOPBLdYnOiwJuzP+KmlXgPI2gI9R\ndNm26pLyD/SPy/UDgJZbEMCLPckawPYVklr5gvFV+u+uHXarsvRY+YftcuBcSQ/y0uQyHD8ALgJO\nBBq7v59uxyWHsjW8D0VX8DIUPSTtuNZ8haRvUvxOPNOzsQ1fiv4EXC3p5+X6/sC88tp5K/W+TNKX\ngQuA53s2tlpfSVsCBwP7ApcC+9q+QdIk4GqglYT9TNndPlfSl4C/0fdzIobieduLe77YS1qxDTGR\n9HngS7YfL9dXAz5p+8h2xI/qpEu8A8przScBb6L4H/sSiq7Llv7wS3oaWIFiEBAU99n3/JH2cK+p\nSfo6MAH4YbnpQOA54JwycKt//Nuq/MP2HMX7fx9FV/65bbyWvy3FOIHFwJXteP+SLgIWAPNZ+vPD\n9jEtxp1BL1+OWulqL+NO7wnVs6nxPMOtd4X1nQl8Dzjf9oKmfQfZPruF2JOBByiuX3+c4vftW7bv\nGHaFi7ifBl4H7Al8gaI7/we2T2ox7o22t2ra9rJLPTH6JGGPsLIldZbt93W6LoNV1XXWMrF+AljP\n9oclbUhxve7CYVV0adxPAj+yPeSZhAYR+yiKnosLKJLU/sBPmi5FDCfuPNvDusQQg1O2gjem+F2+\n1fYLHa7SgCTtSZGwAS62fWkbYs4DdrD9XLk+Abiu1TE0Ub10iY+w8rriayW9wvbzAx8xeGU3+1zb\nT0t6P8X1r2/Y/ksLMccB37b94wELD90ZFNfndinX7wN+ArSUsCmuK18i6TGKJ+ecP5xZhfrwj8AW\nDX/svgDMBVpK2BT1/X+2L261go0kvQY4AVjb9l6SNqEYLNbqaO6NgU/x8tHcb2gx7qoUo8SnlJtm\nUNzp0NKobkn7AN8B/lxu+jtJH7H96xZi9nebmNvxBcz2JZKuofiMLWn1Ntz2eC7wO0mnU3zpPAQY\ndg9DjJy0sDtA0jnA6ymmq+u5D7vl65XlH5Atgc2BMym6AA+wPbXFuNfb3raVGP3FbeyOkzTX9pZt\nir8l8C6KEfn32H5jG2JeBrzd9mPl+mrAT9uQqN5OMbp/HLCw3DzsyxgNcX9D8cXoCNtbSBoPzLG9\nWYtx51E807dxNLdtX99i3AsoLgucRZFM3k/xBentLca9Fdinp5ta0gbAr21v3ELMyf3tt33XcGOX\n8T8CHENxLb/nMknLt7iV19kvo7gkZ+C3wBts/0crcaN6aWF3xp/K1zhgJZqu/7XgxXKQyj8Ap9g+\nTVLLtzIBl0r6FC8fuNTqN/3ny+44YMkf0Xb2OjxIMQDoEeBVbYr5JPAHST2D+d4MzJZ0MsUf048N\nM+5XgZ2Am2wvHqjwEKxh+8eSPgtge2GLAwZ7LLT97TbEabZBU3KeLmluG+I+2XRN+c8UP8thazUh\nD8Kngc3acUtbkzeXyXnJaPxyoF8S9iiXhN0BtqdXFPopSZ+j6LbdXdIywPg2xH03xReKjzZtb2mC\nE2A68BtgHUk/AHalGMnbEkmHUbSs1wTOBz5k++ZW45Z+Vr56zGhYbuVL11//f3vnHmxXWZ7x30No\nuYYgVAUqt6GCgOF+CwzIReVSw4gSsIClCIgjKFqmnbYjGiGOdBDaomIHkBAUWgmVEcq9giTkQoAQ\nIAkCtYJtocoltCHAcHv6x/etnH12TnKSvdY6K2uf9zdzZvZee693fdnnZL/rey/PCyyq2FkDvJqL\nHIHlgwd6Di8rqdQJuEXS2axYzV32Ju51SQd3tRD2rAaoJBYC8JCk20i90pDqECrREVB9win/QSpE\nrIS6WhODkSNC4g2Qw6rdVJH/25IkljLP9kxJ2wCH2Z5Wxm5dKIltiLSzBHiAEkpOHXYvIhWdLSi5\nxBFD0jTSDdDtQFEMVUWaZG+SM/kwsIgUaTjedk+7VknPsOoCxLIqdXuQ8qnj8qElwKkl1nsNQ1ey\nF21zpcVCVJ9wyl6k1NYcBv9N9BTFkTQOeA8DrYlF69nSqjoognoJh90AkvbpeLo+8GlSOPsvGlrS\nsEj6MGkH0anNXapQRdJs4OiioCgXRE2vqlpVFWqJS5pue5KkhazosEoXGK2sTaqCtq4NgHOAI0kh\n4LnAZUXRXEm7ZzPQ3nY/qTixkh2hkgwutkuFrUeCjlqM5ZX+Q7VO9WD3IWAGA61+xd/EWnkDHtRP\nOOy1BEkP2t63x3Nn2T4o92EP5UzKFi5NBj5C0uK+FTgauN/28SXt/jHpTv8YUrvNtcDJZXfGWomW\neJkbAUlb2X5O0rYMIYoxAvnMnpA0neSoO+Vqx9metJbaLW5gt2OwgMwFJe1O7TpkkuEq5GpnkGoZ\nrgKeJ9VNnFq2eDJ6o4NuIofdABo8rWodUhitZ6fqrC1se+OSS1sZx5Oqz+fbPk3S+0mtIaWwfauS\n0tvdpOK7T9l+sqxdYAowgS4t8TIGs7NeF7jGJUU8hiJHA/6SFMUoCvFKp0mAXW3v0vH8HklV5PPr\nsvszklb9wyTxm6q4lYGb2Q1Isp/PVWT7T0n/j88hCad8gHTTUZbbc6X4zVRbJxC0lHDYzdA5rept\n4Bng9F6Nqf5xla/bfkfS2zkP9jtg616N5YrqTjYhVc2fI6lMpXVB5VrisLyH/l1JmzrLOlbIdaQq\n/E8AZ5GK716owO58SRNsz4HlRWelWq9qtvuHto+swM4gbN/Y+TwXOVZVaPUi8GZOB0zOxZ7rVWD3\nJNL3RHcuvGyxZ9BSwmE3gO3tKjZZ67hK4MHcb3wlqbJ2GTC7hL2HGRjz547nVVGHlnjBMuDx3NbV\n2UNf9iZj89yG92Xb9wH35RxmWfYBZkn6T9JnvA3wZK4SLpN7r8vubEm72X6sx/NXlx2prtXv58AR\nDEodD0sAAAzPSURBVPyNbQjcyYAgUE/U8D0RtJzIYY8gko6w/XMNzKXt5iVSbvidIV5bK5C0PTC2\n6i/UHCXYutdq4C5bG5HaYQot8XFUpCUu6c/yw+7isFKFQJLm2j4g3whcRgrXTre9Q0m7263q9V5z\n71Xb7WgzGgN8EPg1A2HgUkV9Smp9b9Ohq0/S/v4rl5zSle0Ppc1dRdFZLdK9QXuJHfbIcgjpbnwi\nQzvszYGvkQpYekJpZu62dPxubc/o1d5K7ErSIWXtKmmUH5ttPgy8kAvovtqjvVk5n/9bVvx8p0h6\nGbjY9vdLLPtGcoogX3MMHZXoJfiWkiznecB3SWmCnj6HTuoqhqvB7sTCNCsW9ZXdVRhY7JLqbqvg\nNUl7O6u85S6QKqrl65LuDVpK7LDXMiRd3WvlqqS/JU3SWsyAXCS2J670pGbtLrC9h6QzSLvrb0h6\n3Pb4MnZXcb3NgdkuJ0c5F/io7Vfz87GkoQylwp9BQtKPbH92uGM92J1GUv+bV2qBQ9vel6RZ/3w+\ntAXwGdulUhqqWbo3aB+xw24ArWLAQck2k+NIIbNKh4rUaHdMFns5gRRZgGpz2YOw/VKuGC/D+oWz\nzjaXStqwpM3CoXzFgzXKL6mi7ahlDNoF58r8KnTsDwBOkfQsg0fOVjEhbXvSoJ1tgU8B+9ExIrUE\ndUv3Bi1jnaYXMEq5mtTDOonkrJaSwl9l+RVJGrFq6rJ7Aak451e25+UvpKdruM5ybJdt5VmmpB4G\nVBr+3L1w1gD58V4V2G0Fkv5G0lJgvKSlxQ+pI+HmCi5xJLADcDgp/D6RlI6pgvOzwMs44DDSUJQq\ndNYnM1i69x6SbkEwSomQeAMMFdaqItSlNOlod1KevLNgp1cpw6L9aitgj6rstpkhwp9bAidWEP58\nlCQj+3J+vhlwX13pgbUVSd+2/ddNr2NN6EjtXAQ8bvu6qkRPlHTgC+neua5+EEjQIiIk3gyVDjjo\n4Ob8M5R2ci90tl/d0vG4kru8HO47nRXFQtbaMLDtByXtTFJmM/Ck7beGOW11uASYI+kG0mc8iTTH\nerSxU1bAu93VD0Kpi/+WdAWpWPSirNZWVfRyPZKe+rrALpIqKSIN2knssBtAFQ846LK9HqnHFOCX\nFTmTTvtVtl/dCDxBar36JmnK2BNr885d0gnAHbb/T9L5pNzlFNvzK7B9IKm/GeAh22V63VuJpI8B\np5F2lTcAUytSv6uN3H51FPCY7adzXcZ423cNc+pwdmsp9gzaSzjsBlHFAw4kHQpMA57Nh7Yh3Qjc\nV9LuL+hqvwJ6br/qsFuEEh+zvZuk3yP1oe9fxm6dFFXsOSoyBfgO8HXb+5W0ey5wJmlcpYBPAlfa\nvqzsmttILsz8DKkY8Tck0Z4fV30DujYj6SmS449CswCIkPiIIum8jqfuOF6Ib5QapQhcCny82JFI\n2pGUby1bvLRp3lGeAVxbtF+VtAkDIwP/V9J40tCEqtSn6qLY6XyC5FD/VdKFFdg9A9jf9jKgGBE6\nlySiMqrIedtT8s984HrSVLBTgUObW9mIUxR7hsMOgHDYI81YamxbAtbtDB/afiq3xZSlrvarK3OI\n/WukoQ8bA1+vwG6d1JmvfHclj0cNkm4CPkRKGU20XRT3/bPS3Om+p6PY8zVggaRRX+wZJMJhjyC2\nJwNIuhY4t6PndjNS0VFZHpZ0FQMjD08maX+XpWi/mlVx+9WPGBilWEh7vr8Cu3VyAilfebHtV/KN\nTBVzzKcCD+RK/yIkfnUFdtvGFaQxrgcB+0qaSZqz/YbtKvqx20Cntv4tVFdEGrScyGE3QI3aw+sD\nZ5O+7CANv7i8bA5sZTcYtk8rafdOBkYpdhbVVHHzUitK4zCXS5La/k0FNvcmhX4NzLT9SFmbbUM1\nzdluI5I2ZggZ3CJtEow+wmE3QNt6bmu8wVhYo75zLUg6lhQN2Yok6rEtqbJ910YX1idIWuzBc7aH\nPDYaCBncoJtQOmuGouf2QklTgDnAxWWNSpoo6RFJSzrUoqqoQJc6Zm7nx2MqsDtbUhXSkCPJFGAC\n8JTt7UljFR9odkl9xXxJE4onqm7OdhtZQQaXNLozGKVEDrsBbF+bC2gOJ4U/j7O9uALTf0/S/V5Y\nsehEpaIeGjxK8TRJlY1SHAHesv2ipHUkjbF9r6R/aHpRfURdc7bbSF1TwIKWEg67IWwvAhZVbPa/\ngEVVK0TVcIPRZuGHJTk0ORO4TtLvgFeHOSdYfY5qegFrEecCN0gaNAWswfUEDRM57D4ihw8vAO5l\noMe5iv7uIJNVrd4gpZNOJs2tvs72S40uLOg7ClU9Bk8BO78KVb2gnUQOu7+4kLTbW5/U07wxqfc7\nqI4vAFvYfsv2NbYvC2cd1ERdU8CClhIh8f5iS9sfa3oRfc5Y4C5JS0gqctNt/7bhNQX9SV2qekFL\niR12f3GbpCObXkQ/Y3tybuE6mzRac0ZWogqCqilU9U4Ebq1YVS9oIZHD7iMkvUpq+3gTKIYk2PYm\nza2qP8kKZ8cDfwJsPMqql4MRoK4pYEF7CYcdBGuApC+S5EnfB0wHflJRS14QBMEqiRx2n5GFSLaj\n43dr+6eNLaj/2Ab4iu0FTS8kCILRReyw+whJU4HxpP7u5b3YZTW/gxWpQ0s8CIJgVYTD7iMkLQZ2\ndfxSayO0xIMgaIqoOOwvHgRG3ZCEESa0xIMgaITIYfcXU0ma3/9De7S520ZoiQdB0AjhsPuLHwKn\nAAvpyGEHlRJa4kEQNELksPsISXNsTxj+nUGv5N7Y1xnQEh9HaIkHQTAChMPuIyRdDmwK3MLg4R/R\n1lUSSbNsH5TFabr/0xh4GbjY9vdHfnVBEIwGwmH3EZKuyQ8H/VKjrat+JG0OzLa9U9NrCYKgPwmH\nHQQVIWkr2881vY4gCPqTaOvqIyRtLekmSS/kn3+R9IGm1zVaCGcdBEGdhMPuL6YCN5NEPbYi5bKn\nNrqiIAiCoBIiJN5HSHrU9u7DHQuCIAjaR+yw+4uXJH1W0hhJ60o6BXix6UUFQRAE5Ykddh8haVvg\ne8AB+dBs4EsxmCIIgqD9hMPuIyRNI41+XJKfbwZ8x/bnml1ZEARBUJYIifcXuxfOGsD2y8BeDa4n\nCIIgqIhw2P2F8q66eLIZMKbB9QRBEAQVEcM/+otLSNO6bgAETAK+1eySgiAIgiqIHHafIWlX4HCS\nPOk9thc3vKQgCIKgAsJhB0EQBEELiBx2EARBELSAcNhBEARB0ALCYQdBEARBCwiHHQQ1IukdSY9I\nelzSDZI2KGHrGkmfzo+vlLTzKt77EUkTerjGM52tgcMd73rPq2t4rcmSzlvTNQbBaCUcdhDUy2u2\n97Q9HngT+ELni5LWpLXS+QfbZ9p+YhXvPQw4cE0XW9hfg+Nr+p4y7w+CUU047CAYOWYCf5R3vzMl\n/QxYKGkdSRdLmifpUUmfh6SCI+l7kn4p6W7gfYUhSb+QtHd+fJSkhyUtkHR31pQ/C/hq3t0fJOm9\nkm7M15gn6cB87uaS7pK0UNKVpP79VZJnrj+Uzzmz67VL8/F/k/QH+dgOkm7P58yQtFM1H2cQjC5C\nOCUIRoC8kz4GuC0f2hPY1faz2UG/Yns/SesB90u6iyQruyOwM7AFsBj4YT7fgCW9F7gCODjb2tT2\nK5L+EVhq+9J8/euBv7M9S9I2wB3ALsA3gBm2p0g6Bjh9Nf45n7O9JIf350m6MUvibgQ8aPvPJZ2f\nbX8pr+8s2/8uaX/gcuCIHj/KIBi1hMMOgnrZQNIj+fEM4GrgIGCe7Wfz8Y8D4yUdn59vAnwQOBi4\n3kks4XlJ93TZFmky24zClu1Xul4v+Ciws7T80FhJG+VrHJfPvU3SEobnXEmfzI+3zmudB7wL/CQf\n/zHw03yNA4HpHdf+/dW4RhAEXYTDDoJ6ed32np0HsuNa1vW+c2zf3fW+Yxg+RL26eWAB+9t+c4i1\nDBsG73j/oaTd8QG235B0L7D+Sq5nUtptSfdnEATBmhM57CBonjuBLxYFaJJ2lLQhaUd+Ys5xb0kq\nJOvEwFzgEEnb5XOLSu6lwNiO994FfLl4Imn3/HAGcFI+djTwnmHWugnJAb8h6UMMzF6H9H0yKT8+\nCZhpeynw6yJ6kPPyuw1zjSAIhiAcdhDUy1A7YHcdv4qUn54v6XHgB8AY2zcBT+fXpgGzVzBkvwh8\nnhR+XgD8U37pFuC4ouiM5Kz3yUVti0hFaQDfJDn8haTQ+LMMTbHeO4B1JS0Gvg3M6XjPMmC//G84\nFLggHz8ZOD2vbyFw7DCfTxAEQxBa4kEQBEHQAmKHHQRBEAQtIBx2EARBELSAcNhBEARB0ALCYQdB\nEARBCwiHHQRBEAQtIBx2EARBELSAcNhBEARB0ALCYQdBEARBC/h/sAeZuBshiqwAAAAASUVORK5C\nYII=\n", + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)\n", + "\n", + "# Run classifier, using a model that is too regularized (C too low) to see\n", + "# the impact on the results\n", + "# classifier = svm.SVC(kernel='linear', C=0.01)\n", + "y_pred = classifier.fit(X_train, y_train).predict(X_test)\n", + "\n", + "import matplotlib.pyplot as plt\n", + "%matplotlib inline\n", + "\n", + "import matplotlib.pylab as pylab\n", + "pylab.rcParams['figure.figsize'] = 8, 6 # that's default image size for this interactive session\n", + "\n", + "my_labels = classifier.classes_\n", + "\n", + "\n", + "def plot_confusion_matrix(cm, title='Confusion matrix', cmap=plt.cm.Blues):\n", + " fig, ax = plt.subplots()\n", + " plt.imshow(cm, interpolation='nearest', cmap=cmap)\n", + " plt.title(title)\n", + " plt.colorbar()\n", + "\n", + "\n", + " tick_marks = np.arange(len(classifier.classes_))\n", + " plt.xticks(tick_marks, my_labels, rotation=90)\n", + " plt.yticks(tick_marks, my_labels)\n", + " plt.tight_layout()\n", + " plt.ylabel('True label')\n", + " plt.xlabel('Predicted label')\n", + "\n", + " from matplotlib.ticker import MultipleLocator # from http://stackoverflow.com/a/19252430 comments\n", + " ax.xaxis.set_major_locator(MultipleLocator(1))\n", + " ax.yaxis.set_major_locator(MultipleLocator(1))\n", + "\n", + "\n", + "# Compute confusion matrix\n", + "cm = confusion_matrix(y_test, y_pred, labels=my_labels)\n", + "np.set_printoptions(precision=2)\n", + "print('Confusion matrix, without normalization')\n", + "print(cm)\n", + "plt.figure()\n", + "plot_confusion_matrix(cm)\n", + "\n", + "# Normalize the confusion matrix by row (i.e by the number of samples\n", + "# in each class)\n", + "cm_normalized = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]\n", + "#print('Normalized confusion matrix')\n", + "#print(cm_normalized)\n", + "#plt.figure()\n", + "#plot_confusion_matrix(cm_normalized, title='Normalized confusion matrix')\n", + "\n", + "#plt.show()\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": true + "collapsed": false }, "outputs": [], "source": [] From e2c76492fea5b2fec1838f84dbf3661ddc958316 Mon Sep 17 00:00:00 2001 From: John Waldrep Date: Sun, 7 Jun 2015 15:41:20 -0400 Subject: [PATCH 11/12] Tweak features and add conclusions to IPython notebook; more data needed --- feature_testing_with_benchmark_dataset.ipynb | 1211 ++++++++++++++++++ 1 file changed, 1211 insertions(+) create mode 100644 feature_testing_with_benchmark_dataset.ipynb diff --git a/feature_testing_with_benchmark_dataset.ipynb b/feature_testing_with_benchmark_dataset.ipynb new file mode 100644 index 0000000..fd5a97d --- /dev/null +++ b/feature_testing_with_benchmark_dataset.ipynb @@ -0,0 +1,1211 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#Initial Setup" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "from lang_classifier import *\n", + "from sklearn.pipeline import make_pipeline, make_union\n", + "from sklearn.metrics import classification_report, confusion_matrix" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "def setup():\n", + " \"\"\"Load the training benchmark training data and split it for train/test\"\"\"\n", + " df = load_bench_data()\n", + " X = df.text\n", + " y = df.language\n", + " test_data = load_test_data()\n", + " args = train_test_split(X, y, test_size=0.2, random_state=0)\n", + " # X_train, X_test, y_train, y_test\n", + " \n", + " return df, X, y, test_data, args" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "df, X, y, test_data, args = setup() # Load and split the train/test data\n", + "X_train, X_test, y_train, y_test = args" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "####Inspect the test_data dataframe to make sure language and text are properly aligned" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
languagetextguess
item
0perluse warnings;\\nuse strict;\\n\\nmy $initial = jo...NaN
1clojure(defn cf-settings\\n \"Setup settings for campf...NaN
2clojure(ns my-cli.core)\\n\\n(defn -main [& args]\\n (p...NaN
3clojure(extend-type String\\n Person\\n (first-name [...NaN
4clojure(require '[overtone.live :as overtone])\\n\\n(de...NaN
5pythonfrom pkgutil import iter_modules\\nfrom subproc...NaN
6pythonimport re\\nimport subprocess\\n\\ndef cmd_keymap...NaN
7pythonclass NoSuchService(Exception):\\n def __ini...NaN
8pythonfrom collections import namedtuple\\nimport fun...NaN
9javascriptfunction errorHandler(context) {\\n return fun...NaN
10javascriptvar _ = require('lodash'),\\n fs = require('...NaN
11javascript/* Riot v2.0.8, @license MIT, (c) 2015 Muut In...NaN
12javascriptvar r = riot.route = function(arg) {\\n //...NaN
13rubymodule ActiveJob\\n module Core\\n extend Ac...NaN
14rubyrequire 'formula'\\n\\nclass A52dec < Formula\\n ...NaN
15rubymodule Fluent\\n class Input\\n include Conf...NaN
16haskell{-# LANGUAGE ScopedTypeVariables, FlexibleInst...NaN
17haskellreverseDependencies :: ModuleGraph -> M.Map Mo...NaN
18haskell{- git-annex extra config files\\n -\\n - Copyri...NaN
19scheme(define subst-f\\n (lambda (new old l)\\n (c...NaN
20scheme(define add1\\n (lambda (n) (+ n 1)))NaN
21scheme(define-lib-primitive (length lst)\\n (if (nul...NaN
22java/**\\n * Interface to represent a persistence s...NaN
23java/*\\n * Copyright 2002-2008 the original author...NaN
24scalapackage com.github.pathikrit\\n\\nimport scala.a...NaN
25scala/* sbt -- Simple Build Tool\\n * Copyright 2010...NaN
26tclproc isaac::mix {a b c d e f g h} {\\n set a...NaN
27tclproc twitter::follow {nick uhost hand chan arg...NaN
28phpclass View\\n{\\n /**\\n * Data available ...NaN
29phppublic function formatLocalized($format)\\n...NaN
30phpclass Application extends App {\\n\\t/**\\n\\t * @...NaN
31ocamltype name = string\\n\\nlet compare_label label1...NaN
32ocamllet search_compiler_libs () =\\n prerr_endline...NaN
\n", + "
" + ], + "text/plain": [ + " language text guess\n", + "item \n", + "0 perl use warnings;\\nuse strict;\\n\\nmy $initial = jo... NaN\n", + "1 clojure (defn cf-settings\\n \"Setup settings for campf... NaN\n", + "2 clojure (ns my-cli.core)\\n\\n(defn -main [& args]\\n (p... NaN\n", + "3 clojure (extend-type String\\n Person\\n (first-name [... NaN\n", + "4 clojure (require '[overtone.live :as overtone])\\n\\n(de... NaN\n", + "5 python from pkgutil import iter_modules\\nfrom subproc... NaN\n", + "6 python import re\\nimport subprocess\\n\\ndef cmd_keymap... NaN\n", + "7 python class NoSuchService(Exception):\\n def __ini... NaN\n", + "8 python from collections import namedtuple\\nimport fun... NaN\n", + "9 javascript function errorHandler(context) {\\n return fun... NaN\n", + "10 javascript var _ = require('lodash'),\\n fs = require('... NaN\n", + "11 javascript /* Riot v2.0.8, @license MIT, (c) 2015 Muut In... NaN\n", + "12 javascript var r = riot.route = function(arg) {\\n //... NaN\n", + "13 ruby module ActiveJob\\n module Core\\n extend Ac... NaN\n", + "14 ruby require 'formula'\\n\\nclass A52dec < Formula\\n ... NaN\n", + "15 ruby module Fluent\\n class Input\\n include Conf... NaN\n", + "16 haskell {-# LANGUAGE ScopedTypeVariables, FlexibleInst... NaN\n", + "17 haskell reverseDependencies :: ModuleGraph -> M.Map Mo... NaN\n", + "18 haskell {- git-annex extra config files\\n -\\n - Copyri... NaN\n", + "19 scheme (define subst-f\\n (lambda (new old l)\\n (c... NaN\n", + "20 scheme (define add1\\n (lambda (n) (+ n 1))) NaN\n", + "21 scheme (define-lib-primitive (length lst)\\n (if (nul... NaN\n", + "22 java /**\\n * Interface to represent a persistence s... NaN\n", + "23 java /*\\n * Copyright 2002-2008 the original author... NaN\n", + "24 scala package com.github.pathikrit\\n\\nimport scala.a... NaN\n", + "25 scala /* sbt -- Simple Build Tool\\n * Copyright 2010... NaN\n", + "26 tcl proc isaac::mix {a b c d e f g h} {\\n set a... NaN\n", + "27 tcl proc twitter::follow {nick uhost hand chan arg... NaN\n", + "28 php class View\\n{\\n /**\\n * Data available ... NaN\n", + "29 php public function formatLocalized($format)\\n... NaN\n", + "30 php class Application extends App {\\n\\t/**\\n\\t * @... NaN\n", + "31 ocaml type name = string\\n\\nlet compare_label label1... NaN\n", + "32 ocaml let search_compiler_libs () =\\n prerr_endline... NaN" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "test_data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Use a generic bag of words/naive bayes classifier pipeline as a baseline" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "def assess_test_data(pipe):\n", + " test_data['guess'] = pd.DataFrame(pipe.predict(test_data['text']))\n", + " correct = test_data[test_data.language == test_data.guess]\n", + " print('Proportion of test data correctly labeled: {:.3f}'.format(len(correct) / len(test_data)))\n", + " print(test_data[['language', 'guess', 'text']])\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Train score: 0.989, Test score: 0.949\n", + "Proportion of test data correctly labeled: 0.727\n", + " language guess \\\n", + "item \n", + "0 perl perl \n", + "1 clojure clojure \n", + "2 clojure clojure \n", + "3 clojure clojure \n", + "4 clojure clojure \n", + "5 python python \n", + "6 python clojure \n", + "7 python ruby \n", + "8 python python \n", + "9 javascript javascript \n", + "10 javascript javascript \n", + "11 javascript clojure \n", + "12 javascript php \n", + "13 ruby ruby \n", + "14 ruby clojure \n", + "15 ruby ruby \n", + "16 haskell haskell \n", + "17 haskell haskell \n", + "18 haskell clojure \n", + "19 scheme scheme \n", + "20 scheme scheme \n", + "21 scheme scheme \n", + "22 java java \n", + "23 java c \n", + "24 scala scala \n", + "25 scala scala \n", + "26 tcl tcl \n", + "27 tcl python \n", + "28 php clojure \n", + "29 php php \n", + "30 php php \n", + "31 ocaml ocaml \n", + "32 ocaml ocaml \n", + "\n", + " text \n", + "item \n", + "0 use warnings;\\nuse strict;\\n\\nmy $initial = jo... \n", + "1 (defn cf-settings\\n \"Setup settings for campf... \n", + "2 (ns my-cli.core)\\n\\n(defn -main [& args]\\n (p... \n", + "3 (extend-type String\\n Person\\n (first-name [... \n", + "4 (require '[overtone.live :as overtone])\\n\\n(de... \n", + "5 from pkgutil import iter_modules\\nfrom subproc... \n", + "6 import re\\nimport subprocess\\n\\ndef cmd_keymap... \n", + "7 class NoSuchService(Exception):\\n def __ini... \n", + "8 from collections import namedtuple\\nimport fun... \n", + "9 function errorHandler(context) {\\n return fun... \n", + "10 var _ = require('lodash'),\\n fs = require('... \n", + "11 /* Riot v2.0.8, @license MIT, (c) 2015 Muut In... \n", + "12 var r = riot.route = function(arg) {\\n //... \n", + "13 module ActiveJob\\n module Core\\n extend Ac... \n", + "14 require 'formula'\\n\\nclass A52dec < Formula\\n ... \n", + "15 module Fluent\\n class Input\\n include Conf... \n", + "16 {-# LANGUAGE ScopedTypeVariables, FlexibleInst... \n", + "17 reverseDependencies :: ModuleGraph -> M.Map Mo... \n", + "18 {- git-annex extra config files\\n -\\n - Copyri... \n", + "19 (define subst-f\\n (lambda (new old l)\\n (c... \n", + "20 (define add1\\n (lambda (n) (+ n 1))) \n", + "21 (define-lib-primitive (length lst)\\n (if (nul... \n", + "22 /**\\n * Interface to represent a persistence s... \n", + "23 /*\\n * Copyright 2002-2008 the original author... \n", + "24 package com.github.pathikrit\\n\\nimport scala.a... \n", + "25 /* sbt -- Simple Build Tool\\n * Copyright 2010... \n", + "26 proc isaac::mix {a b c d e f g h} {\\n set a... \n", + "27 proc twitter::follow {nick uhost hand chan arg... \n", + "28 class View\\n{\\n /**\\n * Data available ... \n", + "29 public function formatLocalized($format)\\n... \n", + "30 class Application extends App {\\n\\t/**\\n\\t * @... \n", + "31 type name = string\\n\\nlet compare_label label1... \n", + "32 let search_compiler_libs () =\\n prerr_endline... \n" + ] + } + ], + "source": [ + "spam_pipe = Pipeline([('bag_of_words', CountVectorizer()),\n", + " ('bayes', MultinomialNB())])\n", + "classifier = assess_classifier(spam_pipe, *args)\n", + "c = classifier.predict(X)\n", + "#print('Guesses: ', c[0:5])\n", + "assess_test_data(spam_pipe)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Inspect the baseline features" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['zin', 'zip', 'zipwith', 'zipwithindex', 'zipwithm', 'zipwithm_', 'ziv', 'ziv1', 'ziv2', 'zizi', 'zoo', 'zotov', 'zq', 'zr', 'zr1', 'zr2', 'zri', 'zrn', 'zrv', 'zrv1', 'zrv2', 'zrzi', 'zrzr', 'zs', 'zt', 'zu', 'zubach', 'zx', 'zy', 'zz']\n", + "Train score: 0.989, Test score: 0.949\n" + ] + } + ], + "source": [ + "cv = CountVectorizer()\n", + "ft = cv.fit_transform(X)\n", + "print(cv.get_feature_names()[-30:])\n", + "spam_pipe = Pipeline([('bag_of_words', cv),\n", + " ('bayes', MultinomialNB())])\n", + "classifier = assess_classifier(spam_pipe, *args)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "###Now try making a limited custom vocabulary to discriminate between languages" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "collapsed": false, + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['}', ')', 'var', 'fn', 'function', 'end', 'defn', '===', 'lambda']\n", + "Train score: 0.329, Test score: 0.436\n", + "Proportion of test data correctly labeled: 0.485\n", + " language guess \\\n", + "item \n", + "0 perl ruby \n", + "1 clojure clojure \n", + "2 clojure clojure \n", + "3 clojure ruby \n", + "4 clojure clojure \n", + "5 python ruby \n", + "6 python ruby \n", + "7 python ruby \n", + "8 python python \n", + "9 javascript php \n", + "10 javascript javascript \n", + "11 javascript javascript \n", + "12 javascript javascript \n", + "13 ruby ruby \n", + "14 ruby ruby \n", + "15 ruby ruby \n", + "16 haskell ruby \n", + "17 haskell ruby \n", + "18 haskell ruby \n", + "19 scheme scheme \n", + "20 scheme scheme \n", + "21 scheme scheme \n", + "22 java ruby \n", + "23 java ruby \n", + "24 scala ruby \n", + "25 scala ruby \n", + "26 tcl ruby \n", + "27 tcl ruby \n", + "28 php php \n", + "29 php php \n", + "30 php php \n", + "31 ocaml haskell \n", + "32 ocaml ruby \n", + "\n", + " text \n", + "item \n", + "0 use warnings;\\nuse strict;\\n\\nmy $initial = jo... \n", + "1 (defn cf-settings\\n \"Setup settings for campf... \n", + "2 (ns my-cli.core)\\n\\n(defn -main [& args]\\n (p... \n", + "3 (extend-type String\\n Person\\n (first-name [... \n", + "4 (require '[overtone.live :as overtone])\\n\\n(de... \n", + "5 from pkgutil import iter_modules\\nfrom subproc... \n", + "6 import re\\nimport subprocess\\n\\ndef cmd_keymap... \n", + "7 class NoSuchService(Exception):\\n def __ini... \n", + "8 from collections import namedtuple\\nimport fun... \n", + "9 function errorHandler(context) {\\n return fun... \n", + "10 var _ = require('lodash'),\\n fs = require('... \n", + "11 /* Riot v2.0.8, @license MIT, (c) 2015 Muut In... \n", + "12 var r = riot.route = function(arg) {\\n //... \n", + "13 module ActiveJob\\n module Core\\n extend Ac... \n", + "14 require 'formula'\\n\\nclass A52dec < Formula\\n ... \n", + "15 module Fluent\\n class Input\\n include Conf... \n", + "16 {-# LANGUAGE ScopedTypeVariables, FlexibleInst... \n", + "17 reverseDependencies :: ModuleGraph -> M.Map Mo... \n", + "18 {- git-annex extra config files\\n -\\n - Copyri... \n", + "19 (define subst-f\\n (lambda (new old l)\\n (c... \n", + "20 (define add1\\n (lambda (n) (+ n 1))) \n", + "21 (define-lib-primitive (length lst)\\n (if (nul... \n", + "22 /**\\n * Interface to represent a persistence s... \n", + "23 /*\\n * Copyright 2002-2008 the original author... \n", + "24 package com.github.pathikrit\\n\\nimport scala.a... \n", + "25 /* sbt -- Simple Build Tool\\n * Copyright 2010... \n", + "26 proc isaac::mix {a b c d e f g h} {\\n set a... \n", + "27 proc twitter::follow {nick uhost hand chan arg... \n", + "28 class View\\n{\\n /**\\n * Data available ... \n", + "29 public function formatLocalized($format)\\n... \n", + "30 class Application extends App {\\n\\t/**\\n\\t * @... \n", + "31 type name = string\\n\\nlet compare_label label1... \n", + "32 let search_compiler_libs () =\\n prerr_endline... \n" + ] + } + ], + "source": [ + "cv = CountVectorizer(vocabulary=['}', ')', 'var', 'fn', 'function', 'end', 'defn',\n", + " '===', 'lambda']) #, '(define', 'elif'])\n", + "ft = cv.fit_transform(X)\n", + "print(cv.get_feature_names()[-30:]) \n", + "spam_pipe = Pipeline([('bag_of_words', cv),\n", + " ('bayes', MultinomialNB())])\n", + "classifier = assess_classifier(spam_pipe, *args)\n", + "assess_test_data(spam_pipe)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Vocabulary|Results\n", + "----------|-------\n", + "} | Train score: 0.122, Test score: 0.137\n", + "}, ) | Train score: 0.122, Test score: 0.137\n", + "}, ), var | Train score: 0.161, Test score: 0.179\n", + "}, ), var, fn | Train score: 0.195, Test score: 0.248\n", + "}, ), var, fn, function| Train score: 0.287, Test score: 0.308\n", + "}, ), var, fn, function, end | Train score: 0.278, Test score: 0.325\n", + "}, ), var, fn, function, end, defn | Train score: 0.302, Test score: 0.359\n", + "}, ), var, fn, function, end, defn, === | Train score: 0.300, Test score: 0.368\n", + "}, ), var, fn, function, end, defn, ===, lambda | Train score: 0.334, Test score: 0.427\n", + "}, ), var, fn, function, end, defn, ===, lambda | Proportion of test data correctly labeled: 0.485\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The above didn't work nearly as well as the automatically-generated vocabulary with thousands of words. Adding new words quickly yielded diminishing returns. Since we want to get above 80% accuracy, it looks like we will need to add our own features." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "##Define new features" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "def longest_run_of_caps_feature(text):\n", + " \"\"\"Find the longest run of capitol letters and return their length.\"\"\"\n", + " runs = sorted(re.findall(r\"[A-Z]+\", text), key=len)\n", + " if runs:\n", + " return len(runs[-1])\n", + " else:\n", + " return 0" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "def percent_character_feature(char):\n", + " \"\"\"Return percentage of text that is a particular char compared to total text length.\"\"\"\n", + " def feature_fn(text):\n", + " chars = text.count(char)\n", + " return chars / len(text)\n", + " return feature_fn\n", + "\n", + "# def percent_characters_feature(char_list):\n", + "# \"\"\"\n", + "# Return percentage of text for each char/substring in char_list,\n", + "# compared to total text length.\n", + "# \"\"\"\n", + "# def feature_fn(text):\n", + "# hits = []\n", + "# for char in char_list:\n", + "# hits.append(text.count(char) / len(text))\n", + "# return hits\n", + "# return feature_fn\n", + "\n", + "def count_word_feature(word):\n", + " \"\"\"Return percentage of text that is a particular char compared to total text length.\"\"\"\n", + " def feature_fn(text):\n", + " num_words = text.count(word)\n", + " return num_words\n", + " return feature_fn\n", + "\n", + "def longest_line_feature(text):\n", + " lines = re.findall(r'.*?\\n', text)\n", + " if not lines:\n", + " return len(text)\n", + " lens = [len(line) for line in lines]\n", + " return max(lens) # Includes newline character\n", + "\n", + "def longest_run_of_parens(text):\n", + " matches = re.findall(r'\\)*', text)\n", + " if not matches:\n", + " return 0\n", + " lens = [len(match) for match in matches]\n", + " return max(lens)\n", + "\n", + "def nested_dots(text):\n", + " matches = re.findall(r'\\.([^\\s]*\\.)*', text)\n", + " if not matches:\n", + " return 0\n", + " lens = [match.count('.') for match in matches]\n", + " return max(lens)\n", + "\n", + "def max_paren_depth(text):\n", + " max_depth = 0\n", + " depth = 0\n", + " for char in text:\n", + " if char == '(':\n", + " depth += 1\n", + " max_depth = depth if depth > max_depth else max_depth\n", + " if char == ')':\n", + " depth -= 1\n", + " return max_depth\n", + "\n", + "def max_curly_brace_depth(text):\n", + " max_depth = 0\n", + " depth = 0\n", + " for char in text:\n", + " if char == '{':\n", + " depth += 1\n", + " max_depth = depth if depth > max_depth else max_depth\n", + " if char == '}':\n", + " depth -= 1\n", + " return max_depth\n", + "\n", + "def percent_words_match_regex(regex):\n", + " \"\"\"Return percentage of text that is a matches regex compared to total number words\"\"\"\n", + " def feature_fn(text):\n", + " num_matches = 0\n", + " #words = re.findall(r'[^[\\s]]+\\b', text)\n", + " words = text.split()\n", + " for word in words:\n", + " if re.search(regex, word):\n", + " num_matches +=1\n", + " if len(words) == 0:\n", + " return 0\n", + " return num_matches / len(words)\n", + " return feature_fn\n", + "\n", + "def count_endings_feature(regex): \n", + " def feature_fn(text):\n", + " num_matches = 0\n", + " lines = re.findall(r'.*?\\n', text)\n", + " if not lines:\n", + " return len(text)\n", + "\n", + " words = text.split()\n", + " for word in words:\n", + " if re.search(regex + r'\\w*$', word):\n", + " num_matches +=1\n", + " if len(words) == 0:\n", + " return 0\n", + " return num_matches #/ len(words)\n", + " return feature_fn\n", + "\n", + "def count_beginnings_feature(regex): \n", + " def feature_fn(text):\n", + " num_matches = 0\n", + " lines = re.findall(r'.*?\\n', text)\n", + " if not lines:\n", + " return len(text)\n", + "\n", + " words = text.split()\n", + " for word in words:\n", + " if re.search(r'\\w*' + regex, word):\n", + " num_matches +=1\n", + " if len(words) == 0:\n", + " return 0\n", + " return num_matches #/ len(words)\n", + " return feature_fn\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "###Look at the new features on a simple python program" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "##Feature Ideas\n", + "- +Longest line\n", + "- bag of words with chars, ngrams, let\n", + "- +run of )\n", + "- % _, }, :\\n, \"\"\"\n", + "- key words: def, defn, var `__x__`, module, end, ->, ::, ;, /*, //, case, final, extends, public, protected, $word, @param, self, this\n", + "- + depth nested dots (or % nested dots)\n", + "- +() nest depth\n", + "- ignore/strip comments?\n", + "-Hyphenated or camel or underscored\n", + "-Indentation...\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 108, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[2, 1, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 1]])" + ] + }, + "execution_count": 108, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "txt = '''\n", + "# Test python program\n", + "class MyClass:\n", + " \"\"\"MyClass is a class to do something\"\"\"\n", + " def __init__(self, name='name'):\n", + " self.name = name\n", + " def longest_run_of_caps_feature(text):\n", + " runs = sorted(re.findall(r\"[A-Z]+\", text), key=len)\n", + " if len(runs) == 0:\n", + " return 0\n", + " longest = runs[-1]\n", + " return len(longest)\n", + " print('{}'.format(self.name))\n", + " $thing \n", + " @thing\n", + " :thing\n", + " end\n", + " end\n", + "end\n", + "\n", + "'''\n", + "featurizer = FunctionFeaturizer(\n", + "# longest_run_of_caps_feature,\n", + "# percent_character_feature('.'),\n", + "# longest_line_feature,\n", + "# longest_run_of_parens,\n", + "# nested_dots,\n", + " max_paren_depth,\n", + "# percent_words_match_regex(r'\\$\\w'),\n", + "# percent_words_match_regex(r'\\@\\w'),\n", + "# percent_words_match_regex(r':\\w'),\n", + "# percent_character_feature(r';'),\n", + " count_word_feature('}'),\n", + " count_word_feature(')'),\n", + " count_word_feature('var'),\n", + " count_word_feature('fn'),\n", + " count_word_feature('function'),\n", + "# count_word_feature('end'),\n", + " count_word_feature('defn'),\n", + " count_word_feature('==='),\n", + "# count_word_feature('lambda'),\n", + " count_word_feature(';'),\n", + " count_word_feature('public'),\n", + "# count_word_feature('val'),\n", + "# count_word_feature('=>'),\n", + " count_word_feature('set'),\n", + " count_word_feature('extends'),\n", + " count_word_feature('module'),\n", + " count_endings_feature(r'end'),\n", + " count_beginnings_feature(r'let'),\n", + " count_word_feature('->'), #.758\n", + " count_beginnings_feature(r'\\(define'), #.818 got scheme\n", + " count_beginnings_feature(r'\\{-'), #.848 less haskell\n", + " count_word_feature('object'), #\n", + " max_curly_brace_depth, #.879 got javascript\n", + "# # count_beginnings_feature(r'from'), #added ^ to start of regex, numbers dropped, so removed\n", + "# percent_character_feature(']'),\n", + "# # count_word_feature('.'),\n", + "# count_word_feature('proc'),\n", + "# count_beginnings_feature('public'),\n", + "# # count_endings_feature(r';'), #fixed java but broke haskell/scala/php/ocaml\n", + " \n", + " \n", + " \n", + " \n", + " )\n", + "featurizer.transform([txt])" + ] + }, + { + "cell_type": "code", + "execution_count": 109, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "3" + ] + }, + "execution_count": 109, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "count_endings_feature('end')(txt)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "###Test the pipeline on the actual test data" + ] + }, + { + "cell_type": "code", + "execution_count": 110, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# pipe = Pipeline([('fnfeaturizer', featurizer),\n", + "# ('bayes', MultinomialNB())])\n", + "pipe = make_pipeline(featurizer, MultinomialNB())\n", + "#pd.DataFrame(args[0]).index.values/pd.DataFrame(args[2]).index.values #, args[2])" + ] + }, + { + "cell_type": "code", + "execution_count": 111, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0" + ] + }, + "execution_count": 111, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "count_beginnings_feature('public')(test_data.loc[2].text)\n", + "# count_word_feature('proc')(test_data.loc[27].text)" + ] + }, + { + "cell_type": "code", + "execution_count": 112, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Train score: 0.786, Test score: 0.803\n", + "Proportion of test data correctly labeled: 0.879\n", + " language guess \\\n", + "item \n", + "0 perl perl \n", + "1 clojure clojure \n", + "2 clojure clojure \n", + "3 clojure python \n", + "4 clojure clojure \n", + "5 python ruby \n", + "6 python python \n", + "7 python python \n", + "8 python python \n", + "9 javascript javascript \n", + "10 javascript javascript \n", + "11 javascript javascript \n", + "12 javascript javascript \n", + "13 ruby ruby \n", + "14 ruby ruby \n", + "15 ruby ruby \n", + "16 haskell haskell \n", + "17 haskell haskell \n", + "18 haskell haskell \n", + "19 scheme scheme \n", + "20 scheme scheme \n", + "21 scheme scheme \n", + "22 java tcl \n", + "23 java java \n", + "24 scala scala \n", + "25 scala scala \n", + "26 tcl tcl \n", + "27 tcl tcl \n", + "28 php php \n", + "29 php php \n", + "30 php php \n", + "31 ocaml haskell \n", + "32 ocaml ocaml \n", + "\n", + " text \n", + "item \n", + "0 use warnings;\\nuse strict;\\n\\nmy $initial = jo... \n", + "1 (defn cf-settings\\n \"Setup settings for campf... \n", + "2 (ns my-cli.core)\\n\\n(defn -main [& args]\\n (p... \n", + "3 (extend-type String\\n Person\\n (first-name [... \n", + "4 (require '[overtone.live :as overtone])\\n\\n(de... \n", + "5 from pkgutil import iter_modules\\nfrom subproc... \n", + "6 import re\\nimport subprocess\\n\\ndef cmd_keymap... \n", + "7 class NoSuchService(Exception):\\n def __ini... \n", + "8 from collections import namedtuple\\nimport fun... \n", + "9 function errorHandler(context) {\\n return fun... \n", + "10 var _ = require('lodash'),\\n fs = require('... \n", + "11 /* Riot v2.0.8, @license MIT, (c) 2015 Muut In... \n", + "12 var r = riot.route = function(arg) {\\n //... \n", + "13 module ActiveJob\\n module Core\\n extend Ac... \n", + "14 require 'formula'\\n\\nclass A52dec < Formula\\n ... \n", + "15 module Fluent\\n class Input\\n include Conf... \n", + "16 {-# LANGUAGE ScopedTypeVariables, FlexibleInst... \n", + "17 reverseDependencies :: ModuleGraph -> M.Map Mo... \n", + "18 {- git-annex extra config files\\n -\\n - Copyri... \n", + "19 (define subst-f\\n (lambda (new old l)\\n (c... \n", + "20 (define add1\\n (lambda (n) (+ n 1))) \n", + "21 (define-lib-primitive (length lst)\\n (if (nul... \n", + "22 /**\\n * Interface to represent a persistence s... \n", + "23 /*\\n * Copyright 2002-2008 the original author... \n", + "24 package com.github.pathikrit\\n\\nimport scala.a... \n", + "25 /* sbt -- Simple Build Tool\\n * Copyright 2010... \n", + "26 proc isaac::mix {a b c d e f g h} {\\n set a... \n", + "27 proc twitter::follow {nick uhost hand chan arg... \n", + "28 class View\\n{\\n /**\\n * Data available ... \n", + "29 public function formatLocalized($format)\\n... \n", + "30 class Application extends App {\\n\\t/**\\n\\t * @... \n", + "31 type name = string\\n\\nlet compare_label label1... \n", + "32 let search_compiler_libs () =\\n prerr_endline... \n" + ] + } + ], + "source": [ + "pipe.fit(args[0], args[2]) # X_train, y_train\n", + "pipe.score(args[1], args[3])\n", + "classifier = assess_classifier(pipe, *args)\n", + "assess_test_data(pipe)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# print(confusion_matrix(classifier.predict(args[1]), args[3]))" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "collapsed": false, + "scrolled": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Confusion matrix, without normalization\n", + "[[ 2 0 0 1 0 3 0 0 1 2 0 0 0 0 0]\n", + " [ 0 12 0 0 0 0 0 0 0 0 0 0 0 0 0]\n", + " [ 0 1 5 0 0 0 0 0 0 0 2 0 0 0 0]\n", + " [ 0 0 0 5 0 2 0 0 0 0 0 0 0 0 0]\n", + " [ 0 0 0 0 6 0 0 0 0 0 1 0 0 0 0]\n", + " [ 0 0 0 1 0 5 0 0 1 0 0 0 0 0 0]\n", + " [ 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0]\n", + " [ 0 0 0 0 0 0 0 6 0 0 0 0 0 0 0]\n", + " [ 1 0 0 0 0 0 0 0 4 0 0 0 0 0 0]\n", + " [ 1 0 0 2 0 0 0 0 1 10 0 0 0 0 0]\n", + " [ 0 0 0 0 0 0 0 0 0 0 6 0 1 0 0]\n", + " [ 0 0 0 0 0 0 0 0 0 0 0 14 1 0 0]\n", + " [ 0 0 0 0 1 0 0 0 0 0 0 0 12 0 0]\n", + " [ 0 0 0 0 0 0 0 0 0 0 0 0 0 6 0]\n", + " [ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]]\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAewAAAG4CAYAAACHNdSBAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzs3Xe8XFW9///XOxAklIQmCFLiRcBLDyDSUrDwQ1CwgCgo\ngrd4wcK1cC3wxRDgimIFQVEEgqICilIEKV5yAggCgQQQQUGRIr2EEkpI3r8/9j5hGE6bM2fOnH3m\n/eQxj7PL2p+9Zk44n1lrr722bBMREREj25h2VyAiIiL6l4QdERFRAUnYERERFZCEHRERUQFJ2BER\nERWQhB0REVEBSdgRbSZpnKQLJD0p6awm4uwn6ZKhrFu7SJos6fZ21yNiJFHuw44YGEn7Ap8FNgKe\nBuYCx9i+usm4HwE+CWxve3HTFR3hJC0G3mj7b+2uS0SVpIUdMQCSPgt8GzgaWB1YBzgR2GMIwq8H\n/KUTknUN9bpDWno4KxJRFUnYEf2QNAE4EjjY9m9sP2d7ke3f2v5CWeY1kr4j6f7y9W1Jy5T7pkm6\nT9JnJT0k6Z+SDij3HQn8P2AfSU9L+pik6ZJ+UnP+iZIWSxpTrh8g6S5JT0n6W9ny795+Zc1xO0i6\nvuxqv07S9jX7ZkmaIemqMs4lklbt5f131/9QSQ+X9X+PpN0k/UXSY5K+WFN+W0nXSHqiLHuCpLHl\nvtllsXnl+927Jv7/SHoA+HG57d7ymPXLc0wq19eS9IikKU39YiMqJgk7on/bA8sCv+6jzGHAtsAW\n5Wtb4PCa/WsA44G1gH8DTpQ0wfZXgP8FfmF7RdunAr1ep5K0PPBdYFfb48u6ze2h3CrAb4HvAKsA\n3wJ+K2nlmmIfAg6g6DFYBvh8H+9vDeA1wJrAEcApwH7AJGAycISk9cqyLwGHAKuW9XsbcDCA7e4k\nu3n5fs+pib8ysC7w8doT274L+ALwU0njgNOA02zPJqKDJGFH9G9V4NF+uqz3BWbYftT2oxQt8o/U\n7F9Y7l9k+2LgGYpr4VB0D9d2EffaXVxaDGwmaZzth2zf1kOZ3YE7bJ9pe7HtXwC383IXvimS3p22\nnwfOBrbs45wLKa7XLwLOovgS8B3bz5bnv637eNs32r6uPO8/gB8CUwfwnr5ie2FZn1ewfQpwJ3Ad\nRXI/rJ94EaNOEnZE/x4DVuvuku7FWsA/atbvKbctiVGX8BcAKzRaEdvPAvsA/wX8U9KFkjbqoeha\nZR1q/aOuTg/WLD/XT30e88sjVJ8rfz5Ud/zyAJI2LOv1gKT5wDEUX3r68ojtF/spcwqwCXCC7YX9\nlI0YdZKwI/p3DfAC8N4+yvwTmFizvm65bTCeAZarWX9d7U7bl9repdx+O/CjHmLcTzGYrdZ65fZW\n+z5Fi/uNtidQtIb7+1vT5+0qklag6N4/BTiyrms/oiMkYUf0w/Z8iuu2J0raU9JyksZKeqekr5XF\nfg4cLmk1SauV5X/SW8x+zAWmSFqnHPD2pe4dklYv67A8RTf1s8CiHmJcDGwo6UOSlpa0D/Am4MKa\nMv11vQ/WChS3vS2Q9CbgoLr9DwHrNxjzu8B1tv+T4tr8D5quZUTFJGFHDIDtb1Hcg3048DBFd/PB\nvDwQ7WjgBuDm8nVDuW1JiL7C1+63fTnFdeKbgeuBC2r2jwE+Q9FSfoxiwNdB9XFsPwa8C/gc8CjF\ngLJ32X68lzqZ/uvY13qtz1Nc03+K4vr1L+rKTwdmlqPI9+rj3AaQtCewCy+/z88CW0n6UB91iBh1\nMnFKREREBaSFHRERUQFJ2BERERWQhB0REVEBSdgREREVkEn2W0RSRvNFRAyQ7VbdZrjEUP1dHo66\n9iQJu4X+8uCzAyp3/HHH8OlDBzbT4jqrLtd/odLRM6Zz+BHTB1y+VXHnLxjYpFTHfXUGh37piAHH\nnbDc2AGVa7S+t9331IDLnvydr/Lx//5S/wWBjdceP+C4o/1399RzA4vbyP8bMPD/P1r1+bYy9miO\nO27s8OW/Zbf8RFPHPz/3xCGqSeOSsCMionP0OcPwyJaEHRERnUNt6c0eEknYI8BbdpjckrhTpk6r\nVNwddurvgU6D06r6Amy93U4tiZvfXaFq/2+0MnbiDpEKt7Az01mLSPJAr2E3opFr2CPFQK+DNmqg\n17Ab1cg17EY0cg17pGjV726g17AbVcX/P6K4hj1cg86W3eYzTcV4/oZvt23QWXW/avRD0nRJn+un\nzJGS3jZcdYqIiBis0dwl3m/Xge2vNBJQ0tK2Xxp8lSIioq2GuEtc0qnA7sDDtjcrtx0F7EGRhx4D\nDrB9bw/H3k3xkJxFwELb2/Z1rlHTwpa0v6R5kuZKOqNu35aSri33nytppXL76ZLeXy7fLWmVcnkb\nSVeUy9Ml/UTSVRRPGFpN0i8lXVe+dhjmtxoREYMlNfd6tdOAXeu2fd32Fra3BH4D9NY4NDDN9qT+\nkjWMkoQtaRPgMGDn8gM6pNzV3co+AzjU9hbALbz84dU+1q+vFvmbgLfZ3g84Hvh2+eHuBZwyZG8k\nIiJaS2Oae9WxfSXwRN22p2tWV6B4xG2vNRpo1UdLl/hbgbO7n/Vr+wmV34QkjQcmlB8qwEzgnAZi\nGzjf9gvl+tuBf9XL37RWlLSc7QVNvoeIiBglJB0DfARYAGzXSzEDl0taBJxs+0d9xRwtCdsM/FtK\nb+Ve4uUeh2Xr9tUmYwFvsf1ifyc6/rhjliy/ZYfJvGXHKQOsYkTE6DW7axazu2a15+TDdB+27cOA\nwyR9Efg2cGAPxXa0/YCk1wKXSbq9pnH5KqMlYf8f8GtJ37L9ePe1aIrb1p6S9ISknWxfRfGNZ1YP\nMe4GtgF+B7y/Znv9b/dS4NPAN6C4Pm57bk+VamRKxYiITjFl6rRX3Kd9zFFHDt/JGxx0tmj+P1j8\n1D3NnPFnwEU97bD9QPnzEUm/BrYFRnfCtn1b2f3QVXYt3ESRgLuvS38U+IGk5YC76PmbzpHAjyU9\nRZHQa69t117f/jRwoqR5FJ9fF3DwkL6hiIhojQZb2EutNJGlVpq4ZH3RfVcN4BTawPZfy9U9KXJS\nfZnlgKVsPy1peWAXijzUq1GRsAFsn0ExuKynffOA7XvYtSrFkHvK1vdGPRx7ZN36Y8AHm61vRERU\nn6SfA1OB1STdSzGoeTdJG1HcrnUXcFBZdi3gR7Z3B14HnFuOh1oaONP2pX2da9Qk7EaV986NA/r/\nuhQREaPDEN+HbftDPWw+tZey/6S4ZxvbfwO2bORcHZuwbX+s3XWIiIhhlod/REREVECFH/6RhB0R\nEZ2jwgm7ujWPiIjoIGlhR0RE5xiTa9jRg1Y8m3e1fU8f8pgAj/7sgJbEhdY9t7pVqvjc6lZp1e+u\nav8mYhSpcJd4EnZERHSOCo8Sr+5XjYiIiA6SFnZERHSOdIlHRERUQIW7xJOwIyKic6SFHRERUQEV\nbmFX96vGEJJ0d/cztCVd3e76RERE1EsLu7Dkede2d2xnRSIiooUq3CXe0ppL2l/SPElzJc2UtJ6k\n/yu3XS5pnbLc6ZJOknSNpLskTSvL3ybptJp4z0j6uqRbJV0maTtJXeUx7y7LLCvpNEk3S7pR0rRy\n+wGSzpV0saS/SPpaL3V+pvy5pqTZkm6SdIukHWvq8K2yDpdLWq2Vn2FERAwhqblXG7UsYUvaBDgM\n2Nn2lsB/A98DTrO9BXAmcHzNISvZ3h74DHA+8HVgE2AzSZuXZZYDfm97U+BpYAbwVuC95TLAJ4BF\ntjcHPgTMlPSact8WwAeAzYB9JL2+h6p3t7b3BX5ne1J53LyaOlxf1qGL4mHlERFRBRrT3KuNWnn2\ntwJn234cwPYTwHbAz8r9PwV2KpcNXFAu3wo8aPtPtg38CZhY7nvR9iXl8i3AFbYXlcd0l9mxjI3t\nO4B/ABuW5/i97adtvwDcBqzXR/2vAw6U9BVgM9vPlNsXA2f18B4iIiJappXXsA301H/QW5/Ci+XP\nxcALNdsX83I9F9ZtfxHA9mJJte+lt3PUxl1EH+/f9pWSJgPvAk6X9C3bP6krJmquf9c7esb0JctT\npk5jytRpvRWNiOgYs7tmMbtrVntOXuFR4q1M2P8H/LpMdI+Xo7D/AHyQomW6HzC7Bee9sox9haQN\ngXWB24GtGwkiaV3gftunSFoWmAT8hKJXYm+KVva+5fl6dPgR0wdT/4iIUa2+AXPMUUcO38krPOis\nZQnb9m2SjgG6JC0CbgQ+BZwm6VDgYeDA2kN6WaaP7T0dcxLwfUk3Ay8BH7W9UJL7iNtTzJ2Bz0ta\nSHG9fP9y+7PAtpIOBx4C9hlAzIiIGAkqnLBVXCaOgZL0tO0VB1DOzy0c+s+2io/XjIjoy7ixwnbL\n+6oledl3ndhUjOcv/MSw1LUnuQ+7cfmGExFRVbmG3Tlsj293HSIiYpAq3CWehB0REZ0jLeyIiIgK\nqHALu7o1j4iI6CBpYUdEROdIl3gMl7tO2a8lcX85776WxAXYa4u1WxY7ota9jy1oSdx1Vl2uJXFj\n+GmIE7akU4HdgYdtb1ZuO45ilswXgbuAA23P7+HYXYHvAEsBp9ju8aFU3dIlHhERHUNSU68enAbs\nWrftUmCT8kFXfwG+1EM9lqJ4INauwMbAhyT9a191T8KOiIgYJNtXAk/UbbvM9uJy9Y9AT92M2wJ3\n2r7b9kLgF8CefZ0rCTsiIjqHmnw17mPART1sfz1wb836feW2XuUadkREdIxGr2Evevh2Fj1y+2DP\ndRjFY6F/1sPuhmfNTMKOiIiO0WjCXnqNf2XpNV6+tLzwtvMGep4DgN2At/VS5H5gnZr1dSha2b1K\nl3hERMQQKkd/Hwrsafv5XordAGwgaaKkZSie/Hh+X3GTsCMiomMM9ShxST8H/gBsJOleSR8DTgBW\nAC6TdJOkk8qya0n6LYDtl4BPApcAtwFn2f5zX3XvqC7xsotia9ufanddIiJi+A31fdi2P9TD5lN7\nKftPinu2u9cvBi4e6Lk6KmHT5KMxJY2pGaofERFVU92JzkZPl7ik/SXNkzRX0hmS9pZ0S7k+q7sY\nsJakiyX9RdLXao4/SdL1km6VNL1m+92SjpU0B9hb0ixJ3ym7OW6R9OZhfaMRETFoLZg4ZdiMiha2\npE2Aw4DtbT8uaWWgC9jF9gOSap9hvWX5ehG4Q9Lxtu8HDrP9RDn7zOWSNrV9K0Wr/FHbW5fn+i9g\nnO1JkiZTdH1sNmxvNiIiOtKoSNjAW4GzbT8OUCbeq4GZks4Gzi3LGfi97acBJN0GrEcxvH4fSf9B\n8ZmsSTFV3K3lcWfVne/n5XmulDRe0njbT9VX6ugZ05csT5k6jSlTpw3BW42IqLbZXbOY3TWrLedu\ndyu5GaMlYZu6KxO2D5K0LcUF/jmSti7LvFBTbBGwtKQ3AJ8DtrE9X9JpwLI15Z4dwPlf5fAjpjf0\nJiIiOkF9A+aYo44ctnNXOWGPlmvY/0dxfXkVAEmrSFrf9nW2vwI8QnFTek+JVcCKFEn5KUlrAO/s\n53z7lOfZCXiyu8UeEREjW65ht5nt2yQdA3RJWgTcBIyXtAFFQr7c9jxJW/LqpG3bN0u6CbidYm7X\nq/o55fOSbqT4/D42pG8mIiKiB6MiYQPYPgM4o58yM4GZNevvrlk+sJdj3tDD5p/Y/swgqxoREe1S\n3R7x0ZOwIyIi+tPubu1mJGE3yPbO7a5DREQMTpUT9mgZdBYRETGqpYUdEREdo8ot7CTsiIjoHNXN\n10nYERHROdLCjmEzYbmxLYm71xZrtyQuwC/n3deSuK2q872PLWhJ3HVWXa4lceNl+YyjP1VO2Bl0\nFhERUQFpYUdERMeocgs7CTsiIjpGEnZEREQVVDdf5xp2REREFaSFHRERHaPKXeKVb2FLmijpliZj\nHCDphAbKz5K0Vbl8d/dzuCMiYmTL87Crr/4Z2Y2Ub/TYiIhok3Yn3WZUvoVdWkrSDyXdKukSSctK\n+g9J10maK+mXksYBSNpb0i3l9lnl8Ut+g5J2l/QHSatK2qVcniPpbEnLt+PNRUREjJaEvQHwPdub\nAk8C7wd+ZXtb21sCfwb+rSz7/4Bdyu17lNsMIOm9wBeAd1Ik8cOAt9neGpgDfHaY3k9ERLSCmny1\n0WjpEv+77ZvL5TnARGAzSUcDE4AVgN+V+68GZko6Gzi33CbgrcA2wDtsPyPpXcDGwB/KLpRlgD80\nUqmjZ0xfsjxl6jSmTJ3W6PuKiBh1ZnfNYnbXrLacu8pd4qMlYb9Qs7wIGAecBuxp+xZJHwWmAdg+\nSNK2wO7AHElbU7Sw7wLeAGxEkfQBLrO972ArdfgR0wd7aETEqFXfgDnmqCOH7dxDnbAlnUqRTx62\nvVm5bRXgLGA94G7gA7af7OHYu4GnKPLWQtvb9nWu0dIl3pMVgAcljQU+3L1R0vq2r7P9FeARYJ1y\n1z+AvYAzJG0M/BHYUdL65XHLS9pgWN9BREQMqRaMEj8N2LVu2xcpGnwbAr8v13tiYJrtSf0laxg9\nCbunkdpHUCTdqyiuYXeX+bqkm8tbwa62Pa87hu07gP2AcygS/gHAzyXNo+gO36h1byEiIqrG9pXA\nE3Wb9wBmlsszgff0EWLATf7Kd4nbvhvYvGb9mzW7f9BD+ff3sG0m5Ydrey6wSbnr78CrvvXY3rlm\n+Q2DrHpERAyzYbqGvYbth8rlh4A1eiln4HJJi4CTbf+or6CVT9gREREDNsxjzmxbUm/zdexo+wFJ\nrwUuk3R72WLvURJ2RER0jEZb2M/dezPP39fwZJoPSXqd7QclrQk83FMh2w+UPx+R9GuKHt0k7IiI\niEaNW2dzxq2z5KorT177s4Ecdj7wUeBr5c/f1BeQtBywlO2ny0m5dgH6HC4/WgadRURE9GuoR4lL\n+jnloGRJ90o6EDgWeIekv1DM8XFsWXYtSb8tD30dcKWkuRQDpC+0fWlfdU8LOyIiOsZQjzmz/aFe\ndr29h7L/pLhnG9t/A7Zs5FxJ2BER0TEy01lEH/baYu2WxP3ulXe1JO4hk9dvSdxovfkLFrYk7oTl\nxrYkbkQjkrAjIqJjVLiBnYQdERGdI13iERERFVDhfJ2EHRERnWPMmOpm7NyHHRERUQFpYUdERMeo\ncpd4R7ewJV3d7jpERMTwacHzsIdNR7ewbe/Y7jpERMTwSQu7oiQ9I2l5SZdLmiPpZkl7lPuOlXRw\nTdnpkj7XW/mIiIhW6ugWNsXDw58D3ls+MWU14BqKJ638AvgOcFJZdm+Kp6k830v5iIgY4drdrd2M\nTk/YUPQyfFXSZGAxsJak1W3PlbR6+SzT1YEnbN8vaWwv5Xt83mlERIwcSdjVth+wGrCV7UWS/g4s\nW+47B9iL4jFovxhA+Vc4esb0JctTpk5jytRprah/RESlzO6axeyuWW05d4XzdRI2MAF4uEy+OwPr\n1ew7CzgFWBWYUm4b30f5Vzj8iOmtqXFERIXVN2COOerI9lWmQjo9YRs4E7hA0s3ADcCfl+y0b5O0\nAnCf7YfKzb2Wj4iIkS1d4hUkaVXgcduPATv0Vs725nXrfZaPiIiRq8L5ujMTtqS1gCuA49pdl4iI\nGD5pYVeM7X8CG7W7HhERMbwqnK87e+KUiIiIqujIFnZERHSmdIlHRERUQIXzdRJ2RER0jrSwI/ow\nf8HClsQ9ZPL6LYn7y3n3tSTuXlus3ZK4rdSq392E5cZWKm7ESJCEHRERHaPCDewk7IiI6BzpEo+I\niKiACufr3IcdERHRDEmHSLpF0q2SDumlzPGS/ippnqRJgzlPEnZERHQMSU29eoi3KfDvwJuBLYB3\nSVq/rsxuwBttbwD8J/D9wdQ9CTsiIjqG1NyrB28C/mj7eduLgC7gfXVl9gBmAtj+I7CSpDUarfuI\nStiSrm53HaB4OIikc/opM0HSQcNVp4iIaN5Qt7CBW4HJklaRtBywO1B/D+frgXtr1u/roUy/RtSg\nM9s7trsOkpYuHw6ydz9FVwYOZpBdGxERMfyGepS47dslfQ24FHgWuAlY3NOp6w9t9FwjKmFLegZY\nAziPIiGOBQ63fb6kY4F7bJ9Ulp0OPA38oJfyywNnU3yzWQo4yvbZkt4MfAdYHngeeDuwF0UXxvLA\nGEkHAL+1vWm5/F5gfBnrp7ZnAMcC60u6CbjU9hda+uFERMSwe/LOG5l/1019lrF9KnAqgKT/Be6p\nK3I/sE7N+trltoaMqIRN8Y3jOeC9tp+WtBpwDXA+8AuKRHtSWXZvYBeKpNtT+V2B+23vDiBpvKRl\nyjgfsD1H0grl+QAmAZvZflLSRF757efNwCZl2esl/Rb4ArCJ7UGN9ouIiOHXaAN75Q22YuUNtlqy\nfs+lp/UQU6vbfljSuhQNvLfUFTkf+CTwC0nbAU/afqixmoy8hA3FdfWvSppM0a2wVvlhzJW0uqQ1\ngdWBJ2zfL2lsT+WBm4FvlC3zC21fJWkz4AHbcwBsPwMgyRSt5Cd7qdOltp8oy54L7AT8plUfQERE\ntEaLJk75paRVgYXAwbafkvRxANsn275I0m6S7qToNj9wMCcZiQl7P2A1YCvbiyT9HVi23HcORff1\n6yhayr2Wt/3X8l633YGjJf0e+HUf510wwPqJnq9PvMrRM6YvWZ4ydRpTpk4b4CkiIkav2V2zmN01\nqy3nbkW+tj2lh20n161/stnzjMSEPQF4uEy+OwPr1ew7CzgFWBXo/oDG91S+bIk/YftMSfOBj1Fc\nd15T0ja2b5C0IkWi7u9X+A5JK1N0v+9J8e3oGWDFvg46/IjpA33PEREdo74Bc8xRR7avMhUy0hK2\ngTOBCyTdDNwA/HnJTvu28rrzfTX9/72V3ww4TtJiim6K/7K9UNI+wAmSxlEk63eU560fsVe7fh3w\nK4qBAj+xfSMUt6FJugW4KIPOIiJGvswlPgTK/v/HbT8G7NBbOdub1633Vv4eimH29cffAGxft3lm\n+eouczdQe577bL+3h1j79VbPiIgYeSqcr0dGwpa0FnAFcFy769KDnlrfERFRQWMqnLFHRMIuJyrZ\nqN316IntV7S+IyIi2mFEJOyIiIjhUOEGdhJ2RER0jgw6i4iIqIAx1c3XSdgREdE50sKO6MOE5ca2\nuwoN2WuLhp96NyDzFyxsSdxWfr5V+91FjGZJ2BER0TEq3MBOwo6IiM6hfmeiHrmSsCMiomNUedDZ\nmHZXICIiIvqXFnZERHSMjBKPiIiogArn63SJN0vSLElbt7seERHRvzFSU6+21r2tZx8d8jSviIho\nuVGbsCV9VtIt5euQctv+kuZJmitpZrnt3ZKulXSjpMskrV5uny5ppqTZku6W9D5J35B0s6SLJeVy\nQkRExUjNvdppVCadsov6AGBbii8lf5R0PXAYsL3txyWtXBa/0vZ25XH/DvwP8Ply3xuAnYFNgGuB\n99r+vKRzgd2B84bpLUVExBAYlYPOJJ3Qx3G2/ekW1Geo7ASca/s5gDLBbgOcbftxANtPlGXXkXQ2\n8DpgGeBv5XYDF9teJOlWYIztS8p9twATh+WdRETEkKlwvu6zhT2Hl6/Ndr9Fl8sj/Zptdz27qe5n\nrROAb9i+UNJUYHrNvhcBbC+WVDsR9GJgqf4qcfSMl0NNmTqNKVOnDaDqERGj2+yuWczumtWWc7d7\n4Fgzek3Ytk+vXZe0vO1nW16joXElcLqkYym6xN8DfBw4TdK3urvEy1b2eOCf5XEH1MTo77fa72/9\n8COmN1rviIhRr74Bc8xRR7avMhXS76AzSTtIug24vVzfUtJJLa9ZE2zfBJwOXEdx7flHtv8AHAN0\nSZoLfLMsPh04R9INwCO83HtQP/q7vldhpPcyREREHTX5aifZfecdSdcBewHn2Z5UbvuT7U2GoX6V\nJcnPLUxOj5dV8fGaEcNh3Fhhu+X5UJI/OPOmpmL84qOThqWuPRnQKHHb99SNrHupNdWJiIhonSo/\n/GMgCfseSTsCSFoG+DTw55bWKiIiIl5hIBOnHAR8Ang9cD8wqVyPiIioFElNvXqIt5Gkm2pe8yV9\nuq7MtHJ7d5nDB1P3flvYth8B9h1M8IiIiJFkqO/qsn0HRUMWSWMoGra/7qFol+09mjnXQEaJry/p\nAkmPSnpE0nmS/qWZk0ZERLTDULew67wduMv2vT2dutm6D6RL/GfA2cCawFrAOcDPmz1xRETEKPNB\nipxZz8AO5bMsLpK08WCCD2TQ2TjbP6lZ/6mkQwdzsohO1qrbr7575V0tiQtwyOT1WxY7oh0aHSX+\n4G3X8+BtN/RbrhyU/W7gCz3svhFYx/YCSe8EfgNs2FhN+p5LfBWKJvzFkr7Ey63qfYCLGz1RRERE\nuzX68I81N9mWNTfZdsn6vHN/0FvRdwJzynFfr2D76ZrliyWdJGmV7mdbDFRfLewbeeVsXv9Z/uye\nS/yLjZwoIiKi3Vp4G/aH6OVysaQ1gIdtW9K2FJOWNZSsoe+5xCc2GiwiImIka8XDPyQtTzHg7D9q\ntn0cwPbJFLOFHiTpJWABxbXuhg1opjNJmwIbA8t2b7N9xmBOGBERMZqUD8ZarW7byTXLJwInNnue\nfhO2pOnAVGAT4LcU/fRXAUnYERFRKRV+uuaAbuvai6Kp/4DtA4EtgJVaWquIiIgWaPF92C01kC7x\n52wvkvSSpAnAw8A6La5XRETEkKtyC3sgCft6SSsDPwJuAJ4F/tDSWlVIecngadvf7K9sRETEYA1k\nLvGDy8UfSLoEGG97XmurVQ2SluaVt75FRMQI1opR4sOlr4lTtqaXZCRpK9s3tqxWw0jSROB3FL0H\nWwF/AvanGBX/TWAF4FHgANsPSpoF3ATsRKZojYiolArn6z5b2N+k79bjzkNcl3baEDjQ9jWSfgx8\nEngPsKftRyXtAxwD/BvFZzLW9psBJH2lXZWOiIjGtHvgWDP6mjhl2jDWo93utX1NufxT4DBgU+Cy\n8pe7FPDPmvJnDSTo0TOmL1meMnUaU6ZOG4KqRkRU2+yuWczumtXualTOgCZO6QC1PQkCngL+ZHuH\nXso/O5Cghx8xvclqRUSMPvUNmGOOOnLYzj2Qe5lHqirXfSitK2m7cnlf4Frgtd3bJI0d7OPQIiJi\n5KjyfdinQdhPAAAgAElEQVRJ2IU7gE9Iug2YABxPMWHM1yTNpRhktn0fx2ekeEREBYxRc692GsjU\npGOA/YA32J4haV3gdbava3nths9Ltj9St20exZSsr2B757r14evLiYiIprQ76TZjIC3skyhal/uW\n68+U20aTtJAjImJEG8igs7fYniTpJgDbj0sa2+J6DRvbdwObt7seERHReu2+Dt2MgSTsFyUt1b0i\n6bXA4tZVKSIiojWq3CU+kIR9AvBrYHVJ/0sxGOvwltYqIiKiBSrcwB7QXOI/lTQHeFu5aU/bf25t\ntSIiIqLWQEaJr0sxUcgF5SZLWtf2PS2tWURExBAblQ//qHERL4+iXhZ4A8V9y5u0qlKjxfwFC4c8\n5oTlRs14vxgih0xev2Wxr/zrIy2JO3mD17YkbkR/qjz5yEC6xDetXZe0FfCJltUoIiKiRSrcwG78\ny0b5WM23tKAuERER0YuBXMP+XM3qGIpnRt/fshpFRES0yGi/hr1CzfJLwIXAr1pTnYiIiNapcL7u\nO2GXE6aMt/25vspFRERUwaicOEXS0rZfkrSjJNnOfNsREVFpo7VL/DqK69VzgfMknQMsKPfZ9rmt\nrlw7Sbob2Mr24+2uS0RERF8Ju/tryLLAY8Bb6/aP6oRNce95db+KRUTEq7SigS1pJeAUivlJDHzM\n9rV1ZY4H3knR8D3A9k2NnqevhP1aSZ8Fbmk0aJVImgj8DriBokfhT8D+5e5PSXo3MBbY2/YdkqYD\n65ev1YCv2z5lmKsdERGD0KJr2N8FLrK9l6SlgeVrd0raDXij7Q0kvQX4PrBdoyfp6z7spYAVKUaJ\n9/QaTTYETrS9MfAUL08M84jtrSk+3M/XlN8U2JniOeFHSFpzOCsbERGDoyb/e1U8aQIw2fapALZf\nsj2/rtgewMxy/x+BlSSt0Wjd+2phP2j7yEYDVtS9tq8pl38KHFIud3f73wi8r1w2cJ7tF4AXJF0B\nbAucVx/0uK/OWLK8w05T2XHy1BZUPSKiWmZ3zWJ216x2V2OovAF4RNJpwBbAHOAQ2wtqyrweuLdm\n/T5gbeChRk40kPuwO0HtCHjx8vO+Xyh/LqLvz6rH54Mf+qUjmq9ZRMQoM2XqNKZMnbZk/Zijhq9t\n2IIu8aUpLqd+0vb1kr4DfBGoTwD1Z274zqu+ktDbGw1WYetK2q4cJLAvcBUwqZeyAvaU9FWKSwPT\ngC8MSy0jIqIpjSbsu+Zey9/m/rGvIvcB99m+vlz/JUXCrnU/sE7N+toMYsbQXhO27ccaDVZhdwCf\nkHQqxaCz7wOfqtlvXv42ZOBm4AqKQWczbD84jHWNiIhBUoPDxN84aXveOGn7JeuXn3HCK/bbflDS\nvZI2tP0Xisbun+rCnA98EviFpO2AJ2031B0O6RLv9pLtj9Rte0P3gu05vPK2tpttf3RYahYRESPd\np4AzJS0D3AV8TNLHAWyfbPsiSbtJuhN4FjhwMCdJwi40ei0hs75FRFRQK27rsj0PeHPd5pPrynyy\n2fN0fMK2fTeweQPlO2XkfETEqFPhmUmTsCMionOM1rnEIyIiRpUqP62rr5nOIiIiYoRICzsiIjpG\nhXvEk7Cj9e59bEH/hQZhnVWXa0nceNnmr1+pJXG3/sqlLYk758hdWhI3Ro8xFX4IYxJ2RER0jCq3\nsHMNOyIiogLSwo6IiI5R5VHiSdgREdExch92REREBVQ4X+cadkRERBV0ZMKW9OWa5YmSbmlnfSIi\nYniMkZp6tbXubT17+3yp3RWIiIjhJzX3aqdRkbDLVvLtkn4q6TZJ50h6p6Rf15R5h6RzJX0VGCfp\nJkk/oXhU5lKSfijpVkmXSFq2PGZLSddKmlceu1K5fZakYyX9UdIdknZqyxuPiIiGjGny1U7tPv9Q\n2hA40fbGwFPAJsCbJK1W7j8Q+LHtLwHP2Z5k+yOAgA2A79neFHgSeH95zBnAoba3AG4BvlJuN7CU\n7bcA/12zPSIiRjBJTb3aaTSNEr/X9jXl8k+BT1Mk3A9LOh3YDvhwL8f+3fbN5fIcYKKk8cAE21eW\n22cC59Qcc27580ZgYk9Bj/vqjCXLO+w0lR0nT23k/UREjEqzu2Yxu2tWu6tROaMpYbtmWeX66cAF\nwPPA2bYX93LsCzXLi4BleyhT/9Wq+5hF9PI5HvqlI/qucUREB5oydRpTpk5bsn7MUUcO27krfFfX\nqOoSX1fSduXyvsCVth8A/gkcDpxWU3ahpL6+rMj2U8ATNdenPwLMGuI6R0TEMMoo8ZHhDuATkm4D\nJgDfL7f/DLjH9h01ZX8I3Fwz6My8Uvf6R4HjJM0DNgdm0LP64yMiYgRSk692Gk1d4i+Vg8jq7QT8\nqHaD7S8CX6zZtHnNvm/WLM8Dtq8PaHvnmuVHgX8ZfLUjIiL6N5oS9qtauZLmAE8Dnxn+6kRExEjT\n7nupmzEqErbtu6lpJdds33r4axMRESNVu2/NasaoSNgREREDUeWBW1Wue0RERMdICzsiIjpGusQj\nIiIqoLrpupggpN11GJUk+bmF+WwjhtN3r7yrJXEPmbx+S+LOX7CwJXEBJiw3tmWxh9q4scJ2y3Op\nJJ8z959Nxdh7y7V6rKukpYAbgPtsv7tu3zTgPOBv5aZf2T660XOnhR0RER2jhQO3DgFuA1bsZX+X\n7T2aOUEGnUVERDRB0trAbsAp9N7r3nQPQhJ2RER0jBY9XvPbwKFAbw+YMrCDpHmSLpK08WDqni7x\niIjoGI02c2+9/g/cesMfeo8nvQt42PZN5bXqntwIrGN7gaR3Ar8BNmywKknYERHRORq9q2uzbXdg\ns213WLJ+1g++WV9kB2APSbtRPJp5vKQzbO/fXcD20zXLF0s6SdIqth9vpC7pEo+IiBgk21+2vY7t\nNwAfBP6vNlkDSFpDZX+6pG0p7tBqKFlDWti9knQ6cIHtX7W7LhERMTTGtP5ObANI+jiA7ZOBvYCD\nJL0ELKBI7A3r6IRdfuOR7Z4GCuQm6oiIUaaVE53Z7gK6yuWTa7afCJzYbPyO6xKXNFHSHZJmArcA\nL9Xs20vSaTXF3y7p+rL87mWZLklb1BxzlaTNhu0NRETEoKnJ/9qp4xJ26Y3AibY3BZ6t2V7bqhaw\nnu03A7sDP5D0GuDHwAEAkjYEXmP7lmGpdUREdKxOTdj/sH1dP2UMnA1g+06KKeU2As4B3iVpaeBj\nwGm9RoiIiBFFau7VTp16Dbu3VvW4fo6z7eckXQa8B9gb2Kq3wkfPmL5kecrUaUyZOq3hikZEjDaz\nu2Yxu2tWW849DIPOWqZTE3athyS9CfgL8F5gfrldwN7lte5/KV93lPtOAS6kmBt2Pr04/Ijprapz\nRERl1TdgjjnqyGE7d7tbyc3o1IRd26r+IkXyfYTiSSvL15S5B7gOGA983PaLALZvlDSfdIdHRFRK\nEnaF2L4b2Lxm/VfAq+61tn1gbzEkrQWMsX1pK+oYERFRr1MHnQ2apP2Ba4Evt7suERHRmCrf1tVx\nLexm2T4DOKPd9YiIiMaNSZd4RETEyNfuVnIz0iUeERFRAWlhR0REx8go8YiIiAqocpd4EnZEjBqH\nTF6/JXFXfvMnWxL37q5vtyRu9K7Kg85yDTsiIqIC0sKOiIiOkS7xiIiICsigs4iIiAqocL5Owo6I\niM4xpsJN7Aw6i4iIqIAk7AGQNFHSLe2uR0RENEdNvtopXeIREdE52p11m9ARCVvS8sDZwOuBpYCj\ngL8D3wWWA14A3gasRvEkruXLQz9p+5q6WBP7KxMRESNTbusa+XYF7re9O4Ck8cBNwAdsz5G0AvAc\n8BDwDtsvSNoA+Bnw5rpYAykTERExpDolYd8MfEPSscCFwHzgAdtzAGw/AyBpGeB7krYAFgEb9hBr\nIGUiImIEqvAg8c5I2Lb/KmkSsDtwNHBFL0U/Q5HIPyJpKeD5QZYB4OgZ05csT5k6jSlTpw3uDURE\njCKzu2Yxu2tWW85d4XzdGQlb0prAE7bPlDQfOAh4naRtbN8gaUVgATAeuK88bH+K6931BlIGgMOP\nmD5E7yAiYvSob8Acc9SRw3fyCmfsjkjYwGbAcZIWAy9SJOwxwAmSxlEk67cDJwG/krQ/8DvgmZoY\nLn/2VSYiIjqIpGWBLuA1FJdMz7P9pR7KHQ+8kyLfHGD7pkbP1REJ2/alwKU97Nq+bv1OYIua9S+W\nx98NbF4u91gmIiJGvqEeJW77eUk7214gaWngKkk72b5qyTml3YA32t5A0luA7wPbNXqujkjYERER\n0JpBZ7YXlIvLUFwmfbyuyB7AzLLsHyWtJGkN2w81cp7MdBYRER2jFTOdSRojaS7Fbb9X2L6trsjr\ngXtr1u8D1m607mlhR0RE52iwhX3DNVcy59qr+ixjezGwpaQJwCWSptme1c+ZTYOSsCMiInqxzfaT\n2Wb7yUvWf/jdY3sta3u+pN8C2wCzanbdD6xTs752ua0h6RKPiIiOoSb/e1U8aTVJK5XL44B3UMyk\nWet8ituAkbQd8GSj168hLeyIiOggLRh0tiYwU9IYikbwT2z/XtLHAWyfbPsiSbtJuhN4FjhwMCeS\n3XA3egyAJD+3MJ9tK81fsLAlcScsN7YlcSPqrbbv6S2L/ejPDmhZ7KE2bqyw3fIpTSR57j+eairG\nluuNH5a69iRd4hERERWQLvGIiOgcmZo0IiJi5MvzsCMiIiqgyo/XzDXsiIiICkgLOyIiOkaFG9ij\nt4Ut6QBJJ7S7HhERMYK0YjLxYTKaW9i5CToiIl6hyoPOKtfClrS8pN9KmivpFkkfkPRmSX8ot10r\naYWy+FqSLpb0F0lfq4mxS1l+jqSzJS1fbr9b0v9KuknSDZK2knSppDu7Z60pyx0q6TpJ8yRNH95P\nICIiBktq7tVOVWxh7wrcb3t3AEnjKeZt/YDtOWWyfo6i82LL8vUicIek44EXgMOAt9l+TtIXgM8C\nR1G0yv9he5KkbwGnA9sD44BbgZMl7ULxIPJty6nozpM02faVw/UBRERE56liwr4Z+IakY4ELgfnA\nA7bnANh+Boop6IDf2366XL8NmAisDGwM/EHF16VlgD/UxD+//HkLsLztZ4FnJb1QPjptF2AXSd2T\nuy8PvBFIwo6IGOGq2yFewYRt+6+SJgG7A0cDV/RR/IWa5UW8/H4vs71vP8cspmiZU7PeffxXbf+w\nv7oePWP6kuUpU6cxZeq0/g6JiBj1ZnfNYnbXrPacvMIZu3IJW9KawBO2z5Q0HzgIeJ2kbWzfIGlF\nYAE9/1oMXAucKGl923eV16/Xsv3X+lP1cvwlwFGSzrT9rKTXAy/afqS+8OFHTB/0+4yIGK3qGzDH\nHHXksJ27yoPOKpewgc2A4yR1t4APohg8d0L5LNIFFM8jNT2MFLf9qKQDgJ9Lek25+TCgPmHXH+/y\n+Msk/StwTdml/jTwYeBVCTsiImKo5PGaLZLHa7ZeHq8ZVZfHaxaG8/Gatz/wbFMx3rTm8m17vGYV\nW9gRERGDUt0O8STsiIjoJBXO2JWbOCUiIqITpYUdEREdI6PEIyIiKqDd04s2Iwk7IiI6RoXzdRJ2\nRER0kApn7NyH3SK5Dzsi2um7V97VkriHTF5/yGMO533Ydz68oKkYb1x9udyHHRER0WoZdBYREVEB\nGXQWERFRARXO15k4JSIiYrAknSrpIUm39LJ/mqT5km4qX4cP9lxpYUdEROcY+ib2acAJwBl9lOmy\nvUezJ0rCjoiIjjHUg85sXylpYr+nHQLpEu+FpAmSDuqnzOmS3j9cdYqIiOZIzb0GwcAOkuZJukjS\nxoOtexJ271YGDu6njMtXRERET24E1rG9BUXX+W8GGyhd4r07Flhf0k3AZcBjwH7AYuAi218uy1V5\n0GFEREdp9A/2NVd1cc3Vswd9PttP1yxfLOkkSavYfrzRWEnYvfsCsIntSZLeCRwObGv7eUkrtblu\nERExCI12a+8weSo7TJ66ZP07Xz+mwfNpDeBh25a0LcUMow0na0jC7kvtr/XtwKm2nwew/eRAAhw9\nY/qS5SlTpzFl6rQhrF5ERDXN7prF7K5ZbTr70HaKSvo5MBVYTdK9wFeAsQC2Twb2Ag6S9BKwAPjg\noM+VucR7Vo76u8D2ZpK+Adxu+5S6MqcBF9r+VQ/HZy7xiGibzCX+apJ83xMvNBVj7ZVf07a5xDPo\nrHdPAyuWy5cDB0oaByBp5bbVKiIiOlISdi9sPwZcXc5e81bgfOCGchDa52qLtqN+ERHRODX5aqdc\nw+6D7f3qNn2tbv+Bw1idiIhoUh7+ERERUQFVfrxmusQjIiIqIC3siIjoHNVtYCdhR0RE56hwvk7C\njoiIzlHlQWe5hh0REVEBmemsRTLTWUTEwAznTGcPP7WwqRirjx/btpnO0iUeERGdo8Jd4knYERHR\nMSqcr5OwIyKic2TQWURERLRUWtgREdExqjw1aRJ2RER0jHSJR0REREslYUdERFRAusQjIqJjVLlL\nPAk7IiI6RgadRY+OnjF9yfKUqdOYMnVa2+oSETFSzO6axeyuWW05d5Vb2JlLvEUyl3hExMAM51zi\n859b1FSMCeOWylziERERrVbhBnYSdkREdJAKZ+wk7IiI6BgZdBYREVEBVR50lolTIiIiKiAJewRo\n1e0NidvauK2MnbjVjNvK2Ik7NNTkq8eY0q6Sbpf0V0lf6KXM8eX+eZImDabuSdgjQNX+h0nc1sdO\n3GrGbWXsxB0iQ5yxJS0FfA/YFdgY+JCkf60rsxvwRtsbAP8JfH8wVU/CjoiIjqEm/+vBtsCdtu+2\nvRD4BbBnXZk9gJkAtv8IrCRpjUbrnoQdERExeK8H7q1Zv6/c1l+ZtRs9UWY6axFJ+WAjIgZouGY6\nG4o4tXWV9H5gV9v/Ua5/GHiL7U/VlLkAONb21eX65cD/2L6xkfPmtq4WadfUdRER0bMW/V2+H1in\nZn0dihZ0X2XWLrc1JF3iERERg3cDsIGkiZKWAfYBzq8rcz6wP4Ck7YAnbT/U6InSwo6IiBgk2y9J\n+iRwCbAU8GPbf5b08XL/ybYvkrSbpDuBZ4EDB3OuXMOOiIiogLSw20jScrYXtLsew03SKn3tt/34\nEJ1ndWDZmrj3DEXcKpD0uT522/a3huAca1Lc0rIYuN72g83GbBVJm9m+pd31iGhGEnYbSNoBOAVY\nEVhH0pbAf9o+uMm4qwFfAXYCDFwJzLD9WJNV7o4/nuKP/dNNhrqRon69eUMzwSXtAXwTWAt4GFgP\n+DOwSTNxy9hfs/2F/rY1EG9r+vgsGh1FWmPFvuI2S9K/A0cAV5Sbvidphu0fNxl3J4p/wxN5+e+T\nbf9LM3GB70t6DXAacKbt+U3GW0LS9sDxFJNmLEPRLfqM7fFDEHsyxYQbp0l6LbCC7b8PMtb7Kf5N\n9DTwyrbPbaKqMQzSJd4Gkq4D9gLOsz2p3PYn200llPJWgS7gpxT/U+4LTLP99ibjvhk4Fej+A/Qk\n8G+2b2gmbqtIuhl4K3CZ7UmSdgY+YvtjQxD7pu7fWc22W2xvNsh4s+g7Ye88mLhl7KWBTw9Fa7qH\n2H8Btu/+MihpVeAa2xs2GfcO4L8pvtQt6t5u+9Fm4paxNwQ+BuwNXAecZvvSIYg7B/ggcDawDcXg\noo1sf7HJuNOBrctYG0p6PXC27R0HGe90+v63NqjrqjF80sJuE9v36JWPjXlpCMK+zvZRNetHS9pn\nCOKeChxs+0pY0go6Fdh8MMEkbdXX/iZald0W2n5U0hhJS9m+QtJ3mwko6SDgYGB9SbVdqysCVw82\nru1pzdSrn9gvSfoQMOQJG3gUeKZm/ZlyW7OetH3xEMR5Fdt/kXQ4xaje44EtJY0Bvmz7V03G/mv5\nb20RcJqkuUBTCRt4LzAJmFOe435JKzZRxwOarE+0WRJ2e9wjaUeA8jaAT1N02Tbr0vIP9Fnl+t5A\n0y0I4KXuZA1g+ypJzXzB+BZ9d9cOulVZeqL8w3YlcKakh3llchmMnwEXA8cCtd3fzwzFJYeyNbw7\nRVfwUhQ9JENxrfkqSd+j+DfxbPfGIfhSdBdwraTzyvU9gZvLa+fN1PsKSccB5wIvdG9str6StgAO\nAN4FXAa8y/aNktYCrgWaSdjPlt3t8yR9HXiQ3p8T0YgXbC/u/mIvafkhiImk/wW+bvvJcn1l4HO2\nDx+K+NE66RJvg/Ja8/HA2yn+x76UouuyqT/8kp4BlqMYBATFffbdf6Q92Gtqkr4DjAN+Xm7aB3ge\n+EkZuNk//kOq/MP2PMX734+iK//MIbyWvzXFOIHFwNVD8f4lXQw8B9zCy78/bB/ZZNxZ9PDlqJmu\n9jLu9O5Q3ZtqzzPYerewvl3Aj4FzbD9Xt29/22c0EXsi8BDF9evPUPx7O8n2nYOucBH3UOCNwC7A\nVym6839m+/gm4861vWXdtldd6omRJwl7mJUtqZm292t3XQaqVddZy8T6WWBd2/8haQOK63UXDqqi\nL8f9HPAL2w3PJDSA2EdQ9FycS5Gk9gR+WXcpYjBxb7Y9qEsMMTBlK3gjin/Ld9h+sc1V6pekXSgS\nNsAlti8bgpg3A9vafr5cHwfc0OwYmmi9dIkPs/K64nqSXmP7hf6PGLiym32e7WckfYTi+td3bf+j\niZhjgO/bPqvfwo07jeL63A7l+j+BXwJNJWyK68qXSnqC4sk55wxmVqFefBjYvOaP3VeBeUBTCZui\nvv+f7UuarWAtSa8DjgFeb3tXSRtTDBZrdjT3RsDnefVo7rc2GXclilHiU8pNsyjudGhqVLek3YEf\nAH8rN/2LpI/bvqiJmH3dJuah+AJm+1JJf6T4jC1plSG47fFM4PeSTqX40nkgMOgehhg+aWG3gaSf\nAG+imK6u+z7spq9Xln9AtgA2A06n6ALc2/bUJuPOsb11MzH6ilvbHSdpnu0thij+FsAHKEbk32f7\nbUMQ8wrgfbafKNdXBn41BInqfRSj+8cAC8vNg76MURP3dxRfjA6zvbmkscBNtjdtMu7NFM/0rR3N\nbdtzmox7LsVlgZkUyeQjFF+Q3tdk3DuA3bu7qSWtD1xke6MmYk7sa7/tuwcbu4z/ceBIimv53ZdJ\nmr7FrbzOfgXFJTkDlwNvtf0/zcSN1ksLuz3uKl9jgBWou/7XhJfKQSrvAU60fYqkpm9lAi6T9Hle\nPXCp2W/6L5TdccCSP6JD2evwMMUAoMeA1w5RzKeAP0nqHsz3DuA6SSdQ/DH99CDjfgvYDrjV9uL+\nCjdgNdtnSfoigO2FTQ4Y7LbQ9veHIE699euS83RJ84Yg7lN115T/RvG7HLRmE/IAHApsOhS3tNV5\nR5mcl4zGLwf6JWGPcEnYbWB7eotCPy3pyxTdtpMlLQWMHYK4H6T4QvGJuu1NTXACTAd+B6wt6WfA\njhQjeZsi6WCKlvXqwDnAv9u+rdm4pV+Xr26zapab+dJ1D/CnIU7WAM+UgxyBJQ8eGHT3sopZ6gRc\nIOkTvHo0d7Nf4p6TNLnuFsJBzwaoYrIQgBskXURxrzQU4xCGZB4BtW7ilL9RDEQcEq26NTGGT7rE\n26DsVq03FNf/1qSYLOU621dKWhfY2fbMZuK2iorJNkTRsgT4I03M5FQT91iKQWdzm6zisJE0k+IL\n0MVA92CoobhMsjVFMtkU+BNFT8NetgfVapV0N30PQGx2lrotKa6nTig3PQF8tIn6ns7/3969B9tZ\nlXcc//4ILdcAQlWgchsqCDQgdwIjIqhcahhRAhawFgFxBEXLtNN2RCPE0Q5CW1TsABJAoZVQGaHc\nK0hCLgYItxAE6ii2hSqX0IYAw+3XP9Z6c/Y5OclJzrve8+bd5/nMnJm999n72SsnyVn7XetZzzN8\nJnt1bK52sRA1VzhlL9LW1jwG/5sY1SqOpE2BtzFwNLE6era01AmK0KyYsFsgaZ+eu+sDHyctZ/9l\nS0MakaQ/Jl1B9NbmrpWoImkucGSVUJQTomaWylZVwVrikmbanippEStOWLUTjFZ2TKrAsa4NgDOB\nw0lLwPOBi6qkuZpxz2DgeNs9pOTEIleESmVwsV1r2Xos9ORiLM/0H+7o1Cji3gfMYuCoX/VvYq38\nAB6aFxP2WkLSvbb3HeVr59g+KJ/DHm4yqZu4NA14P6kW903AkcA9to+tGfdPSJ/0jyIdt7kKOLHu\nlbFWUku8zgcBSVvbflrSdgxTFGMM9jNHRdJM0kTdW652U9tT19K41QfY7RlcQObcmnFnDHnIpMAl\nytXOIuUyXAY8Q8qb+FTd5Mk4Gx2Gij3sFmhwt6p1SMtoo55UnWsL29645tBW5lhS9vlC2ydLeifp\naEgttm9SqvR2Byn57mO2H68bF5gOTGZILfE6AfNkvS5whWsW8RhOXg34K9IqRpWIV3ubBNjN9q49\n9++UVGI/v6m4PyHVqr+fVPymlJsY+DC7Aans59OFYv8Z6f/xmaTCKe8ifeio65acKX4DZfMEQkfF\nhN2O3m5VbwC/Bk4ZbTA1367yFdtvSnoj74P9DthmtMFyRnWvTUhZ82dKqpNpXSleSxyWn6F/S9Jm\nzmUdC7qalIX/EeB0UvLdswXiLpQ02fY8WJ50VuvoVcNx/9D24QXiDGL7ut77OcmxVKLVc8BreTtg\nWk72XK9A3BNIvyeG7oXXTfYMHRUTdgtsb184ZKPtKoF783njS0mZtcuAuTXi3c9Amz/33C+liVri\nlWXAI/lYV+8Z+rofMrbIx/C+YPtu4O68h1nXPsAcSf9J+hlvCzyes4Tr7L03FXeupN1tPzzK16+u\nnSh31O+nwGEM/BvbELiNgYJAo9LA74nQcbGHPYYkHWb7pxroSzvU86S94TeH+d5aQdIOwMTSv1Dz\nKsE2o80GHhJrI9JxmKqW+KYUqiUu6c/zzaHJYbUSgSTNt31A/iBwEWm5dqbtHWvG3X5V3x/t3nvp\nuD3HjCYA7wZ+xcAycK2kPqVqfW/QU1efVPv7r12zS1eOP1xt7hJJZ42U7g3dFVfYY+tg0qfxKQw/\nYbll0agAAAwjSURBVG8BfJmUwDIqSj1zt6Pn79b2rNHGW0lcSTq4blylGuVH55j3A8/mBLovjTLe\nnLyf/1tW/PlOl/QCcL7t79YY9nXkLYL8nhPoyUSv4etKZTnPBr5N2iYY1c+hV1PJcA3EnVKFZsWk\nvrpXFQYWu2Z1t1V4WdLezlXe8imQEtnyTZXuDR0VV9hrGUmXjzZzVdLfkTppLWagXCS2p6z0Re3G\nfdD2eyWdSrq6/qqkR2xPqhN3Fe+3BTDX9cpRzgc+aPulfH8iqSlDreXPkEj6ge1PjvTYKOJeSar+\nt6DWAIePvS+pZv0z+aEtgU/YrrWloYZL94buiSvsFmgVDQ5qHjM5hrRkVrSpSINxJ+RiL8eRVhag\n7F72ILafzxnjdaxfTdY55lJJG9aMWU0oX/TgGuUXlDh21DGDroJzZn6JOvYHACdJeorBLWdLdEjb\ngdRoZzvgY8B+9LRIraHp0r2hY9ZpewDj1OWkM6xTSZPVUtLyV12/JJVGLK2puOeSknN+aXtB/oX0\nZAPvs5ztukd5lilVDwOKLn/uUU3WAPn2XgXidoKkv5W0FJgkaWn1RTqRcEOBtzgc2BE4lLT8PoW0\nHVPCObnAy6bAB0hNUUrUWZ/G4NK9d5LqFoRxKpbEWzDcslaJpS6lTkd7kPbJexN2RlvKsDp+tTXw\n3lJxu2yY5c+tgOMLLH8+RCoj+0K+vzlwd1PbA2srSd+w/Tdtj2NN9GztfBN4xPbVpYqeKNWBr0r3\nznf5RiChQ2JJvB1FGxz0uCF/DVc7eTR6j1/d2HO7yKe8vNx3CisWC1lrl4Ft3ytpF1JlNgOP2359\nhJetjguAeZKuJf2Mp5L6WI83O+cKeLe4fCOUpvy3pEtIyaLfzNXaSq1erkeqp74usKukIkmkoZvi\nCrsFKtzgYEjs9UhnTAF+UWgy6Y1f8vjVdcBjpKNXXyN1GXtsbb5yl3QccKvt/5N0DmnvcrrthQVi\nH0g63wxwn+06Z907SdKHgJNJV5XXAjMKVb9rTD5+dQTwsO0nc17GJNu3j/DSkeI2kuwZuism7Bap\ncIMDSYcAVwJP5Ye2JX0QuLtm3J8x5PgVMOrjVz1xq6XEh23vLun3SOfQ968Tt0lVFnteFZkOfAv4\niu39asY9CziN1K5SwEeBS21fVHfMXZQTMz9BSkb8Dalozw9LfwBdm0l6gjTxR6JZAGJJfExJOrvn\nrnser4pv1GqlCFwIfLi6IpG0E2m/tW7y0mb5ivJU4Krq+FXNmDDQMvB/JU0iNU0oVX2qKdWVzkdI\nE+q/STqvQNxTgf1tLwOqFqHzSUVUxpW8b3tS/loIXEPqCvYp4JD2RjbmqmTPmLADEBP2WJtIg8eW\ngHV7lw9tP5GPxdTV1PGrS/MS+5dJTR82Br5SIG6TmtyvfGslt8cNSdcD7yFtGU2xXSX3/YtS3+m+\n15Ps+TLwoKRxn+wZkpiwx5DtaQCSrgLO6jlzuzkp6aiu+yVdxkDLwxNJtb/rqo5fzSl8/OoHDLRS\nrEp7vrNA3CYdR9qvPN/2i/mDTIk+5jOAn+dM/2pJ/PICcbvmElIb14OAfSXNJvXZftV2ifPYXdBb\nW/9GyiWRho6LPewWNFh7eH3gDNIvO0jNLy6uuwe2sg8Ytk+uGfc2Blop9ibVlPjw0iildpjLS5La\n/k2BmHuTln4NzLb9QN2YXaOG+mx3kaSNGaYMbrVtEsafmLBb0LUztw1+wFjUYH3nRkg6mrQasjWp\nqMd2pMz23VodWJ+QtNiD+2wP+9h4EGVww1BR6awd1Znb8yRNB+YB59cNKmmKpAckLempFlUiA13q\n6bmdb08oEHeupBKlIcfSdGAy8ITtHUhtFX/e7pD6ykJJk6s7Ktdnu4tWKINLat0ZxqnYw26B7aty\nAs2hpOXPY2wvLhD6H0h1vxcVLjpRtKiHBrdSPFlSsVaKY+B1289JWkfSBNt3SfrHtgfVR5rqs91F\nTXUBCx0VE3ZLbD8KPFo47H8Bj5auENXAB4wuF35YkpcmZwNXS/od8NIIrwmr74i2B7AWOQu4VtKg\nLmAtjie0LPaw+0hePjwXuIuBM84lzneHLFe1epW0nXQiqW/11bafb3Vgoe9UVfUY3AXsnBJV9UI3\nxR52fzmPdLW3PulM88aks9+hnM8CW9p+3fYVti+KyTo0pKkuYKGjYkm8v2xl+0NtD6LPTQRul7SE\nVEVupu3ftjym0J+aqqoXOiqusPvLzZIOb3sQ/cz2tHyE6wxSa81ZuRJVCKVVVfWOB24qXFUvdFDs\nYfcRSS+Rjn28BlRNEmx7k/ZG1Z9yhbNjgT8FNh5n2cthDDTVBSx0V0zYIawBSZ8jlSd9BzAT+FGh\nI3khhLBKsYfdZ3Ihku3p+bu1/ePWBtR/tgW+aPvBtgcSQhhf4gq7j0iaAUwine9efha7bs3vsKIm\naomHEMKqxITdRyQtBnZz/KU2JmqJhxDaEhmH/eVeYNw1SRhjUUs8hNCK2MPuLzNINb//h+7U5u6a\nqCUeQmhFTNj95fvAScAievawQ1FRSzyE0IrYw+4jkubZnjzyM8No5bOxrzBQS3xTopZ4CGEMxITd\nRyRdDGwG3Mjg5h9xrKsmSXNsH5SL0wz9T2PgBeB8298d+9GFEMaDmLD7iKQr8s1Bf6lxrKt5krYA\n5treue2xhBD6U0zYIRQiaWvbT7c9jhBCf4pjXX1E0jaSrpf0bP76V0nvantc40VM1iGEJsWE3V9m\nADeQinpsTdrLntHqiEIIIRQRS+J9RNJDtvcY6bEQQgjdE1fY/eV5SZ+UNEHSupJOAp5re1AhhBDq\niyvsPiJpO+A7wAH5obnA56MxRQghdF9M2H1E0pWk1o9L8v3NgW/Z/nS7IwshhFBXLIn3lz2qyRrA\n9gvAXi2OJ4QQQiExYfcX5avq6s7mwIQWxxNCCKGQaP7RXy4gdeu6FhAwFfh6u0MKIYRQQuxh9xlJ\nuwGHksqT3ml7cctDCiGEUEBM2CGEEEIHxB52CCGE0AExYYcQQggdEBN2CCGE0AExYYfQIElvSnpA\n0iOSrpW0QY1YV0j6eL59qaRdVvHc90uaPIr3+HXv0cCRHh/ynJfW8L2mSTp7TccYwngVE3YIzXrZ\n9p62JwGvAZ/t/aakNTla6fyF7dNsP7aK534AOHBNB1vFX4PH1/Q5dZ4fwrgWE3YIY2c28Ef56ne2\npJ8AiyStI+l8SQskPSTpM5Cq4Ej6jqRfSLoDeEcVSNLPJO2dbx8h6X5JD0q6I9eUPx34Ur66P0jS\n2yVdl99jgaQD82u3kHS7pEWSLiWd31+l3HP9vvya04Z878L8+L9L+oP82I6SbsmvmSVp5zI/zhDG\nlyicEsIYyFfSRwE354f2BHaz/VSeoF+0vZ+k9YB7JN1OKiu7E7ALsCWwGPh+fr0BS3o7cAnwvhxr\nM9svSvonYKntC/P7XwP8ve05krYFbgV2Bb4KzLI9XdJRwCmr8cf5tO0leXl/gaTrckncjYB7bf+F\npHNy7M/n8Z1u+z8k7Q9cDBw2yh9lCONWTNghNGsDSQ/k27OAy4GDgAW2n8qPfxiYJOnYfH8T4N3A\n+4BrnIolPCPpziGxRerMNquKZfvFId+vfBDYRVr+0ERJG+X3OCa/9mZJSxjZWZI+mm9vk8e6AHgL\n+FF+/IfAj/N7HAjM7Hnv31+N9wghDBETdgjNesX2nr0P5Ilr2ZDnnWn7jiHPO4qRl6hXdx9YwP62\nXxtmLCMug/c8/xDS1fEBtl+VdBew/krez6RttyVDfwYhhDUXe9ghtO824HNVApqknSRtSLoiPz7v\ncW9FSiTrZWA+cLCk7fNrq0zupcDEnufeDnyhuiNpj3xzFnBCfuxI4G0jjHUT0gT8qqT3MNB7HdLv\nk6n59gnAbNtLgV9Vqwd5X373Ed4jhDCMmLBDaNZwV8Ae8vhlpP3phZIeAb4HTLB9PfBk/t6VwNwV\nAtnPAZ8hLT8/CPxz/taNwDFV0hlpst4nJ7U9SkpKA/gaacJfRFoaf4rhVeO9FVhX0mLgG8C8nucs\nA/bLf4ZDgHPz4ycCp+TxLQKOHuHnE0IYRtQSDyGEEDogrrBDCCGEDogJO4QQQuiAmLBDCCGEDogJ\nO4QQQuiAmLBDCCGEDogJO4QQQuiAmLBDCCGEDogJO4QQQuiA/wed+GaSlnp66gAAAABJRU5ErkJg\ngg==\n", + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)\n", + "\n", + "# Run classifier, using a model that is too regularized (C too low) to see\n", + "# the impact on the results\n", + "# classifier = svm.SVC(kernel='linear', C=0.01)\n", + "y_pred = classifier.fit(X_train, y_train).predict(X_test)\n", + "\n", + "import matplotlib.pyplot as plt\n", + "%matplotlib inline\n", + "\n", + "import matplotlib.pylab as pylab\n", + "pylab.rcParams['figure.figsize'] = 8, 6 # that's default image size for this interactive session\n", + "\n", + "my_labels = classifier.classes_\n", + "\n", + "\n", + "def plot_confusion_matrix(cm, title='Confusion matrix', cmap=plt.cm.Blues):\n", + " fig, ax = plt.subplots()\n", + " plt.imshow(cm, interpolation='nearest', cmap=cmap)\n", + " plt.title(title)\n", + " plt.colorbar()\n", + "\n", + "\n", + " tick_marks = np.arange(len(classifier.classes_))\n", + " plt.xticks(tick_marks, my_labels, rotation=90)\n", + " plt.yticks(tick_marks, my_labels)\n", + " plt.tight_layout()\n", + " plt.ylabel('True label')\n", + " plt.xlabel('Predicted label')\n", + "\n", + " from matplotlib.ticker import MultipleLocator # from http://stackoverflow.com/a/19252430 comments\n", + " ax.xaxis.set_major_locator(MultipleLocator(1))\n", + " ax.yaxis.set_major_locator(MultipleLocator(1))\n", + "\n", + "\n", + "# Compute confusion matrix\n", + "cm = confusion_matrix(y_test, y_pred, labels=my_labels)\n", + "np.set_printoptions(precision=2)\n", + "print('Confusion matrix, without normalization')\n", + "print(cm)\n", + "plt.figure()\n", + "plot_confusion_matrix(cm)\n", + "\n", + "# Normalize the confusion matrix by row (i.e by the number of samples\n", + "# in each class)\n", + "cm_normalized = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]\n", + "#print('Normalized confusion matrix')\n", + "#print(cm_normalized)\n", + "#plt.figure()\n", + "#plot_confusion_matrix(cm_normalized, title='Normalized confusion matrix')\n", + "\n", + "#plt.show()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "####Something is incorrectly lining up in the confusion matrix plots\n", + "For instance the java/tcl confusion is not showing up" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": true + }, + "source": [ + "#Conclusions:\n", + "##Got up to 28/32 correct by tweaking features iteratively\n", + "This method is almost guaranteed to yield overfitting, however.\n", + "I think more data is needed." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.4.3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} From 042789151c72851dee360fefae6b6a8233cc43ff Mon Sep 17 00:00:00 2001 From: John Waldrep Date: Mon, 8 Jun 2015 05:24:46 -0400 Subject: [PATCH 12/12] Add ipython notebook with basic test of web data retrieval --- bs4_testing.ipynb | 2482 +++++++++++++++++++++++++++++++++++++++++ feature_testing.ipynb | 1178 ------------------- requirements.txt | 2 + 3 files changed, 2484 insertions(+), 1178 deletions(-) create mode 100644 bs4_testing.ipynb delete mode 100644 feature_testing.ipynb diff --git a/bs4_testing.ipynb b/bs4_testing.ipynb new file mode 100644 index 0000000..c40c30f --- /dev/null +++ b/bs4_testing.ipynb @@ -0,0 +1,2482 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "from bs4 import BeautifulSoup\n", + "import requests" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "html_doc = \"\"\"\n", + "The Dormouse's story\n", + "\n", + "

The Dormouse's story

\n", + "\n", + "

Once upon a time there were three little sisters; and their names were\n", + "Elsie,\n", + "Lacie and\n", + "Tillie;\n", + "and they lived at the bottom of a well.

\n", + "\n", + "

...

\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + " \n", + " \n", + " The Dormouse's story\n", + " \n", + " \n", + " \n", + "

\n", + " \n", + " The Dormouse's story\n", + " \n", + "

\n", + "

\n", + " Once upon a time there were three little sisters; and their names were\n", + " \n", + " Elsie\n", + " \n", + " ,\n", + " \n", + " Lacie\n", + " \n", + " and\n", + " \n", + " Tillie\n", + " \n", + " ;\n", + "and they lived at the bottom of a well.\n", + "

\n", + "

\n", + " ...\n", + "

\n", + " \n", + "\n" + ] + } + ], + "source": [ + "soup = BeautifulSoup(html_doc)\n", + "\n", + "print(soup.prettify())" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "soup.title\n", + "# The Dormouse's story\n", + "\n", + "soup.title.name\n", + "# u'title'\n", + "\n", + "soup.title.string\n", + "# u'The Dormouse's story'\n", + "\n", + "soup.title.parent.name\n", + "# u'head'\n", + "\n", + "soup.p\n", + "#

The Dormouse's story

\n", + "\n", + "soup.p['class']\n", + "# u'title'\n", + "\n", + "soup.a\n", + "# Elsie\n", + "\n", + "soup.find_all('a')\n", + "# [Elsie,\n", + "# Lacie,\n", + "# Tillie]\n", + "\n", + "soup.find(id=\"link3\")\n", + "# Tillie\n", + "\n", + "\n", + "####Extract links####\n", + "for link in soup.find_all('a'):\n", + " print(link.get('href'))\n", + "# http://example.com/elsie\n", + "# http://example.com/lacie\n", + "# http://example.com/tillie\n", + "\n", + "####Extract text####\n", + "print(soup.get_text())\n", + "# The Dormouse's story\n", + "#\n", + "# The Dormouse's story\n", + "#\n", + "# Once upon a time there were three little sisters; and their names were\n", + "# Elsie,\n", + "# Lacie and\n", + "# Tillie;\n", + "# and they lived at the bottom of a well.\n", + "#\n", + "# ..." + ] + }, + { + "cell_type": "code", + "execution_count": 196, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "None\n", + "#mw-head\n", + "#p-search\n", + "/wiki/Category:Solutions_by_Programming_Task\n", + "/wiki/Rosetta_Code:Solve_a_Task\n", + "/wiki/Rosetta_Code:Multiple_passes\n", + "/wiki/Rosetta_Code:Extra_credit\n", + "/wiki/Talk:100_doors\n", + "/wiki/Rosetta_Code:Optimization\n", + "#4DOS_Batch\n", + "#6502_Assembly\n", + "#68000_Assembly\n", + "#8086_Assembly\n", + "#8th\n", + "#ABAP\n", + "#ACL2\n", + "#ActionScript\n", + "#Acurity_Architect\n", + "#Ada\n", + "#Aikido\n", + "#ALGOL_68\n", + "#ALGOL_W\n", + "#AmigaE\n", + "#APL\n", + "#AppleScript\n", + "#Arbre\n", + "#Argile\n", + "#ATS\n", + "#AutoHotkey\n", + "#Standard_Approach\n", + "#Alternative_Approach\n", + "#Optimized\n", + "#AutoIt\n", + "#Axiom\n", + "#AWK\n", + "#BASIC\n", + "#BASIC256\n", + "#Batch_File\n", + "#BBC_BASIC\n", + "#bc\n", + "#Befunge\n", + "#BlitzMax\n", + "#Bracmat\n", + "#Burlesque\n", + "#C\n", + "#unoptimized\n", + "#optimized_2\n", + "#C.2B.2B\n", + "#C.23\n", + "#Unoptimized_with_Modulus_.25_Operator\n", + "#Optimized_for_Increments\n", + "#Optimized_for_Orthogonality\n", + "#Unoptimized_but_Concise\n", + "#Optimized_for_brevity\n", + "#C1R\n", + "#Cach.C3.A9_ObjectScript\n", + "#Clarion\n", + "#CLIPS\n", + "#Clojure\n", + "#COBOL\n", + "#Coco\n", + "#CoffeeScript\n", + "#ColdFusion\n", + "#Common_Lisp\n", + "#Component_Pascal\n", + "#Coq\n", + "#Crystal\n", + "#D\n", + "#Dart\n", + "#DCL\n", + "#Delphi\n", + "#D.C3.A9j.C3.A0_Vu\n", + "#DWScript\n", + "#Dylan\n", + "#E\n", + "#ECL\n", + "#Eero\n", + "#EGL\n", + "#Eiffel\n", + "#Ela\n", + "#Elixir\n", + "#Emacs_Lisp\n", + "#Erlang\n", + "#ERRE\n", + "#Euler_Math_Toolbox\n", + "#Euphoria\n", + "#F.23\n", + "#Factor\n", + "#Falcon\n", + "#Fantom\n", + "#FBSL\n", + "#friendly_interactive_shell\n", + "#Forth\n", + "#Fortran\n", + "#Frink\n", + "#FunL\n", + "#Unoptimized_2\n", + "#Optimized_3\n", + "#GAP\n", + "#GML\n", + "#Go\n", + "#Golfscript\n", + "#Gosu\n", + "#Groovy\n", + "#Harbour\n", + "#Haskell\n", + "#Haxe\n", + "#HicEst\n", + "#Hy\n", + "#Icon_and_Unicon\n", + "#Inform_7\n", + "#Informix_4GL\n", + "#Io\n", + "#Ioke\n", + "#J\n", + "#Java\n", + "#JavaScript\n", + "#ES5\n", + "#unoptimized_3\n", + "#optimized_4\n", + "#ES6\n", + "#jq\n", + "#Julia\n", + "#K\n", + "#Kotlin\n", + "#LabVIEW\n", + "#Lasso\n", + "#Loop\n", + "#Lhogho\n", + "#Liberty_BASIC\n", + "#LiveCode\n", + "#Logo\n", + "#LOLCODE\n", + "#Lua\n", + "#M4\n", + "#Maple\n", + "#Mathematica\n", + "#MATLAB_.2F_Octave\n", + "#Iterative_Method\n", + "#Vectorized_Method\n", + "#Known-Result_Method\n", + "#Maxima\n", + "#MAXScript\n", + "#Mercury\n", + "#Metafont\n", + "#MIPS_Assembly\n", + "#Mirah\n", + "#mIRC_Scripting_Language\n", + "#ML.2FI\n", + "#MMIX\n", + "#Modula-2\n", + "#Modula-3\n", + "#MOO\n", + "#MoonScript\n", + "#MUMPS\n", + "#NetRexx\n", + "#NewLisp\n", + "#Nim\n", + "#Objeck\n", + "#Objective-C\n", + "#OCaml\n", + "#Octave\n", + "#Oforth\n", + "#ooRexx\n", + "#OpenEdge.2FProgress\n", + "#OxygenBasic\n", + "#Oz\n", + "#PARI.2FGP\n", + "#Pascal\n", + "#Perl\n", + "#Perl5i\n", + "#Perl_6\n", + "#PHL\n", + "#unoptimized_4\n", + "#optimized_5\n", + "#PHP\n", + "#PicoLisp\n", + "#Piet\n", + "#Pike\n", + "#PL.2FI\n", + "#PL.2FSQL\n", + "#Pop11\n", + "#PostScript\n", + "#Potion\n", + "#PowerShell\n", + "#unoptimized_5\n", + "#Alternative_Method\n", + "#unoptimized_Pipeline\n", + "#unoptimized_Pipeline_2\n", + "#unoptimized_Pipeline_3_.28dynamically_build_pipeline.29\n", + "#Using_Powershell_Workflow_for_Parallelism\n", + "#optimized_6\n", + "#ProDOS\n", + "#Prolog\n", + "#unoptimized_6\n", + "#optimized_7\n", + "#PureBasic\n", + "#Python\n", + "#Q\n", + "#R\n", + "#Racket\n", + "#RapidQ\n", + "#REALbasic\n", + "#REBOL\n", + "#Unoptimized_7\n", + "#Optimized_8\n", + "#Retro\n", + "#REXX\n", + "#version_1\n", + "#version_2.2C_the_hard_way\n", + "#version_3.2C_the_easy_way\n", + "#version_4.2C_easy_way.2C_1.2C000_doors\n", + "#Ruby\n", + "#Run_BASIC\n", + "#Rust\n", + "#S-lang\n", + "#Salmon\n", + "#SAS\n", + "#Scala\n", + "#Sather\n", + "#Scheme\n", + "#Seed7\n", + "#SETL\n", + "#Sidef\n", + "#Slate\n", + "#Smalltalk\n", + "#SNOBOL4\n", + "#Sparkling\n", + "#SQL\n", + "#Swift\n", + "#Tcl\n", + "#TI-83_BASIC\n", + "#Unoptimized_8\n", + "#Optimized_9\n", + "#TI-89_BASIC\n", + "#TorqueScript\n", + "#TSE_SAL\n", + "#TUSCRIPT\n", + "#TXR\n", + "#Uniface\n", + "#UNIX_Shell\n", + "#Ursala\n", + "#Vala\n", + "#VBA\n", + "#VBScript\n", + "#Vedit_macro_language\n", + "#VHDL\n", + "#Visual_Basic_.NET\n", + "#Wart\n", + "#Wortel\n", + "#Wrapl\n", + "#XPL0\n", + "#XSLT_1.0\n", + "#XSLT_2.0\n", + "#Yorick\n", + "#zkl\n", + "#ZX_Spectrum_Basic\n", + "/mw/index.php?title=100_doors&action=edit§ion=1\n", + "/wiki/Category:4DOS_Batch\n", + "/mw/index.php?title=100_doors&action=edit§ion=2\n", + "/wiki/Category:6502_Assembly\n", + "http://www.6502asm.com/beta/index.html\n", + "http://www.6502asm.com/\n", + "/mw/index.php?title=100_doors&action=edit§ion=3\n", + "/wiki/Category:68000_Assembly\n", + "http://www.easy68k.com/\n", + "/mw/index.php?title=100_doors&action=edit§ion=4\n", + "/wiki/Category:8086_Assembly\n", + "/wiki/100_doors/8086_Assembly\n", + "/mw/index.php?title=100_doors&action=edit§ion=5\n", + "/wiki/Category:8th\n", + "/mw/index.php?title=100_doors&action=edit§ion=6\n", + "/wiki/Category:ABAP\n", + "http://help.sap.com/abapdocu/en/ABAPFORM.htm\n", + "http://help.sap.com/abapdocu/en/ABAPDATA.htm\n", + "http://help.sap.com/abapdocu/en/ABAPDATA.htm\n", + "http://help.sap.com/abapdocu/en/ABAPFIELD-SYMBOLS.htm\n", + "http://help.sap.com/abapdocu/en/ABAPDO.htm\n", + "http://help.sap.com/abapdocu/en/ABAPAPPEND.htm\n", + "http://help.sap.com/abapdocu/en/ABAPENDDO.htm\n", + "http://help.sap.com/abapdocu/en/ABAPWHILE.htm\n", + "http://help.sap.com/abapdocu/en/ABAPWHILE.htm\n", + "http://help.sap.com/abapdocu/en/ABAPIF.htm\n", + "http://help.sap.com/abapdocu/en/ABAPELSE.htm\n", + "http://help.sap.com/abapdocu/en/ABAPENDIF.htm\n", + "http://help.sap.com/abapdocu/en/ABAPADD.htm\n", + "http://help.sap.com/abapdocu/en/ABAPENDWHILE.htm\n", + "http://help.sap.com/abapdocu/en/ABAPENDWHILE.htm\n", + "http://help.sap.com/abapdocu/en/ABAPLOOP.htm\n", + "http://help.sap.com/abapdocu/en/ABAPIF.htm\n", + "http://help.sap.com/abapdocu/en/ABAPWRITE.htm\n", + "http://help.sap.com/abapdocu/en/ABAPENDIF.htm\n", + "http://help.sap.com/abapdocu/en/ABAPENDLOOP.htm\n", + "http://help.sap.com/abapdocu/en/ABAPENDFORM.htm\n", + "http://help.sap.com/abapdocu/en/ABAPFORM.htm\n", + "http://help.sap.com/abapdocu/en/ABAPDATA.htm\n", + "http://help.sap.com/abapdocu/en/ABAPDATA.htm\n", + "http://help.sap.com/abapdocu/en/ABAPFIELD-SYMBOLS.htm\n", + "http://help.sap.com/abapdocu/en/ABAPDO.htm\n", + "http://help.sap.com/abapdocu/en/ABAPAPPEND.htm\n", + "http://help.sap.com/abapdocu/en/ABAPIF.htm\n", + "http://help.sap.com/abapdocu/en/ABAPADD.htm\n", + "http://help.sap.com/abapdocu/en/ABAPWRITE.htm\n", + "http://help.sap.com/abapdocu/en/ABAPENDIF.htm\n", + "http://help.sap.com/abapdocu/en/ABAPENDDO.htm\n", + "http://help.sap.com/abapdocu/en/ABAPENDFORM.htm\n", + "/mw/index.php?title=100_doors&action=edit§ion=7\n", + "/wiki/Category:ACL2\n", + "/mw/index.php?title=100_doors&action=edit§ion=8\n", + "/wiki/Category:ActionScript\n", + "/wiki/ActionScript\n", + "/mw/index.php?title=100_doors&action=edit§ion=9\n", + "/wiki/Category:Acurity_Architect\n", + "/mw/index.php?title=100_doors&action=edit§ion=10\n", + "/wiki/Category:Ada\n", + "/mw/index.php?title=100_doors&action=edit§ion=11\n", + "/wiki/Category:Aikido\n", + "/mw/index.php?title=100_doors&action=edit§ion=12\n", + "/wiki/Category:ALGOL_68\n", + "/mw/index.php?title=100_doors&action=edit§ion=13\n", + "/wiki/Category:ALGOL_W\n", + "/mw/index.php?title=100_doors&action=edit§ion=14\n", + "/wiki/Category:AmigaE\n", + "/mw/index.php?title=100_doors&action=edit§ion=15\n", + "/wiki/Category:APL\n", + "/wiki/GNU_APL\n", + "/mw/index.php?title=100_doors&action=edit§ion=16\n", + "/wiki/Category:AppleScript\n", + "/mw/index.php?title=100_doors&action=edit§ion=17\n", + "/wiki/Category:Arbre\n", + "/mw/index.php?title=100_doors&action=edit§ion=18\n", + "/wiki/Category:Argile\n", + "/mw/index.php?title=100_doors&action=edit§ion=19\n", + "/wiki/Category:ATS\n", + "/mw/index.php?title=100_doors&action=edit§ion=20\n", + "/wiki/Category:AutoHotkey\n", + "/mw/index.php?title=100_doors&action=edit§ion=21\n", + "http://www.autohotkey.com/docs/commands/Loop.htm\n", + "http://www.autohotkey.com/docs/commands/Loop.htm\n", + "http://www.autohotkey.com/docs/Variables.htm#A_Index\n", + "http://www.autohotkey.com/docs/Variables.htm#A_Index\n", + "http://www.autohotkey.com/docs/commands/Loop.htm\n", + "http://www.autohotkey.com/docs/Variables.htm#A_Index\n", + "http://www.autohotkey.com/docs/commands/MsgBox.htm\n", + "/mw/index.php?title=100_doors&action=edit§ion=22\n", + "http://www.autohotkey.com/docs/commands/Loop.htm\n", + "http://www.autohotkey.com/docs/Variables.htm#A_Index\n", + "http://www.autohotkey.com/docs/Variables.htm#A_Index\n", + "http://www.autohotkey.com/docs/commands/MsgBox.htm\n", + "http://www.autohotkey.com/docs/Functions.htm#BuiltIn\n", + "/mw/index.php?title=100_doors&action=edit§ion=23\n", + "http://www.autohotkey.com/docs/Variables.htm#A_Index\n", + "http://www.autohotkey.com/docs/commands/MsgBox.htm\n", + "/mw/index.php?title=100_doors&action=edit§ion=24\n", + "/wiki/Category:AutoIt\n", + "http://www.autoitscript.com/autoit3/docs/keywords.htm\n", + "http://www.autoitscript.com/autoit3/docs/keywords.htm\n", + "http://www.autoitscript.com/autoit3/docs/keywords.htm\n", + "http://www.autoitscript.com/autoit3/docs/keywords.htm\n", + "http://www.autoitscript.com/autoit3/docs/keywords.htm\n", + "http://www.autoitscript.com/autoit3/docs/keywords.htm\n", + "http://www.autoitscript.com/autoit3/docs/keywords.htm\n", + "http://www.autoitscript.com/autoit3/docs/keywords.htm\n", + "http://www.autoitscript.com/autoit3/docs/keywords.htm\n", + "http://www.autoitscript.com/autoit3/docs/keywords.htm\n", + "http://www.autoitscript.com/autoit3/docs/keywords.htm\n", + "http://www.autoitscript.com/autoit3/docs/functions/ConsoleWrite.htm\n", + "http://www.autoitscript.com/autoit3/docs/functions/Number.htm\n", + "http://www.autoitscript.com/autoit3/docs/keywords.htm\n", + "http://www.autoitscript.com/autoit3/docs/functions/Mod.htm\n", + "http://www.autoitscript.com/autoit3/docs/keywords.htm\n", + "http://www.autoitscript.com/autoit3/docs/functions/ConsoleWrite.htm\n", + "http://www.autoitscript.com/autoit3/docs/macros.htm\n", + "http://www.autoitscript.com/autoit3/docs/keywords.htm\n", + "/mw/index.php?title=100_doors&action=edit§ion=25\n", + "/wiki/Category:Axiom\n", + "/mw/index.php?title=100_doors&action=edit§ion=26\n", + "/wiki/Category:AWK\n", + "/mw/index.php?title=100_doors&action=edit§ion=27\n", + "/wiki/Category:BASIC\n", + "/mw/index.php?title=QBASIC,_QB64&action=edit&redlink=1\n", + "http://www.qbasicnews.com/qboho/qckdefint.shtml\n", + "http://www.qbasicnews.com/qboho/qckconst.shtml\n", + "http://www.qbasicnews.com/qboho/qckdim.shtml\n", + "http://www.qbasicnews.com/qboho/qcklet.shtml\n", + "http://www.qbasicnews.com/qboho/qcknot.shtml\n", + "http://www.qbasicnews.com/qboho/qckprint.shtml\n", + "http://www.qbasicnews.com/qboho/qckprint.shtml\n", + "http://www.qbasicnews.com/qboho/qckend.shtml\n", + "/wiki/QuickBasic\n", + "http://www.qbasicnews.com/qboho/qckdim.shtml\n", + "http://www.qbasicnews.com/qboho/qckprint.shtml\n", + "http://www.qbasicnews.com/qboho/qckprint.shtml\n", + "http://www.qbasicnews.com/qboho/qcknot.shtml\n", + "http://www.qbasicnews.com/qboho/qcknot.shtml\n", + "http://www.qbasicnews.com/qboho/qckprint.shtml\n", + "http://www.qbasicnews.com/qboho/qcknot.shtml\n", + "http://www.qbasicnews.com/qboho/qckprint.shtml\n", + "http://www.qbasicnews.com/qboho/qckprint.shtml\n", + "http://www.qbasicnews.com/qboho/qckend.shtml\n", + "http://www.qbasicnews.com/qboho/qckdim.shtml\n", + "http://www.qbasicnews.com/qboho/qckint.shtml\n", + "http://www.qbasicnews.com/qboho/qcksqr.shtml\n", + "http://www.qbasicnews.com/qboho/qcksqr.shtml\n", + "http://www.qbasicnews.com/qboho/qckprint.shtml\n", + "http://www.qbasicnews.com/qboho/qcknot.shtml\n", + "http://www.qbasicnews.com/qboho/qckprint.shtml\n", + "http://www.qbasicnews.com/qboho/qckprint.shtml\n", + "http://www.qbasicnews.com/qboho/qckend.shtml\n", + "http://www.qbasicnews.com/qboho/qckcls.shtml\n", + "http://www.qbasicnews.com/qboho/qckprint.shtml\n", + "http://www.qbasicnews.com/qboho/qckprint.shtml\n", + "http://www.qbasicnews.com/qboho/qckend.shtml\n", + "/mw/index.php?title=100_doors&action=edit§ion=28\n", + "/wiki/Category:BASIC256\n", + "/mw/index.php?title=100_doors&action=edit§ion=29\n", + "/wiki/Category:Batch_File\n", + "http://www.ss64.com/nt/echo.html\n", + "http://www.ss64.com/nt/setlocal.html\n", + "http://www.ss64.com/nt/for.html\n", + "http://www.ss64.com/nt/in.html\n", + "http://www.ss64.com/nt/do.html\n", + "http://www.ss64.com/nt/for.html\n", + "http://www.ss64.com/nt/in.html\n", + "http://www.ss64.com/nt/do.html\n", + "http://www.ss64.com/nt/set.html\n", + "http://www.ss64.com/nt/for.html\n", + "http://www.ss64.com/nt/in.html\n", + "http://www.ss64.com/nt/do.html\n", + "http://www.ss64.com/nt/if.html\n", + "http://www.ss64.com/nt/echo.html\n", + "http://www.ss64.com/nt/else.html\n", + "http://www.ss64.com/nt/echo.html\n", + "http://www.ss64.com/nt/echo.html\n", + "http://www.ss64.com/nt/setlocal.html\n", + "http://www.ss64.com/nt/set.html\n", + "http://www.ss64.com/nt/for.html\n", + "http://www.ss64.com/nt/in.html\n", + "http://www.ss64.com/nt/do.html\n", + "http://www.ss64.com/nt/if.html\n", + "http://www.ss64.com/nt/neq.html\n", + "http://www.ss64.com/nt/echo.html\n", + "http://www.ss64.com/nt/else.html\n", + "http://www.ss64.com/nt/echo.html\n", + "http://www.ss64.com/nt/set.html\n", + "/mw/index.php?title=100_doors&action=edit§ion=30\n", + "/wiki/Category:BBC_BASIC\n", + "/mw/index.php?title=100_doors&action=edit§ion=31\n", + "/wiki/Category:Bc\n", + "/mw/index.php?title=100_doors&action=edit§ion=32\n", + "/wiki/Category:Befunge\n", + "/wiki/CCBI\n", + "/mw/index.php?title=100_doors&action=edit§ion=33\n", + "/wiki/Category:BlitzMax\n", + "/wiki/BlitzMax\n", + "/mw/index.php?title=100_doors&action=edit§ion=34\n", + "/wiki/Category:Bracmat\n", + "/mw/index.php?title=100_doors&action=edit§ion=35\n", + "/wiki/Category:Burlesque\n", + "/mw/index.php?title=100_doors&action=edit§ion=36\n", + "/wiki/Category:C\n", + "/mw/index.php?title=100_doors&action=edit§ion=37\n", + "/wiki/Library/C_Runtime\n", + "/wiki/Library/C_Runtime/printf\n", + "http://www.opengroup.org/onlinepubs/009695399/functions/printf.html\n", + "/wiki/Library/C_Runtime\n", + "/wiki/Library/C_Runtime/printf\n", + "http://www.opengroup.org/onlinepubs/009695399/functions/printf.html\n", + "/mw/index.php?title=100_doors&action=edit§ion=38\n", + "/wiki/Library/C_Runtime\n", + "/wiki/Library/C_Runtime/printf\n", + "http://www.opengroup.org/onlinepubs/009695399/functions/printf.html\n", + "http://www.opengroup.org/onlinepubs/009695399/functions/printf.html\n", + "http://www.opengroup.org/onlinepubs/009695399/functions/printf.html\n", + "http://www.opengroup.org/onlinepubs/009695399/functions/printf.html\n", + "http://www.opengroup.org/onlinepubs/009695399/functions/printf.html\n", + "/mw/index.php?title=100_doors&action=edit§ion=39\n", + "/wiki/Category:C%2B%2B\n", + "/wiki/GCC\n", + "/mw/index.php?title=100_doors&action=edit§ion=40\n", + "/wiki/Category:C_sharp\n", + "/mw/index.php?title=100_doors&action=edit§ion=41\n", + "http://www.google.com/search?q=new+msdn.microsoft.com\n", + "/mw/index.php?title=100_doors&action=edit§ion=42\n", + "/mw/index.php?title=100_doors&action=edit§ion=43\n", + "http://www.google.com/search?q=new+msdn.microsoft.com\n", + "/mw/index.php?title=100_doors&action=edit§ion=44\n", + "http://www.google.com/search?q=new+msdn.microsoft.com\n", + "/mw/index.php?title=100_doors&action=edit§ion=45\n", + "/mw/index.php?title=100_doors&action=edit§ion=46\n", + "/wiki/Category:C1R\n", + "/mw/index.php?title=100_doors&action=edit§ion=47\n", + "/wiki/Category:Cach%C3%A9_ObjectScript\n", + "/mw/index.php?title=100_doors&action=edit§ion=48\n", + "/wiki/Category:Clarion\n", + "/mw/index.php?title=100_doors&action=edit§ion=49\n", + "/wiki/Category:CLIPS\n", + "/mw/index.php?title=100_doors&action=edit§ion=50\n", + "/wiki/Category:Clojure\n", + "/mw/index.php?title=100_doors&action=edit§ion=51\n", + "/wiki/Category:COBOL\n", + "/mw/index.php?title=100_doors&action=edit§ion=52\n", + "/wiki/Category:Coco\n", + "/mw/index.php?title=100_doors&action=edit§ion=53\n", + "/wiki/Category:CoffeeScript\n", + "/mw/index.php?title=100_doors&action=edit§ion=54\n", + "/wiki/Category:ColdFusion\n", + "/mw/index.php?title=100_doors&action=edit§ion=55\n", + "/wiki/Category:Common_Lisp\n", + "/mw/index.php?title=100_doors&action=edit§ion=56\n", + "/wiki/Category:Component_Pascal\n", + "/mw/index.php?title=100_doors&action=edit§ion=57\n", + "/wiki/Category:Coq\n", + "https://github.com/spanjel/rosetta\n", + "/mw/index.php?title=100_doors&action=edit§ion=58\n", + "/wiki/Category:Crystal\n", + "/mw/index.php?title=100_doors&action=edit§ion=59\n", + "/wiki/Category:D\n", + "/mw/index.php?title=100_doors&action=edit§ion=60\n", + "/wiki/Category:Dart\n", + "/mw/index.php?title=100_doors&action=edit§ion=61\n", + "/wiki/Category:DCL\n", + "/mw/index.php?title=100_doors&action=edit§ion=62\n", + "/wiki/Category:Delphi\n", + "#Pascal\n", + "/mw/index.php?title=100_doors&action=edit§ion=63\n", + "/wiki/Category:D%C3%A9j%C3%A0_Vu\n", + "/mw/index.php?title=100_doors&action=edit§ion=64\n", + "/wiki/Category:DWScript\n", + "/mw/index.php?title=100_doors&action=edit§ion=65\n", + "/wiki/Category:Dylan\n", + "/mw/index.php?title=100_doors&action=edit§ion=66\n", + "/wiki/Category:E\n", + "/wiki/E-on-Java\n", + "http://wiki.erights.org/wiki/var\n", + "http://wiki.erights.org/wiki/var\n", + "http://wiki.erights.org/wiki/def\n", + "http://wiki.erights.org/wiki/for\n", + "http://wiki.erights.org/wiki/in\n", + "http://wiki.erights.org/wiki/def\n", + "http://wiki.erights.org/wiki/E\n", + "http://wiki.erights.org/wiki/fn\n", + "http://wiki.erights.org/wiki/fn\n", + "http://wiki.erights.org/wiki/def\n", + "http://wiki.erights.org/wiki/def\n", + "http://wiki.erights.org/wiki/to\n", + "http://wiki.erights.org/wiki/bind\n", + "http://wiki.erights.org/wiki/true\n", + "http://wiki.erights.org/wiki/match\n", + "http://wiki.erights.org/wiki/def\n", + "http://wiki.erights.org/wiki/def\n", + "http://wiki.erights.org/wiki/timer\n", + "http://wiki.erights.org/wiki/timer\n", + "http://wiki.erights.org/wiki/fn\n", + "http://wiki.erights.org/wiki/if\n", + "http://wiki.erights.org/wiki/if\n", + "http://wiki.erights.org/wiki/else\n", + "http://wiki.erights.org/wiki/else\n", + "http://wiki.erights.org/wiki/interp\n", + "/mw/index.php?title=100_doors&action=edit§ion=67\n", + "/wiki/Category:ECL\n", + "/mw/index.php?title=100_doors&action=edit§ion=68\n", + "/wiki/Category:Eero\n", + "http://www.opengroup.org/onlinepubs/009695399/functions/printf.html\n", + "http://www.opengroup.org/onlinepubs/009695399/functions/puts.html\n", + "http://www.opengroup.org/onlinepubs/009695399/functions/puts.html\n", + "/mw/index.php?title=100_doors&action=edit§ion=69\n", + "/wiki/Category:EGL\n", + "/mw/index.php?title=100_doors&action=edit§ion=70\n", + "/wiki/Category:Eiffel\n", + "http://www.google.com/search?q=site%3Ahttp%3A%2F%2Fdocs.eiffel.com%2Feiffelstudio%2Flibraries+none&btnI=I%27m+Feeling+Lucky\n", + "http://www.google.com/search?q=site%3Ahttp%3A%2F%2Fdocs.eiffel.com%2Feiffelstudio%2Flibraries+linked_list&btnI=I%27m+Feeling+Lucky\n", + "http://www.google.com/search?q=site%3Ahttp%3A%2F%2Fdocs.eiffel.com%2Feiffelstudio%2Flibraries+integer&btnI=I%27m+Feeling+Lucky\n", + "http://www.google.com/search?q=site%3Ahttp%3A%2F%2Fdocs.eiffel.com%2Feiffelstudio%2Flibraries+integer&btnI=I%27m+Feeling+Lucky\n", + "http://www.google.com/search?q=site%3Ahttp%3A%2F%2Fdocs.eiffel.com%2Feiffelstudio%2Flibraries+boolean&btnI=I%27m+Feeling+Lucky\n", + "http://www.google.com/search?q=site%3Ahttp%3A%2F%2Fdocs.eiffel.com%2Feiffelstudio%2Flibraries+integer&btnI=I%27m+Feeling+Lucky\n", + "http://www.google.com/search?q=site%3Ahttp%3A%2F%2Fdocs.eiffel.com%2Feiffelstudio%2Flibraries+boolean&btnI=I%27m+Feeling+Lucky\n", + "http://www.google.com/search?q=site%3Ahttp%3A%2F%2Fdocs.eiffel.com%2Feiffelstudio%2Flibraries+boolean&btnI=I%27m+Feeling+Lucky\n", + "/mw/index.php?title=100_doors&action=edit§ion=71\n", + "/wiki/Category:Ela\n", + "/mw/index.php?title=100_doors&action=edit§ion=72\n", + "/wiki/Category:Elixir\n", + "/mw/index.php?title=100_doors&action=edit§ion=73\n", + "/wiki/Category:Emacs_Lisp\n", + "/mw/index.php?title=100_doors&action=edit§ion=74\n", + "/wiki/Category:Erlang\n", + "http://erlang.org/doc/man/lists.html\n", + "http://erlang.org/doc/man/lists.html\n", + "http://erlang.org/doc/man/lists.html\n", + "http://erlang.org/doc/man/math.html\n", + "http://erlang.org/doc/man/io.html\n", + "http://erlang.org/doc/man/io.html\n", + "http://erlang.org/doc/man/lists.html\n", + "/mw/index.php?title=100_doors&action=edit§ion=75\n", + "/wiki/Category:ERRE\n", + "/mw/index.php?title=100_doors&action=edit§ion=76\n", + "/wiki/Category:Euler_Math_Toolbox\n", + "/mw/index.php?title=100_doors&action=edit§ion=77\n", + "/wiki/Category:Euphoria\n", + "/mw/index.php?title=100_doors&action=edit§ion=78\n", + "/wiki/Category:F_Sharp\n", + "http://research.microsoft.com/en-us/um/cambridge/projects/fsharp/manual/namespaces.html\n", + "http://research.microsoft.com/en-us/um/cambridge/projects/fsharp/manual/namespaces.html\n", + "http://research.microsoft.com/en-us/um/cambridge/projects/fsharp/manual/namespaces.html\n", + "/mw/index.php?title=100_doors&action=edit§ion=79\n", + "/wiki/Category:Factor\n", + "/mw/index.php?title=100_doors&action=edit§ion=80\n", + "/wiki/Category:Falcon\n", + "http://falconpl.org/project_docs/core/functions.html#arrayBuffer\n", + "http://falconpl.org/project_docs/core/functions.html#len\n", + "http://falconpl.org/project_docs/core/functions.html#len\n", + "http://falconpl.org/project_docs/core/functions.html#len\n", + "http://falconpl.org/project_docs/core/functions.html#fract\n", + "/mw/index.php?title=100_doors&action=edit§ion=81\n", + "/wiki/Category:Fantom\n", + "/mw/index.php?title=100_doors&action=edit§ion=82\n", + "/wiki/Category:FBSL\n", + "http://www.qbasicnews.com/qboho/qckdim.shtml\n", + "http://www.qbasicnews.com/qboho/qckas.shtml\n", + "http://www.qbasicnews.com/qboho/qckinteger.shtml\n", + "http://www.qbasicnews.com/qboho/qckdim.shtml\n", + "http://www.qbasicnews.com/qboho/qckdim.shtml\n", + "http://www.qbasicnews.com/qboho/qcknot.shtml\n", + "http://www.qbasicnews.com/qboho/qckprint.shtml\n", + "http://www.qbasicnews.com/qboho/qckdim.shtml\n", + "http://www.qbasicnews.com/qboho/qckprint.shtml\n", + "http://www.qbasicnews.com/qboho/qckend.shtml\n", + "/mw/index.php?title=100_doors&action=edit§ion=83\n", + "/wiki/Category:Friendly_interactive_shell\n", + "/mw/index.php?title=100_doors&action=edit§ion=84\n", + "/wiki/Category:Forth\n", + "/mw/index.php?title=100_doors&action=edit§ion=85\n", + "/wiki/Category:Fortran\n", + "/wiki/Fortran\n", + "/mw/index.php?title=100_doors&action=edit§ion=86\n", + "/wiki/Category:Frink\n", + "/mw/index.php?title=100_doors&action=edit§ion=87\n", + "/wiki/Category:FunL\n", + "/mw/index.php?title=100_doors&action=edit§ion=88\n", + "/mw/index.php?title=100_doors&action=edit§ion=89\n", + "/mw/index.php?title=100_doors&action=edit§ion=90\n", + "/wiki/Category:GAP\n", + "/mw/index.php?title=100_doors&action=edit§ion=91\n", + "/wiki/Category:GML\n", + "/mw/index.php?title=100_doors&action=edit§ion=92\n", + "/wiki/Category:Go\n", + "/mw/index.php?title=100_doors&action=edit§ion=93\n", + "/wiki/Category:Golfscript\n", + "/mw/index.php?title=100_doors&action=edit§ion=94\n", + "/wiki/Category:Gosu\n", + "/mw/index.php?title=100_doors&action=edit§ion=95\n", + "/wiki/Category:Groovy\n", + "http://www.google.de/search?q=site%3Agroovy.codehaus.org/%20false\n", + "http://www.google.de/search?q=site%3Agroovy.codehaus.org/%20each\n", + "http://www.google.de/search?q=site%3Agroovy.codehaus.org/%20step\n", + "http://www.google.de/search?q=site%3Agroovy.codehaus.org/%20true\n", + "http://www.google.de/search?q=site%3Agroovy.codehaus.org/%20each\n", + "http://www.google.de/search?q=site%3Agroovy.codehaus.org/%20println\n", + "http://www.google.de/search?q=site%3Agroovy.codehaus.org/%20each\n", + "http://www.google.de/search?q=site%3Agroovy.codehaus.org/%20println\n", + "http://www.google.de/search?q=site%3Agroovy.codehaus.org/%20each\n", + "http://www.google.de/search?q=site%3Agroovy.codehaus.org/%20each\n", + "http://www.google.de/search?q=site%3Agroovy.codehaus.org/%20println\n", + "/mw/index.php?title=100_doors&action=edit§ion=96\n", + "/wiki/Category:Harbour\n", + "/mw/index.php?title=100_doors&action=edit§ion=97\n", + "/wiki/Category:Haskell\n", + "http://haskell.org/ghc/docs/latest/html/libraries/base/Prelude.html#t:Show\n", + "http://haskell.org/ghc/docs/latest/html/libraries/base/Prelude.html#t:Int\n", + "http://haskell.org/ghc/docs/latest/html/libraries/base/Prelude.html#v:zipWith\n", + "http://haskell.org/ghc/docs/latest/html/libraries/base/Prelude.html#v:cycle\n", + "http://haskell.org/ghc/docs/latest/html/libraries/base/Prelude.html#v:id\n", + "http://haskell.org/ghc/docs/latest/html/libraries/base/Prelude.html#v:foldl\n", + "http://haskell.org/ghc/docs/latest/html/libraries/base/Prelude.html#t:Show\n", + "http://haskell.org/ghc/docs/latest/html/libraries/base/Prelude.html#t:Int\n", + "http://haskell.org/ghc/docs/latest/html/libraries/base/Prelude.html#v:zipWith\n", + "http://haskell.org/ghc/docs/latest/html/libraries/base/Prelude.html#v:mod\n", + "http://haskell.org/ghc/docs/latest/html/libraries/base/Prelude.html#v:otherwise\n", + "http://haskell.org/ghc/docs/latest/html/libraries/base/Prelude.html#v:foldr\n", + "http://haskell.org/ghc/docs/latest/html/libraries/base/Prelude.html#t:Eq\n", + "/mw/index.php?title=100_doors&action=edit§ion=98\n", + "/wiki/Category:Haxe\n", + "/mw/index.php?title=100_doors&action=edit§ion=99\n", + "/wiki/Category:HicEst\n", + "/mw/index.php?title=100_doors&action=edit§ion=100\n", + "/wiki/Category:Hy\n", + "/wiki/100_doors#Coco\n", + "/mw/index.php?title=100_doors&action=edit§ion=101\n", + "/wiki/Category:Icon\n", + "/wiki/Category:Unicon\n", + "/mw/index.php?title=100_doors&action=edit§ion=102\n", + "/wiki/Category:Inform_7\n", + "/mw/index.php?title=Z-machine&action=edit&redlink=1\n", + "/mw/index.php?title=Glulx_virtual_machine&action=edit&redlink=1\n", + "/mw/index.php?title=100_doors&action=edit§ion=103\n", + "/wiki/Category:Informix_4GL\n", + "/mw/index.php?title=100_doors&action=edit§ion=104\n", + "/wiki/Category:Io\n", + "/mw/index.php?title=100_doors&action=edit§ion=105\n", + "/wiki/Category:Ioke\n", + "/mw/index.php?title=100_doors&action=edit§ion=106\n", + "/wiki/Category:J\n", + "/mw/index.php?title=100_doors&action=edit§ion=107\n", + "/wiki/Category:Java\n", + "http://www.google.com/search?hl=en&q=allinurl%3Astring+java.sun.com&btnI=I%27m%20Feeling%20Lucky\n", + "http://www.google.com/search?hl=en&q=allinurl%3Asystem+java.sun.com&btnI=I%27m%20Feeling%20Lucky\n", + "http://www.google.com/search?hl=en&q=allinurl%3Astring+java.sun.com&btnI=I%27m%20Feeling%20Lucky\n", + "http://www.google.com/search?hl=en&q=allinurl%3Asystem+java.sun.com&btnI=I%27m%20Feeling%20Lucky\n", + "http://www.google.com/search?hl=en&q=allinurl%3Astring+java.sun.com&btnI=I%27m%20Feeling%20Lucky\n", + "http://www.google.com/search?hl=en&q=allinurl%3Asystem+java.sun.com&btnI=I%27m%20Feeling%20Lucky\n", + "http://www.google.com/search?hl=en&q=allinurl%3Astring+java.sun.com&btnI=I%27m%20Feeling%20Lucky\n", + "http://www.google.com/search?hl=en&q=allinurl%3Asystem+java.sun.com&btnI=I%27m%20Feeling%20Lucky\n", + "http://www.google.com/search?hl=en&q=allinurl%3Astring+java.sun.com&btnI=I%27m%20Feeling%20Lucky\n", + "http://www.google.com/search?hl=en&q=allinurl%3Asystem+java.sun.com&btnI=I%27m%20Feeling%20Lucky\n", + "http://www.google.com/search?hl=en&q=allinurl%3Astring+java.sun.com&btnI=I%27m%20Feeling%20Lucky\n", + "http://www.google.com/search?hl=en&q=allinurl%3Amath+java.sun.com&btnI=I%27m%20Feeling%20Lucky\n", + "http://www.google.com/search?hl=en&q=allinurl%3Asystem+java.sun.com&btnI=I%27m%20Feeling%20Lucky\n", + "http://www.google.com/search?hl=en&q=allinurl%3Asystem+java.sun.com&btnI=I%27m%20Feeling%20Lucky\n", + "/mw/index.php?title=100_doors&action=edit§ion=108\n", + "/wiki/Category:JavaScript\n", + "/mw/index.php?title=100_doors&action=edit§ion=109\n", + "/mw/index.php?title=100_doors&action=edit§ion=110\n", + "/mw/index.php?title=100_doors&action=edit§ion=111\n", + "/mw/index.php?title=100_doors&action=edit§ion=112\n", + "/mw/index.php?title=100_doors&action=edit§ion=113\n", + "/wiki/Category:Jq\n", + "/mw/index.php?title=100_doors&action=edit§ion=114\n", + "/wiki/Category:Julia\n", + "/mw/index.php?title=100_doors&action=edit§ion=115\n", + "/wiki/Category:K\n", + "/mw/index.php?title=100_doors&action=edit§ion=116\n", + "/wiki/Category:Kotlin\n", + "/mw/index.php?title=100_doors&action=edit§ion=117\n", + "/wiki/Category:LabVIEW\n", + "http://zone.ni.com/devzone/cda/tut/p/id/9330\n", + "/wiki/LabVIEW\n", + "/wiki/File:100doors.png\n", + "http://zone.ni.com/devzone/cda/tut/p/id/9330\n", + "/wiki/LabVIEW\n", + "/wiki/File:LabVIEW_100_doors.png\n", + "/mw/index.php?title=100_doors&action=edit§ion=118\n", + "/wiki/Category:Lasso\n", + "/mw/index.php?title=100_doors&action=edit§ion=119\n", + "/mw/index.php?title=100_doors&action=edit§ion=120\n", + "/wiki/Category:Lhogho\n", + "/mw/index.php?title=100_doors&action=edit§ion=121\n", + "/wiki/Category:Liberty_BASIC\n", + "/mw/index.php?title=100_doors&action=edit§ion=122\n", + "/wiki/Category:LiveCode\n", + "/mw/index.php?title=100_doors&action=edit§ion=123\n", + "/wiki/Category:Logo\n", + "/mw/index.php?title=100_doors&action=edit§ion=124\n", + "/wiki/Category:LOLCODE\n", + "/mw/index.php?title=100_doors&action=edit§ion=125\n", + "/wiki/Category:Lua\n", + "/mw/index.php?title=100_doors&action=edit§ion=126\n", + "/wiki/Category:M4\n", + "/mw/index.php?title=100_doors&action=edit§ion=127\n", + "/wiki/Category:Maple\n", + "/mw/index.php?title=100_doors&action=edit§ion=128\n", + "/wiki/Category:Mathematica\n", + "/mw/index.php?title=100_doors&action=edit§ion=129\n", + "/wiki/Category:MATLAB\n", + "/wiki/Category:Octave\n", + "/mw/index.php?title=100_doors&action=edit§ion=130\n", + "http://www.mathworks.com/access/helpdesk/help/techdoc/ref/zeros.html\n", + "http://www.mathworks.com/access/helpdesk/help/techdoc/ref/i.html\n", + "http://www.mathworks.com/access/helpdesk/help/techdoc/ref/i.html\n", + "http://www.mathworks.com/access/helpdesk/help/techdoc/ref/i.html\n", + "http://www.mathworks.com/access/helpdesk/help/techdoc/ref/i.html\n", + "http://www.mathworks.com/access/helpdesk/help/techdoc/ref/sqrt.html\n", + "http://www.mathworks.com/access/helpdesk/help/techdoc/ref/floor.html\n", + "http://www.mathworks.com/access/helpdesk/help/techdoc/ref/sqrt.html\n", + "http://www.mathworks.com/access/helpdesk/help/techdoc/ref/i.html\n", + "http://www.mathworks.com/access/helpdesk/help/techdoc/ref/zeros.html\n", + "http://www.mathworks.com/access/helpdesk/help/techdoc/ref/sqrt.html\n", + "/mw/index.php?title=100_doors&action=edit§ion=131\n", + "http://www.mathworks.com/access/helpdesk/help/techdoc/ref/logical.html\n", + "http://www.mathworks.com/access/helpdesk/help/techdoc/ref/find.html\n", + "http://www.mathworks.com/access/helpdesk/help/techdoc/ref/find.html\n", + "/mw/index.php?title=100_doors&action=edit§ion=132\n", + "/mw/index.php?title=100_doors&action=edit§ion=133\n", + "/wiki/Category:Maxima\n", + "/mw/index.php?title=100_doors&action=edit§ion=134\n", + "/wiki/Category:MAXScript\n", + "/mw/index.php?title=100_doors&action=edit§ion=135\n", + "/wiki/Category:Mercury\n", + "/mw/index.php?title=100_doors&action=edit§ion=136\n", + "/wiki/Category:Metafont\n", + "/mw/index.php?title=100_doors&action=edit§ion=137\n", + "/wiki/Category:MIPS_Assembly\n", + "/mw/index.php?title=100_doors&action=edit§ion=138\n", + "/wiki/Category:Mirah\n", + "/mw/index.php?title=100_doors&action=edit§ion=139\n", + "/wiki/Category:MIRC_Scripting_Language\n", + "http://www.mirc.com/echo\n", + "http://www.mirc.com/echo\n", + "/mw/index.php?title=100_doors&action=edit§ion=140\n", + "/wiki/Category:ML/I\n", + "/mw/index.php?title=100_doors&action=edit§ion=141\n", + "/wiki/Category:MMIX\n", + "/wiki/100_doors/MMIX\n", + "/mw/index.php?title=100_doors&action=edit§ion=142\n", + "/wiki/Category:Modula-2\n", + "/mw/index.php?title=100_doors&action=edit§ion=143\n", + "/wiki/Category:Modula-3\n", + "/mw/index.php?title=100_doors&action=edit§ion=144\n", + "/wiki/Category:MOO\n", + "/mw/index.php?title=100_doors&action=edit§ion=145\n", + "/wiki/Category:MoonScript\n", + "/mw/index.php?title=100_doors&action=edit§ion=146\n", + "/wiki/Category:MUMPS\n", + "/mw/index.php?title=100_doors&action=edit§ion=147\n", + "/wiki/Category:NetRexx\n", + "/wiki/100_doors#Java\n", + "/wiki/100_doors#Java\n", + "/mw/index.php?title=100_doors&action=edit§ion=148\n", + "/wiki/Category:NewLisp\n", + "http://www.newlisp.org/downloads/newlisp_manual.html#define\n", + "http://www.newlisp.org/downloads/newlisp_manual.html#let\n", + "http://www.newlisp.org/downloads/newlisp_manual.html#int\n", + "http://www.newlisp.org/downloads/newlisp_manual.html#sqrt\n", + "http://www.newlisp.org/downloads/newlisp_manual.html#if\n", + "http://www.newlisp.org/downloads/newlisp_manual.html#string\n", + "http://www.newlisp.org/downloads/newlisp_manual.html#string\n", + "http://www.newlisp.org/downloads/newlisp_manual.html#dolist\n", + "http://www.newlisp.org/downloads/newlisp_manual.html#map\n", + "http://www.newlisp.org/downloads/newlisp_manual.html#sequence\n", + "http://www.newlisp.org/downloads/newlisp_manual.html#println\n", + "/mw/index.php?title=100_doors&action=edit§ion=149\n", + "/wiki/Category:Nim\n", + "/mw/index.php?title=100_doors&action=edit§ion=150\n", + "/wiki/Category:Objeck\n", + "/mw/index.php?title=100_doors&action=edit§ion=151\n", + "/wiki/Category:Objective-C\n", + "/mw/index.php?title=100_doors&action=edit§ion=152\n", + "/wiki/Category:OCaml\n", + "http://caml.inria.fr/pub/docs/manual-ocaml/libref/Array.html\n", + "http://caml.inria.fr/pub/docs/manual-ocaml/libref/Printf.html\n", + "http://caml.inria.fr/pub/docs/manual-ocaml/libref/Array.html\n", + "http://caml.inria.fr/pub/docs/manual-ocaml/libref/Pervasives.html#VALint_of_float\n", + "http://caml.inria.fr/pub/docs/manual-ocaml/libref/Pervasives.html#VALsqrt\n", + "http://caml.inria.fr/pub/docs/manual-ocaml/libref/Pervasives.html#VALfloat_of_int\n", + "http://caml.inria.fr/pub/docs/manual-ocaml/libref/Array.html\n", + "/mw/index.php?title=100_doors&action=edit§ion=153\n", + "/wiki/Category:Octave\n", + "http://octave.sourceforge.net/octave/function/false.html\n", + "http://octave.sourceforge.net/octave/function/i.html\n", + "http://octave.sourceforge.net/octave/function/j.html\n", + "http://octave.sourceforge.net/octave/function/i.html\n", + "http://octave.sourceforge.net/octave/function/i.html\n", + "http://octave.sourceforge.net/octave/function/j.html\n", + "http://octave.sourceforge.net/octave/function/j.html\n", + "http://octave.sourceforge.net/octave/function/i.html\n", + "http://octave.sourceforge.net/octave/function/i.html\n", + "http://octave.sourceforge.net/octave/function/printf.html\n", + "http://octave.sourceforge.net/octave/function/i.html\n", + "/mw/index.php?title=100_doors&action=edit§ion=154\n", + "/wiki/Category:Oforth\n", + "/mw/index.php?title=100_doors&action=edit§ion=155\n", + "/wiki/Category:OoRexx\n", + "/mw/index.php?title=100_doors&action=edit§ion=156\n", + "/wiki/Category:OpenEdge/Progress\n", + "/mw/index.php?title=100_doors&action=edit§ion=157\n", + "/wiki/Category:OxygenBasic\n", + "/mw/index.php?title=100_doors&action=edit§ion=158\n", + "/wiki/Category:Oz\n", + "/mw/index.php?title=100_doors&action=edit§ion=159\n", + "/wiki/Category:PARI/GP\n", + "/mw/index.php?title=100_doors&action=edit§ion=160\n", + "/wiki/Category:Pascal\n", + "/mw/index.php?title=100_doors&action=edit§ion=161\n", + "/wiki/Category:Perl\n", + "/wiki/Perl\n", + "http://perldoc.perl.org/functions/print.html\n", + "/wiki/Perl\n", + "http://perldoc.perl.org/functions/map.html\n", + "http://perldoc.perl.org/functions/int.html\n", + "http://perldoc.perl.org/functions/print.html\n", + "http://perldoc.perl.org/functions/join.html\n", + "http://perldoc.perl.org/functions/map.html\n", + "http://perldoc.perl.org/functions/grep.html\n", + "http://perldoc.perl.org/functions/print.html\n", + "/wiki/Perl\n", + "http://perldoc.perl.org/functions/print.html\n", + "http://perldoc.perl.org/functions/map.html\n", + "http://perldoc.perl.org/functions/print.html\n", + "http://perldoc.perl.org/functions/qw.html\n", + "http://perldoc.perl.org/functions/int.html\n", + "http://perldoc.perl.org/functions/sqrt.html\n", + "http://perldoc.perl.org/functions/sqrt.html\n", + "http://perldoc.perl.org/functions/sqrt.html\n", + "http://perldoc.perl.org/functions/int.html\n", + "http://perldoc.perl.org/functions/print.html\n", + "http://perldoc.perl.org/functions/print.html\n", + "/mw/index.php?title=100_doors&action=edit§ion=162\n", + "/wiki/Category:Perl5i\n", + "/mw/index.php?title=100_doors&action=edit§ion=163\n", + "/wiki/Category:Perl_6\n", + "/wiki/Rakudo\n", + "/mw/index.php?title=100_doors&action=edit§ion=164\n", + "/wiki/Category:PHL\n", + "/mw/index.php?title=100_doors&action=edit§ion=165\n", + "/mw/index.php?title=100_doors&action=edit§ion=166\n", + "/wiki/100_doors#C.23\n", + "/mw/index.php?title=100_doors&action=edit§ion=167\n", + "/wiki/Category:PHP\n", + "http://www.thomporter.com/100doors.php\n", + "http://www.php.net/sqrt\n", + "http://www.php.net/ceil\n", + "http://www.php.net/array\n", + "http://www.php.net/array_fill\n", + "http://www.php.net/printf\n", + "/mw/index.php?title=100_doors&action=edit§ion=168\n", + "/wiki/Category:PicoLisp\n", + "/mw/index.php?title=100_doors&action=edit§ion=169\n", + "/wiki/Category:Piet\n", + "http://www.toothycat.net/~sham/piet/100doors.png\n", + "/mw/index.php?title=100_doors&action=edit§ion=170\n", + "/wiki/Category:Pike\n", + "/mw/index.php?title=100_doors&action=edit§ion=171\n", + "/wiki/Category:PL/I\n", + "/mw/index.php?title=100_doors&action=edit§ion=172\n", + "/wiki/Category:PL/SQL\n", + "http://www.oracle.com/pls/db92/db92.drilldown?word=DECLARE\n", + "http://www.oracle.com/pls/db92/db92.drilldown?word=TYPE\n", + "http://www.oracle.com/pls/db92/db92.drilldown?word=IS\n", + "http://www.oracle.com/pls/db92/db92.drilldown?word=OF\n", + "http://www.oracle.com/pls/db92/db92.drilldown?word=BOOLEAN\n", + "http://www.oracle.com/pls/db92/db92.drilldown?word=BEGIN\n", + "http://www.oracle.com/pls/db92/db92.drilldown?word=FOR\n", + "http://www.oracle.com/pls/db92/db92.drilldown?word=IN\n", + "http://www.oracle.com/pls/db92/db92.drilldown?word=COUNT\n", + "http://www.oracle.com/pls/db92/db92.drilldown?word=LOOP\n", + "http://www.oracle.com/pls/db92/db92.drilldown?word=FALSE\n", + "http://www.oracle.com/pls/db92/db92.drilldown?word=END\n", + "http://www.oracle.com/pls/db92/db92.drilldown?word=LOOP\n", + "http://www.oracle.com/pls/db92/db92.drilldown?word=FOR\n", + "http://www.oracle.com/pls/db92/db92.drilldown?word=IN\n", + "http://www.oracle.com/pls/db92/db92.drilldown?word=LOOP\n", + "http://www.oracle.com/pls/db92/db92.drilldown?word=FOR\n", + "http://www.oracle.com/pls/db92/db92.drilldown?word=IN\n", + "http://www.oracle.com/pls/db92/db92.drilldown?word=LOOP\n", + "http://www.oracle.com/pls/db92/db92.drilldown?word=IF\n", + "http://www.oracle.com/pls/db92/db92.drilldown?word=MOD\n", + "http://www.oracle.com/pls/db92/db92.drilldown?word=THEN\n", + "http://www.oracle.com/pls/db92/db92.drilldown?word=NOT\n", + "http://www.oracle.com/pls/db92/db92.drilldown?word=END\n", + "http://www.oracle.com/pls/db92/db92.drilldown?word=IF\n", + "http://www.oracle.com/pls/db92/db92.drilldown?word=END\n", + "http://www.oracle.com/pls/db92/db92.drilldown?word=LOOP\n", + "http://www.oracle.com/pls/db92/db92.drilldown?word=END\n", + "http://www.oracle.com/pls/db92/db92.drilldown?word=LOOP\n", + "http://www.oracle.com/pls/db92/db92.drilldown?word=FOR\n", + "http://www.oracle.com/pls/db92/db92.drilldown?word=IN\n", + "http://www.oracle.com/pls/db92/db92.drilldown?word=COUNT\n", + "http://www.oracle.com/pls/db92/db92.drilldown?word=LOOP\n", + "http://www.oracle.com/pls/db92/db92.drilldown?word=DBMS_OUTPUT\n", + "http://www.oracle.com/pls/db92/db92.drilldown?word=CASE\n", + "http://www.oracle.com/pls/db92/db92.drilldown?word=WHEN\n", + "http://www.oracle.com/pls/db92/db92.drilldown?word=THEN\n", + "http://www.oracle.com/pls/db92/db92.drilldown?word=ELSE\n", + "http://www.oracle.com/pls/db92/db92.drilldown?word=END\n", + "http://www.oracle.com/pls/db92/db92.drilldown?word=END\n", + "http://www.oracle.com/pls/db92/db92.drilldown?word=LOOP\n", + "http://www.oracle.com/pls/db92/db92.drilldown?word=END\n", + "/mw/index.php?title=100_doors&action=edit§ion=173\n", + "/wiki/Category:Pop11\n", + "/mw/index.php?title=100_doors&action=edit§ion=174\n", + "/wiki/Category:PostScript\n", + "/mw/index.php?title=100_doors&action=edit§ion=175\n", + "/wiki/Category:Potion\n", + "/mw/index.php?title=100_doors&action=edit§ion=176\n", + "/wiki/Category:PowerShell\n", + "/mw/index.php?title=100_doors&action=edit§ion=177\n", + "/mw/index.php?title=100_doors&action=edit§ion=178\n", + "/mw/index.php?title=100_doors&action=edit§ion=179\n", + "about:blank\n", + "about:blank\n", + "/mw/index.php?title=100_doors&action=edit§ion=180\n", + "about:blank\n", + "about:blank\n", + "about:blank\n", + "about:blank\n", + "/mw/index.php?title=100_doors&action=edit§ion=181\n", + "about:blank\n", + "about:blank\n", + "about:blank\n", + "about:blank\n", + "/mw/index.php?title=100_doors&action=edit§ion=182\n", + "/mw/index.php?title=100_doors&action=edit§ion=183\n", + "about:blank\n", + "about:blank\n", + "/mw/index.php?title=100_doors&action=edit§ion=184\n", + "/wiki/Category:ProDOS\n", + "/mw/index.php?title=100_doors&action=edit§ion=185\n", + "/wiki/Category:Prolog\n", + "/mw/index.php?title=100_doors&action=edit§ion=186\n", + "http://pauillac.inria.fr/~deransar/prolog/bips.html\n", + "http://pauillac.inria.fr/~deransar/prolog/bips.html\n", + "http://pauillac.inria.fr/~deransar/prolog/bips.html\n", + "http://pauillac.inria.fr/~deransar/prolog/bips.html\n", + "http://pauillac.inria.fr/~deransar/prolog/bips.html\n", + "http://pauillac.inria.fr/~deransar/prolog/bips.html\n", + "http://pauillac.inria.fr/~deransar/prolog/bips.html\n", + "http://pauillac.inria.fr/~deransar/prolog/bips.html\n", + "http://pauillac.inria.fr/~deransar/prolog/bips.html\n", + "http://pauillac.inria.fr/~deransar/prolog/bips.html\n", + "http://pauillac.inria.fr/~deransar/prolog/bips.html\n", + "http://pauillac.inria.fr/~deransar/prolog/bips.html\n", + "http://pauillac.inria.fr/~deransar/prolog/bips.html\n", + "http://pauillac.inria.fr/~deransar/prolog/bips.html\n", + "http://pauillac.inria.fr/~deransar/prolog/bips.html\n", + "http://pauillac.inria.fr/~deransar/prolog/bips.html\n", + "http://pauillac.inria.fr/~deransar/prolog/bips.html\n", + "http://pauillac.inria.fr/~deransar/prolog/bips.html\n", + "http://pauillac.inria.fr/~deransar/prolog/bips.html\n", + "http://pauillac.inria.fr/~deransar/prolog/bips.html\n", + "http://pauillac.inria.fr/~deransar/prolog/bips.html\n", + "/mw/index.php?title=100_doors&action=edit§ion=187\n", + "http://pauillac.inria.fr/~deransar/prolog/bips.html\n", + "http://pauillac.inria.fr/~deransar/prolog/bips.html\n", + "http://pauillac.inria.fr/~deransar/prolog/bips.html\n", + "http://pauillac.inria.fr/~deransar/prolog/bips.html\n", + "/mw/index.php?title=100_doors&action=edit§ion=188\n", + "/wiki/Category:PureBasic\n", + "/mw/index.php?title=100_doors&action=edit§ion=189\n", + "/wiki/Category:Python\n", + "/wiki/Python\n", + "/wiki/Python\n", + "/mw/index.php?title=100_doors&action=edit§ion=190\n", + "/wiki/Category:Q\n", + "/mw/index.php?title=100_doors&action=edit§ion=191\n", + "/wiki/Category:R\n", + "/mw/index.php?title=100_doors&action=edit§ion=192\n", + "/wiki/Category:Racket\n", + "/wiki/File:100doors_rkt.png\n", + "/mw/index.php?title=100_doors&action=edit§ion=193\n", + "/wiki/Category:RapidQ\n", + "/mw/index.php?title=100_doors&action=edit§ion=194\n", + "/wiki/Category:REALbasic\n", + "/mw/index.php?title=100_doors&action=edit§ion=195\n", + "/wiki/Category:REBOL\n", + "/mw/index.php?title=100_doors&action=edit§ion=196\n", + "/mw/index.php?title=100_doors&action=edit§ion=197\n", + "/mw/index.php?title=100_doors&action=edit§ion=198\n", + "/wiki/Category:Retro\n", + "/mw/index.php?title=100_doors&action=edit§ion=199\n", + "/wiki/Category:REXX\n", + "/mw/index.php?title=100_doors&action=edit§ion=200\n", + "/mw/index.php?title=100_doors&action=edit§ion=201\n", + "/mw/index.php?title=100_doors&action=edit§ion=202\n", + "/mw/index.php?title=100_doors&action=edit§ion=203\n", + "/mw/index.php?title=100_doors&action=edit§ion=204\n", + "/wiki/Category:Ruby\n", + "/mw/index.php?title=100_doors&action=edit§ion=205\n", + "/wiki/Category:Run_BASIC\n", + "/mw/index.php?title=100_doors&action=edit§ion=206\n", + "/wiki/Category:Rust\n", + "/mw/index.php?title=100_doors&action=edit§ion=207\n", + "/wiki/Category:S-lang\n", + "/mw/index.php?title=100_doors&action=edit§ion=208\n", + "/wiki/Category:Salmon\n", + "/mw/index.php?title=100_doors&action=edit§ion=209\n", + "/wiki/Category:SAS\n", + "/mw/index.php?title=100_doors&action=edit§ion=210\n", + "/wiki/Category:Scala\n", + "http://scala-lang.org\n", + "http://scala-lang.org\n", + "http://scala-lang.org\n", + "http://scala-lang.org\n", + "http://scala-lang.org\n", + "http://scala-lang.org\n", + "http://scala-lang.org\n", + "http://scala-lang.org\n", + "http://scala-lang.org\n", + "http://scala-lang.org\n", + "http://scala-lang.org\n", + "http://scala-lang.org\n", + "http://scala-lang.org\n", + "http://scala-lang.org\n", + "http://scala-lang.org\n", + "/mw/index.php?title=100_doors&action=edit§ion=211\n", + "/wiki/Category:Sather\n", + "/mw/index.php?title=100_doors&action=edit§ion=212\n", + "/wiki/Category:Scheme\n", + "/mw/index.php?title=100_doors&action=edit§ion=213\n", + "/wiki/Category:Seed7\n", + "/mw/index.php?title=100_doors&action=edit§ion=214\n", + "/wiki/Category:SETL\n", + "/mw/index.php?title=100_doors&action=edit§ion=215\n", + "/wiki/Category:Sidef\n", + "/mw/index.php?title=100_doors&action=edit§ion=216\n", + "/wiki/Category:Slate\n", + "/mw/index.php?title=100_doors&action=edit§ion=217\n", + "/wiki/Category:Smalltalk\n", + "/wiki/GNU_Smalltalk\n", + "/mw/index.php?title=Squeak_Smalltalk&action=edit&redlink=1\n", + "/mw/index.php?title=100_doors&action=edit§ion=218\n", + "/wiki/Category:SNOBOL4\n", + "/mw/index.php?title=100_doors&action=edit§ion=219\n", + "/wiki/Category:Sparkling\n", + "/mw/index.php?title=100_doors&action=edit§ion=220\n", + "/wiki/Category:SQL\n", + "/mw/index.php?title=100_doors&action=edit§ion=221\n", + "/wiki/Category:Swift\n", + "/mw/index.php?title=100_doors&action=edit§ion=222\n", + "/wiki/Category:Tcl\n", + "/wiki/Category:Tk\n", + "/mw/index.php?title=100_doors&action=edit§ion=223\n", + "/wiki/Category:TI-83_BASIC\n", + "/mw/index.php?title=100_doors&action=edit§ion=224\n", + "/mw/index.php?title=100_doors&action=edit§ion=225\n", + "/mw/index.php?title=100_doors&action=edit§ion=226\n", + "/wiki/Category:TI-89_BASIC\n", + "/mw/index.php?title=100_doors&action=edit§ion=227\n", + "/wiki/Category:TorqueScript\n", + "/mw/index.php?title=100_doors&action=edit§ion=228\n", + "/wiki/Category:TSE_SAL\n", + "/mw/index.php?title=100_doors&action=edit§ion=229\n", + "/wiki/Category:TUSCRIPT\n", + "/mw/index.php?title=100_doors&action=edit§ion=230\n", + "/wiki/Category:TXR\n", + "/mw/index.php?title=100_doors&action=edit§ion=231\n", + "/wiki/Category:Uniface\n", + "/mw/index.php?title=Uniface_9.6&action=edit&redlink=1\n", + "/mw/index.php?title=100_doors&action=edit§ion=232\n", + "/wiki/Category:UNIX_Shell\n", + "/wiki/Bourne_Again_SHell\n", + "/mw/index.php?title=100_doors&action=edit§ion=233\n", + "/wiki/Category:Ursala\n", + "/mw/index.php?title=100_doors&action=edit§ion=234\n", + "/wiki/Category:Vala\n", + "/mw/index.php?title=100_doors&action=edit§ion=235\n", + "/wiki/Category:VBA\n", + "/mw/index.php?title=100_doors&action=edit§ion=236\n", + "/wiki/Category:VBScript\n", + "/wiki/Windows_Script_Host\n", + "/mw/index.php?title=100_doors&action=edit§ion=237\n", + "/wiki/Category:Vedit_macro_language\n", + "/mw/index.php?title=100_doors&action=edit§ion=238\n", + "/wiki/Category:VHDL\n", + "/mw/index.php?title=100_doors&action=edit§ion=239\n", + "/wiki/Category:Visual_Basic_.NET\n", + "/wiki/Visual_Basic_.NET\n", + "/mw/index.php?title=100_doors&action=edit§ion=240\n", + "/wiki/Category:Wart\n", + "/mw/index.php?title=100_doors&action=edit§ion=241\n", + "/wiki/Category:Wortel\n", + "/wiki/100_doors#JavaScript\n", + "/mw/index.php?title=100_doors&action=edit§ion=242\n", + "/wiki/Category:Wrapl\n", + "/mw/index.php?title=100_doors&action=edit§ion=243\n", + "/wiki/Category:XPL0\n", + "/mw/index.php?title=100_doors&action=edit§ion=244\n", + "/wiki/Category:XSLT_1.0\n", + "/wiki/100_doors/XSLT\n", + "/mw/index.php?title=100_doors&action=edit§ion=245\n", + "/wiki/Category:XSLT_2.0\n", + "/mw/index.php?title=100_doors&action=edit§ion=246\n", + "/wiki/Category:Yorick\n", + "/mw/index.php?title=100_doors&action=edit§ion=247\n", + "/wiki/Category:Zkl\n", + "/mw/index.php?title=100_doors&action=edit§ion=248\n", + "/wiki/Category:ZX_Spectrum_Basic\n", + "http://rosettacode.org/mw/index.php?title=100_doors&oldid=205312\n", + "/wiki/Special:Categories\n", + "/wiki/Category:Programming_Tasks\n", + "/wiki/Category:Solutions_by_Programming_Task\n", + "/wiki/Category:4DOS_Batch\n", + "/wiki/Category:6502_Assembly\n", + "/wiki/Category:68000_Assembly\n", + "/wiki/Category:8086_Assembly\n", + "/wiki/Category:8th\n", + "/wiki/Category:ABAP\n", + "/wiki/Category:ACL2\n", + "/wiki/Category:ActionScript\n", + "/wiki/Category:Acurity_Architect\n", + "/wiki/Category:Ada\n", + "/wiki/Category:Aikido\n", + "/wiki/Category:ALGOL_68\n", + "/wiki/Category:ALGOL_W\n", + "/wiki/Category:AmigaE\n", + "/wiki/Category:APL\n", + "/wiki/Category:AppleScript\n", + "/wiki/Category:Arbre\n", + "/wiki/Category:Argile\n", + "/wiki/Category:ATS\n", + "/wiki/Category:AutoHotkey\n", + "/wiki/Category:AutoIt\n", + "/wiki/Category:Axiom\n", + "/wiki/Category:AWK\n", + "/wiki/Category:BASIC\n", + "/wiki/Category:BASIC256\n", + "/wiki/Category:Batch_File\n", + "/wiki/Category:BBC_BASIC\n", + "/wiki/Category:Bc\n", + "/wiki/Category:Befunge\n", + "/wiki/Category:BlitzMax\n", + "/wiki/Category:Bracmat\n", + "/wiki/Category:Burlesque\n", + "/wiki/Category:C\n", + "/mw/index.php?title=Category:C_Runtime&action=edit&redlink=1\n", + "/wiki/Category:C%2B%2B\n", + "/wiki/Category:C_sharp\n", + "/wiki/Category:C1R\n", + "/wiki/Category:Cach%C3%A9_ObjectScript\n", + "/wiki/Category:Clarion\n", + "/wiki/Category:CLIPS\n", + "/wiki/Category:Clojure\n", + "/wiki/Category:COBOL\n", + "/wiki/Category:Coco\n", + "/wiki/Category:CoffeeScript\n", + "/wiki/Category:ColdFusion\n", + "/wiki/Category:Common_Lisp\n", + "/wiki/Category:Component_Pascal\n", + "/wiki/Category:Coq\n", + "/wiki/Category:Crystal\n", + "/wiki/Category:D\n", + "/wiki/Category:Dart\n", + "/wiki/Category:DCL\n", + "/wiki/Category:Delphi\n", + "/wiki/Category:D%C3%A9j%C3%A0_Vu\n", + "/wiki/Category:DWScript\n", + "/wiki/Category:Dylan\n", + "/wiki/Category:E\n", + "/wiki/Category:ECL\n", + "/wiki/Category:Eero\n", + "/wiki/Category:EGL\n", + "/wiki/Category:Eiffel\n", + "/wiki/Category:Ela\n", + "/wiki/Category:Elixir\n", + "/wiki/Category:Emacs_Lisp\n", + "/wiki/Category:Erlang\n", + "/wiki/Category:ERRE\n", + "/wiki/Category:Euler_Math_Toolbox\n", + "/wiki/Category:Euphoria\n", + "/wiki/Category:F_Sharp\n", + "/wiki/Category:Factor\n", + "/wiki/Category:Falcon\n", + "/wiki/Category:Fantom\n", + "/wiki/Category:FBSL\n", + "/wiki/Category:Friendly_interactive_shell\n", + "/wiki/Category:Forth\n", + "/wiki/Category:Fortran\n", + "/wiki/Category:Frink\n", + "/wiki/Category:FunL\n", + "/wiki/Category:GAP\n", + "/wiki/Category:GML\n", + "/wiki/Category:Go\n", + "/wiki/Category:Golfscript\n", + "/wiki/Category:Gosu\n", + "/wiki/Category:Groovy\n", + "/wiki/Category:Harbour\n", + "/wiki/Category:Haskell\n", + "/wiki/Category:Haxe\n", + "/wiki/Category:HicEst\n", + "/wiki/Category:Hy\n", + "/wiki/Category:Icon\n", + "/wiki/Category:Unicon\n", + "/wiki/Category:Inform_7\n", + "/wiki/Category:Informix_4GL\n", + "/wiki/Category:Io\n", + "/wiki/Category:Ioke\n", + "/wiki/Category:J\n", + "/wiki/Category:Java\n", + "/wiki/Category:JavaScript\n", + "/wiki/Category:Jq\n", + "/wiki/Category:Julia\n", + "/wiki/Category:K\n", + "/wiki/Category:Kotlin\n", + "/wiki/Category:LabVIEW\n", + "/wiki/Category:Lasso\n", + "/wiki/Category:Lhogho\n", + "/wiki/Category:Liberty_BASIC\n", + "/wiki/Category:LiveCode\n", + "/wiki/Category:Logo\n", + "/wiki/Category:LOLCODE\n", + "/wiki/Category:Lua\n", + "/wiki/Category:M4\n", + "/wiki/Category:Maple\n", + "/wiki/Category:Mathematica\n", + "/wiki/Category:MATLAB\n", + "/wiki/Category:Octave\n", + "/wiki/Category:Maxima\n", + "/wiki/Category:MAXScript\n", + "/wiki/Category:Mercury\n", + "/wiki/Category:Metafont\n", + "/wiki/Category:MIPS_Assembly\n", + "/wiki/Category:Mirah\n", + "/wiki/Category:MIRC_Scripting_Language\n", + "/wiki/Category:ML/I\n", + "/wiki/Category:MMIX\n", + "/wiki/Category:Modula-2\n", + "/wiki/Category:Modula-3\n", + "/wiki/Category:MOO\n", + "/wiki/Category:MoonScript\n", + "/wiki/Category:MUMPS\n", + "/wiki/Category:NetRexx\n", + "/wiki/Category:NewLisp\n", + "/wiki/Category:Nim\n", + "/wiki/Category:Objeck\n", + "/wiki/Category:Objective-C\n", + "/wiki/Category:OCaml\n", + "/wiki/Category:Oforth\n", + "/wiki/Category:OoRexx\n", + "/wiki/Category:OpenEdge/Progress\n", + "/wiki/Category:OxygenBasic\n", + "/wiki/Category:Oz\n", + "/wiki/Category:PARI/GP\n", + "/wiki/Category:Pascal\n", + "/wiki/Category:Perl\n", + "/wiki/Category:Perl5i\n", + "/wiki/Category:Perl_6\n", + "/wiki/Category:PHL\n", + "/wiki/Category:PHP\n", + "/wiki/Category:PicoLisp\n", + "/wiki/Category:Piet\n", + "/wiki/Category:Pike\n", + "/wiki/Category:PL/I\n", + "/wiki/Category:PL/SQL\n", + "/wiki/Category:Pop11\n", + "/wiki/Category:PostScript\n", + "/wiki/Category:Potion\n", + "/wiki/Category:PowerShell\n", + "/wiki/Category:ProDOS\n", + "/wiki/Category:Prolog\n", + "/wiki/Category:PureBasic\n", + "/wiki/Category:Python\n", + "/wiki/Category:Q\n", + "/wiki/Category:R\n", + "/wiki/Category:Racket\n", + "/wiki/Category:RapidQ\n", + "/wiki/Category:REALbasic\n", + "/wiki/Category:REBOL\n", + "/wiki/Category:Retro\n", + "/wiki/Category:REXX\n", + "/wiki/Category:Ruby\n", + "/wiki/Category:Run_BASIC\n", + "/wiki/Category:Rust\n", + "/wiki/Category:S-lang\n", + "/wiki/Category:Salmon\n", + "/wiki/Category:SAS\n", + "/wiki/Category:Scala\n", + "/wiki/Category:Sather\n", + "/wiki/Category:Scheme\n", + "/wiki/Category:Seed7\n", + "/wiki/Category:SETL\n", + "/wiki/Category:Sidef\n", + "/wiki/Category:Slate\n", + "/wiki/Category:Smalltalk\n", + "/wiki/Category:SNOBOL4\n", + "/wiki/Category:Sparkling\n", + "/wiki/Category:SQL\n", + "/wiki/Category:Swift\n", + "/wiki/Category:Tcl\n", + "/wiki/Category:Tk\n", + "/wiki/Category:TI-83_BASIC\n", + "/wiki/Category:TI-89_BASIC\n", + "/wiki/Category:TorqueScript\n", + "/wiki/Category:TSE_SAL\n", + "/wiki/Category:TUSCRIPT\n", + "/wiki/Category:TXR\n", + "/wiki/Category:Uniface\n", + "/wiki/Category:UNIX_Shell\n", + "/wiki/Category:Ursala\n", + "/wiki/Category:Vala\n", + "/wiki/Category:VBA\n", + "/wiki/Category:VBScript\n", + "/wiki/Category:Vedit_macro_language\n", + "/wiki/Category:VHDL\n", + "/wiki/Category:Visual_Basic_.NET\n", + "/wiki/Category:Wart\n", + "/wiki/Category:Wortel\n", + "/wiki/Category:Wrapl\n", + "/wiki/Category:XPL0\n", + "/wiki/Category:XSLT_1.0\n", + "/wiki/Category:XSLT_2.0\n", + "/wiki/Category:Yorick\n", + "/wiki/Category:Zkl\n", + "/wiki/Category:GUISS/Omit\n", + "/wiki/Category:ZX_Spectrum_Basic\n", + "/mw/index.php?title=Special:UserLogin&returnto=100+doors&type=signup\n", + "/mw/index.php?title=Special:UserLogin&returnto=100+doors\n", + "/wiki/100_doors\n", + "/wiki/Talk:100_doors\n", + "#\n", + "/wiki/100_doors\n", + "/mw/index.php?title=100_doors&action=edit\n", + "/mw/index.php?title=100_doors&action=history\n", + "#\n", + "/wiki/Rosetta_Code\n", + "http://twitter.com/rosettacode\n", + "/wiki/Special:WebChat\n", + "http://rosettacode.org/planet\n", + "/wiki/Rosetta_Code:Village_Pump\n", + "/wiki/Rosetta_Code:Finances\n", + "/wiki/Category:Programming_Languages\n", + "/wiki/Category:Programming_Tasks\n", + "/wiki/Special:RecentChanges\n", + "/wiki/Help:Similar_Sites\n", + "/wiki/Special:Random\n", + "https://twitter.com/share\n", + "/wiki/Special:WhatLinksHere/100_doors\n", + "/wiki/Special:RecentChangesLinked/100_doors\n", + "/wiki/Special:SpecialPages\n", + "/mw/index.php?title=100_doors&printable=yes\n", + "/mw/index.php?title=100_doors&oldid=205312\n", + "/wiki/Special:Browse/100_doors\n", + "http://www.gnu.org/licenses/fdl-1.2.html\n", + "/wiki/Rosetta_Code:Privacy_policy\n", + "/wiki/Rosetta_Code:About\n", + "/wiki/Rosetta_Code:General_disclaimer\n", + "http://www.gnu.org/licenses/fdl-1.2.html\n", + "//www.mediawiki.org/\n", + "http://www.semantic-mediawiki.org/wiki/Semantic_MediaWiki\n" + ] + } + ], + "source": [ + "#url = input(\"Enter a website to extract the URL's from: \")\n", + "#url = 'rosettacode.org/wiki/Category:C'\n", + "url = 'rosettacode.org/wiki/100_doors'\n", + "r = requests.get(\"http://\" +url)\n", + "\n", + "data = r.text\n", + "\n", + "soup = BeautifulSoup(data)\n", + "\n", + "for link in soup.find_all('a'):\n", + " print(link.get('href'))" + ] + }, + { + "cell_type": "code", + "execution_count": 203, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# c_soup = BeautifulSoup(\n", + "x = soup.find(\"pre\", class_=\"c highlighted_source\")\n", + "#x = soup.select('pre')[0]\n", + "# dir(x)" + ] + }, + { + "cell_type": "code", + "execution_count": 204, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "#include \n", + "int main(){ char is_open[100] = { 0 }; int pass, door;\n", + " /* do the 100 passes */ for (pass = 0; pass < 100; ++pass) for (door = pass; door < 100; door += pass+1) is_open[door] = !is_open[door];\n", + " /* output the result */ for (door = 0; door < 100; ++door) printf(\"door #%d is\n", + "%s.\\n\", door+1, (is_open[door]? \"open\" : \"closed\"));\n", + " return 0;}\n" + ] + } + ], + "source": [ + "import re\n", + "print(re.sub(r'\\xa0', r'\\n',x.text))" + ] + }, + { + "cell_type": "code", + "execution_count": 205, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "[#include <stdio.h>,\n", + "
,\n", + " '\\xa0',\n", + "
,\n", + " int,\n", + " ' main',\n", + " (,\n", + " ),\n", + "
,\n", + " {,\n", + "
,\n", + " ' ',\n", + " char,\n", + " ' is_open',\n", + " [,\n", + " 100,\n", + " ],\n", + " ' ',\n", + " =,\n", + " ' ',\n", + " {,\n", + " ' ',\n", + " 0,\n", + " ' ',\n", + " },\n", + " ;,\n", + "
,\n", + " ' ',\n", + " int,\n", + " ' pass',\n", + " ,,\n", + " ' door',\n", + " ;,\n", + "
,\n", + " '\\xa0',\n", + "
,\n", + " ' ',\n", + " /* do the 100 passes */,\n", + "
,\n", + " ' ',\n", + " for,\n", + " ' ',\n", + " (,\n", + " 'pass ',\n", + " =,\n", + " ' ',\n", + " 0,\n", + " ;,\n", + " ' pass ',\n", + " <,\n", + " ' ',\n", + " 100,\n", + " ;,\n", + " ' ',\n", + " ++,\n", + " 'pass',\n", + " ),\n", + "
,\n", + " ' ',\n", + " for,\n", + " ' ',\n", + " (,\n", + " 'door ',\n", + " =,\n", + " ' pass',\n", + " ;,\n", + " ' door ',\n", + " <,\n", + " ' ',\n", + " 100,\n", + " ;,\n", + " ' door ',\n", + " +=,\n", + " ' pass',\n", + " +,\n", + " 1,\n", + " ),\n", + "
,\n", + " ' is_open',\n", + " [,\n", + " 'door',\n", + " ],\n", + " ' ',\n", + " =,\n", + " ' ',\n", + " !,\n", + " 'is_open',\n", + " [,\n", + " 'door',\n", + " ],\n", + " ;,\n", + "
,\n", + " '\\xa0',\n", + "
,\n", + " ' ',\n", + " /* output the result */,\n", + "
,\n", + " ' ',\n", + " for,\n", + " ' ',\n", + " (,\n", + " 'door ',\n", + " =,\n", + " ' ',\n", + " 0,\n", + " ;,\n", + " ' door ',\n", + " <,\n", + " ' ',\n", + " 100,\n", + " ;,\n", + " ' ',\n", + " ++,\n", + " 'door',\n", + " ),\n", + "
,\n", + " ' ',\n", + " printf,\n", + " (,\n", + " \"door #%d is %s.\\n\",\n", + " ,,\n", + " ' door',\n", + " +,\n", + " 1,\n", + " ,,\n", + " ' ',\n", + " (,\n", + " 'is_open',\n", + " [,\n", + " 'door',\n", + " ],\n", + " ?,\n", + " ' ',\n", + " \"open\",\n", + " ' ',\n", + " :,\n", + " ' ',\n", + " \"closed\",\n", + " ),\n", + " ),\n", + " ;,\n", + "
,\n", + " '\\xa0',\n", + "
,\n", + " ' ',\n", + " return,\n", + " ' ',\n", + " 0,\n", + " ;,\n", + "
,\n", + " }]" + ] + }, + "execution_count": 205, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "x.contents" + ] + }, + { + "cell_type": "code", + "execution_count": 206, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "while x.br:\n", + " a_tag = x.br\n", + " new_tag = soup.new_tag(\"p\")\n", + " new_tag.string = \"\\n\"\n", + " a_tag.replace_with(new_tag)" + ] + }, + { + "cell_type": "code", + "execution_count": 207, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "
#include <stdio.h>

\n", + "

 

\n", + "

int main()

\n", + "

{

\n", + "

char is_open[100] = { 0 };

\n", + "

int pass, door;

\n", + "

 

\n", + "

/* do the 100 passes */

\n", + "

for (pass = 0; pass < 100; ++pass)

\n", + "

for (door = pass; door < 100; door += pass+1)

\n", + "

is_open[door] = !is_open[door];

\n", + "

 

\n", + "

/* output the result */

\n", + "

for (door = 0; door < 100; ++door)

\n", + "

printf(\"door #%d is %s.\\n\", door+1, (is_open[door]? \"open\" : \"closed\"));

\n", + "

 

\n", + "

return 0;

\n", + "

}
" + ] + }, + "execution_count": 207, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "x" + ] + }, + { + "cell_type": "code", + "execution_count": 208, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "#include \n", + "\n", + "int main()\n", + "{\n", + " char is_open[100] = { 0 };\n", + " int pass, door;\n", + "\n", + " /* do the 100 passes */\n", + " for (pass = 0; pass < 100; ++pass)\n", + " for (door = pass; door < 100; door += pass+1)\n", + " is_open[door] = !is_open[door];\n", + "\n", + " /* output the result */\n", + " for (door = 0; door < 100; ++door)\n", + " printf(\"door #%d is%s.\\n\", door+1, (is_open[door]? \"open\" : \"closed\"));\n", + "\n", + " return 0;\n", + "}\n" + ] + } + ], + "source": [ + "print(re.sub(r'\\xa0', r'',x.text))" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "#include \n", + " \n", + "int main()\n", + "{\n", + " char is_open[100] = { 0 };\n", + " int pass, door;\n", + " \n", + " /* do the 100 passes */\n", + " for (pass = 0; pass < 100; ++pass)\n", + " for (door = pass; door < 100; door += pass+1)\n", + " is_open[door] = !is_open[door];\n", + " \n", + " /* output the result */\n", + " for (door = 0; door < 100; ++door)\n", + " printf(\"door #%d is %s.\\n\", door+1, (is_open[door]? \"open\" : \"closed\"));\n", + " \n", + " return 0;\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 201, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "c\n", + "c\n", + "c\n", + "c\n", + "c\n", + "c\n", + "ocaml\n", + "ocaml\n" + ] + } + ], + "source": [ + "pres = soup.findAll('pre')\n", + "texts = []\n", + "langs = ['c', 'ocaml']\n", + "for pre in pres:\n", + " lang = pre.get('class', ['',''])[0]\n", + " if lang in langs:\n", + " print(lang)\n", + "# lang" + ] + }, + { + "cell_type": "code", + "execution_count": 192, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "[['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['', ''],\n", + " ['text', 'highlighted_source'],\n", + " ['_68000devpac', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['abap', 'highlighted_source'],\n", + " ['abap', 'highlighted_source'],\n", + " ['lisp', 'highlighted_source'],\n", + " ['actionscript', 'highlighted_source'],\n", + " ['', ''],\n", + " ['text', 'highlighted_source'],\n", + " ['', ''],\n", + " ['ada', 'highlighted_source'],\n", + " ['ada', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['algol68', 'highlighted_source'],\n", + " ['algol68', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['', ''],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['', ''],\n", + " ['', ''],\n", + " ['applescript', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['autohotkey', 'highlighted_source'],\n", + " ['autohotkey', 'highlighted_source'],\n", + " ['autohotkey', 'highlighted_source'],\n", + " ['autoit', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['awk', 'highlighted_source'],\n", + " ['awk', 'highlighted_source'],\n", + " ['qbasic', 'highlighted_source'],\n", + " ['qbasic', 'highlighted_source'],\n", + " ['qbasic', 'highlighted_source'],\n", + " ['qbasic', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['dos', 'highlighted_source'],\n", + " ['dos', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['c', 'highlighted_source'],\n", + " ['c', 'highlighted_source'],\n", + " ['c', 'highlighted_source'],\n", + " ['c', 'highlighted_source'],\n", + " ['c', 'highlighted_source'],\n", + " ['cpp', 'highlighted_source'],\n", + " ['cpp', 'highlighted_source'],\n", + " ['cpp', 'highlighted_source'],\n", + " ['csharp', 'highlighted_source'],\n", + " ['csharp', 'highlighted_source'],\n", + " ['csharp', 'highlighted_source'],\n", + " ['csharp', 'highlighted_source'],\n", + " ['csharp', 'highlighted_source'],\n", + " ['c', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['clojure', 'highlighted_source'],\n", + " ['clojure', 'highlighted_source'],\n", + " ['clojure', 'highlighted_source'],\n", + " ['clojure', 'highlighted_source'],\n", + " ['clojure', 'highlighted_source'],\n", + " ['cobol', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['coffeescript', 'highlighted_source'],\n", + " ['coffeescript', 'highlighted_source'],\n", + " ['coffeescript', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['cfm', 'highlighted_source'],\n", + " ['lisp', 'highlighted_source'],\n", + " ['lisp', 'highlighted_source'],\n", + " ['', ''],\n", + " ['lisp', 'highlighted_source'],\n", + " ['lisp', 'highlighted_source'],\n", + " ['oberon2', 'highlighted_source'],\n", + " ['', ''],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['ruby', 'highlighted_source'],\n", + " ['d', 'highlighted_source'],\n", + " ['', ''],\n", + " ['d', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['dcl', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['', ''],\n", + " ['delphi', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['e', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['objc', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['eiffel', 'highlighted_source'],\n", + " ['eiffel', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['', ''],\n", + " ['lisp', 'highlighted_source'],\n", + " ['erlang', 'highlighted_source'],\n", + " ['erlang', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['euphoria', 'highlighted_source'],\n", + " ['fsharp', 'highlighted_source'],\n", + " ['fsharp', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['falcon', 'highlighted_source'],\n", + " ['falcon', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['qbasic', 'highlighted_source'],\n", + " ['qbasic', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['fortran', 'highlighted_source'],\n", + " ['fortran', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['gml', 'highlighted_source'],\n", + " ['go', 'highlighted_source'],\n", + " ['', ''],\n", + " ['go', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['groovy', 'highlighted_source'],\n", + " ['groovy', 'highlighted_source'],\n", + " ['groovy', 'highlighted_source'],\n", + " ['visualfoxpro', 'highlighted_source'],\n", + " ['visualfoxpro', 'highlighted_source'],\n", + " ['', ''],\n", + " ['haskell', 'highlighted_source'],\n", + " ['haskell', 'highlighted_source'],\n", + " ['haskell', 'highlighted_source'],\n", + " ['haskell', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['hicest', 'highlighted_source'],\n", + " ['hicest', 'highlighted_source'],\n", + " ['lisp', 'highlighted_source'],\n", + " ['icon', 'highlighted_source'],\n", + " ['icon', 'highlighted_source'],\n", + " ['icon', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['io', 'highlighted_source'],\n", + " ['io', 'highlighted_source'],\n", + " ['', ''],\n", + " ['text', 'highlighted_source'],\n", + " ['j', 'highlighted_source'],\n", + " ['j', 'highlighted_source'],\n", + " ['j', 'highlighted_source'],\n", + " ['java', 'highlighted_source'],\n", + " ['java', 'highlighted_source'],\n", + " ['java', 'highlighted_source'],\n", + " ['', ''],\n", + " ['java', 'highlighted_source'],\n", + " ['java', 'highlighted_source'],\n", + " ['java', 'highlighted_source'],\n", + " ['javascript', 'highlighted_source'],\n", + " ['javascript', 'highlighted_source'],\n", + " ['javascript', 'highlighted_source'],\n", + " ['javascript', 'highlighted_source'],\n", + " ['javascript', 'highlighted_source'],\n", + " ['javascript', 'highlighted_source'],\n", + " ['', ''],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['', ''],\n", + " ['text', 'highlighted_source'],\n", + " ['lb', 'highlighted_source'],\n", + " ['lb', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['lolcode', 'highlighted_source'],\n", + " ['lua', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['matlab', 'highlighted_source'],\n", + " ['matlab', 'highlighted_source'],\n", + " ['matlab', 'highlighted_source'],\n", + " ['matlab', 'highlighted_source'],\n", + " ['matlab', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['mirc', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['modula2', 'highlighted_source'],\n", + " ['modula2', 'highlighted_source'],\n", + " ['modula3', 'highlighted_source'],\n", + " ['modula3', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['netrexx', 'highlighted_source'],\n", + " ['netrexx', 'highlighted_source'],\n", + " ['netrexx', 'highlighted_source'],\n", + " ['netrexx', 'highlighted_source'],\n", + " ['newlisp', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['objeck', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['ocaml', 'highlighted_source'],\n", + " ['ocaml', 'highlighted_source'],\n", + " ['octave', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['oorexx', 'highlighted_source'],\n", + " ['progress', 'highlighted_source'],\n", + " ['', ''],\n", + " ['oz', 'highlighted_source'],\n", + " ['', ''],\n", + " ['parigp', 'highlighted_source'],\n", + " ['parigp', 'highlighted_source'],\n", + " ['pascal', 'highlighted_source'],\n", + " ['pascal', 'highlighted_source'],\n", + " ['perl', 'highlighted_source'],\n", + " ['perl', 'highlighted_source'],\n", + " ['perl', 'highlighted_source'],\n", + " ['perl', 'highlighted_source'],\n", + " ['perl', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['perl6', 'highlighted_source'],\n", + " ['perl6', 'highlighted_source'],\n", + " ['perl6', 'highlighted_source'],\n", + " ['perl6', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['php', 'highlighted_source'],\n", + " ['php', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['', ''],\n", + " ['text', 'highlighted_source'],\n", + " ['', ''],\n", + " ['pike', 'highlighted_source'],\n", + " ['pike', 'highlighted_source'],\n", + " ['pike', 'highlighted_source'],\n", + " ['', ''],\n", + " ['pli', 'highlighted_source'],\n", + " ['plsql', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['powershell', 'highlighted_source'],\n", + " ['powershell', 'highlighted_source'],\n", + " ['powershell', 'highlighted_source'],\n", + " ['powershell', 'highlighted_source'],\n", + " ['powershell', 'highlighted_source'],\n", + " ['powershell', 'highlighted_source'],\n", + " ['powershell', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['prolog', 'highlighted_source'],\n", + " ['prolog', 'highlighted_source'],\n", + " ['prolog', 'highlighted_source'],\n", + " ['purebasic', 'highlighted_source'],\n", + " ['purebasic', 'highlighted_source'],\n", + " ['', ''],\n", + " ['python', 'highlighted_source'],\n", + " ['python', 'highlighted_source'],\n", + " ['python', 'highlighted_source'],\n", + " ['python', 'highlighted_source'],\n", + " ['python', 'highlighted_source'],\n", + " ['python', 'highlighted_source'],\n", + " ['q', 'highlighted_source'],\n", + " ['q', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['vb', 'highlighted_source'],\n", + " ['', ''],\n", + " ['vb', 'highlighted_source'],\n", + " ['rebol', 'highlighted_source'],\n", + " ['rebol', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['rexx', 'highlighted_source'],\n", + " ['rexx', 'highlighted_source'],\n", + " ['', ''],\n", + " ['rexx', 'highlighted_source'],\n", + " ['', ''],\n", + " ['rexx', 'highlighted_source'],\n", + " ['', ''],\n", + " ['ruby', 'highlighted_source'],\n", + " ['ruby', 'highlighted_source'],\n", + " ['ruby', 'highlighted_source'],\n", + " ['ruby', 'highlighted_source'],\n", + " ['', ''],\n", + " ['text', 'highlighted_source'],\n", + " ['', ''],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['sas', 'highlighted_source'],\n", + " ['scala', 'highlighted_source'],\n", + " ['scala', 'highlighted_source'],\n", + " ['scala', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['scheme', 'highlighted_source'],\n", + " ['scheme', 'highlighted_source'],\n", + " ['scheme', 'highlighted_source'],\n", + " ['', ''],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['', ''],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['ruby', 'highlighted_source'],\n", + " ['ruby', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['smalltalk', 'highlighted_source'],\n", + " ['smalltalk', 'highlighted_source'],\n", + " ['smalltalk', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['', ''],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['sql', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['tcl', 'highlighted_source'],\n", + " ['tcl', 'highlighted_source'],\n", + " ['tcl', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['', ''],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['', ''],\n", + " ['bash', 'highlighted_source'],\n", + " ['bash', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['', ''],\n", + " ['vala', 'highlighted_source'],\n", + " ['', ''],\n", + " ['vala', 'highlighted_source'],\n", + " ['', ''],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['vedit', 'highlighted_source'],\n", + " ['vedit', 'highlighted_source'],\n", + " ['', ''],\n", + " ['vhdl', 'highlighted_source'],\n", + " ['vhdl', 'highlighted_source'],\n", + " ['vbnet', 'highlighted_source'],\n", + " ['vbnet', 'highlighted_source'],\n", + " ['python', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['xml', 'highlighted_source'],\n", + " ['xml', 'highlighted_source'],\n", + " ['xml', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['text', 'highlighted_source'],\n", + " ['', ''],\n", + " ['text', 'highlighted_source'],\n", + " ['', ''],\n", + " ['', ''],\n", + " ['', '']]" + ] + }, + "execution_count": 192, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "[x.get('class', ['','']) for x in soup.findAll('pre')]\n", + "# y = [x for x in soup.findAll('pre')][0]\n", + "# y" + ] + }, + { + "cell_type": "code", + "execution_count": 188, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['text', 'highlighted_source']" + ] + }, + "execution_count": 188, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "y.get('class')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.4.3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/feature_testing.ipynb b/feature_testing.ipynb deleted file mode 100644 index d13bfb9..0000000 --- a/feature_testing.ipynb +++ /dev/null @@ -1,1178 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#Initial Setup" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "from lang_classifier import *\n", - "from sklearn.pipeline import make_pipeline, make_union\n", - "from sklearn.metrics import classification_report, confusion_matrix" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "def setup():\n", - " \"\"\"Load the training benchmark training data and split it for train/test\"\"\"\n", - " df = load_bench_data()\n", - " X = df.text\n", - " y = df.language\n", - " test_data = load_test_data()\n", - " args = train_test_split(X, y, test_size=0.2, random_state=0)\n", - " # X_train, X_test, y_train, y_test\n", - " \n", - " return df, X, y, test_data, args" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "df, X, y, test_data, args = setup() # Load and split the train/test data\n", - "X_train, X_test, y_train, y_test = args" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "####Inspect the test_data dataframe to make sure language and text are properly aligned" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
languagetextguess
item
0perluse warnings;\\nuse strict;\\n\\nmy $initial = jo...NaN
1clojure(defn cf-settings\\n \"Setup settings for campf...NaN
2clojure(ns my-cli.core)\\n\\n(defn -main [& args]\\n (p...NaN
3clojure(extend-type String\\n Person\\n (first-name [...NaN
4clojure(require '[overtone.live :as overtone])\\n\\n(de...NaN
5pythonfrom pkgutil import iter_modules\\nfrom subproc...NaN
6pythonimport re\\nimport subprocess\\n\\ndef cmd_keymap...NaN
7pythonclass NoSuchService(Exception):\\n def __ini...NaN
8pythonfrom collections import namedtuple\\nimport fun...NaN
9javascriptfunction errorHandler(context) {\\n return fun...NaN
10javascriptvar _ = require('lodash'),\\n fs = require('...NaN
11javascript/* Riot v2.0.8, @license MIT, (c) 2015 Muut In...NaN
12javascriptvar r = riot.route = function(arg) {\\n //...NaN
13rubymodule ActiveJob\\n module Core\\n extend Ac...NaN
14rubyrequire 'formula'\\n\\nclass A52dec < Formula\\n ...NaN
15rubymodule Fluent\\n class Input\\n include Conf...NaN
16haskell{-# LANGUAGE ScopedTypeVariables, FlexibleInst...NaN
17haskellreverseDependencies :: ModuleGraph -> M.Map Mo...NaN
18haskell{- git-annex extra config files\\n -\\n - Copyri...NaN
19scheme(define subst-f\\n (lambda (new old l)\\n (c...NaN
20scheme(define add1\\n (lambda (n) (+ n 1)))NaN
21scheme(define-lib-primitive (length lst)\\n (if (nul...NaN
22java/**\\n * Interface to represent a persistence s...NaN
23java/*\\n * Copyright 2002-2008 the original author...NaN
24scalapackage com.github.pathikrit\\n\\nimport scala.a...NaN
25scala/* sbt -- Simple Build Tool\\n * Copyright 2010...NaN
26tclproc isaac::mix {a b c d e f g h} {\\n set a...NaN
27tclproc twitter::follow {nick uhost hand chan arg...NaN
28phpclass View\\n{\\n /**\\n * Data available ...NaN
29phppublic function formatLocalized($format)\\n...NaN
30phpclass Application extends App {\\n\\t/**\\n\\t * @...NaN
31ocamltype name = string\\n\\nlet compare_label label1...NaN
32ocamllet search_compiler_libs () =\\n prerr_endline...NaN
\n", - "
" - ], - "text/plain": [ - " language text guess\n", - "item \n", - "0 perl use warnings;\\nuse strict;\\n\\nmy $initial = jo... NaN\n", - "1 clojure (defn cf-settings\\n \"Setup settings for campf... NaN\n", - "2 clojure (ns my-cli.core)\\n\\n(defn -main [& args]\\n (p... NaN\n", - "3 clojure (extend-type String\\n Person\\n (first-name [... NaN\n", - "4 clojure (require '[overtone.live :as overtone])\\n\\n(de... NaN\n", - "5 python from pkgutil import iter_modules\\nfrom subproc... NaN\n", - "6 python import re\\nimport subprocess\\n\\ndef cmd_keymap... NaN\n", - "7 python class NoSuchService(Exception):\\n def __ini... NaN\n", - "8 python from collections import namedtuple\\nimport fun... NaN\n", - "9 javascript function errorHandler(context) {\\n return fun... NaN\n", - "10 javascript var _ = require('lodash'),\\n fs = require('... NaN\n", - "11 javascript /* Riot v2.0.8, @license MIT, (c) 2015 Muut In... NaN\n", - "12 javascript var r = riot.route = function(arg) {\\n //... NaN\n", - "13 ruby module ActiveJob\\n module Core\\n extend Ac... NaN\n", - "14 ruby require 'formula'\\n\\nclass A52dec < Formula\\n ... NaN\n", - "15 ruby module Fluent\\n class Input\\n include Conf... NaN\n", - "16 haskell {-# LANGUAGE ScopedTypeVariables, FlexibleInst... NaN\n", - "17 haskell reverseDependencies :: ModuleGraph -> M.Map Mo... NaN\n", - "18 haskell {- git-annex extra config files\\n -\\n - Copyri... NaN\n", - "19 scheme (define subst-f\\n (lambda (new old l)\\n (c... NaN\n", - "20 scheme (define add1\\n (lambda (n) (+ n 1))) NaN\n", - "21 scheme (define-lib-primitive (length lst)\\n (if (nul... NaN\n", - "22 java /**\\n * Interface to represent a persistence s... NaN\n", - "23 java /*\\n * Copyright 2002-2008 the original author... NaN\n", - "24 scala package com.github.pathikrit\\n\\nimport scala.a... NaN\n", - "25 scala /* sbt -- Simple Build Tool\\n * Copyright 2010... NaN\n", - "26 tcl proc isaac::mix {a b c d e f g h} {\\n set a... NaN\n", - "27 tcl proc twitter::follow {nick uhost hand chan arg... NaN\n", - "28 php class View\\n{\\n /**\\n * Data available ... NaN\n", - "29 php public function formatLocalized($format)\\n... NaN\n", - "30 php class Application extends App {\\n\\t/**\\n\\t * @... NaN\n", - "31 ocaml type name = string\\n\\nlet compare_label label1... NaN\n", - "32 ocaml let search_compiler_libs () =\\n prerr_endline... NaN" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "test_data" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Use a generic bag of words/naive bayes classifier pipeline as a baseline" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "def assess_test_data(pipe):\n", - " test_data['guess'] = pd.DataFrame(pipe.predict(test_data['text']))\n", - " correct = test_data[test_data.language == test_data.guess]\n", - " print('Proportion of test data correctly labeled: {:.3f}'.format(len(correct) / len(test_data)))\n", - " print(test_data[['language', 'guess', 'text']])\n" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Train score: 0.989, Test score: 0.949\n", - "Proportion of test data correctly labeled: 0.727\n", - " language guess \\\n", - "item \n", - "0 perl perl \n", - "1 clojure clojure \n", - "2 clojure clojure \n", - "3 clojure clojure \n", - "4 clojure clojure \n", - "5 python python \n", - "6 python clojure \n", - "7 python ruby \n", - "8 python python \n", - "9 javascript javascript \n", - "10 javascript javascript \n", - "11 javascript clojure \n", - "12 javascript php \n", - "13 ruby ruby \n", - "14 ruby clojure \n", - "15 ruby ruby \n", - "16 haskell haskell \n", - "17 haskell haskell \n", - "18 haskell clojure \n", - "19 scheme scheme \n", - "20 scheme scheme \n", - "21 scheme scheme \n", - "22 java java \n", - "23 java c \n", - "24 scala scala \n", - "25 scala scala \n", - "26 tcl tcl \n", - "27 tcl python \n", - "28 php clojure \n", - "29 php php \n", - "30 php php \n", - "31 ocaml ocaml \n", - "32 ocaml ocaml \n", - "\n", - " text \n", - "item \n", - "0 use warnings;\\nuse strict;\\n\\nmy $initial = jo... \n", - "1 (defn cf-settings\\n \"Setup settings for campf... \n", - "2 (ns my-cli.core)\\n\\n(defn -main [& args]\\n (p... \n", - "3 (extend-type String\\n Person\\n (first-name [... \n", - "4 (require '[overtone.live :as overtone])\\n\\n(de... \n", - "5 from pkgutil import iter_modules\\nfrom subproc... \n", - "6 import re\\nimport subprocess\\n\\ndef cmd_keymap... \n", - "7 class NoSuchService(Exception):\\n def __ini... \n", - "8 from collections import namedtuple\\nimport fun... \n", - "9 function errorHandler(context) {\\n return fun... \n", - "10 var _ = require('lodash'),\\n fs = require('... \n", - "11 /* Riot v2.0.8, @license MIT, (c) 2015 Muut In... \n", - "12 var r = riot.route = function(arg) {\\n //... \n", - "13 module ActiveJob\\n module Core\\n extend Ac... \n", - "14 require 'formula'\\n\\nclass A52dec < Formula\\n ... \n", - "15 module Fluent\\n class Input\\n include Conf... \n", - "16 {-# LANGUAGE ScopedTypeVariables, FlexibleInst... \n", - "17 reverseDependencies :: ModuleGraph -> M.Map Mo... \n", - "18 {- git-annex extra config files\\n -\\n - Copyri... \n", - "19 (define subst-f\\n (lambda (new old l)\\n (c... \n", - "20 (define add1\\n (lambda (n) (+ n 1))) \n", - "21 (define-lib-primitive (length lst)\\n (if (nul... \n", - "22 /**\\n * Interface to represent a persistence s... \n", - "23 /*\\n * Copyright 2002-2008 the original author... \n", - "24 package com.github.pathikrit\\n\\nimport scala.a... \n", - "25 /* sbt -- Simple Build Tool\\n * Copyright 2010... \n", - "26 proc isaac::mix {a b c d e f g h} {\\n set a... \n", - "27 proc twitter::follow {nick uhost hand chan arg... \n", - "28 class View\\n{\\n /**\\n * Data available ... \n", - "29 public function formatLocalized($format)\\n... \n", - "30 class Application extends App {\\n\\t/**\\n\\t * @... \n", - "31 type name = string\\n\\nlet compare_label label1... \n", - "32 let search_compiler_libs () =\\n prerr_endline... \n" - ] - } - ], - "source": [ - "spam_pipe = Pipeline([('bag_of_words', CountVectorizer()),\n", - " ('bayes', MultinomialNB())])\n", - "classifier = assess_classifier(spam_pipe, *args)\n", - "c = classifier.predict(X)\n", - "#print('Guesses: ', c[0:5])\n", - "assess_test_data(spam_pipe)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Inspect the baseline features" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "['zin', 'zip', 'zipwith', 'zipwithindex', 'zipwithm', 'zipwithm_', 'ziv', 'ziv1', 'ziv2', 'zizi', 'zoo', 'zotov', 'zq', 'zr', 'zr1', 'zr2', 'zri', 'zrn', 'zrv', 'zrv1', 'zrv2', 'zrzi', 'zrzr', 'zs', 'zt', 'zu', 'zubach', 'zx', 'zy', 'zz']\n", - "Train score: 0.989, Test score: 0.949\n" - ] - } - ], - "source": [ - "cv = CountVectorizer()\n", - "ft = cv.fit_transform(X)\n", - "print(cv.get_feature_names()[-30:])\n", - "spam_pipe = Pipeline([('bag_of_words', cv),\n", - " ('bayes', MultinomialNB())])\n", - "classifier = assess_classifier(spam_pipe, *args)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "###Now try making a limited custom vocabulary to discriminate between languages" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": { - "collapsed": false, - "scrolled": true - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "['}', ')', 'var', 'fn', 'function', 'end', 'defn', '===', 'lambda']\n", - "Train score: 0.329, Test score: 0.436\n", - "Proportion of test data correctly labeled: 0.485\n", - " language guess \\\n", - "item \n", - "0 perl ruby \n", - "1 clojure clojure \n", - "2 clojure clojure \n", - "3 clojure ruby \n", - "4 clojure clojure \n", - "5 python ruby \n", - "6 python ruby \n", - "7 python ruby \n", - "8 python python \n", - "9 javascript php \n", - "10 javascript javascript \n", - "11 javascript javascript \n", - "12 javascript javascript \n", - "13 ruby ruby \n", - "14 ruby ruby \n", - "15 ruby ruby \n", - "16 haskell ruby \n", - "17 haskell ruby \n", - "18 haskell ruby \n", - "19 scheme scheme \n", - "20 scheme scheme \n", - "21 scheme scheme \n", - "22 java ruby \n", - "23 java ruby \n", - "24 scala ruby \n", - "25 scala ruby \n", - "26 tcl ruby \n", - "27 tcl ruby \n", - "28 php php \n", - "29 php php \n", - "30 php php \n", - "31 ocaml haskell \n", - "32 ocaml ruby \n", - "\n", - " text \n", - "item \n", - "0 use warnings;\\nuse strict;\\n\\nmy $initial = jo... \n", - "1 (defn cf-settings\\n \"Setup settings for campf... \n", - "2 (ns my-cli.core)\\n\\n(defn -main [& args]\\n (p... \n", - "3 (extend-type String\\n Person\\n (first-name [... \n", - "4 (require '[overtone.live :as overtone])\\n\\n(de... \n", - "5 from pkgutil import iter_modules\\nfrom subproc... \n", - "6 import re\\nimport subprocess\\n\\ndef cmd_keymap... \n", - "7 class NoSuchService(Exception):\\n def __ini... \n", - "8 from collections import namedtuple\\nimport fun... \n", - "9 function errorHandler(context) {\\n return fun... \n", - "10 var _ = require('lodash'),\\n fs = require('... \n", - "11 /* Riot v2.0.8, @license MIT, (c) 2015 Muut In... \n", - "12 var r = riot.route = function(arg) {\\n //... \n", - "13 module ActiveJob\\n module Core\\n extend Ac... \n", - "14 require 'formula'\\n\\nclass A52dec < Formula\\n ... \n", - "15 module Fluent\\n class Input\\n include Conf... \n", - "16 {-# LANGUAGE ScopedTypeVariables, FlexibleInst... \n", - "17 reverseDependencies :: ModuleGraph -> M.Map Mo... \n", - "18 {- git-annex extra config files\\n -\\n - Copyri... \n", - "19 (define subst-f\\n (lambda (new old l)\\n (c... \n", - "20 (define add1\\n (lambda (n) (+ n 1))) \n", - "21 (define-lib-primitive (length lst)\\n (if (nul... \n", - "22 /**\\n * Interface to represent a persistence s... \n", - "23 /*\\n * Copyright 2002-2008 the original author... \n", - "24 package com.github.pathikrit\\n\\nimport scala.a... \n", - "25 /* sbt -- Simple Build Tool\\n * Copyright 2010... \n", - "26 proc isaac::mix {a b c d e f g h} {\\n set a... \n", - "27 proc twitter::follow {nick uhost hand chan arg... \n", - "28 class View\\n{\\n /**\\n * Data available ... \n", - "29 public function formatLocalized($format)\\n... \n", - "30 class Application extends App {\\n\\t/**\\n\\t * @... \n", - "31 type name = string\\n\\nlet compare_label label1... \n", - "32 let search_compiler_libs () =\\n prerr_endline... \n" - ] - } - ], - "source": [ - "cv = CountVectorizer(vocabulary=['}', ')', 'var', 'fn', 'function', 'end', 'defn',\n", - " '===', 'lambda']) #, '(define', 'elif'])\n", - "ft = cv.fit_transform(X)\n", - "print(cv.get_feature_names()[-30:]) \n", - "spam_pipe = Pipeline([('bag_of_words', cv),\n", - " ('bayes', MultinomialNB())])\n", - "classifier = assess_classifier(spam_pipe, *args)\n", - "assess_test_data(spam_pipe)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Vocabulary|Results\n", - "----------|-------\n", - "} | Train score: 0.122, Test score: 0.137\n", - "}, ) | Train score: 0.122, Test score: 0.137\n", - "}, ), var | Train score: 0.161, Test score: 0.179\n", - "}, ), var, fn | Train score: 0.195, Test score: 0.248\n", - "}, ), var, fn, function| Train score: 0.287, Test score: 0.308\n", - "}, ), var, fn, function, end | Train score: 0.278, Test score: 0.325\n", - "}, ), var, fn, function, end, defn | Train score: 0.302, Test score: 0.359\n", - "}, ), var, fn, function, end, defn, === | Train score: 0.300, Test score: 0.368\n", - "}, ), var, fn, function, end, defn, ===, lambda | Train score: 0.334, Test score: 0.427\n", - "}, ), var, fn, function, end, defn, ===, lambda | Proportion of test data correctly labeled: 0.485\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The above didn't work nearly as well as the automatically-generated vocabulary with thousands of words. Adding new words quickly yielded diminishing returns. Since we want to get above 80% accuracy, it looks like we will need to add our own features." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "##Define new features" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "def longest_run_of_caps_feature(text):\n", - " \"\"\"Find the longest run of capitol letters and return their length.\"\"\"\n", - " runs = sorted(re.findall(r\"[A-Z]+\", text), key=len)\n", - " if runs:\n", - " return len(runs[-1])\n", - " else:\n", - " return 0" - ] - }, - { - "cell_type": "code", - "execution_count": 73, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "def percent_character_feature(char):\n", - " \"\"\"Return percentage of text that is a particular char compared to total text length.\"\"\"\n", - " def feature_fn(text):\n", - " chars = text.count(char)\n", - " return chars / len(text)\n", - " return feature_fn\n", - "\n", - "# def percent_characters_feature(char_list):\n", - "# \"\"\"\n", - "# Return percentage of text for each char/substring in char_list,\n", - "# compared to total text length.\n", - "# \"\"\"\n", - "# def feature_fn(text):\n", - "# hits = []\n", - "# for char in char_list:\n", - "# hits.append(text.count(char) / len(text))\n", - "# return hits\n", - "# return feature_fn\n", - "\n", - "def count_word_feature(word):\n", - " \"\"\"Return percentage of text that is a particular char compared to total text length.\"\"\"\n", - " def feature_fn(text):\n", - " num_words = text.count(word)\n", - " return num_words\n", - " return feature_fn\n", - "\n", - "def longest_line_feature(text):\n", - " lines = re.findall(r'.*?\\n', text)\n", - " if not lines:\n", - " return len(text)\n", - " lens = [len(line) for line in lines]\n", - " return max(lens) # Includes newline character\n", - "\n", - "def longest_run_of_parens(text):\n", - " matches = re.findall(r'\\)*', text)\n", - " if not matches:\n", - " return 0\n", - " lens = [len(match) for match in matches]\n", - " return max(lens)\n", - "\n", - "def nested_dots(text):\n", - " matches = re.findall(r'\\.([^\\s]*\\.)*', text)\n", - " if not matches:\n", - " return 0\n", - " lens = [match.count('.') for match in matches]\n", - " return max(lens)\n", - "\n", - "def max_paren_depth(text):\n", - " max_depth = 0\n", - " depth = 0\n", - " for char in text:\n", - " if char == '(':\n", - " depth += 1\n", - " max_depth = depth if depth > max_depth else max_depth\n", - " if char == ')':\n", - " depth -= 1\n", - " return max_depth\n", - "\n", - "def max_curly_brace_depth(text):\n", - " max_depth = 0\n", - " depth = 0\n", - " for char in text:\n", - " if char == '{':\n", - " depth += 1\n", - " max_depth = depth if depth > max_depth else max_depth\n", - " if char == '}':\n", - " depth -= 1\n", - " return max_depth\n", - "\n", - "def percent_words_match_regex(regex):\n", - " \"\"\"Return percentage of text that is a matches regex compared to total number words\"\"\"\n", - " def feature_fn(text):\n", - " num_matches = 0\n", - " #words = re.findall(r'[^[\\s]]+\\b', text)\n", - " words = text.split()\n", - " for word in words:\n", - " if re.search(regex, word):\n", - " num_matches +=1\n", - " if len(words) == 0:\n", - " return 0\n", - " return num_matches / len(words)\n", - " return feature_fn\n", - "\n", - "def count_endings_feature(regex): \n", - " def feature_fn(text):\n", - " num_matches = 0\n", - " lines = re.findall(r'.*?\\n', text)\n", - " if not lines:\n", - " return len(text)\n", - "\n", - " words = text.split()\n", - " for word in words:\n", - " if re.search(regex + r'\\w*$', word):\n", - " num_matches +=1\n", - " if len(words) == 0:\n", - " return 0\n", - " return num_matches #/ len(words)\n", - " return feature_fn\n", - "\n", - "def count_beginnings_feature(regex): \n", - " def feature_fn(text):\n", - " num_matches = 0\n", - " lines = re.findall(r'.*?\\n', text)\n", - " if not lines:\n", - " return len(text)\n", - "\n", - " words = text.split()\n", - " for word in words:\n", - " if re.search(r'\\w*' + regex, word):\n", - " num_matches +=1\n", - " if len(words) == 0:\n", - " return 0\n", - " return num_matches #/ len(words)\n", - " return feature_fn\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "###Look at the new features on a simple python program" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "##Feature Ideas\n", - "- +Longest line\n", - "- bag of words with chars, ngrams, let\n", - "- +run of )\n", - "- % _, }, :\\n, \"\"\"\n", - "- key words: def, defn, var `__x__`, module, end, ->, ::, ;, /*, //, case, final, extends, public, protected, $word, @param, self, this\n", - "- + depth nested dots (or % nested dots)\n", - "- +() nest depth\n", - "- ignore/strip comments?\n", - "-Hyphenated or camel or underscored\n", - "-Indentation...\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": 137, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "data": { - "text/plain": [ - "array([[ 2.00e+00, 1.00e+00, 8.00e+00, 0.00e+00, 0.00e+00,\n", - " 0.00e+00, 0.00e+00, 0.00e+00, 0.00e+00, 0.00e+00,\n", - " 0.00e+00, 0.00e+00, 0.00e+00, 3.00e+00, 0.00e+00,\n", - " 0.00e+00, 0.00e+00, 0.00e+00, 0.00e+00, 1.00e+00,\n", - " 4.35e-03]])" - ] - }, - "execution_count": 137, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "txt = '''\n", - "# Test python program\n", - "class MyClass:\n", - " \"\"\"MyClass is a class to do something\"\"\"\n", - " def __init__(self, name='name'):\n", - " self.name = name\n", - " def longest_run_of_caps_feature(text):\n", - " runs = sorted(re.findall(r\"[A-Z]+\", text), key=len)\n", - " if len(runs) == 0:\n", - " return 0\n", - " longest = runs[-1]\n", - " return len(longest)\n", - " print('{}'.format(self.name))\n", - " $thing \n", - " @thing\n", - " :thing\n", - " end\n", - " end\n", - "end\n", - "\n", - "'''\n", - "featurizer = FunctionFeaturizer(\n", - "# longest_run_of_caps_feature,\n", - "# percent_character_feature('.'),\n", - "# longest_line_feature,\n", - "# longest_run_of_parens,\n", - "# nested_dots,\n", - " max_paren_depth,\n", - "# percent_words_match_regex(r'\\$\\w'),\n", - "# percent_words_match_regex(r'\\@\\w'),\n", - "# percent_words_match_regex(r':\\w'),\n", - "# percent_character_feature(r';'),\n", - " count_word_feature('}'),\n", - " count_word_feature(')'),\n", - " count_word_feature('var'),\n", - " count_word_feature('fn'),\n", - " count_word_feature('function'),\n", - "# count_word_feature('end'),\n", - " count_word_feature('defn'),\n", - " count_word_feature('==='),\n", - "# count_word_feature('lambda'),\n", - " count_word_feature(';'),\n", - " count_word_feature('public'),\n", - "# count_word_feature('val'),\n", - "# count_word_feature('=>'),\n", - " count_word_feature('set'),\n", - " count_word_feature('extends'),\n", - " count_word_feature('module'),\n", - " count_endings_feature(r'end'),\n", - " count_beginnings_feature(r'let'),\n", - " count_word_feature('->'), #.758\n", - " count_beginnings_feature(r'\\(define'), #.818 got scheme\n", - " count_beginnings_feature(r'\\{-'), #.848 less haskell\n", - " count_word_feature('object'), #\n", - " max_curly_brace_depth, #.879 got javascript\n", - "# count_beginnings_feature(r'from'), #added ^ to start of regex, numbers dropped, so removed\n", - " percent_character_feature(']'),\n", - "# count_word_feature('.'),\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " )\n", - "featurizer.transform([txt])" - ] - }, - { - "cell_type": "code", - "execution_count": 138, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "data": { - "text/plain": [ - "3" - ] - }, - "execution_count": 138, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "count_endings_feature('end')(txt)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "###Test the pipeline on the actual test data" - ] - }, - { - "cell_type": "code", - "execution_count": 139, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "# pipe = Pipeline([('fnfeaturizer', featurizer),\n", - "# ('bayes', MultinomialNB())])\n", - "pipe = make_pipeline(featurizer, MultinomialNB())\n", - "#pd.DataFrame(args[0]).index.values/pd.DataFrame(args[2]).index.values #, args[2])" - ] - }, - { - "cell_type": "code", - "execution_count": 140, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Train score: 0.786, Test score: 0.803\n", - "Proportion of test data correctly labeled: 0.879\n", - " language guess \\\n", - "item \n", - "0 perl perl \n", - "1 clojure clojure \n", - "2 clojure clojure \n", - "3 clojure python \n", - "4 clojure clojure \n", - "5 python ruby \n", - "6 python python \n", - "7 python python \n", - "8 python python \n", - "9 javascript javascript \n", - "10 javascript javascript \n", - "11 javascript javascript \n", - "12 javascript javascript \n", - "13 ruby ruby \n", - "14 ruby ruby \n", - "15 ruby ruby \n", - "16 haskell haskell \n", - "17 haskell haskell \n", - "18 haskell haskell \n", - "19 scheme scheme \n", - "20 scheme scheme \n", - "21 scheme scheme \n", - "22 java tcl \n", - "23 java java \n", - "24 scala scala \n", - "25 scala scala \n", - "26 tcl tcl \n", - "27 tcl tcl \n", - "28 php php \n", - "29 php php \n", - "30 php php \n", - "31 ocaml haskell \n", - "32 ocaml ocaml \n", - "\n", - " text \n", - "item \n", - "0 use warnings;\\nuse strict;\\n\\nmy $initial = jo... \n", - "1 (defn cf-settings\\n \"Setup settings for campf... \n", - "2 (ns my-cli.core)\\n\\n(defn -main [& args]\\n (p... \n", - "3 (extend-type String\\n Person\\n (first-name [... \n", - "4 (require '[overtone.live :as overtone])\\n\\n(de... \n", - "5 from pkgutil import iter_modules\\nfrom subproc... \n", - "6 import re\\nimport subprocess\\n\\ndef cmd_keymap... \n", - "7 class NoSuchService(Exception):\\n def __ini... \n", - "8 from collections import namedtuple\\nimport fun... \n", - "9 function errorHandler(context) {\\n return fun... \n", - "10 var _ = require('lodash'),\\n fs = require('... \n", - "11 /* Riot v2.0.8, @license MIT, (c) 2015 Muut In... \n", - "12 var r = riot.route = function(arg) {\\n //... \n", - "13 module ActiveJob\\n module Core\\n extend Ac... \n", - "14 require 'formula'\\n\\nclass A52dec < Formula\\n ... \n", - "15 module Fluent\\n class Input\\n include Conf... \n", - "16 {-# LANGUAGE ScopedTypeVariables, FlexibleInst... \n", - "17 reverseDependencies :: ModuleGraph -> M.Map Mo... \n", - "18 {- git-annex extra config files\\n -\\n - Copyri... \n", - "19 (define subst-f\\n (lambda (new old l)\\n (c... \n", - "20 (define add1\\n (lambda (n) (+ n 1))) \n", - "21 (define-lib-primitive (length lst)\\n (if (nul... \n", - "22 /**\\n * Interface to represent a persistence s... \n", - "23 /*\\n * Copyright 2002-2008 the original author... \n", - "24 package com.github.pathikrit\\n\\nimport scala.a... \n", - "25 /* sbt -- Simple Build Tool\\n * Copyright 2010... \n", - "26 proc isaac::mix {a b c d e f g h} {\\n set a... \n", - "27 proc twitter::follow {nick uhost hand chan arg... \n", - "28 class View\\n{\\n /**\\n * Data available ... \n", - "29 public function formatLocalized($format)\\n... \n", - "30 class Application extends App {\\n\\t/**\\n\\t * @... \n", - "31 type name = string\\n\\nlet compare_label label1... \n", - "32 let search_compiler_libs () =\\n prerr_endline... \n" - ] - } - ], - "source": [ - "pipe.fit(args[0], args[2]) # X_train, y_train\n", - "pipe.score(args[1], args[3])\n", - "classifier = assess_classifier(pipe, *args)\n", - "assess_test_data(pipe)" - ] - }, - { - "cell_type": "code", - "execution_count": 128, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "# print(confusion_matrix(classifier.predict(args[1]), args[3]))" - ] - }, - { - "cell_type": "code", - "execution_count": 107, - "metadata": { - "collapsed": false, - "scrolled": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Confusion matrix, without normalization\n", - "[[ 3 0 0 1 0 1 1 0 3 0 0 0 0 0 0]\n", - " [ 0 11 0 0 0 0 0 0 0 0 1 0 0 0 0]\n", - " [ 0 1 7 0 0 0 0 0 0 0 0 0 0 0 0]\n", - " [ 0 0 0 5 0 2 0 0 0 0 0 0 0 0 0]\n", - " [ 0 0 0 0 6 0 0 0 0 0 1 0 0 0 0]\n", - " [ 0 0 0 1 0 5 0 0 1 0 0 0 0 0 0]\n", - " [ 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0]\n", - " [ 0 0 0 0 0 0 0 6 0 0 0 0 0 0 0]\n", - " [ 1 0 0 0 0 0 0 0 4 0 0 0 0 0 0]\n", - " [ 1 0 0 2 0 1 0 0 1 9 0 0 0 0 0]\n", - " [ 0 0 1 0 0 0 0 0 0 0 5 0 1 0 0]\n", - " [ 0 0 0 0 0 0 0 0 0 0 0 15 0 0 0]\n", - " [ 0 0 0 0 1 0 0 0 0 0 0 0 12 0 0]\n", - " [ 0 0 0 0 0 0 0 0 0 0 0 0 0 6 0]\n", - " [ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]]\n" - ] - }, - { - "data": { - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAewAAAG4CAYAAACHNdSBAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzs3Xe8XVWd/vHPEwgSSigiSiiGQcCh904SLPwQEMaCWEYE\nR8cBGWdsoyMMhKaoYwNRGZEqFlAcFUVAhySAQCghCSJNQWnSOwFC8vz+2Psmh8Pt5+x77r7nefM6\nr+yy9nevc+7lfs9ae+21ZZuIiIgY3cZ1ugIRERExsCTsiIiIGkjCjoiIqIEk7IiIiBpIwo6IiKiB\nJOyIiIgaSMKO6DBJEyT9UtLjkn7cQpz3Sbq4nXXrFEm7S7ql0/WIGE2U+7AjBkfSe4FPABsDTwE3\nAifYvrLFuO8HDgd2tr245YqOcpIWA6+z/edO1yWiTtLCjhgESZ8AvgYcD6wJrAucAuzXhvCvBW7r\nhmTdQH3ukJYdyYpE1EUSdsQAJK0CHAMcZvt/bS+wvcj2r2x/pizzCklfl3Rv+fqapOXKfdMk3SPp\nE5IekHSfpIPLfccA/wUcKOkpSR+UNF3SOQ3nnyxpsaRx5frBkv4k6UlJfy5b/j3bL284bhdJ15Zd\n7bMl7dywb4akYyVdUca5WNIr+3j/PfX/tKQHy/r/g6S9Jd0m6RFJn20ov4OkqyQ9VpY9WdL4ct+s\nstjc8v0e0BD/PyTdD3yv3HZ3ecwG5Tm2LtcnSXpI0pSWfrARNZOEHTGwnYHlgZ/1U+YIYAdgy/K1\nA3Bkw/5XAxOBScA/AadIWsX20cDngR/ZXtn26UCf16kkrQh8A9jL9sSybjf2Um514FfA14HVga8C\nv5K0WkOx9wAHU/QYLAd8qp/392rgFcBawFHAacD7gK2B3YGjJL22LPsi8G/AK8v6vRE4DMB2T5Ld\nony/5zfEXw1YD/hI44lt/wn4DPB9SROAM4AzbM8iooskYUcM7JXAwwN0Wb8XONb2w7YfpmiRv79h\n/8Jy/yLbFwFPU1wLh6J7uLGLuM/u4tJiYHNJE2w/YPvmXsrsA9xq+1zbi23/CLiFpV34pkh6d9h+\nDjgP2Kqfcy6kuF6/CPgxxZeAr9t+pjz/zT3H277B9uzyvH8B/geYOoj3dLTthWV9XsL2acAdwGyK\n5H7EAPEixpwk7IiBPQKs0dMl3YdJwF8a1v9ablsSoynhPwusNNSK2H4GOBD4F+A+SRdK2riXopPK\nOjT6S1Od/tawvGCA+jzipSNUF5T/PtB0/IoAkjYq63W/pCeAEyi+9PTnIdsvDFDmNGBT4GTbCwco\nGzHmJGFHDOwq4Hngbf2UuQ+Y3LC+XrltOJ4GVmhYf03jTtuX2N6z3H4L8N1eYtxLMZit0WvL7VX7\nNkWL+3W2V6FoDQ/0t6bf21UkrUTRvX8acExT135EV0jCjhiA7ScortueIml/SStIGi/pLZK+WBb7\nIXCkpDUkrVGWP6evmAO4EZgiad1ywNt/9uyQtGZZhxUpuqmfARb1EuMiYCNJ75G0rKQDgdcDFzaU\nGajrfbhWorjt7VlJrwcObdr/ALDBEGN+A5ht+58prs1/p+VaRtRMEnbEINj+KsU92EcCD1J0Nx/G\n0oFoxwPXAfPK13XltiUh+gvfuN/2bymuE88DrgV+2bB/HPBxipbyIxQDvg5tjmP7EWBf4JPAwxQD\nyva1/WgfdTID17G/9Uaforim/yTF9esfNZWfDpxVjiJ/Zz/nNoCk/YE9Wfo+PwFsI+k9/dQhYszJ\nxCkRERHDJOl0ikGeD9revNw2HfgQ8FBZ7D9t/6aXY/eiuNSzDHCa7S82l3lJ+STsiIiI4ZG0O8W4\nk7MbEvbRwFNlz1xfxy0D3Aq8iaLH7FrgPbb/2Ncx6RKPiIgYJtuXA4/1smugMSI7AHfYvqu86+FH\nwP79HZCEHRER0X7/KmmupO9JWrWX/WsDdzes31Nu61MSdkRERHt9G1ifYjKh+4Gv9FJmyNejM8l+\nRSRlcEBExCDZruo2wyXa9Xd5oLrafrDhnKdR3OnR7F6Khwj1WJeild2nJOwKzfnLk4Mq952vfZ5/\n+fjnBlX29ZNWHvT5jz92OkceNX3Q5auK++SCwU1K9eXPH8unP3fUoONOnDB+UOWqqi8Mvc5VxB3s\n5wBD/yxuue+pQZUbyu8wDP73eLT8Do+G2GM57oTxlefqJZbf6qMtHf/cjacMWEbSWrbvL1ffBszv\npdh1wIaSJlNMsnQgxfz+fUrCjoiI7tHvDMPDCCf9kGKu/DXKJ8wdDUyTtBVFt/edlA+0kTQJ+K7t\nfWy/KOlw4GKK27q+198IcUjCjoiIbqL2tuZt99YqPr2PsvdR3LPds34RxayEg5KEPQpst9PulcSd\nMnVareLusvtAD3QanqrqC9XVuW6fRX6Hq4+duG3S5hb2SMrEKRWR5MFewx6KoVzDHi2Gck14KIZy\n7XYoqqpvVar6HGDw17CHqo6/x1GdCeM1YoPOlt/u4y3FeO66r41IXXtT368aA5A0XdInByhzjKQ3\njlSdIiIihmssd4kP2HVg++ihBJS0rO0Xh1+liIjoqBp3ide35k0kHVTOKnOjpLOb9m0l6epy/wU9\ns85IOlPSO8rluyStXi5vJ+mycnm6pHMkXUHxhKE1JP1E0uzytcsIv9WIiBguqbVXB42JFrakTYEj\ngJ1tP1o+3P5jLG1lnw181Pblko6hGHb/cV76WL/+WuSvB3az/bykHwBfs32lpPWA3wCbtP9dRURE\n29W4hT0mEjbwBuC8nmf92n5M5TchSROBVcoJ2gHOAs4fQmwDv7D9fLn+JuDvtfSb1sqSVrD9bIvv\nISIiok9jJWGbgZ+M0qOvci+y9BLB8k37GpOxgB1tvzDQib7ztc8vWd5up93Zbudqbn2JiKiTWTNn\nMGvmjM6cvMPd2q0YKwn7/4CfSfpq2SW+erldtp+U9Jik3WxfAbwfmNFLjLuA7Si6uN/RsL35p3sJ\nRXf7f0Nxfdz2jb1VaihTNUZEdIspU6e95D7tE447ZuROni7xzrJ9s6QTgJmSFgFzKBJwz3XpDwDf\nkbQC8CfgkF7CHAN8T9KTFAm98dp24/XtjwGnSJpL8fnNBA5r6xuKiIhqpIXdebbPphhc1tu+ucDO\nvex6JfBIWeYKYONejj2maf0R4N2t1jciImIoxkzCHipJpwMTgCs6XZeIiBgh6RKvH9sf7HQdIiJi\nhKVLPCIiogbSwo6IiKiBGifs+tY8IiKii6SFHRER3WNcrmFHL6p45u8Wn/tN22MCzPv8XpXEhWqf\n11yFutW3SnV7bnXdnr0eHVDjLvEk7IiI6B41HiVe368aERERXSQt7IiI6B7pEo+IiKiBGneJJ2FH\nRET3SAs7IiKiBmrcwq7vV402knRXzzO0JV3Z6fpEREQ9SDpd0gOS5jdsO07SXEk3SvqdpHX7OPYu\nSfMkzZE0e6BzJWEXljzv2vaunaxIRERUSONae73cGUDzRBZfsr2l7a2A/wWO7qM2BqbZ3tr2DgNV\nvdKELemghm8ZZ0l6raT/K7f9tudbh6QzJX1L0lWS/iRpWln+ZklnNMR7WtKXJN0k6VJJO0maWR7z\n1rLM8pLOKL+13CBpWrn9YEkXSLpI0m2SvthHnZ8u/11L0qzym898Sbs21OGrZR1+K2mNKj/DiIho\nI6m1VxPblwOPNW17qmF1JeDh/mo02KpXlrAlbQocAexRfsv4d+CbwBm2twTOBU5qOGRV2zsDHwd+\nAXwJ2BTYXNIWZZkVgN/Z3gx4CjgWeAPwtnIZ4KPAIttbAO8BzpL0inLflsC7gM2BAyWt3UvVe1rb\n7wV+Y3vr8ri5DXW4tqzDTPr+5hQREaNN+1vYvZ9GOkHSX4EPACf2UczAbyVdJ+nDA8WssoX9BuA8\n248C2H4M2An4Qbn/+8Bu5bKBX5bLNwF/s/0H2wb+AEwu971g++JyeT5wme1F5TE9ZXYtY2P7VuAv\nwEblOX5n+ynbzwM3A6/tp/6zgUMkHQ1sbvvpcvti4Me9vIeIiAgAbB9hez3gTOBrfRTbtWwUvgX4\nqKTd+4tZ5Shx03tTv6/m/wvlv4uB5xu2L2ZpPRc2bX8BwPZiSY3vpa9zNMZdRD/v3/bl5Ye3L3Cm\npK/aPqepmGi4/t3s+GOnL1meMnUaU6ZO66toRETXmDVzBrNmzujMyYc4SnzRI7ez+JE7WjnjD4Bf\n97bD9v3lvw9J+hmwA3B5X4GqTNj/B/ysTHSPlqOwfw+8m6Jl+j5gVgXnvbyMfZmkjYD1gFuAbYcS\nRNJ6wL22T5O0PLA1cA5Fr8QBFK3s99LPh3vkUdOHU/+IiDGtuQFzwnHHjNzJh3gf9jJrbMwya2y8\nZH3RHQM/gEnShrZvL1f3B+b0UmYFYBnbT0laEdgT6PeDqCxh275Z0gnATEmLgBuAfwXOkPRp4EHg\nkMZD+limn+29HfMt4NuS5gEvAh+wvVCS+4nbW8w9gE9JWkhxvfygcvszwA6SjgQeAA4cRMyIiBgN\n2jxxiqQfAlOBNSTdTTGuaW9JG1P05P4JOLQsOwn4ru19gNcAF6ho8S8LnGv7kn7PVVwmjsGS9JTt\nAZ85KMkLFrb/s63j4zUjRkoer1lPE8YL25XPaCLJy+97SksxnrvwoyNS195kprOhyzeciIi6qvFM\nZ0nYQ2R7YqfrEBERw5S5xCMiImogLeyIiIgaqHELu741j4iI6CJpYUdERPdIl3iMlCv+642VxD3m\nklsriQtw9J4bD1woog1y+1UMREnYERERo1+dE3auYUdERNRAWtgREdE96tvATsKOiIjuUecu8STs\niIjoGnVO2LmGHRERUQNpYUdERNdIC7smJB0s6eRO1yMiIjpDUkuvTuq2FnZLj8aUNM724nZVJiIi\nRlh9G9hjp4Ut6SBJcyXdKOlsSQdIml+uz+gpBkySdJGk2yR9seH4b0m6VtJNkqY3bL9L0omSrgcO\nkDRD0tclzSnjbz+ibzQiIoYtLewOk7QpcASws+1HJa0GzAT2tH2/pMZnWG9Vvl4AbpV0ku17gSNs\nPyZpGeC3kjazfRNFq/xh29uW5/oXYILtrSXtDpwObD5ibzYiIrrSmEjYwBuA82w/ClAm3iuBsySd\nB1xQljPwO9tPAUi6GXgtcC9woKQPU3wmawGbADeVx/246Xw/LM9zuaSJkibafrK5UscfO33J8pSp\n05gydVob3mpERL3NmjmDWTNndOTcnW4lt2KsJGzTdGXC9qGSdgD2Aa6XtG1Z5vmGYouAZSWtD3wS\n2M72E5LOAJZvKPfMIM7/MkceNX1IbyIiohs0N2BOOO6YETt3nRP2WLmG/X8U15dXB5C0uqQNbM+2\nfTTwELAuvSdWAStTJOUnJb0aeMsA5zuwPM9uwOM9LfaIiBjdcg27w2zfLOkEYKakRcAcYKKkDSkS\n8m9tz5W0FS9P2rY9T9Ic4BbgbuCKAU75nKQbKD6/D7b1zURERPRiTCRsANtnA2cPUOYs4KyG9bc2\nLB/SxzHr97L5HNsfH2ZVIyKiU+rbIz52EnZERMRAOt2t3Yok7CGyvUen6xAREcNT54Q9VgadRURE\njDhJp0t6QNL8hm1flvTHcjKvCySt0sexe0m6RdLtkj4z0LmSsCMiomtUMEr8DGCvpm2XAJva3hK4\nDfjPXuqxDPDN8thNgPdI+vv+6p6EHRER3UMtvprYvhx4rGnbpQ3PnbgGWKeXmuwA3GH7LtsLgR8B\n+/dX9VzDjoiIrtGBa9gfpJwds8naFLcR97gH2LG/QEnYNTNxwvhK4h6958aVxAW48Kb7Kom772aT\nKol776MLKom79uoTKokbEYM31IT9wn1/4IX7/zDccx0BvGD7B73sHvLTI5OwIyIi+rDcpE1ZbtKm\nS9afmXP+oI6TdDCwN/DGPorcSzEDZ491KVrZfUrCjoiIrjESXeKS9gI+DUy1/Vwfxa4DNpQ0GbiP\nYsrr9/QXN4POIiKia7R7lLikHwK/BzaWdLekDwInAysBl0qaI+lbZdlJkn4FYPtF4HDgYuBm4Me2\n/9hf3dPCjoiI7tHmBrbt3lrFp/dR9j6KJ0j2rF8EXDTYc6WFHRERUQNpYUdERNfI1KQdJGly45Rw\nw4xxsKSTh1B+hqRtyuW7ep7DHRERo1ueh11/Q70fzn0sR0TEKNbppNuK2rewS8tI+h9JN0m6WNLy\nkj4sabakGyX9RNIEAEkHSJpfbp9RHr/kJyhpH0m/l/RKSXuWy9dLOk/Sip14cxEREWMlYW8IfNP2\nZsDjwDuAn9rewfZWwB+BfyrL/hewZ7l9v3KbASS9DfgM8BaKJH4E8Ebb2wLXA58YofcTERFVaPNc\n4iNprHSJ32l7Xrl8PTAZ2FzS8cAqFPfD/abcfyVwlqTzgAvKbQLeAGwHvNn205L2pXiCyu/LLpTl\nKO61G7Tjj52+ZHnK1GlMmTptqO8rImLMmTVzBrNmzujIuevcJT5WEvbzDcuLgAkUjzzb3/Z8SR8A\npgHYPlTSDhT3wl0vaVuKFvafgPWBjSmSPsCltt873EodedT04R4aETFmNTdgTjjumBE7d50T9ljp\nEu/NSsDfJI0H/rFno6QNbM+2fTTwEEvncv0L8E7gbEmbUDwSbVdJG5THrShpwxF9BxER0VZ1HiU+\nVhJ2byO1j6JIuldQXMPuKfMlSfPKW8GutD23J4btW4H3AedTJPyDgR9Kmks59Vx1byEiIqJvte8S\nt30XsEXD+lcadn+nl/Lv6GXbWcBZ5fKNQM+jWe6keMh4c/k9GpbXH2bVIyJihHW6ldyK2ifsiIiI\nQatvvk7CjoiI7lHnFvZYuYYdERExpqWFHRERXaPOLewk7IiI6Bo1ztdJ2BER0T3Swo7ox76bTaok\n7mnX3FlJ3A/tmDv16urJBQsriTtxwvhK4kYMRRJ2RER0jRo3sJOwIyKie6RLPCIiogZqnK+TsCMi\nonuMG1ffjJ2JUyIiImogLeyIiOgade4S7+oWtqQrO12HiIgYOe1+Hrak0yU9UD6yuWfb6pIulXSb\npEskrdpHXe4qH/c8R9Lsgere1Qnb9q6drkNERIwcqbVXL84A9mra9lngUtsbAb8r13tjYJrtrW2/\n7FHOzbo6YUt6WtKKkn4r6frym85+5b4TJR3WUHa6pE/2VT4iIrqP7cuBx5o27wecVS6fBfxDPyEG\n3Unf1Qmb4tvNAuBttrcF3gB8pdz3I+BdDWUPKLc910f5iIgY5drdJd6HV9t+oFx+AHh1H+UM/FbS\ndZI+PFDQDDorvrR8QdLuwGJgkqQ1bd8oaU1JawFrAo/ZvlfS+D7KP9i5txAREYMx0hOn2LYk97F7\nV9v3S3oVcKmkW8oWe6+SsOF9wBrANrYXSboTWL7cdz7wTuA1FK3rgcq/xPHHTl+yPGXqNKZMnVZF\n/SMiamXWzBnMmjmjI+cear5++q4befquuUM9zQOSXmP7b2Wjr9cGne37y38fkvQzYAcgCbsfqwAP\nlsl3D+C1Dft+DJwGvBKYUm6b2E/5lzjyqOnV1DgiosaaGzAnHHdM5yozgJUmb8VKk7dasv7AzHMG\nc9gvgA8AXyz//d/mApJWAJax/ZSkFYE9gX4/iG5P2AbOBX4paR5wHfDHJTvtmyWtBNzTcD2iz/IR\nETG6tbtLXNIPganAGpLuBo4CTgTOk/RPwF2U46EkTQK+a3sfip7bC8r6LAuca/uS/s7VtQlb0iuB\nR20/AuzSVznbWzSt91s+IiJGr3Zfwrb9nj52vamXsvcB+5TLfwa2ai7Tn65M2OW3nMuAL3e6LhER\nMXLytK6aKb/lbNzpekRExMiqcb7u+vuwIyIiaqErW9gREdGd0iUeERFRAzXO10nYERHRPdLCjujH\nkwsWVhL3QzuuX0ncC2+6r5K4+242qZK4VarqZzdxwvhaxY0YDZKwIyKia9S4gZ2EHRER3SNd4hER\nETVQ43yd+7AjIiLqIC3siIjoGukSj4iIqIEa5+vR1SUu6cpO1wGKh4NIOn+AMqtIOnSk6hQREa2T\n1NKrk0ZVwra9a6frIGlZ2/fZPmCAoqsBh41EnSIioj2SsNtE0tOSVpT0W0nXS5onab9y34mSDmso\nO13SJ/spv6KkX0m6UdJ8ST0PEN9e0pXl9qslrSTpYEm/kPQ74FJJr5V0U1n+YEk/l3SZpNskHVVW\n4URgA0lzJH1xRD+oiIjoOqPtGraBBcDbbD8laQ3gKuAXwI+ArwPfKsseAOwJPNdH+b2Ae23vAyBp\noqTlyjjvsn29pJXK8wFsDWxu+3FJk8u69Nge2LQse62kXwGfATa1vXUFn0NERFQg17DbaxzwBUlz\ngUuBSZLWtH0jsKaktSRtCTxm+96+ygPzgDeXLfPdbD9J8Qzs+21fD2D7aduLKJLzJbYf76NOl9h+\nzPZzwAXAbtW9/YiIqEqdu8RHWwsb4H3AGsA2thdJuhNYvtx3PvBO4DUULeU+y9u+XdLWwD7A8WV3\n98/6Oe+zg6yfgMWDKXj8sdOXLE+ZOo0pU6cN8hQREWPXrJkzmDVzRkfOXecW9mhM2KsAD5bJdw/g\ntQ37fgycBrwSmFJum9hbeUlrUbTCz5X0BPBBiuvOa0nazvZ1klamSNQD/QjfLGk1iu73/YFDgKeB\nlfs76Mijpg/2PUdEdI3mBswJxx3TucrUyGhL2AbOBX4paR5wHfDHJTvtm8vrzvfYfqDc3Ff5zYEv\nS1oMLAT+xfZCSQcCJ0uaQJGs31yet/GaNU3rs4GfAusA59i+AYrb0CTNB35t+zPt+QgiIqIqne7W\nbsWoSdiSXgk8avsRYJe+ytneomm9r/J/BS7p5fjrgJ2bNp9VvnrK3AU0nuce22/rJdb7+qpnRESM\nPjXO16MjYUuaBFwGfLnTdelFb63viIiooXE1ztijImHbvo9iBPeoY/slre+IiIhOGI23dUVERFRC\nau3Ve0z9WzlB102S/q2PMidJul3S3PIOpiFLwo6IiK7R7vuwJW0GfIhigq0tgX0lbdBUZm/gdbY3\nBP4Z+PZw6p6EHRERXWOcWnv14vXANbafKyfimgm8vanMfpSXVm1fA6wq6dVDrvtQD4iIiKirCmY6\nuwnYXdLqklagmKxrnaYyawN3N6zf00uZAY2KQWcxtk2cML7TVRiSfTebVEncJxcsrCRulZ9v3X52\nESPN9i3lA6AuAZ4B5tD7bJjN2X7Idx8lYUdERNcY6l1dj9x6PY/cdkO/ZWyfDpxexNfnKeYBaXQv\nsG7D+jrltiFJwo6IiK6hAWeifqk1Nt6ONTbebsn6Hb867eUxiwdUPShpPeBtwI5NRX4BHA78SNJO\nwOMNs3UOWhJ2RER0jT4GjrXqJ+VsnQuBw2w/KekjALZPtf1rSXtLuoOi2/yQ4ZwkCTsiIqIFtqf0\nsu3UpvXDWz1PEnZERHSNPPwjIiKiBmqcr3MfdqskzZC0bafrERERAxsntfTqaN07evaxIU/zioiI\nyo3ZhC3pE+Vk7PN7JmOXdFA58fqNks4qt71V0tWSbpB0qaQ1y+3TJZ0laZakuyS9XdJ/S5on6SJJ\nuZwQEVEzVTz8Y6SMyaRTdlEfDOxA8aXkGknXAkcAO9t+VNJqZfHLbe9UHvch4D+AT5X71gf2ADYF\nrgbeZvtTki6gmH7u5yP0liIiog3G5KAzSSf3c5xtf6yC+rTLbsAFthcAlAl2O+A8248C2H6sLLuu\npPOA1wDLAX8utxu4yPYiSTcB42xfXO6bD0wekXcSERFtU+N83W8L+3qWXpvteYsul0f7NdueevZQ\n07+NTgb+2/aFkqYC0xv2vQBge7GkxomgFwPLDFSJ449dGmrK1GlMmTptEFWPiBjbZs2cwayZMzpy\n7k4PHGtFnwnb9pmN65JWtP1M5TVqj8uBMyWdSNEl/g/AR4AzJH21p0u8bGVPBO4rjzu4IcZAP9UB\nf+pHHjV9qPWOiBjzmhswJxx3TOcqUyMDDjqTtIukm4FbyvWtJH2r8pq1wPYc4ExgNsW15+/a/j1w\nAjBT0o3AV8ri04HzJV0HPMTS3oPm0d/NvQqjvZchIiKaqMVXJ8nuP+9Img28E/i57a3LbX+wvekI\n1K+2JHnBwuT0WKqOj9eMGAkTxgvbledDSX73WXNaivGjD2w9InXtzaBGidv+a9PIuherqU5ERER1\nKnr4x4gYTML+q6RdASQtB3wM+GOltYqIiIiXGEzCPhT4BrA2xQO3LwE+WmWlIiIiqjAm78PuYfsh\n4L0jUJeIiIhK1ThfD2qU+AaSfinpYUkPSfq5pL8bicpFRES0k6SWXp00mLnEfwCcB6wFTALOB35Y\nZaUiIiLipQZzDXuC7XMa1r8v6dNVVShirKrq9qvTrrmzkrgAH9px/cpiR3TCmBwlLml1ivvEL5L0\nnyxtVR8IXDQCdYuIiGirTndrt6K/FvYNvHQ2r38u/+2ZS/yzVVUqIiKiCvVN1/3PJT55BOsRERFR\nuTH58I9GkjYDNgGW79lm++yqKhUREREvNWDCljQdmApsCvwKeAtwBZCEHRERtVLjBvagbut6J/Am\n4H7bhwBbAqtWWquIiIgK1Pk+7MF0iS+wvUjSi5JWAR4E1q24XhEREW031lvY10paDfgucB0wB/h9\npbWqEUnTJX2y0/WIiIiRJ2ljSXMaXk9I+lhTmWnl9p4yRw7nXIOZS/ywcvE7ki4GJtqeO5yTjTWS\nluWlt75FRMQo1u5R4rZvBbYGkDSO4iFZP+ul6Ezb+7Vyrv4mTtmWPpKRpG1s39DKiUcLSZOB31D0\nHmwD/AE4iGJU/FeAlYCHgYNt/03SDIpeht3IFK0REbVScZf4m4A/2b67t1O3Gry/FvZX6L/1uEer\nJx9FNgIOsX2VpO8BhwP/AOxv+2FJBwInAP9E8ZmMt709gKSjO1XpiIgYmooHjr2b4vkbzQzsImku\nRQv8U7ZvHmrw/iZOmTbUYDV2t+2ryuXvA0cAmwGXlj/cZYD7Gsr/eDBBjz92+pLlKVOnMWXqtDZU\nNSKi3mbNnMGsmTM6XY1Bufem2dx70+wBy0laDngr8Jledt8ArGv7WUlvAf6XoqE4JLK7+xJs2SU+\no2dmN0l8SDHIAAAgAElEQVRvoGhhv8b2Lr2Uvwz4ZM8lgbKF/bTtrzSV84KF3f3ZxsjIwz+i7iaM\nF7YrH78tyYdfMOSG7Ut88+2b9FpXSfsDh9reaxD1uBPY1vajQzn3YEaJd4P1JO1ULr8XuBp4Vc82\nSeMlbdKx2kVERFtUeB/2e+hjXJOkV6s8WNIOFI3lISVrGOTUpF3gVuCjkk6nGHR2EnAxcFJ57/my\nwNeAvr6apSkdEVEDVTxeU9KKFAPOPtyw7SMAtk+lmIDsUEkvAs9SXOsessFMTToOeB+wvu1jJa1H\n0V08cKd+fbxo+/1N2+ZSTMn6Erb3aFo/psqKRURE+1SRsG0/A6zRtO3UhuVTgFNaPc9gusS/BexM\n0VUM8HS5bSxJCzkiIka1wXSJ72h7a0lzAGw/Kml8xfUaMbbvArbodD0iIqJ6nZ4PvBWDSdgvSFqm\nZ0XSq4DF1VUpIiKiGlV0iY+UwSTskymmWVtT0ucpLp4Pax7UiIiITqpxA3tQc4l/X9L1wBvLTfvb\n/mO11YqIiIhGgxklvh7wDPDLcpMlrWf7r5XWLCIios3a/fCPkTSYLvFfs3QU9fLA+hT3LW9aVaXG\niicXLGx7zIkTxsx4v2iTKmcju+ZPQ57bYVB23GD1SuJGDKTOs4UNpkt8s8Z1SdsAH62sRhERERWp\ncQN76F82yjm0d6ygLhEREdGHwVzD/mTD6jiKZ0bfW1mNIiIiKjLWr2Gv1LD8InAh8NNqqhMREVGd\nGufr/hN2OWHKRNuf7K9cREREHYzJiVMkLWv7RUm7SpK7/cHZERFRe2O1S3w2xfXqG4GfSzqf4rFg\nALZ9QdWV6yRJdwHbDOeZpREREe3WX8Lu+RqyPPAI8Iam/WM6YVPce17fr2IREfEyNW5g95uwXyXp\nE8D8kapMJ0iaDPwGuI6iR+EPwEHl7n+V9FZgPHCA7VslTQc2KF9rAF+yfdoIVzsiIoahztew+7sP\nexlgZYpR4r29xpKNgFNsbwI8ydKJYR6yvS3wbeBTDeU3A/ageE74UZLWGsnKRkTE8KjF/zqpvxb2\n32wfM2I16ay7bV9VLn8f+Ldyuafb/wbg7eWygZ/bfh54XtJlwA7Az5uDfvnzxy5Z3mX3qey6+9QK\nqh4RUS+zZs5g1swZna5G7QzmPuxu0DgCXix93vfz5b+L6P+z6vX54J/+3FGt1ywiYoyZMnUaU6ZO\nW7J+wnEj1zYcq13ibxqxWnTeepJ2KpffC1zRT1kB+0t6haRXAtOAayuuX0REtME4tfbqaN372mH7\nkZGsSIfdCnxU0s3AKhTXrBuZpa1wA/OAy4CrgGNt/22kKhoREcMnqaVXJ6VLvPCi7fc3bVvyzELb\n1/PS29rm2f7AiNQsIiKCJOweQ53FLbO+RUTUUKe7tVvR9Qnb9l3AFkMo3y0j5yMixpw6T5wy5Odh\nR0RE1NU4qaVXbyStKuknkv4o6eaGQcyNZU6SdLukuZK2Hk7du76FHRER3aOiLvFvAL+2/U5JywIr\nNu6UtDfwOtsbStqRYmDzy5L6QNLCjoiIGCZJqwC72z4dwPaLtp9oKrYfcFa5/xpgVUmvHuq5krAj\nIqJrSK29erE+8JCkMyTdIOm7klZoKrM2cHfD+j3AOkOte7rEo3L3Prqgkrhrrz6hkrhPLlhYSdyJ\nE8ZXErdKfz9p5UrivuO02ZXE/emHdqgkbowd44Y4H/htN1zN7XOu7q/IshQPjjrc9rWSvg58Fmie\n6rL5xEO+2ygJOyIiusZQR4lvvO1ObLzt0svNF53xjeYi9wD32O6Z8fInFAm70b3Aug3r65TbhiRd\n4hEREcNUznR5t6SNyk1vonhMc6NfUD62uRxB/rjtB4Z6rrSwIyKia1Q0SvxfgXMlLQf8CfigpI8A\n2D7V9q8l7S3pDuAZ4JDhnCQJOyIiukZf91K3wvZcYPumzac2lTm81fMkYUdERNfITGcRERFRqa5M\n2JI+17A8WdL8TtYnIiJGRhVTk45Y3Tt69s75z05XICIiRl4FE6eMmDGRsMtW8i2Svl9OvH6+pLdI\n+llDmTdLukDSF4AJkuZIOofi5vVlJP2PpJskXSxp+fKYrSRdXU7WfoGkVcvtMySdKOkaSbdK2q0j\nbzwiIoZkXIuvTur0+dtpI+AU25sATwKbAq+XtEa5/xDge7b/E1hge2vb76eYfWZD4Ju2NwMeB95R\nHnM28GnbWwLzgaPL7QaWsb0j8O8N2yMiYhST1NKrk8bSKPG7bV9VLn8f+BhFwv1HSWdSPBnlH/s4\n9k7b88rl64HJkiYCq9i+vNx+FnB+wzEXlP/eAEzuLeiXP3/skuVddp/KrrtPHcr7iYgYk2bNnMGs\nmTM6XY3aGUsJu3FeVpXrZwK/BJ4DzrO9uI9jn29YXgQs30uZ5q9WPccsoo/P8dOfa55KNiIipkyd\nxpSp05asn3DcMSN27hrf1TWmusTXa3ho+HuBy23fD9wHHAmc0VB2YfnM0r7I9pPAYw3Xp98PzGhz\nnSMiYgRllPjocCvwUUk3A6tQPCAc4AfAX23f2lD2f4B5DYPOmp+a0rP+AeDLkuYCWwDH0rshP3Ul\nIiJGnlp8ddJY6hJ/sRxE1mw34LuNG2x/lpc+TWWLhn1faVieC+zcHND2Hg3LDwN/N/xqR0REDGws\nJeyXtXIlXQ88BXx85KsTERGjTafvpW7FmEjYtu+ioZXcsH3bka9NRESMVp2+NasVYyJhR0REDEad\nB27Vue4RERFdIy3siIjoGukSj4iIqIH6pusk7EpNnDC+01UYtCcXLKws9tqrT6gsdhXq9HOrWlWf\nxU8/tEMlcS+86b5K4u672aRK4lb5/11+j3uXFnZEREQN1HngVp3rHhER0TXSwo6IiK6RLvGIiIga\nqG+6TsKOiIguUuMGdq5hR0RE1EESdh8knSnpHZ2uR0REtM841NKrL5KWkTRH0i972TdN0hPl/jmS\njhxO3bu6S1zF6APZXtzL7jzjOiJijKmwS/zfgJuBlfvYP9P2fq2coOta2JImS7pV0lnAfODFhn3v\nlHRGQ/E3Sbq2LL9PWWampC0bjrlC0uYj9gYiImLY1OJ/vcaU1gH2Bk6j73FtLX9V6LqEXXodcIrt\nzYBnGrY3tqoFvNb29sA+wHckvQL4HnAwgKSNgFfYnj8itY6IiNHoa8Cngd56a6HILbtImivp15I2\nGc5JujVh/8X27AHKGDgPwPYdwJ+BjYHzgX0lLQt8EDijzwgRETGqSK29Xh5P+wIP2p5D363oG4B1\nbW8JnAz873Dq3q3XsPtqVQ806bVtL5B0KfAPwAHANn0VPv7Y6UuWp0ydxpSp04Zc0YiIsWbWzBnM\nmjmjI+fub+BYb+bNvpJ5117ZX5FdgP0k7Q0sD0yUdLbtg3oK2H6qYfkiSd+StLrtR4dSF9ndNbZK\n0mTgl7Y3L9dvB94K3EbRen7C9gclnQm8CtgX+DtgBrCB7RckbQNcSDGI4D19nMcLFtbns81DCGIs\nyMM/lqrT/3cTxgvbld8hLcm/+cODLcXYa9M1+6yrpKnAp2y/tWn7qyla4Za0A3Ce7clDPXe3trAb\nM+lnKZLvQ8B1wIoNZf4KzAYmAh+x/QKA7RskPUG6wyMiamUEJk5xcR59BMD2qcA7gUMlvQg8C7x7\nOIG7roXdDpImAZfZ3rifMmlhl+r0TT/qLS3sper0/91ItrAvvrm1Fvb/26TvFnbVunXQ2bBJOgi4\nGvhcp+sSERFDU8VtXSOlW7vEh8322cDZna5HREQM3bgazyWehB0REV2j063kVqRLPCIiogbSwo6I\niK5R58drJmFHRETXqHOXeBJ2APW6BSSiL1XdfrXa9odXEvexa79ZSdzoW50HneUadkRERA2khR0R\nEV0jXeIRERE1kEFnERERNVDjfJ2EHRER3WNcjZvYGXQWERFRA0nYgyBpsqT5na5HRES0Ri2+Oild\n4hER0T06nXVb0BUJW9KKwHnA2sAywHHAncA3gBWA54E3AmtQPIlrxfLQw21f1RRr8kBlIiJidMpt\nXaPfXsC9tvcBkDQRmAO8y/b1klYCFgAPAG+2/bykDYEfANs3xRpMmYiIiLbqloQ9D/hvSScCFwJP\nAPfbvh7A9tMAkpYDvilpS2ARsFEvsQZTJiIiRqEaDxLvjoRt+3ZJWwP7AMcDl/VR9OMUifz9kpYB\nnhtmGQCOP3b6kuUpU6cxZeq04b2BiIgxZNbMGcyaOaMj565xvu6OhC1pLeAx2+dKegI4FHiNpO1s\nXydpZeBZYCJwT3nYQRTXu5sNpgwARx41vU3vICJi7GhuwJxw3DEjd/IaZ+yuSNjA5sCXJS0GXqBI\n2OOAkyVNoEjWbwK+BfxU0kHAb4CnG2K4/Le/MhEREZWQ7YFLxZBJ8oKF+WwjxoI8XrNaE8YL25W3\nfSX52j8/0VKM7f9ulRGpa2+6pYUdERGRQWcRERF1UON8nalJIyKii7R5blJJy0u6RtKNkm6W9IVe\nTyudJOl2SXPLu5aGLC3siIiIYbL9nKQ9bD8raVngCkm72b6ip4ykvYHX2d5Q0o7At4GdhnquJOyI\niOgaVUxNavvZcnE5ilt9H20qsh9wVln2GkmrSnq17QeGcp50iUdERNeQWnv1HlPjJN1IMXX1ZbZv\nbiqyNnB3w/o9wDpDrXta2FFbTy5YWEnciRPGVxI36quq268mH/qTSuIC3PXtd1YWu86qGHRmezGw\nlaRVgIslTbM9Y4BTD/m+3yTsiIiIPlx71eVcd/Xlgypr+wlJvwK2A2Y07LoXWLdhfZ1y25AkYUdE\nRPcYYhN7+112Z/tddl+yfurXT3xpOGkN4EXbj5czZ74ZaJ5r9RfA4cCPJO0EPD7U69eQhB0REV2k\ngkFnawFnSRpHMS7sHNu/k/QRANun2v61pL0l3QE8AxwynBMlYUdERNdo90xntucD2/Sy/dSm9Zbn\nt80o8YiIiBpICzsiIrpGpiYdhSQdLOnkTtcjIiJGkTZPTTqSxnILO8+2jIiIl6hiprORUrsWtqQV\nJf2qnGh9vqR3Sdpe0u/LbVdLWqksPknSRZJuk/TFhhh7luWvl3SepBXL7XdJ+rykOZKuk7SNpEsk\n3dEz4q8s92lJs8tJ3KeP7CcQERHDVcVMZyOlji3svYB7be8DIGkiMAd4l+3ry2S9gKLzYqvy9QJw\nq6STgOeBI4A32l4g6TPAJ4DjKFrlf7G9taSvAmcCOwMTgJuAUyXtSTGJ+w7lMP6fS9rd9uDurI+I\niBiGOibsecB/SzoRuBB4Arjf9vUAtp8GkGTgd7afKtdvBiYDqwGbAL9X8XVpOeD3DfF/Uf47H1jR\n9jPAM5KeL6ed2xPYU9KcstyKwOuAJOyIiFGuvh3iNUzYtm8vnyW6D3A8cFk/xZ9vWF7E0vd7qe33\nDnDMYoqWOQ3rPcd/wfb/DFTX44+dvmR5ytRpTJk6baBDIiLGvFkzZzBr5ozOnLzGGbt2CVvSWsBj\nts+V9ARwKPAaSdvZvk7SysCz9P5jMXA1cIqkDWz/qbx+Pcn27c2n6uP4i4HjJJ1r+xlJawMv2H6o\nufCRR00f9vuMiBirmhswJxzXPJNndeo86Kx2CRvYHPiypJ4W8KEUg+dOLudxfZZiLlfTy0hx2w9L\nOhj4oaRXlJuPAJoTdvPxLo+/VNLfA1eVXepPAf8IvCxhR0REtIvs3P1UBUlesDCfbZXyeM2ouzxe\nszBhvLBdedNXkm+5/5mWYrx+rRVHpK69qWMLOyIiYljq2yGehB0REd2kxhm7dhOnREREdKO0sCMi\nomtklHhEREQNdHp60VYkYUdERNeocb5Owo6IiC5S44yd+7ArkvuwI6KTTrvmzkrifmjH9dsecyTv\nw77jwWdbivG6NVfIfdgRERFVy6CziIiIGsigs4iIiBqocb7OxCkRERF1kBZ2RER0jxo3sZOwIyKi\na9R50Fm6xPsgaRVJhw5Q5kxJ7xipOkVERGuk1l4vj6fTJT0gaX7v59M0SU9ImlO+jhxu3ZOw+7Ya\ncNgAZVy+IiKiO50B7DVAmZm2ty5fxw/3ROkS79uJwAaS5gCXAo8A7wMWA7+2/bmyXH37VyIiuky7\n/2DbvlzS5JE4bRJ23z4DbGp7a0lvAY4EdrD9nKRVO1y3iIgYhg7ch21gF0lzgXuBT9m+eTiBkrD7\n1vhjfRNwuu3nAGw/PpgAxx87fcnylKnTmDJ1WhurFxFRT7NmzmDWzBkdOvvQMvZVV8zkqitmtXLC\nG4B1bT9bNv7+F9hoOIEyl3gfyi6OX9reXNJ/A7fYPq2pzBnAhbZ/2svxmUs8Ijomc4m/nCTf89jz\nLcVYZ7VXvKyujfliEHW4E9jW9qNDPXcGnfXtKWDlcvm3wCGSJgBIWq1jtYqIiNqQ9Gqp6IiXtANF\nQ3nIyRrSJd4n249IurIcqn8R8AvgOkkvAL+iuKYNGSUeEVEb7W7GS/ohMBVYQ9LdwNHAeADbpwLv\nBA6V9CLwLPDuYZ8rXeLVSJd4RHRSusRfTpLve7y1LvFJq768S3ykpIUdERFdIzOdRURERKXSwo6I\niO5R3wZ2EnZERHSPGufrJOyIiOgeHZjprG1yDTsiIqIGcltXRXJbV0TE4IzkbV0PPrmwpRhrThyf\n27oiIiIqV+Mu8STsiIjoGjXO10nYERHRPTLoLCIiIiqVFnZERHSNOk9NmoQdERFdI13iERERUakk\n7IiIiBpIl3hERHSNOneJJ2FHRETXyKCz6NXxx05fsjxl6jSmTJ3WsbpERIwWs2bOYNbMGR05d51b\n2JlLvCKZSzwiYnBGci7xJxYsainGKhOWyVziERERVatxAzsJOyIiukiNM3YSdkREdI0MOouIiKiB\nOg86y8QpERERNZCEPQpUdXtD4lYbt8rYiVvPuFXGTtz2UIuvXmNKe0m6RdLtkj7TR5mTyv1zJW09\nnLonYY8CdfsfJnGrj5249YxbZezEbZM2Z2xJywDfBPYCNgHeI+nvm8rsDbzO9obAPwPfHk7Vk7Aj\nIqJrqMX/erEDcIftu2wvBH4E7N9UZj/gLADb1wCrSnr1UOuehB0RETF8awN3N6zfU24bqMw6Qz1R\nZjqriKR8sBERgzRSM521I05jXSW9A9jL9ofL9X8EdrT9rw1lfgmcaPvKcv23wH/YvmEo581tXRXp\n1NR1ERHRu4r+Lt8LrNuwvi5FC7q/MuuU24YkXeIRERHDdx2woaTJkpYDDgR+0VTmF8BBAJJ2Ah63\n/cBQT5QWdkRExDDZflHS4cDFwDLA92z/UdJHyv2n2v61pL0l3QE8AxwynHPlGnZEREQNpIXdQZJW\nsP1sp+sx0iSt3t9+24+26TxrAss3xP1rO+LWgaRP9rPbtr/ahnOsRXFLy2LgWtt/azVmVSRtbnt+\np+sR0Yok7A6QtAtwGrAysK6krYB/tn1Yi3HXAI4GdgMMXA4ca/uRFqvcE38ixR/7p1oMdQNF/fqy\nfivBJe0HfAWYBDwIvBb4I7BpK3HL2F+0/ZmBtg0h3rb081kMdRRpg5X7i9sqSR8CjgIuKzd9U9Kx\ntr/XYtzdKH6HJ7P075Nt/10rcYFvS3oFcAZwru0nWoy3hKSdgZMoJs1YjqJb9GnbE9sQe3eKCTfO\nkPQqYCXbdw4z1jsofid6G3hl2xe0UNUYAekS7wBJs4F3Aj+3vXW57Q+2W0oo5a0CM4HvU/xP+V5g\nmu03tRh3e+B0oOcP0OPAP9m+rpW4VZE0D3gDcKntrSXtAbzf9gfbEHtOz8+sYdt825sPM94M+k/Y\newwnbhl7WeBj7WhN9xL7NmDnni+Dkl4JXGV7oxbj3gr8O8WXukU9220/3ErcMvZGwAeBA4DZwBm2\nL2lD3OuBdwPnAdtRDC7a2PZnW4w7Hdi2jLWRpLWB82zvOsx4Z9L/79qwrqvGyEkLu0Ns/1UvfWzM\ni20I+xrbxzWsHy/pwDbEPR04zPblsKQVdDqwxXCCSdqmv/0ttCp7LLT9sKRxkpaxfZmkb7QSUNKh\nwGHABpIau1ZXBq4cblzb01qp1wCxX5T0HqDtCRt4GHi6Yf3pclurHrd9URvivIzt2yQdSTGq9yRg\nK0njgM/Z/mmLsW8vf9cWAWdIuhFoKWEDbwO2Bq4vz3GvpJVbqOPBLdYnOiwJuzP+KmlXgPI2gI9R\ndNm26pLyD/SPy/UDgJZbEMCLPckawPYVklr5gvFV+u+uHXarsvRY+YftcuBcSQ/y0uQyHD8ALgJO\nBBq7v59uxyWHsjW8D0VX8DIUPSTtuNZ8haRvUvxOPNOzsQ1fiv4EXC3p5+X6/sC88tp5K/W+TNKX\ngQuA53s2tlpfSVsCBwP7ApcC+9q+QdIk4GqglYT9TNndPlfSl4C/0fdzIobieduLe77YS1qxDTGR\n9HngS7YfL9dXAz5p+8h2xI/qpEu8A8przScBb6L4H/sSiq7Llv7wS3oaWIFiEBAU99n3/JH2cK+p\nSfo6MAH4YbnpQOA54JwycKt//Nuq/MP2HMX7fx9FV/65bbyWvy3FOIHFwJXteP+SLgIWAPNZ+vPD\n9jEtxp1BL1+OWulqL+NO7wnVs6nxPMOtd4X1nQl8Dzjf9oKmfQfZPruF2JOBByiuX3+c4vftW7bv\nGHaFi7ifBl4H7Al8gaI7/we2T2ox7o22t2ra9rJLPTH6JGGPsLIldZbt93W6LoNV1XXWMrF+AljP\n9oclbUhxve7CYVV0adxPAj+yPeSZhAYR+yiKnosLKJLU/sBPmi5FDCfuPNvDusQQg1O2gjem+F2+\n1fYLHa7SgCTtSZGwAS62fWkbYs4DdrD9XLk+Abiu1TE0Ub10iY+w8rriayW9wvbzAx8xeGU3+1zb\nT0t6P8X1r2/Y/ksLMccB37b94wELD90ZFNfndinX7wN+ArSUsCmuK18i6TGKJ+ecP5xZhfrwj8AW\nDX/svgDMBVpK2BT1/X+2L261go0kvQY4AVjb9l6SNqEYLNbqaO6NgU/x8tHcb2gx7qoUo8SnlJtm\nUNzp0NKobkn7AN8B/lxu+jtJH7H96xZi9nebmNvxBcz2JZKuofiMLWn1Ntz2eC7wO0mnU3zpPAQY\ndg9DjJy0sDtA0jnA6ymmq+u5D7vl65XlH5Atgc2BMym6AA+wPbXFuNfb3raVGP3FbeyOkzTX9pZt\nir8l8C6KEfn32H5jG2JeBrzd9mPl+mrAT9uQqN5OMbp/HLCw3DzsyxgNcX9D8cXoCNtbSBoPzLG9\nWYtx51E807dxNLdtX99i3AsoLgucRZFM3k/xBentLca9Fdinp5ta0gbAr21v3ELMyf3tt33XcGOX\n8T8CHENxLb/nMknLt7iV19kvo7gkZ+C3wBts/0crcaN6aWF3xp/K1zhgJZqu/7XgxXKQyj8Ap9g+\nTVLLtzIBl0r6FC8fuNTqN/3ny+44YMkf0Xb2OjxIMQDoEeBVbYr5JPAHST2D+d4MzJZ0MsUf048N\nM+5XgZ2Am2wvHqjwEKxh+8eSPgtge2GLAwZ7LLT97TbEabZBU3KeLmluG+I+2XRN+c8UP8thazUh\nD8Kngc3acUtbkzeXyXnJaPxyoF8S9iiXhN0BtqdXFPopSZ+j6LbdXdIywPg2xH03xReKjzZtb2mC\nE2A68BtgHUk/AHalGMnbEkmHUbSs1wTOBz5k++ZW45Z+Vr56zGhYbuVL11//f3vnHmxXWZ7x30No\nuYYgVAUqt6GCgOF+CwzIReVSw4gSsIClCIgjKFqmnbYjGiGOdBDaomIHkBAUWgmVEcq9giTkQoAQ\nIAkCtYJtocoltCHAcHv6x/etnH12TnKSvdY6K2uf9zdzZvZee693fdnnZL/rey/PCyyq2FkDvJqL\nHIHlgwd6Di8rqdQJuEXS2axYzV32Ju51SQd3tRD2rAaoJBYC8JCk20i90pDqECrREVB9win/QSpE\nrIS6WhODkSNC4g2Qw6rdVJH/25IkljLP9kxJ2wCH2Z5Wxm5dKIltiLSzBHiAEkpOHXYvIhWdLSi5\nxBFD0jTSDdDtQFEMVUWaZG+SM/kwsIgUaTjedk+7VknPsOoCxLIqdXuQ8qnj8qElwKkl1nsNQ1ey\nF21zpcVCVJ9wyl6k1NYcBv9N9BTFkTQOeA8DrYlF69nSqjoognoJh90AkvbpeLo+8GlSOPsvGlrS\nsEj6MGkH0anNXapQRdJs4OiioCgXRE2vqlpVFWqJS5pue5KkhazosEoXGK2sTaqCtq4NgHOAI0kh\n4LnAZUXRXEm7ZzPQ3nY/qTixkh2hkgwutkuFrUeCjlqM5ZX+Q7VO9WD3IWAGA61+xd/EWnkDHtRP\nOOy1BEkP2t63x3Nn2T4o92EP5UzKFi5NBj5C0uK+FTgauN/28SXt/jHpTv8YUrvNtcDJZXfGWomW\neJkbAUlb2X5O0rYMIYoxAvnMnpA0neSoO+Vqx9metJbaLW5gt2OwgMwFJe1O7TpkkuEq5GpnkGoZ\nrgKeJ9VNnFq2eDJ6o4NuIofdABo8rWodUhitZ6fqrC1se+OSS1sZx5Oqz+fbPk3S+0mtIaWwfauS\n0tvdpOK7T9l+sqxdYAowgS4t8TIGs7NeF7jGJUU8hiJHA/6SFMUoCvFKp0mAXW3v0vH8HklV5PPr\nsvszklb9wyTxm6q4lYGb2Q1Isp/PVWT7T0n/j88hCad8gHTTUZbbc6X4zVRbJxC0lHDYzdA5rept\n4Bng9F6Nqf5xla/bfkfS2zkP9jtg616N5YrqTjYhVc2fI6lMpXVB5VrisLyH/l1JmzrLOlbIdaQq\n/E8AZ5GK716owO58SRNsz4HlRWelWq9qtvuHto+swM4gbN/Y+TwXOVZVaPUi8GZOB0zOxZ7rVWD3\nJNL3RHcuvGyxZ9BSwmE3gO3tKjZZ67hK4MHcb3wlqbJ2GTC7hL2HGRjz547nVVGHlnjBMuDx3NbV\n2UNf9iZj89yG92Xb9wH35RxmWfYBZkn6T9JnvA3wZK4SLpN7r8vubEm72X6sx/NXlx2prtXv58AR\nDEodD0sAAAzPSURBVPyNbQjcyYAgUE/U8D0RtJzIYY8gko6w/XMNzKXt5iVSbvidIV5bK5C0PTC2\n6i/UHCXYutdq4C5bG5HaYQot8XFUpCUu6c/yw+7isFKFQJLm2j4g3whcRgrXTre9Q0m7263q9V5z\n71Xb7WgzGgN8EPg1A2HgUkV9Smp9b9Ohq0/S/v4rl5zSle0Ppc1dRdFZLdK9QXuJHfbIcgjpbnwi\nQzvszYGvkQpYekJpZu62dPxubc/o1d5K7ErSIWXtKmmUH5ttPgy8kAvovtqjvVk5n/9bVvx8p0h6\nGbjY9vdLLPtGcoogX3MMHZXoJfiWkiznecB3SWmCnj6HTuoqhqvB7sTCNCsW9ZXdVRhY7JLqbqvg\nNUl7O6u85S6QKqrl65LuDVpK7LDXMiRd3WvlqqS/JU3SWsyAXCS2J670pGbtLrC9h6QzSLvrb0h6\n3Pb4MnZXcb3NgdkuJ0c5F/io7Vfz87GkoQylwp9BQtKPbH92uGM92J1GUv+bV2qBQ9vel6RZ/3w+\ntAXwGdulUhqqWbo3aB+xw24ArWLAQck2k+NIIbNKh4rUaHdMFns5gRRZgGpz2YOw/VKuGC/D+oWz\nzjaXStqwpM3CoXzFgzXKL6mi7ahlDNoF58r8KnTsDwBOkfQsg0fOVjEhbXvSoJ1tgU8B+9ExIrUE\ndUv3Bi1jnaYXMEq5mtTDOonkrJaSwl9l+RVJGrFq6rJ7Aak451e25+UvpKdruM5ybJdt5VmmpB4G\nVBr+3L1w1gD58V4V2G0Fkv5G0lJgvKSlxQ+pI+HmCi5xJLADcDgp/D6RlI6pgvOzwMs44DDSUJQq\ndNYnM1i69x6SbkEwSomQeAMMFdaqItSlNOlod1KevLNgp1cpw6L9aitgj6rstpkhwp9bAidWEP58\nlCQj+3J+vhlwX13pgbUVSd+2/ddNr2NN6EjtXAQ8bvu6qkRPlHTgC+neua5+EEjQIiIk3gyVDjjo\n4Ob8M5R2ci90tl/d0vG4kru8HO47nRXFQtbaMLDtByXtTFJmM/Ck7beGOW11uASYI+kG0mc8iTTH\nerSxU1bAu93VD0Kpi/+WdAWpWPSirNZWVfRyPZKe+rrALpIqKSIN2knssBtAFQ846LK9HqnHFOCX\nFTmTTvtVtl/dCDxBar36JmnK2BNr885d0gnAHbb/T9L5pNzlFNvzK7B9IKm/GeAh22V63VuJpI8B\np5F2lTcAUytSv6uN3H51FPCY7adzXcZ423cNc+pwdmsp9gzaSzjsBlHFAw4kHQpMA57Nh7Yh3Qjc\nV9LuL+hqvwJ6br/qsFuEEh+zvZuk3yP1oe9fxm6dFFXsOSoyBfgO8HXb+5W0ey5wJmlcpYBPAlfa\nvqzsmttILsz8DKkY8Tck0Z4fV30DujYj6SmS449CswCIkPiIIum8jqfuOF6Ib5QapQhcCny82JFI\n2pGUby1bvLRp3lGeAVxbtF+VtAkDIwP/V9J40tCEqtSn6qLY6XyC5FD/VdKFFdg9A9jf9jKgGBE6\nlySiMqrIedtT8s984HrSVLBTgUObW9mIUxR7hsMOgHDYI81YamxbAtbtDB/afiq3xZSlrvarK3OI\n/WukoQ8bA1+vwG6d1JmvfHclj0cNkm4CPkRKGU20XRT3/bPS3Om+p6PY8zVggaRRX+wZJMJhjyC2\nJwNIuhY4t6PndjNS0VFZHpZ0FQMjD08maX+XpWi/mlVx+9WPGBilWEh7vr8Cu3VyAilfebHtV/KN\nTBVzzKcCD+RK/yIkfnUFdtvGFaQxrgcB+0qaSZqz/YbtKvqx20Cntv4tVFdEGrScyGE3QI3aw+sD\nZ5O+7CANv7i8bA5sZTcYtk8rafdOBkYpdhbVVHHzUitK4zCXS5La/k0FNvcmhX4NzLT9SFmbbUM1\nzdluI5I2ZggZ3CJtEow+wmE3QNt6bmu8wVhYo75zLUg6lhQN2Yok6rEtqbJ910YX1idIWuzBc7aH\nPDYaCBncoJtQOmuGouf2QklTgDnAxWWNSpoo6RFJSzrUoqqoQJc6Zm7nx2MqsDtbUhXSkCPJFGAC\n8JTt7UljFR9odkl9xXxJE4onqm7OdhtZQQaXNLozGKVEDrsBbF+bC2gOJ4U/j7O9uALTf0/S/V5Y\nsehEpaIeGjxK8TRJlY1SHAHesv2ipHUkjbF9r6R/aHpRfURdc7bbSF1TwIKWEg67IWwvAhZVbPa/\ngEVVK0TVcIPRZuGHJTk0ORO4TtLvgFeHOSdYfY5qegFrEecCN0gaNAWswfUEDRM57D4ihw8vAO5l\noMe5iv7uIJNVrd4gpZNOJs2tvs72S40uLOg7ClU9Bk8BO78KVb2gnUQOu7+4kLTbW5/U07wxqfc7\nqI4vAFvYfsv2NbYvC2cd1ERdU8CClhIh8f5iS9sfa3oRfc5Y4C5JS0gqctNt/7bhNQX9SV2qekFL\niR12f3GbpCObXkQ/Y3tybuE6mzRac0ZWogqCqilU9U4Ebq1YVS9oIZHD7iMkvUpq+3gTKIYk2PYm\nza2qP8kKZ8cDfwJsPMqql4MRoK4pYEF7CYcdBGuApC+S5EnfB0wHflJRS14QBMEqiRx2n5GFSLaj\n43dr+6eNLaj/2Ab4iu0FTS8kCILRReyw+whJU4HxpP7u5b3YZTW/gxWpQ0s8CIJgVYTD7iMkLQZ2\ndfxSayO0xIMgaIqoOOwvHgRG3ZCEESa0xIMgaITIYfcXU0ma3/9De7S520ZoiQdB0AjhsPuLHwKn\nAAvpyGEHlRJa4kEQNELksPsISXNsTxj+nUGv5N7Y1xnQEh9HaIkHQTAChMPuIyRdDmwK3MLg4R/R\n1lUSSbNsH5TFabr/0xh4GbjY9vdHfnVBEIwGwmH3EZKuyQ8H/VKjrat+JG0OzLa9U9NrCYKgPwmH\nHQQVIWkr2881vY4gCPqTaOvqIyRtLekmSS/kn3+R9IGm1zVaCGcdBEGdhMPuL6YCN5NEPbYi5bKn\nNrqiIAiCoBIiJN5HSHrU9u7DHQuCIAjaR+yw+4uXJH1W0hhJ60o6BXix6UUFQRAE5Ykddh8haVvg\ne8AB+dBs4EsxmCIIgqD9hMPuIyRNI41+XJKfbwZ8x/bnml1ZEARBUJYIifcXuxfOGsD2y8BeDa4n\nCIIgqIhw2P2F8q66eLIZMKbB9QRBEAQVEcM/+otLSNO6bgAETAK+1eySgiAIgiqIHHafIWlX4HCS\nPOk9thc3vKQgCIKgAsJhB0EQBEELiBx2EARBELSAcNhBEARB0ALCYQdBEARBCwiHHQQ1IukdSY9I\nelzSDZI2KGHrGkmfzo+vlLTzKt77EUkTerjGM52tgcMd73rPq2t4rcmSzlvTNQbBaCUcdhDUy2u2\n97Q9HngT+ELni5LWpLXS+QfbZ9p+YhXvPQw4cE0XW9hfg+Nr+p4y7w+CUU047CAYOWYCf5R3vzMl\n/QxYKGkdSRdLmifpUUmfh6SCI+l7kn4p6W7gfYUhSb+QtHd+fJSkhyUtkHR31pQ/C/hq3t0fJOm9\nkm7M15gn6cB87uaS7pK0UNKVpP79VZJnrj+Uzzmz67VL8/F/k/QH+dgOkm7P58yQtFM1H2cQjC5C\nOCUIRoC8kz4GuC0f2hPY1faz2UG/Yns/SesB90u6iyQruyOwM7AFsBj4YT7fgCW9F7gCODjb2tT2\nK5L+EVhq+9J8/euBv7M9S9I2wB3ALsA3gBm2p0g6Bjh9Nf45n7O9JIf350m6MUvibgQ8aPvPJZ2f\nbX8pr+8s2/8uaX/gcuCIHj/KIBi1hMMOgnrZQNIj+fEM4GrgIGCe7Wfz8Y8D4yUdn59vAnwQOBi4\n3kks4XlJ93TZFmky24zClu1Xul4v+Ciws7T80FhJG+VrHJfPvU3SEobnXEmfzI+3zmudB7wL/CQf\n/zHw03yNA4HpHdf+/dW4RhAEXYTDDoJ6ed32np0HsuNa1vW+c2zf3fW+Yxg+RL26eWAB+9t+c4i1\nDBsG73j/oaTd8QG235B0L7D+Sq5nUtptSfdnEATBmhM57CBonjuBLxYFaJJ2lLQhaUd+Ys5xb0kq\nJOvEwFzgEEnb5XOLSu6lwNiO994FfLl4Imn3/HAGcFI+djTwnmHWugnJAb8h6UMMzF6H9H0yKT8+\nCZhpeynw6yJ6kPPyuw1zjSAIhiAcdhDUy1A7YHcdv4qUn54v6XHgB8AY2zcBT+fXpgGzVzBkvwh8\nnhR+XgD8U37pFuC4ouiM5Kz3yUVti0hFaQDfJDn8haTQ+LMMTbHeO4B1JS0Gvg3M6XjPMmC//G84\nFLggHz8ZOD2vbyFw7DCfTxAEQxBa4kEQBEHQAmKHHQRBEAQtIBx2EARBELSAcNhBEARB0ALCYQdB\nEARBCwiHHQRBEAQtIBx2EARBELSAcNhBEARB0ALCYQdBEARBC/h/sAeZuBshiqwAAAAASUVORK5C\nYII=\n", - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "# X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)\n", - "\n", - "# Run classifier, using a model that is too regularized (C too low) to see\n", - "# the impact on the results\n", - "# classifier = svm.SVC(kernel='linear', C=0.01)\n", - "y_pred = classifier.fit(X_train, y_train).predict(X_test)\n", - "\n", - "import matplotlib.pyplot as plt\n", - "%matplotlib inline\n", - "\n", - "import matplotlib.pylab as pylab\n", - "pylab.rcParams['figure.figsize'] = 8, 6 # that's default image size for this interactive session\n", - "\n", - "my_labels = classifier.classes_\n", - "\n", - "\n", - "def plot_confusion_matrix(cm, title='Confusion matrix', cmap=plt.cm.Blues):\n", - " fig, ax = plt.subplots()\n", - " plt.imshow(cm, interpolation='nearest', cmap=cmap)\n", - " plt.title(title)\n", - " plt.colorbar()\n", - "\n", - "\n", - " tick_marks = np.arange(len(classifier.classes_))\n", - " plt.xticks(tick_marks, my_labels, rotation=90)\n", - " plt.yticks(tick_marks, my_labels)\n", - " plt.tight_layout()\n", - " plt.ylabel('True label')\n", - " plt.xlabel('Predicted label')\n", - "\n", - " from matplotlib.ticker import MultipleLocator # from http://stackoverflow.com/a/19252430 comments\n", - " ax.xaxis.set_major_locator(MultipleLocator(1))\n", - " ax.yaxis.set_major_locator(MultipleLocator(1))\n", - "\n", - "\n", - "# Compute confusion matrix\n", - "cm = confusion_matrix(y_test, y_pred, labels=my_labels)\n", - "np.set_printoptions(precision=2)\n", - "print('Confusion matrix, without normalization')\n", - "print(cm)\n", - "plt.figure()\n", - "plot_confusion_matrix(cm)\n", - "\n", - "# Normalize the confusion matrix by row (i.e by the number of samples\n", - "# in each class)\n", - "cm_normalized = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]\n", - "#print('Normalized confusion matrix')\n", - "#print(cm_normalized)\n", - "#plt.figure()\n", - "#plot_confusion_matrix(cm_normalized, title='Normalized confusion matrix')\n", - "\n", - "#plt.show()\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.4.3" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} diff --git a/requirements.txt b/requirements.txt index 2cc845f..c00e8ba 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,3 +5,5 @@ numpy matplotlib pytest ipython[notebook] +beautifulsoup4 +requests