From e4b5d09b485b1e1ebffaa69bc068d72983830a2e Mon Sep 17 00:00:00 2001 From: Will Flowers Date: Sun, 7 Jun 2015 23:48:22 -0400 Subject: [PATCH 1/3] Submitting work so far --- Ideas.ipynb | 3953 ++++++++++++++++++++++++++++ More ideas.ipynb | 58 + New_Trial.ipynb | 114 + Untitled.ipynb | 47 + programming_language_classifier.py | 129 + requirements.txt | 1 + 6 files changed, 4302 insertions(+) create mode 100644 Ideas.ipynb create mode 100644 More ideas.ipynb create mode 100644 New_Trial.ipynb create mode 100644 Untitled.ipynb create mode 100644 programming_language_classifier.py diff --git a/Ideas.ipynb b/Ideas.ipynb new file mode 100644 index 0000000..4fcc739 --- /dev/null +++ b/Ideas.ipynb @@ -0,0 +1,3953 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# from scraper import *" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# def get_text(url):\n", + " \n", + "\n", + "# def scrape_links():\n", + "# \"\"\"Creates list of links to use with create_url to gather code.\"\"\"\n", + "# with open(\"links_list.txt\", \"r\") as myfile:\n", + "# data=myfile.read()\n", + "# return findall(r\"wiki/(.+)\" ti), data)\n", + " \n", + "# def create_url_for_scraping(task_string):\n", + "# return\"http://www.rosettacode.org/wiki/{}\".format(task_string)\n", + "\n", + "# language_start = [\"C\", \"C#\", \"Common Lisp\", \"Clojure\", \"Haskell\", \"Java\", \"JavaScript\", \n", + "# \"Java\", \"JavaScript\", \"OCaml\", \"Perl\", \"PHP\", \"Python\", \"Ruby\", \"Scala\", \"Scheme\"]\n", + "\n", + "# #def make_data(languages=language_start, num_links=50):\n", + "# #grab data for all of the links in the task list\n", + "# #go through for each of the languages and grab the associated code\n", + "# #return a df with the code you need in a column and the type of code as the index\n", + " \n", + "# def scrape_data(url):\n", + "# req = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/5.0'})\n", + "# content = urllib.request.urlopen(req).read()\n", + "# soup = BeautifulSoup(content)\n", + "# return soup.find_all(\"pre\", class = \"highlighted_source\")\n", + "# #pre is an html tag. We want all text from pre with class highlighted_source \n", + "# #returns a list of soup objects\n", + " \n", + "# def pull_code_from_soup(soup_list):\n", + "# []\n", + "# language = [(soup_list[i]['class'][0]), soup_list[i].get_text() for i in len(soup_list)]" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# new_df = df[df[0]!='text']" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "from programming_language_classifier import *" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "tc = make_data(['http://rosettacode.org/wiki/Temperature_conversion'])\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
01
0text: KtoC \\ n -- n\\t273.15 n:- ; : KtoF \\ n -- n\\...
1adawith Ada.Float_Text_IO, Ada.Text_IO; use Ada....
2textvoidshow(integer symbol, real temperature){ ...
3autohotkeyMsgBox, % \"Kelvin:`t`t 21.00 K`n\" . \"Ce...
4awk# syntax: AWK -f TEMPERATURE_CONVERSION.AWKBEG...
5awk# usage: gawk -f temperature_conversion.awk i...
6text10 REM TRANSLATION OF AWK VERSION20 INPUT \"KE...
7textdo print \"Kelvin degrees (>=0): \"; input K ...
8textREPEAT INPUT \"Kelvin degrees (>=0): \" KUNTIL...
9text( ( rational2fixedpoint = minus fixedpointn...
10c#include <stdio.h>#include <stdlib.h> double k...
11cpp#include <iostream>#include <iomanip> //-----...
12csharpusing System; namespace TemperatureConversion{...
13clojure(defn to-celsius [k] (- k 273.15))(defn to-fa...
14cobolIDENTIFICATION DIVISION. PROGRAM-...
15lisp(defun to-celsius (k) (- k 273.15))(defun t...
16ddouble kelvinToCelsius(in double k) pure nothr...
17delphiprogram Temperature; {$APPTYPE CONSOLE} uses ...
18erlang% Implemented by Arjun Sunel-module(temp_conv)...
19textinclude std/console.e atom Kwhile 1 do\\tK = p...
20textA1 : KelvinB1 : CelsiusC1 : FahrenheitD1 : Ran...
21text# convert from Kelvinநிரல்பாகம் கெல்வின்_இருந...
22fsharp// Define units of measure[<Measure>] type k[...
23fortranProgram Temperature implicit none  real :: k...
24gopackage main import ( \"fmt\" \"os\" \"str...
25haskellmain = do putStrLn \"Please enter temperature...
26uniconprocedure main(A) k := A[1] | 21.00 writ...
27jNB. Temp conversions are all linear polyno...
28jNB. Format matrix for printing & tag each ...
29javapublic class TemperatureConversion { public...
.........
35texttempConvert[t_] :=Grid[Transpose@{{\"K\", \"C\", \"...
36textП7\\t0\\t,\\t8\\t*\\tП8\\tИП7\\t9\\t*\\t5/\\t3\\t2\\t+\\tП9...
37ocamlfun KtoC n = n - 273.15;fun KtoF n = n * 1.8 -...
38netrexx/* NetRexx */options replace format comments j...
39textimport rdstdin, strutils, strfmt while true: ...
40objeckclass Temperature { function : Main(args : S...
41objc#import <Foundation/Foundation.h> int main(int...
42ocamllet print_temp s t = print_string s; print_...
43text: kelvinToCelsius { 273.15 - }: kelvinToFahren...
44parigpf(x)=[x,x-273.15,1.8*x-459.67,1.8*x]
45perlmy %scale = ( Celcius => { factor => 1 ...
46perl6while my $answer = prompt 'Temperature: ' { ...
47phperror_reporting(E_ALL & ~ ( E_NOTICE | E_WARNI...
48text(scl 2) (de convertKelvin (Kelvin) (for X ...
49text(convertKelvin 21.0)
50pli*process source attributes xref; /* PL/I *****...
51python>>> while True:\\tk = float(input('K ? '))\\tpri...
52python>>> toK = {'C': (lambda c: c + 273.15), ...
53text#lang racket(define (converter temp init final...
54rexx/*REXX program converts temperatures for a num...
55rubymodule TempConvert  FROM_TEMP_SCALE_TO_K = ...
56rubyTempConvert.kelvin_to_celsius 100 #=> -173.15T...
57text[loop]input \"Kelvin Degrees\";kelvinif kelvin <...
58scalaobject TemperatureConversion extends App {  d...
59text$ include \"seed7_05.s7i\"; include \"float.s7i\"...
60tclproc temps {k} { set c [expr {$k - 273.15}]...
61tclputs -nonewline \"Enter a temperature in K: \"fl...
62textinclude c:\\cxpl\\codes;real K, C, F, R;[ChOut(0...
63textK:=ask(0,\"Kelvin: \").toFloat();println(\"K %.2f...
64zxbasic10 REM Translation of traditional basic versio...
\n", + "

65 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " 0 1\n", + "0 text : KtoC \\ n -- n\\t273.15 n:- ; : KtoF \\ n -- n\\...\n", + "1 ada with Ada.Float_Text_IO, Ada.Text_IO; use Ada....\n", + "2 text voidshow(integer symbol, real temperature){ ...\n", + "3 autohotkey MsgBox, % \"Kelvin:`t`t 21.00 K`n\" . \"Ce...\n", + "4 awk # syntax: AWK -f TEMPERATURE_CONVERSION.AWKBEG...\n", + "5 awk # usage: gawk -f temperature_conversion.awk i...\n", + "6 text  10 REM TRANSLATION OF AWK VERSION20 INPUT \"KE...\n", + "7 text  do print \"Kelvin degrees (>=0): \"; input K ...\n", + "8 text  REPEAT INPUT \"Kelvin degrees (>=0): \" KUNTIL...\n", + "9 text ( ( rational2fixedpoint = minus fixedpointn...\n", + "10 c #include #include  double k...\n", + "11 cpp  #include #include  //-----...\n", + "12 csharp using System; namespace TemperatureConversion{...\n", + "13 clojure (defn to-celsius [k] (- k 273.15))(defn to-fa...\n", + "14 cobol IDENTIFICATION DIVISION. PROGRAM-...\n", + "15 lisp  (defun to-celsius (k) (- k 273.15))(defun t...\n", + "16 d double kelvinToCelsius(in double k) pure nothr...\n", + "17 delphi  program Temperature; {$APPTYPE CONSOLE} uses ...\n", + "18 erlang % Implemented by Arjun Sunel-module(temp_conv)...\n", + "19 text  include std/console.e atom Kwhile 1 do\\tK = p...\n", + "20 text A1 : KelvinB1 : CelsiusC1 : FahrenheitD1 : Ran...\n", + "21 text  # convert from Kelvinநிரல்பாகம் கெல்வின்_இருந...\n", + "22 fsharp  // Define units of measure[] type k[...\n", + "23 fortran Program Temperature implicit none  real :: k...\n", + "24 go package main import ( \"fmt\" \"os\" \"str...\n", + "25 haskell  main = do putStrLn \"Please enter temperature...\n", + "26 unicon procedure main(A) k := A[1] | 21.00 writ...\n", + "27 j NB. Temp conversions are all linear polyno...\n", + "28 j NB. Format matrix for printing & tag each ...\n", + "29 java public class TemperatureConversion { public...\n", + ".. ... ...\n", + "35 text tempConvert[t_] :=Grid[Transpose@{{\"K\", \"C\", \"...\n", + "36 text П7\\t0\\t,\\t8\\t*\\tП8\\tИП7\\t9\\t*\\t5/\\t3\\t2\\t+\\tП9...\n", + "37 ocaml fun KtoC n = n - 273.15;fun KtoF n = n * 1.8 -...\n", + "38 netrexx /* NetRexx */options replace format comments j...\n", + "39 text import rdstdin, strutils, strfmt while true: ...\n", + "40 objeck  class Temperature { function : Main(args : S...\n", + "41 objc #import  int main(int...\n", + "42 ocaml  let print_temp s t = print_string s; print_...\n", + "43 text : kelvinToCelsius { 273.15 - }: kelvinToFahren...\n", + "44 parigp f(x)=[x,x-273.15,1.8*x-459.67,1.8*x]\n", + "45 perl my %scale = ( Celcius => { factor => 1 ...\n", + "46 perl6 while my $answer = prompt 'Temperature: ' { ...\n", + "47 php error_reporting(E_ALL & ~ ( E_NOTICE | E_WARNI...\n", + "48 text (scl 2) (de convertKelvin (Kelvin) (for X ...\n", + "49 text (convertKelvin 21.0)\n", + "50 pli *process source attributes xref; /* PL/I *****...\n", + "51 python >>> while True:\\tk = float(input('K ? '))\\tpri...\n", + "52 python >>> toK = {'C': (lambda c: c + 273.15), ...\n", + "53 text #lang racket(define (converter temp init final...\n", + "54 rexx /*REXX program converts temperatures for a num...\n", + "55 ruby module TempConvert  FROM_TEMP_SCALE_TO_K = ...\n", + "56 ruby TempConvert.kelvin_to_celsius 100 #=> -173.15T...\n", + "57 text [loop]input \"Kelvin Degrees\";kelvinif kelvin <...\n", + "58 scala object TemperatureConversion extends App {  d...\n", + "59 text $ include \"seed7_05.s7i\"; include \"float.s7i\"...\n", + "60 tcl proc temps {k} { set c [expr {$k - 273.15}]...\n", + "61 tcl puts -nonewline \"Enter a temperature in K: \"fl...\n", + "62 text include c:\\cxpl\\codes;real K, C, F, R;[ChOut(0...\n", + "63 text K:=ask(0,\"Kelvin: \").toFloat();println(\"K %.2f...\n", + "64 zxbasic 10 REM Translation of traditional basic versio...\n", + "\n", + "[65 rows x 2 columns]" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tc" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(65, 2)" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tc.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(125, 2)" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pn = make_data(['http://rosettacode.org/wiki/Perfect_numbers'])\n", + "pn.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
01
\n", + "
" + ], + "text/plain": [ + "Empty DataFrame\n", + "Columns: [0, 1]\n", + "Index: []" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "code_snippets = pd.DataFrame(columns=([0, 1]))\n", + "code_snippets" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
01
0text: KtoC \\ n -- n\\t273.15 n:- ; : KtoF \\ n -- n\\...
1adawith Ada.Float_Text_IO, Ada.Text_IO; use Ada....
2textvoidshow(integer symbol, real temperature){ ...
3autohotkeyMsgBox, % \"Kelvin:`t`t 21.00 K`n\" . \"Ce...
4awk# syntax: AWK -f TEMPERATURE_CONVERSION.AWKBEG...
5awk# usage: gawk -f temperature_conversion.awk i...
6text10 REM TRANSLATION OF AWK VERSION20 INPUT \"KE...
7textdo print \"Kelvin degrees (>=0): \"; input K ...
8textREPEAT INPUT \"Kelvin degrees (>=0): \" KUNTIL...
9text( ( rational2fixedpoint = minus fixedpointn...
10c#include <stdio.h>#include <stdlib.h> double k...
11cpp#include <iostream>#include <iomanip> //-----...
12csharpusing System; namespace TemperatureConversion{...
13clojure(defn to-celsius [k] (- k 273.15))(defn to-fa...
14cobolIDENTIFICATION DIVISION. PROGRAM-...
15lisp(defun to-celsius (k) (- k 273.15))(defun t...
16ddouble kelvinToCelsius(in double k) pure nothr...
17delphiprogram Temperature; {$APPTYPE CONSOLE} uses ...
18erlang% Implemented by Arjun Sunel-module(temp_conv)...
19textinclude std/console.e atom Kwhile 1 do\\tK = p...
20textA1 : KelvinB1 : CelsiusC1 : FahrenheitD1 : Ran...
21text# convert from Kelvinநிரல்பாகம் கெல்வின்_இருந...
22fsharp// Define units of measure[<Measure>] type k[...
23fortranProgram Temperature implicit none  real :: k...
24gopackage main import ( \"fmt\" \"os\" \"str...
25haskellmain = do putStrLn \"Please enter temperature...
26uniconprocedure main(A) k := A[1] | 21.00 writ...
27jNB. Temp conversions are all linear polyno...
28jNB. Format matrix for printing & tag each ...
29javapublic class TemperatureConversion { public...
.........
35texttempConvert[t_] :=Grid[Transpose@{{\"K\", \"C\", \"...
36textП7\\t0\\t,\\t8\\t*\\tП8\\tИП7\\t9\\t*\\t5/\\t3\\t2\\t+\\tП9...
37ocamlfun KtoC n = n - 273.15;fun KtoF n = n * 1.8 -...
38netrexx/* NetRexx */options replace format comments j...
39textimport rdstdin, strutils, strfmt while true: ...
40objeckclass Temperature { function : Main(args : S...
41objc#import <Foundation/Foundation.h> int main(int...
42ocamllet print_temp s t = print_string s; print_...
43text: kelvinToCelsius { 273.15 - }: kelvinToFahren...
44parigpf(x)=[x,x-273.15,1.8*x-459.67,1.8*x]
45perlmy %scale = ( Celcius => { factor => 1 ...
46perl6while my $answer = prompt 'Temperature: ' { ...
47phperror_reporting(E_ALL & ~ ( E_NOTICE | E_WARNI...
48text(scl 2) (de convertKelvin (Kelvin) (for X ...
49text(convertKelvin 21.0)
50pli*process source attributes xref; /* PL/I *****...
51python>>> while True:\\tk = float(input('K ? '))\\tpri...
52python>>> toK = {'C': (lambda c: c + 273.15), ...
53text#lang racket(define (converter temp init final...
54rexx/*REXX program converts temperatures for a num...
55rubymodule TempConvert  FROM_TEMP_SCALE_TO_K = ...
56rubyTempConvert.kelvin_to_celsius 100 #=> -173.15T...
57text[loop]input \"Kelvin Degrees\";kelvinif kelvin <...
58scalaobject TemperatureConversion extends App {  d...
59text$ include \"seed7_05.s7i\"; include \"float.s7i\"...
60tclproc temps {k} { set c [expr {$k - 273.15}]...
61tclputs -nonewline \"Enter a temperature in K: \"fl...
62textinclude c:\\cxpl\\codes;real K, C, F, R;[ChOut(0...
63textK:=ask(0,\"Kelvin: \").toFloat();println(\"K %.2f...
64zxbasic10 REM Translation of traditional basic versio...
\n", + "

65 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " 0 1\n", + "0 text : KtoC \\ n -- n\\t273.15 n:- ; : KtoF \\ n -- n\\...\n", + "1 ada with Ada.Float_Text_IO, Ada.Text_IO; use Ada....\n", + "2 text voidshow(integer symbol, real temperature){ ...\n", + "3 autohotkey MsgBox, % \"Kelvin:`t`t 21.00 K`n\" . \"Ce...\n", + "4 awk # syntax: AWK -f TEMPERATURE_CONVERSION.AWKBEG...\n", + "5 awk # usage: gawk -f temperature_conversion.awk i...\n", + "6 text  10 REM TRANSLATION OF AWK VERSION20 INPUT \"KE...\n", + "7 text  do print \"Kelvin degrees (>=0): \"; input K ...\n", + "8 text  REPEAT INPUT \"Kelvin degrees (>=0): \" KUNTIL...\n", + "9 text ( ( rational2fixedpoint = minus fixedpointn...\n", + "10 c #include #include  double k...\n", + "11 cpp  #include #include  //-----...\n", + "12 csharp using System; namespace TemperatureConversion{...\n", + "13 clojure (defn to-celsius [k] (- k 273.15))(defn to-fa...\n", + "14 cobol IDENTIFICATION DIVISION. PROGRAM-...\n", + "15 lisp  (defun to-celsius (k) (- k 273.15))(defun t...\n", + "16 d double kelvinToCelsius(in double k) pure nothr...\n", + "17 delphi  program Temperature; {$APPTYPE CONSOLE} uses ...\n", + "18 erlang % Implemented by Arjun Sunel-module(temp_conv)...\n", + "19 text  include std/console.e atom Kwhile 1 do\\tK = p...\n", + "20 text A1 : KelvinB1 : CelsiusC1 : FahrenheitD1 : Ran...\n", + "21 text  # convert from Kelvinநிரல்பாகம் கெல்வின்_இருந...\n", + "22 fsharp  // Define units of measure[] type k[...\n", + "23 fortran Program Temperature implicit none  real :: k...\n", + "24 go package main import ( \"fmt\" \"os\" \"str...\n", + "25 haskell  main = do putStrLn \"Please enter temperature...\n", + "26 unicon procedure main(A) k := A[1] | 21.00 writ...\n", + "27 j NB. Temp conversions are all linear polyno...\n", + "28 j NB. Format matrix for printing & tag each ...\n", + "29 java public class TemperatureConversion { public...\n", + ".. ... ...\n", + "35 text tempConvert[t_] :=Grid[Transpose@{{\"K\", \"C\", \"...\n", + "36 text П7\\t0\\t,\\t8\\t*\\tП8\\tИП7\\t9\\t*\\t5/\\t3\\t2\\t+\\tП9...\n", + "37 ocaml fun KtoC n = n - 273.15;fun KtoF n = n * 1.8 -...\n", + "38 netrexx /* NetRexx */options replace format comments j...\n", + "39 text import rdstdin, strutils, strfmt while true: ...\n", + "40 objeck  class Temperature { function : Main(args : S...\n", + "41 objc #import  int main(int...\n", + "42 ocaml  let print_temp s t = print_string s; print_...\n", + "43 text : kelvinToCelsius { 273.15 - }: kelvinToFahren...\n", + "44 parigp f(x)=[x,x-273.15,1.8*x-459.67,1.8*x]\n", + "45 perl my %scale = ( Celcius => { factor => 1 ...\n", + "46 perl6 while my $answer = prompt 'Temperature: ' { ...\n", + "47 php error_reporting(E_ALL & ~ ( E_NOTICE | E_WARNI...\n", + "48 text (scl 2) (de convertKelvin (Kelvin) (for X ...\n", + "49 text (convertKelvin 21.0)\n", + "50 pli *process source attributes xref; /* PL/I *****...\n", + "51 python >>> while True:\\tk = float(input('K ? '))\\tpri...\n", + "52 python >>> toK = {'C': (lambda c: c + 273.15), ...\n", + "53 text #lang racket(define (converter temp init final...\n", + "54 rexx /*REXX program converts temperatures for a num...\n", + "55 ruby module TempConvert  FROM_TEMP_SCALE_TO_K = ...\n", + "56 ruby TempConvert.kelvin_to_celsius 100 #=> -173.15T...\n", + "57 text [loop]input \"Kelvin Degrees\";kelvinif kelvin <...\n", + "58 scala object TemperatureConversion extends App {  d...\n", + "59 text $ include \"seed7_05.s7i\"; include \"float.s7i\"...\n", + "60 tcl proc temps {k} { set c [expr {$k - 273.15}]...\n", + "61 tcl puts -nonewline \"Enter a temperature in K: \"fl...\n", + "62 text include c:\\cxpl\\codes;real K, C, F, R;[ChOut(0...\n", + "63 text K:=ask(0,\"Kelvin: \").toFloat();println(\"K %.2f...\n", + "64 zxbasic 10 REM Translation of traditional basic versio...\n", + "\n", + "[65 rows x 2 columns]" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "code_snippets.append(tc)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(125, 2)" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "code_snippets.append(pn).shape\n" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "url = 'http://rosettacode.org/wiki/Hailstone_sequence'\n" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
01
0abapCLASS lcl_hailstone DEFINITION. PUBLIC SECTI...
1lisp(defun hailstone (len) (loop for x = len ...
2adawith Ada.Text_IO; use Ada.Text_IO;procedure ha...
3adapackage Hailstones is type Integer_Sequence ...
4adapackage body Hailstones is function Create_S...
5adawith Ada.Text_IO;with Hailstones; procedure Ma...
6textvoidprint_hailstone(integer h){ list l;  ...
7algol68MODE LINT = # LONG ... # INT; PROC hailstone =...
8textseq←hailstone n;next⍝ Returns the hailstone se...
9text5↑hailstone 2727 82 41 124 62 ¯5↑hailstone 27...
10autohotkey; Submitted by MasterFocus --- http://tiny.cc/...
11autoit$Hail = Hailstone(27)ConsoleWrite(\"Sequence-L...
12awk#!/usr/bin/awk -ffunction hailstone(v, verbos...
13text10 HOME 100 N = 27110 GOSUB 400\"HAILSTONE120 D...
14textseqlen% = FNhailstone(27, TRUE) PRI...
15lbprint \"Part 1: Create a routine to generate th...
16textfunction Hailstone(sys *n)'=================...
17purebasicNewList Hailstones.i() ; Make a linked list to...
18textprint \"Part 1: Create a routine to generate th...
19dos@echo offsetlocal enabledelayedexpansionif \"%1...
20dos>hailstone.cmd 2020 10 5 16 8 4 2 1
21text&>:.:1-| >3*^ @ |%2: < V>2/>+
22text( ( hailstone = L len .  !arg:?L ...
23text>,[ [ ----------[ >>>[>>>...
24text27111
25texthailstone = { num | sequence = [num] while {...
26textblsq ) 27{^^^^2.%{3.*1.+}\\/{2./}\\/ie}{1!=}w!b...
27c#include <stdio.h>#include <stdlib.h> int hail...
28c#include <stdio.h> #define N 10000000#define C...
29csharpusing System;using System.Collections.Generic;...
.........
139pythondef hailstone(n): seq = [n] while n>1: ...
140text### PART 1:makeHailstone <- function(n){ hseq...
141text#lang racket (define hailstone (let ([t (mak...
142rexx/*REXX pgm tests a number and a range for hail...
143rexx/*REXX pgm tests a number and a range for hail...
144rubydef hailstone n seq = [n] until n == 1 n ...
145rubymodule Hailstone ListNode = Struct.new(:value...
146textuse std::vec::Vec; fn hailstone(mut n : int) -...
147sas* Create a routine to generate the hailstone ...
148scalaobject HailstoneSequence extends App { def ha...
149scheme(define (collatz n)(if (= n 1) '(1)(cons n (co...
150textfunction x=hailstone(n) // iterative defini...
151text$ include \"seed7_05.s7i\"; const func array int...
152rubyfunc hailstone(n) { var a = [n]; while (...
153smalltalkObject subclass: Sequences [ Sequences class ...
154smalltalk|r|r := Sequences hailstone: 27. \"hailstone '...
155textfunc hailstone(var n:Int) -> [Int] {  var ...
156tclproc hailstone n { while 1 {\\tlappend seq $...
157textprompt NN→M: 0→X: 1→LWhile L=1X+1→XDisp MIf M=...
158textprompt N0→A:0→Bfor(I,1,N)I→M: 0→X: 1→LWhile L=...
159text@(do (defun hailstone (n) (cons n ...
160bash#!/bin/bash# seq is the array genereated by ha...
161bash# Outputs a hailstone sequence from $1, with o...
162text# Outputs a hailstone sequence from !:1, with ...
163text#import std#import nat hail = @iNC ~&h~=1->x ^...
164vbOption ExplicitDim flag As Boolean ' true to p...
165vbnetModule HailstoneSequence Sub Main() ...
166textinclude c:\\cxpl\\codes; \\intrinsic 'code' decl...
167textfcn collatz(n,z=L()){ z.append(n); if(n==1) re...
168text[2..0d100_000].pump(Void, // loop n from 2 to...
\n", + "

169 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " 0 1\n", + "0 abap  CLASS lcl_hailstone DEFINITION. PUBLIC SECTI...\n", + "1 lisp (defun hailstone (len) (loop for x = len ...\n", + "2 ada with Ada.Text_IO; use Ada.Text_IO;procedure ha...\n", + "3 ada package Hailstones is type Integer_Sequence ...\n", + "4 ada package body Hailstones is function Create_S...\n", + "5 ada with Ada.Text_IO;with Hailstones; procedure Ma...\n", + "6 text voidprint_hailstone(integer h){ list l;  ...\n", + "7 algol68 MODE LINT = # LONG ... # INT; PROC hailstone =...\n", + "8 text seq←hailstone n;next⍝ Returns the hailstone se...\n", + "9 text 5↑hailstone 2727 82 41 124 62 ¯5↑hailstone 27...\n", + "10 autohotkey ; Submitted by MasterFocus --- http://tiny.cc/...\n", + "11 autoit  $Hail = Hailstone(27)ConsoleWrite(\"Sequence-L...\n", + "12 awk  #!/usr/bin/awk -ffunction hailstone(v, verbos...\n", + "13 text 10 HOME 100 N = 27110 GOSUB 400\"HAILSTONE120 D...\n", + "14 text seqlen% = FNhailstone(27, TRUE) PRI...\n", + "15 lb print \"Part 1: Create a routine to generate th...\n", + "16 text   function Hailstone(sys *n)'=================...\n", + "17 purebasic NewList Hailstones.i() ; Make a linked list to...\n", + "18 text print \"Part 1: Create a routine to generate th...\n", + "19 dos @echo offsetlocal enabledelayedexpansionif \"%1...\n", + "20 dos >hailstone.cmd 2020 10 5 16 8 4 2 1\n", + "21 text &>:.:1-| >3*^ @ |%2: < V>2/>+ \n", + "22 text ( ( hailstone = L len .  !arg:?L ...\n", + "23 text >,[ [ ----------[ >>>[>>>...\n", + "24 text 27111\n", + "25 text hailstone = { num | sequence = [num] while {...\n", + "26 text  blsq ) 27{^^^^2.%{3.*1.+}\\/{2./}\\/ie}{1!=}w!b...\n", + "27 c #include #include  int hail...\n", + "28 c #include  #define N 10000000#define C...\n", + "29 csharp using System;using System.Collections.Generic;...\n", + ".. ... ...\n", + "139 python def hailstone(n): seq = [n] while n>1: ...\n", + "140 text ### PART 1:makeHailstone <- function(n){ hseq...\n", + "141 text  #lang racket (define hailstone (let ([t (mak...\n", + "142 rexx /*REXX pgm tests a number and a range for hail...\n", + "143 rexx /*REXX pgm tests a number and a range for hail...\n", + "144 ruby def hailstone n seq = [n] until n == 1 n ...\n", + "145 ruby module Hailstone ListNode = Struct.new(:value...\n", + "146 text use std::vec::Vec; fn hailstone(mut n : int) -...\n", + "147 sas  * Create a routine to generate the hailstone ...\n", + "148 scala object HailstoneSequence extends App { def ha...\n", + "149 scheme (define (collatz n)(if (= n 1) '(1)(cons n (co...\n", + "150 text function x=hailstone(n) // iterative defini...\n", + "151 text $ include \"seed7_05.s7i\"; const func array int...\n", + "152 ruby func hailstone(n) { var a = [n]; while (...\n", + "153 smalltalk Object subclass: Sequences [ Sequences class ...\n", + "154 smalltalk |r|r := Sequences hailstone: 27. \"hailstone '...\n", + "155 text  func hailstone(var n:Int) -> [Int] {  var ...\n", + "156 tcl proc hailstone n { while 1 {\\tlappend seq $...\n", + "157 text prompt NN→M: 0→X: 1→LWhile L=1X+1→XDisp MIf M=...\n", + "158 text prompt N0→A:0→Bfor(I,1,N)I→M: 0→X: 1→LWhile L=...\n", + "159 text @(do (defun hailstone (n) (cons n ...\n", + "160 bash #!/bin/bash# seq is the array genereated by ha...\n", + "161 bash # Outputs a hailstone sequence from $1, with o...\n", + "162 text # Outputs a hailstone sequence from !:1, with ...\n", + "163 text #import std#import nat hail = @iNC ~&h~=1->x ^...\n", + "164 vb Option ExplicitDim flag As Boolean ' true to p...\n", + "165 vbnet Module HailstoneSequence Sub Main() ...\n", + "166 text include c:\\cxpl\\codes; \\intrinsic 'code' decl...\n", + "167 text fcn collatz(n,z=L()){ z.append(n); if(n==1) re...\n", + "168 text [2..0d100_000].pump(Void, // loop n from 2 to...\n", + "\n", + "[169 rows x 2 columns]" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "code = scrape_data(url)\n", + "codes = pull_code_from_soup(code)\n", + "pd.DataFrame(codes)" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "hs = make_data(['http://rosettacode.org/wiki/Hailstone_sequence'])\n" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "tc = make_data(['http://rosettacode.org/wiki/Temperature_conversion'])\n" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "((169, 2), (65, 2))" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "hs.shape, tc.shape\n" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "hs = hs.append(tc)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "pn = make_data(['http://rosettacode.org/wiki/Perfect_numbers'])\n" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(359, 2)" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "hs.append(pn).shape\n" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "pn = make_data(['http://rosettacode.org/wiki/Perfect_numbers', 'http://rosettacode.org/wiki/Temperature_conversion', 'http://rosettacode.org/wiki/Hailstone_sequence'])\n" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(359, 2)" + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pn.shape\n" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'task_list' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0;32mfor\u001b[0m \u001b[0mitem\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mtask_list\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0mitem\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreplace\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\" \"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"_\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mappend\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtask_list\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mNameError\u001b[0m: name 'task_list' is not defined" + ] + } + ], + "source": [ + "for item in task_list:\n", + " item.replace(\" \", \"_\")\n", + " append\n", + "print(task_list)" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "def scrape_tasks(url):\n", + " req = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/5.0'})\n", + " content = urllib.request.urlopen(req).read()\n", + " soup = BeautifulSoup(content)\n", + " return soup.find_all( \"a\")#, class_=\"li\")\n", + "links = scrape_tasks('http://rosettacode.org/wiki/Category:Programming_Tasks')" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['/wiki/Rosetta_Code:Add_a_Task',\n", + " '/wiki/Category_talk:Programming_Tasks',\n", + " '/wiki/100_doors',\n", + " '/wiki/24_game',\n", + " '/wiki/24_game/Solve',\n", + " '/wiki/9_billion_names_of_God_the_integer',\n", + " '/wiki/99_Bottles_of_Beer',\n", + " '/wiki/A%2BB',\n", + " '/wiki/ABC_Problem',\n", + " '/wiki/Abstract_type',\n", + " '/wiki/Abundant,_deficient_and_perfect_number_classifications',\n", + " '/wiki/Accumulator_factory',\n", + " '/wiki/Ackermann_function',\n", + " '/wiki/Active_Directory/Connect',\n", + " '/wiki/Active_Directory/Search_for_a_user',\n", + " '/wiki/Active_object',\n", + " '/wiki/Add_a_variable_to_a_class_instance_at_runtime',\n", + " '/wiki/Address_of_a_variable',\n", + " '/wiki/AKS_test_for_primes',\n", + " '/wiki/Align_columns',\n", + " '/wiki/Aliquot_sequence_classifications',\n", + " '/wiki/Almost_prime',\n", + " '/wiki/Amb',\n", + " '/wiki/Amicable_pairs',\n", + " '/wiki/Anagrams',\n", + " '/wiki/Anagrams/Deranged_anagrams',\n", + " '/wiki/Animate_a_pendulum',\n", + " '/wiki/Animation',\n", + " '/wiki/Anonymous_recursion',\n", + " '/wiki/Append_a_record_to_the_end_of_a_text_file',\n", + " '/wiki/Apply_a_callback_to_an_array',\n", + " '/wiki/Arbitrary-precision_integers_(included)',\n", + " '/wiki/Arena_storage_pool',\n", + " '/wiki/Arithmetic_evaluation',\n", + " '/wiki/Arithmetic-geometric_mean',\n", + " '/wiki/Arithmetic-geometric_mean/Calculate_Pi',\n", + " '/wiki/Arithmetic/Complex',\n", + " '/wiki/Arithmetic/Integer',\n", + " '/wiki/Arithmetic/Rational',\n", + " '/wiki/Array_concatenation',\n", + " '/wiki/Arrays',\n", + " '/wiki/Assertions',\n", + " '/wiki/Associative_array/Creation',\n", + " '/wiki/Associative_array/Iteration',\n", + " '/wiki/Atomic_updates',\n", + " '/wiki/Average_loop_length',\n", + " '/wiki/Averages/Arithmetic_mean',\n", + " '/wiki/Averages/Mean_angle',\n", + " '/wiki/Averages/Mean_time_of_day',\n", + " '/wiki/Averages/Median',\n", + " '/wiki/Averages/Mode',\n", + " '/wiki/Averages/Pythagorean_means',\n", + " '/wiki/Averages/Root_mean_square',\n", + " '/wiki/Averages/Simple_moving_average',\n", + " '/wiki/Balanced_brackets',\n", + " '/wiki/Balanced_ternary',\n", + " '/wiki/Benford%27s_law',\n", + " '/wiki/Bernoulli_numbers',\n", + " '/wiki/Best_shuffle',\n", + " '/wiki/Binary_digits',\n", + " '/wiki/Binary_search',\n", + " '/wiki/Binary_strings',\n", + " '/wiki/Bitcoin/address_validation',\n", + " '/wiki/Bitcoin/public_point_to_address',\n", + " '/wiki/Bitmap',\n", + " '/wiki/Bitmap/Bresenham%27s_line_algorithm',\n", + " '/wiki/Bitmap/B%C3%A9zier_curves/Cubic',\n", + " '/wiki/Bitmap/B%C3%A9zier_curves/Quadratic',\n", + " '/wiki/Bitmap/Flood_fill',\n", + " '/wiki/Bitmap/Histogram',\n", + " '/wiki/Bitmap/Midpoint_circle_algorithm',\n", + " '/wiki/Bitmap/PPM_conversion_through_a_pipe',\n", + " '/wiki/Bitmap/Read_a_PPM_file',\n", + " '/wiki/Bitmap/Read_an_image_through_a_pipe',\n", + " '/wiki/Bitmap/Write_a_PPM_file',\n", + " '/wiki/Bitwise_IO',\n", + " '/wiki/Bitwise_operations',\n", + " '/wiki/Boolean_values',\n", + " '/wiki/Box_the_compass',\n", + " '/wiki/Break_OO_privacy',\n", + " '/wiki/Brownian_tree',\n", + " '/wiki/Bulls_and_cows',\n", + " '/wiki/Bulls_and_cows/Player',\n", + " '/wiki/Caesar_cipher',\n", + " '/wiki/Calendar',\n", + " '/wiki/Calendar_-_for_%22REAL%22_programmers',\n", + " '/wiki/Call_a_foreign-language_function',\n", + " '/wiki/Call_a_function',\n", + " '/wiki/Call_a_function_in_a_shared_library',\n", + " '/wiki/Call_an_object_method',\n", + " '/wiki/Canny_edge_detector',\n", + " '/wiki/Carmichael_3_strong_pseudoprimes',\n", + " '/wiki/Case-sensitivity_of_identifiers',\n", + " '/wiki/Casting_out_nines',\n", + " '/wiki/Catalan_numbers',\n", + " '/wiki/Catalan_numbers/Pascal%27s_triangle',\n", + " '/wiki/Catamorphism',\n", + " '/wiki/Catmull%E2%80%93Clark_subdivision_surface',\n", + " '/wiki/Character_codes',\n", + " '/wiki/Chat_server',\n", + " '/wiki/Check_Machin-like_formulas',\n", + " '/wiki/Check_that_file_exists',\n", + " '/wiki/Checkpoint_synchronization',\n", + " '/wiki/Chinese_remainder_theorem',\n", + " '/wiki/Cholesky_decomposition',\n", + " '/wiki/Circles_of_given_radius_through_two_points',\n", + " '/wiki/Classes',\n", + " '/wiki/Closest-pair_problem',\n", + " '/wiki/Closures/Value_capture',\n", + " '/wiki/Collections',\n", + " '/wiki/Color_of_a_screen_pixel',\n", + " '/wiki/Color_quantization',\n", + " '/wiki/Colour_bars/Display',\n", + " '/wiki/Colour_pinstripe/Display',\n", + " '/wiki/Colour_pinstripe/Printer',\n", + " '/wiki/Combinations',\n", + " '/wiki/Combinations_and_permutations',\n", + " '/wiki/Combinations_with_repetitions',\n", + " '/wiki/Comma_quibbling',\n", + " '/wiki/Command-line_arguments',\n", + " '/wiki/Comments',\n", + " '/wiki/Compare_sorting_algorithms%27_performance',\n", + " '/wiki/Compile-time_calculation',\n", + " '/wiki/Compound_data_type',\n", + " '/wiki/Concurrent_computing',\n", + " '/wiki/Conditional_structures',\n", + " '/wiki/Conjugate_transpose',\n", + " '/wiki/Constrained_genericity',\n", + " '/wiki/Constrained_random_points_on_a_circle',\n", + " '/wiki/Continued_fraction',\n", + " '/wiki/Continued_fraction/Arithmetic/Construct_from_rational_number',\n", + " '/wiki/Continued_fraction/Arithmetic/G(matrix_NG,_Contined_Fraction_N)',\n", + " '/wiki/Continued_fraction/Arithmetic/G(matrix_NG,_Contined_Fraction_N1,_Contined_Fraction_N2)',\n", + " '/wiki/Convert_decimal_number_to_rational',\n", + " '/wiki/Conway%27s_Game_of_Life',\n", + " '/wiki/Copy_a_string',\n", + " '/wiki/Count_in_factors',\n", + " '/wiki/Count_in_octal',\n", + " '/wiki/Count_occurrences_of_a_substring',\n", + " '/wiki/Count_the_coins',\n", + " '/wiki/CRC-32',\n", + " '/wiki/Create_a_file',\n", + " '/wiki/Create_a_file_on_magnetic_tape',\n", + " '/wiki/Create_a_two-dimensional_array_at_runtime',\n", + " '/wiki/Create_an_HTML_table',\n", + " '/wiki/Create_an_object_at_a_given_address',\n", + " '/wiki/CSV_data_manipulation',\n", + " '/wiki/CSV_to_HTML_translation',\n", + " '/wiki/Currying',\n", + " '/wiki/Cut_a_rectangle',\n", + " '/wiki/Date_format',\n", + " '/wiki/Date_manipulation',\n", + " '/wiki/Day_of_the_week',\n", + " '/wiki/Deal_cards_for_FreeCell',\n", + " '/wiki/Death_Star',\n", + " '/wiki/Deconvolution/1D',\n", + " '/wiki/Deconvolution/2D%2B',\n", + " '/wiki/Deepcopy',\n", + " '/wiki/Define_a_primitive_data_type',\n", + " '/wiki/Delegates',\n", + " '/wiki/Delete_a_file',\n", + " '/wiki/Detect_division_by_zero',\n", + " '/wiki/Determine_if_a_string_is_numeric',\n", + " '/wiki/Determine_if_only_one_instance_is_running',\n", + " '/wiki/Digital_root',\n", + " '/wiki/Digital_root/Multiplicative_digital_root',\n", + " '/wiki/Dinesman%27s_multiple-dwelling_problem',\n", + " '/wiki/Dining_philosophers',\n", + " '/wiki/Discordian_date',\n", + " '/wiki/Distributed_programming',\n", + " '/wiki/DNS_query',\n", + " '/wiki/Documentation',\n", + " '/wiki/Dot_product',\n", + " '/wiki/Doubly-linked_list/Definition',\n", + " '/wiki/Doubly-linked_list/Element_definition',\n", + " '/wiki/Doubly-linked_list/Element_insertion',\n", + " '/wiki/Doubly-linked_list/Traversal',\n", + " '/wiki/Dragon_curve',\n", + " '/wiki/Draw_a_clock',\n", + " '/wiki/Draw_a_cuboid',\n", + " '/wiki/Draw_a_sphere',\n", + " '/wiki/Dutch_national_flag_problem',\n", + " '/wiki/Dynamic_variable_names',\n", + " '/wiki/Echo_server',\n", + " '/wiki/Element-wise_operations',\n", + " '/wiki/Empty_directory',\n", + " '/wiki/Empty_program',\n", + " '/wiki/Empty_string',\n", + " '/wiki/Enforced_immutability',\n", + " '/wiki/Entropy',\n", + " '/wiki/Enumerations',\n", + " '/wiki/Environment_variables',\n", + " '/wiki/Equilibrium_index',\n", + " '/wiki/Ethiopian_multiplication',\n", + " '/wiki/Euler_method',\n", + " '/wiki/Euler%27s_sum_of_powers_conjecture',\n", + " '/wiki/Evaluate_binomial_coefficients',\n", + " '/wiki/Even_or_odd',\n", + " '/wiki/Events',\n", + " '/wiki/Evolutionary_algorithm',\n", + " '/wiki/Exceptions',\n", + " '/wiki/Exceptions/Catch_an_exception_thrown_in_a_nested_call',\n", + " '/wiki/Executable_library',\n", + " '/wiki/Execute_a_Markov_algorithm',\n", + " '/wiki/Execute_a_system_command',\n", + " '/wiki/Execute_Brain****',\n", + " '/wiki/Execute_HQ9%2B',\n", + " '/wiki/Execute_SNUSP',\n", + " '/wiki/Exponentiation_operator',\n", + " '/wiki/Extend_your_language',\n", + " '/wiki/Extensible_prime_generator',\n", + " '/wiki/Extreme_floating_point_values',\n", + " '/wiki/Factorial',\n", + " '/wiki/Factors_of_a_Mersenne_number',\n", + " '/wiki/Factors_of_an_integer',\n", + " '/wiki/Fast_Fourier_transform',\n", + " '/wiki/Fibonacci_n-step_number_sequences',\n", + " '/wiki/Fibonacci_sequence',\n", + " '/wiki/Fibonacci_word',\n", + " '/wiki/Fibonacci_word/fractal',\n", + " '/wiki/File_input/output',\n", + " '/wiki/File_modification_time',\n", + " '/wiki/File_size',\n", + " '/wiki/Filter',\n", + " '/wiki/Find_common_directory_path',\n", + " '/wiki/Find_largest_left_truncatable_prime_in_a_given_base',\n", + " '/wiki/Find_limit_of_recursion',\n", + " '/wiki/Find_the_last_Sunday_of_each_month',\n", + " '/wiki/Find_the_missing_permutation',\n", + " '/wiki/First_class_environments',\n", + " '/wiki/First-class_functions',\n", + " '/wiki/First-class_functions/Use_numbers_analogously',\n", + " '/wiki/Five_weekends',\n", + " '/wiki/FizzBuzz',\n", + " '/wiki/Flatten_a_list',\n", + " '/wiki/Flipping_bits_game',\n", + " '/wiki/Flow-control_structures',\n", + " '/wiki/Floyd%27s_triangle',\n", + " '/wiki/Forest_fire',\n", + " '/wiki/Fork',\n", + " '/wiki/Formal_power_series',\n", + " '/wiki/Formatted_numeric_output',\n", + " '/wiki/Forward_difference',\n", + " '/wiki/Four_bit_adder',\n", + " '/wiki/Fractal_tree',\n", + " '/wiki/Fractran',\n", + " '/wiki/Function_composition',\n", + " '/wiki/Function_definition',\n", + " '/wiki/Function_frequency',\n", + " '/wiki/Function_prototype',\n", + " '/wiki/Galton_box_animation',\n", + " '/wiki/Gamma_function',\n", + " '/wiki/Gaussian_elimination',\n", + " '/wiki/Generate_Chess960_starting_position',\n", + " '/wiki/Generate_lower_case_ASCII_alphabet',\n", + " '/wiki/Generator/Exponential',\n", + " '/wiki/Generic_swap',\n", + " '/wiki/Globally_replace_text_in_several_files',\n", + " '/wiki/Go_Fish',\n", + " '/wiki/Gray_code',\n", + " '/wiki/Grayscale_image',\n", + " '/wiki/Greatest_common_divisor',\n", + " '/wiki/Greatest_element_of_a_list',\n", + " '/wiki/Greatest_subsequential_sum',\n", + " '/wiki/Greyscale_bars/Display',\n", + " '/wiki/Guess_the_number',\n", + " '/wiki/Guess_the_number/With_feedback',\n", + " '/wiki/Guess_the_number/With_feedback_(player)',\n", + " '/wiki/GUI_component_interaction',\n", + " '/wiki/GUI_enabling/disabling_of_controls',\n", + " '/wiki/GUI/Maximum_window_dimensions',\n", + " '/wiki/Hailstone_sequence',\n", + " '/wiki/Hamming_numbers',\n", + " '/wiki/Handle_a_signal',\n", + " '/wiki/Happy_numbers',\n", + " '/wiki/Harshad_or_Niven_series',\n", + " '/wiki/Hash_from_two_arrays',\n", + " '/wiki/Hash_join',\n", + " '/wiki/Haversine_formula',\n", + " '/wiki/Hello_world/Graphical',\n", + " '/wiki/Hello_world/Line_printer',\n", + " '/wiki/Hello_world/Newbie',\n", + " '/wiki/Hello_world/Newline_omission',\n", + " '/wiki/Hello_world/Standard_error',\n", + " '/wiki/Hello_world/Text',\n", + " '/wiki/Hello_world/Web_server',\n", + " '/wiki/Here_document',\n", + " '/wiki/Heronian_triangles',\n", + " '/wiki/Hickerson_series_of_almost_integers',\n", + " '/wiki/Higher-order_functions',\n", + " '/wiki/History_variables',\n", + " '/wiki/Hofstadter_Figure-Figure_sequences',\n", + " '/wiki/Hofstadter_Q_sequence',\n", + " '/wiki/Hofstadter-Conway_$10,000_sequence',\n", + " '/wiki/Holidays_related_to_Easter',\n", + " '/wiki/Honeycombs',\n", + " '/wiki/Horizontal_sundial_calculations',\n", + " '/wiki/Horner%27s_rule_for_polynomial_evaluation',\n", + " '/wiki/Host_introspection',\n", + " '/wiki/Hostname',\n", + " '/wiki/Hough_transform',\n", + " '/wiki/HTTP',\n", + " '/wiki/HTTPS',\n", + " '/wiki/HTTPS/Authenticated',\n", + " '/wiki/HTTPS/Client-authenticated',\n", + " '/wiki/Huffman_coding',\n", + " '/wiki/I_before_E_except_after_C',\n", + " '/wiki/IBAN',\n", + " '/wiki/Identity_matrix',\n", + " '/wiki/Image_convolution',\n", + " '/wiki/Image_noise',\n", + " '/wiki/Include_a_file',\n", + " '/wiki/Increment_a_numerical_string',\n", + " '/wiki/Infinity',\n", + " '/wiki/Inheritance/Multiple',\n", + " '/wiki/Inheritance/Single',\n", + " '/wiki/Input_loop',\n", + " '/wiki/Integer_comparison',\n", + " '/wiki/Integer_overflow',\n", + " '/wiki/Integer_sequence',\n", + " '/wiki/Interactive_programming',\n", + " '/wiki/Introspection',\n", + " '/wiki/Inverted_index',\n", + " '/wiki/Inverted_syntax',\n", + " '/wiki/Iterated_digits_squaring',\n", + " '/wiki/Jensen%27s_Device',\n", + " '/wiki/JortSort',\n", + " '/wiki/Josephus_problem',\n", + " '/wiki/Joystick_position',\n", + " '/wiki/JSON',\n", + " '/wiki/Jump_anywhere',\n", + " '/wiki/K-d_tree',\n", + " '/wiki/K-means%2B%2B_clustering',\n", + " '/wiki/Kaprekar_numbers',\n", + " '/wiki/Keyboard_input/Flush_the_keyboard_buffer',\n", + " '/wiki/Keyboard_input/Keypress_check',\n", + " '/wiki/Keyboard_input/Obtain_a_Y_or_N_response',\n", + " '/wiki/Keyboard_macros',\n", + " '/wiki/Knapsack_problem/0-1',\n", + " '/wiki/Knapsack_problem/Bounded',\n", + " '/wiki/Knapsack_problem/Continuous',\n", + " '/wiki/Knapsack_problem/Unbounded',\n", + " '/wiki/Knight%27s_tour',\n", + " '/wiki/Knuth_shuffle',\n", + " '/wiki/Knuth%27s_algorithm_S',\n", + " '/wiki/Langton%27s_ant',\n", + " '/wiki/Largest_int_from_concatenated_ints',\n", + " '/wiki/Last_Friday_of_each_month',\n", + " '/wiki/Last_letter-first_letter',\n", + " '/wiki/Leap_year',\n", + " '/wiki/Least_common_multiple',\n", + " '/wiki/Left_factorials',\n", + " '/wiki/Letter_frequency',\n", + " '/wiki/Levenshtein_distance',\n", + " '/wiki/Linear_congruential_generator',\n", + " '/wiki/List_comprehensions',\n", + " '/wiki/Literals/Floating_point',\n", + " '/wiki/Literals/Integer',\n", + " '/wiki/Literals/String',\n", + " '/wiki/Logical_operations',\n", + " '/wiki/Long_multiplication',\n", + " '/wiki/Longest_common_subsequence',\n", + " '/wiki/Longest_increasing_subsequence',\n", + " '/wiki/Longest_string_challenge',\n", + " '/wiki/Look-and-say_sequence',\n", + " '/wiki/Loop_over_multiple_arrays_simultaneously',\n", + " '/wiki/Loops/Break',\n", + " '/wiki/Loops/Continue',\n", + " '/wiki/Loops/Do-while',\n", + " '/wiki/Loops/Downward_for',\n", + " '/wiki/Loops/For',\n", + " '/wiki/Loops/For_with_a_specified_step',\n", + " '/wiki/Loops/Foreach',\n", + " '/wiki/Loops/Infinite',\n", + " '/wiki/Loops/N_plus_one_half',\n", + " '/wiki/Loops/Nested',\n", + " '/wiki/Loops/While',\n", + " '/wiki/LU_decomposition',\n", + " '/wiki/Lucas-Lehmer_test',\n", + " '/wiki/Ludic_numbers',\n", + " '/wiki/Luhn_test_of_credit_card_numbers',\n", + " '/wiki/LZW_compression',\n", + " '/wiki/Machine_code',\n", + " '/wiki/Mad_Libs',\n", + " '/wiki/Magic_squares_of_odd_order',\n", + " '/wiki/Main_step_of_GOST_28147-89',\n", + " '/wiki/Make_directory_path',\n", + " '/wiki/Man_or_boy_test',\n", + " '/wiki/Mandelbrot_set',\n", + " '/wiki/Map_range',\n", + " '/wiki/Matrix_arithmetic',\n", + " '/wiki/Matrix_multiplication',\n", + " '/wiki/Matrix_transposition',\n", + " '/wiki/Matrix-exponentiation_operator',\n", + " '/wiki/Maximum_triangle_path_sum',\n", + " '/wiki/Maze_generation',\n", + " '/wiki/Maze_solving',\n", + " '/wiki/MD4',\n", + " '/wiki/MD5',\n", + " '/wiki/MD5/Implementation',\n", + " '/wiki/Median_filter',\n", + " '/wiki/Memory_allocation',\n", + " '/wiki/Memory_layout_of_a_data_structure',\n", + " '/wiki/Menu',\n", + " '/wiki/Metaprogramming',\n", + " '/wiki/Metered_concurrency',\n", + " '/wiki/Metronome',\n", + " '/wiki/Middle_three_digits',\n", + " '/wiki/Miller-Rabin_primality_test',\n", + " '/wiki/Minesweeper_game',\n", + " '/wiki/Modular_exponentiation',\n", + " '/wiki/Modular_inverse',\n", + " '/wiki/Monte_Carlo_methods',\n", + " '/wiki/Monty_Hall_problem',\n", + " '/wiki/Morse_code',\n", + " '/wiki/Mouse_position',\n", + " '/wiki/Move-to-front_algorithm',\n", + " '/wiki/Multifactorial',\n", + " '/wiki/Multiple_distinct_objects',\n", + " '/wiki/Multiple_regression',\n", + " '/wiki/Multiplication_tables',\n", + " '/wiki/Multiplicative_order',\n", + " '/wiki/Multisplit',\n", + " '/wiki/Munching_squares',\n", + " '/wiki/Mutual_recursion',\n", + " '/wiki/N%27th',\n", + " '/wiki/N-queens_problem',\n", + " '/wiki/Named_parameters',\n", + " '/wiki/Narcissist',\n", + " '/wiki/Narcissistic_decimal_number',\n", + " '/wiki/Natural_sorting',\n", + " '/wiki/Nautical_bell',\n", + " '/wiki/Non-continuous_subsequences',\n", + " '/wiki/Non-decimal_radices/Convert',\n", + " '/wiki/Non-decimal_radices/Input',\n", + " '/wiki/Non-decimal_radices/Output',\n", + " '/wiki/Nth_root',\n", + " '/wiki/Null_object',\n", + " '/wiki/Number_names',\n", + " '/wiki/Number_reversal_game',\n", + " '/wiki/Numeric_error_propagation',\n", + " '/wiki/Numerical_integration',\n", + " '/wiki/Numerical_integration/Gauss-Legendre_Quadrature',\n", + " '/wiki/Object_serialization',\n", + " '/wiki/Odd_word_problem',\n", + " '/wiki/Old_lady_swallowed_a_fly',\n", + " '/wiki/OLE_Automation',\n", + " '/wiki/One_of_n_lines_in_a_file',\n", + " '/wiki/One-dimensional_cellular_automata',\n", + " '/wiki/OpenGL',\n", + " '/wiki/Operator_precedence',\n", + " '/wiki/Optional_parameters',\n", + " '/wiki/Order_disjoint_list_items',\n", + " '/wiki/Order_two_numerical_lists',\n", + " '/wiki/Ordered_Partitions',\n", + " '/wiki/Ordered_words',\n", + " '/wiki/Palindrome_detection',\n", + " '/wiki/Pangram_checker',\n", + " '/wiki/Paraffins',\n", + " '/wiki/Parallel_calculations',\n", + " '/wiki/Parametric_polymorphism',\n", + " '/wiki/Parametrized_SQL_statement',\n", + " '/wiki/Parse_an_IP_Address',\n", + " '/wiki/Parsing/RPN_calculator_algorithm',\n", + " '/wiki/Parsing/RPN_to_infix_conversion',\n", + " '/wiki/Parsing/Shunting-yard_algorithm',\n", + " '/wiki/Partial_function_application',\n", + " '/wiki/Pascal_matrix_generation',\n", + " '/wiki/Pascal%27s_triangle',\n", + " '/wiki/Pascal%27s_triangle/Puzzle',\n", + " '/wiki/Pattern_matching',\n", + " '/wiki/Penney%27s_game',\n", + " '/wiki/Percentage_difference_between_images',\n", + " '/wiki/Percolation/Bond_percolation',\n", + " '/wiki/Percolation/Mean_cluster_density',\n", + " '/wiki/Percolation/Mean_run_density',\n", + " '/wiki/Percolation/Site_percolation',\n", + " '/wiki/Perfect_numbers',\n", + " '/wiki/Permutation_test',\n", + " '/wiki/Permutations',\n", + " '/wiki/Permutations_by_swapping',\n", + " '/wiki/Permutations/Derangements',\n", + " '/wiki/Permutations/Rank_of_a_permutation',\n", + " '/wiki/Pernicious_numbers',\n", + " '/wiki/Phrase_reversals',\n", + " '/wiki/Pi',\n", + " '/wiki/Pick_random_element',\n", + " '/wiki/Pig_the_dice_game',\n", + " '/wiki/Pig_the_dice_game/Player',\n", + " '/wiki/Pinstripe/Display',\n", + " '/wiki/Pinstripe/Printer',\n", + " '/wiki/Play_recorded_sounds',\n", + " '/wiki/Playing_cards',\n", + " '/wiki/Plot_coordinate_pairs',\n", + " '/wiki/Pointers_and_references',\n", + " '/wiki/Polymorphic_copy',\n", + " '/wiki/Polymorphism',\n", + " '/wiki/Polynomial_long_division',\n", + " '/wiki/Polynomial_regression',\n", + " '/wiki/Power_set',\n", + " '/wiki/Pragmatic_directives',\n", + " '/wiki/Price_fraction',\n", + " '/wiki/Primality_by_trial_division',\n", + " '/wiki/Prime_decomposition',\n", + " '/wiki/Primes_-_allocate_descendants_to_their_ancestors',\n", + " '/wiki/Priority_queue',\n", + " '/wiki/Probabilistic_choice',\n", + " '/wiki/Problem_of_Apollonius',\n", + " '/wiki/Program_name',\n", + " '/wiki/Program_termination',\n", + " '/wiki/Pythagorean_triples',\n", + " '/wiki/QR_decomposition',\n", + " '/wiki/Quaternion_type',\n", + " '/wiki/Queue/Definition',\n", + " '/wiki/Queue/Usage',\n", + " '/wiki/Quickselect_algorithm',\n", + " '/wiki/Quine',\n", + " '/wiki/Random_number_generator_(device)',\n", + " '/wiki/Random_number_generator_(included)',\n", + " '/wiki/Random_numbers',\n", + " '/wiki/Range_expansion',\n", + " '/wiki/Range_extraction',\n", + " '/wiki/Ranking_methods',\n", + " '/wiki/Rate_counter',\n", + " '/wiki/Ray-casting_algorithm',\n", + " '/wiki/RCRPG',\n", + " '/wiki/Read_a_configuration_file',\n", + " '/wiki/Read_a_file_line_by_line',\n", + " '/wiki/Read_a_specific_line_from_a_file',\n", + " '/wiki/Read_entire_file',\n", + " '/wiki/Real_constants_and_functions',\n", + " '/wiki/Record_sound',\n", + " '/wiki/Reduced_row_echelon_form',\n", + " '/wiki/Regular_expressions',\n", + " '/wiki/Remove_duplicate_elements',\n", + " '/wiki/Remove_lines_from_a_file',\n", + " '/wiki/Rename_a_file',\n", + " '/wiki/Rendezvous',\n", + " '/wiki/Rep-string',\n", + " '/wiki/Repeat_a_string',\n", + " '/wiki/Resistor_mesh',\n", + " '/wiki/Respond_to_an_unknown_method_call',\n", + " '/wiki/Return_multiple_values',\n", + " '/wiki/Reverse_a_string',\n", + " '/wiki/Reverse_words_in_a_string',\n", + " '/wiki/RIPEMD-160',\n", + " '/wiki/Rock-paper-scissors',\n", + " '/wiki/Roman_numerals/Decode',\n", + " '/wiki/Roman_numerals/Encode',\n", + " '/wiki/Roots_of_a_function',\n", + " '/wiki/Roots_of_a_quadratic_function',\n", + " '/wiki/Roots_of_unity',\n", + " '/wiki/Rosetta_Code/Count_examples',\n", + " '/wiki/Rosetta_Code/Find_bare_lang_tags',\n", + " '/wiki/Rosetta_Code/Find_unimplemented_tasks',\n", + " '/wiki/Rosetta_Code/Fix_code_tags',\n", + " '/wiki/Rosetta_Code/Rank_languages_by_popularity',\n", + " '/wiki/Rot-13',\n", + " '/wiki/RSA_code',\n", + " '/wiki/Run-length_encoding',\n", + " '/wiki/Runge-Kutta_method',\n", + " '/wiki/Runtime_evaluation',\n", + " '/wiki/Runtime_evaluation/In_an_environment',\n", + " '/wiki/S-Expressions',\n", + " '/wiki/Safe_addition',\n", + " '/wiki/Sailors,_coconuts_and_a_monkey_problem',\n", + " '/wiki/Same_Fringe',\n", + " '/wiki/Scope_modifiers',\n", + " '/wiki/Scope/Function_names_and_labels',\n", + " '/wiki/Search_a_list',\n", + " '/wiki/Secure_temporary_file',\n", + " '/wiki/SEDOLs',\n", + " '/wiki/Self-describing_numbers',\n", + " '/wiki/Self-referential_sequence',\n", + " '/wiki/Semiprime',\n", + " '/wiki/Semordnilap',\n", + " '/wiki/Send_an_unknown_method_call',\n", + " '/wiki/Send_email',\n", + " '/wiki/Sequence_of_non-squares',\n", + " '/wiki/Sequence_of_primes_by_Trial_Division',\n", + " '/wiki/Set',\n", + " '/wiki/Set_consolidation',\n", + " '/wiki/Set_of_real_numbers',\n", + " '/wiki/Set_puzzle',\n", + " '/wiki/Seven-sided_dice_from_five-sided_dice',\n", + " '/wiki/SHA-1',\n", + " '/wiki/SHA-256',\n", + " '/wiki/Shell_one-liner',\n", + " '/wiki/Short-circuit_evaluation',\n", + " '/wiki/Show_the_epoch',\n", + " '/wiki/Sierpinski_carpet',\n", + " '/wiki/Sierpinski_triangle',\n", + " '/wiki/Sierpinski_triangle/Graphical',\n", + " '/wiki/Sieve_of_Eratosthenes',\n", + " '/wiki/Simple_database',\n", + " '/wiki/Simple_windowed_application',\n", + " '/wiki/Simulate_input/Keyboard',\n", + " '/wiki/Simulate_input/Mouse',\n", + " '/wiki/Singleton',\n", + " '/wiki/Singly-linked_list/Element_definition',\n", + " '/wiki/Singly-linked_list/Element_insertion',\n", + " '/wiki/Singly-linked_list/Traversal',\n", + " '/wiki/Sleep',\n", + " '/wiki/SOAP',\n", + " '/wiki/Sockets',\n", + " '/wiki/Sokoban',\n", + " '/wiki/Solve_a_Hidato_puzzle',\n", + " '/wiki/Solve_a_Holy_Knight%27s_tour',\n", + " '/wiki/Solve_a_Hopido_puzzle',\n", + " '/wiki/Solve_a_Numbrix_puzzle',\n", + " '/wiki/Solve_the_no_connection_puzzle',\n", + " '/wiki/Sort_an_array_of_composite_structures',\n", + " '/wiki/Sort_an_integer_array',\n", + " '/wiki/Sort_disjoint_sublist',\n", + " '/wiki/Sort_stability',\n", + " '/wiki/Sort_using_a_custom_comparator',\n", + " '/wiki/Sorting_algorithms/Bead_sort',\n", + " '/wiki/Sorting_algorithms/Bogosort',\n", + " '/wiki/Sorting_algorithms/Bubble_sort',\n", + " '/wiki/Sorting_algorithms/Cocktail_sort',\n", + " '/wiki/Sorting_algorithms/Comb_sort',\n", + " '/wiki/Sorting_algorithms/Counting_sort',\n", + " '/wiki/Sorting_algorithms/Gnome_sort',\n", + " '/wiki/Sorting_algorithms/Heapsort',\n", + " '/wiki/Sorting_algorithms/Insertion_sort',\n", + " '/wiki/Sorting_algorithms/Merge_sort',\n", + " '/wiki/Sorting_algorithms/Pancake_sort',\n", + " '/wiki/Sorting_algorithms/Permutation_sort',\n", + " '/wiki/Sorting_algorithms/Quicksort',\n", + " '/wiki/Sorting_algorithms/Radix_sort',\n", + " '/wiki/Sorting_algorithms/Selection_sort',\n", + " '/wiki/Sorting_algorithms/Shell_sort',\n", + " '/wiki/Sorting_algorithms/Sleep_sort',\n", + " '/wiki/Sorting_algorithms/Stooge_sort',\n", + " '/wiki/Sorting_algorithms/Strand_sort',\n", + " '/wiki/Soundex',\n", + " '/wiki/Sparkline_in_unicode',\n", + " '/wiki/Special_characters',\n", + " '/wiki/Special_variables',\n", + " '/wiki/Speech_synthesis',\n", + " '/wiki/Spiral_matrix',\n", + " '/wiki/SQL-based_authentication',\n", + " '/wiki/Stable_marriage_problem',\n", + " '/wiki/Stack',\n", + " '/wiki/Stack_traces',\n", + " '/wiki/Stair-climbing_puzzle',\n", + " '/wiki/Standard_deviation',\n", + " '/wiki/Start_from_a_main_routine',\n", + " '/wiki/State_name_puzzle',\n", + " '/wiki/Statistics/Basic',\n", + " '/wiki/Stem-and-leaf_plot',\n", + " '/wiki/Stern-Brocot_sequence',\n", + " '/wiki/String_append',\n", + " '/wiki/String_case',\n", + " '/wiki/String_comparison',\n", + " '/wiki/String_concatenation',\n", + " '/wiki/String_interpolation_(included)',\n", + " '/wiki/String_length',\n", + " '/wiki/String_matching',\n", + " '/wiki/String_prepend',\n", + " '/wiki/Strip_a_set_of_characters_from_a_string',\n", + " '/wiki/Strip_block_comments',\n", + " '/wiki/Strip_comments_from_a_string',\n", + " '/wiki/Strip_control_codes_and_extended_characters_from_a_string',\n", + " '/wiki/Strip_whitespace_from_a_string/Top_and_tail',\n", + " '/wiki/Subleq',\n", + " '/wiki/Substring',\n", + " '/wiki/Substring/Top_and_tail',\n", + " '/wiki/Subtractive_generator',\n", + " '/wiki/Sudoku',\n", + " '/wiki/Sum_and_product_of_an_array',\n", + " '/wiki/Sum_digits_of_an_integer',\n", + " '/wiki/Sum_multiples_of_3_and_5',\n", + " '/wiki/Sum_of_a_series',\n", + " '/wiki/Sum_of_squares',\n", + " '/wiki/Sutherland-Hodgman_polygon_clipping',\n", + " '/wiki/Symmetric_difference',\n", + " '/wiki/Synchronous_concurrency',\n", + " '/wiki/System_time',\n", + " '/wiki/Table_creation/Postal_addresses',\n", + " '/wiki/Take_notes_on_the_command_line',\n", + " '/wiki/Temperature_conversion',\n", + " '/wiki/Terminal_control/Clear_the_screen',\n", + " '/wiki/Terminal_control/Coloured_text',\n", + " '/wiki/Terminal_control/Cursor_movement',\n", + " '/wiki/Terminal_control/Cursor_positioning',\n", + " '/wiki/Terminal_control/Dimensions',\n", + " '/wiki/Terminal_control/Display_an_extended_character',\n", + " '/wiki/Terminal_control/Hiding_the_cursor',\n", + " '/wiki/Terminal_control/Inverse_video',\n", + " '/wiki/Terminal_control/Positional_read',\n", + " '/wiki/Terminal_control/Preserve_screen',\n", + " '/wiki/Terminal_control/Ringing_the_terminal_bell',\n", + " '/wiki/Terminal_control/Unicode_output',\n", + " '/wiki/Ternary_logic',\n", + " '/wiki/Test_a_function',\n", + " '/wiki/Text_processing/1',\n", + " '/wiki/Text_processing/2',\n", + " '/wiki/Text_processing/Max_licenses_in_use',\n", + " '/wiki/Textonyms',\n", + " '/wiki/The_ISAAC_Cipher',\n", + " '/wiki/The_Twelve_Days_of_Christmas',\n", + " '/wiki/Thiele%27s_interpolation_formula',\n", + " '/wiki/Tic-tac-toe',\n", + " '/wiki/Time_a_function',\n", + " '/wiki/Tokenize_a_string',\n", + " '/wiki/Top_rank_per_group',\n", + " '/wiki/Topic_variable',\n", + " '/wiki/Topological_sort',\n", + " '/wiki/Topswops',\n", + " '/wiki/Total_circles_area',\n", + " '/wiki/Towers_of_Hanoi',\n", + " '/wiki/Trabb_Pardo%E2%80%93Knuth_algorithm',\n", + " '/wiki/Tree_traversal',\n", + " '/wiki/Trigonometric_functions',\n", + " '/wiki/Truncatable_primes',\n", + " '/wiki/Truncate_a_file',\n", + " '/wiki/Twelve_statements',\n", + " '/wiki/Ulam_spiral_(for_primes)',\n", + " '/wiki/Unbias_a_random_generator',\n", + " '/wiki/Undefined_values',\n", + " '/wiki/Unicode_strings',\n", + " '/wiki/Unicode_variable_names',\n", + " '/wiki/Universal_Turing_machine',\n", + " '/wiki/Unix/ls',\n", + " '/wiki/Update_a_configuration_file',\n", + " '/wiki/URL_decoding',\n", + " '/wiki/URL_encoding',\n", + " '/wiki/Use_another_language_to_call_a_function',\n", + " '/wiki/User_input/Graphical',\n", + " '/wiki/User_input/Text',\n", + " '/wiki/Vampire_number',\n", + " '/wiki/Van_der_Corput_sequence',\n", + " '/wiki/Variable_size/Get',\n", + " '/wiki/Variable_size/Set',\n", + " '/wiki/Variable-length_quantity',\n", + " '/wiki/Variables',\n", + " '/wiki/Variadic_function',\n", + " '/wiki/Vector_products',\n", + " '/wiki/Verify_distribution_uniformity/Chi-squared_test',\n", + " '/wiki/Verify_distribution_uniformity/Naive',\n", + " '/wiki/Video_display_modes',\n", + " '/wiki/Vigen%C3%A8re_cipher',\n", + " '/wiki/Vigen%C3%A8re_cipher/Cryptanalysis',\n", + " '/wiki/Visualize_a_tree',\n", + " '/wiki/Vogel%27s_approximation_method',\n", + " '/wiki/Voronoi_diagram',\n", + " '/wiki/Walk_a_directory/Non-recursively',\n", + " '/wiki/Walk_a_directory/Recursively',\n", + " '/wiki/Web_scraping',\n", + " '/wiki/Window_creation',\n", + " '/wiki/Window_creation/X11',\n", + " '/wiki/Window_management',\n", + " '/wiki/Wireworld',\n", + " '/wiki/Word_wrap',\n", + " '/wiki/World_Cup_group_stage',\n", + " '/wiki/Write_float_arrays_to_a_text_file',\n", + " '/wiki/Write_language_name_in_3D_ASCII',\n", + " '/wiki/Write_to_Windows_event_log',\n", + " '/wiki/Xiaolin_Wu%27s_line_algorithm',\n", + " '/wiki/XML/DOM_serialization',\n", + " '/wiki/XML/Input',\n", + " '/wiki/XML/Output',\n", + " '/wiki/XML/XPath',\n", + " '/wiki/Y_combinator',\n", + " '/wiki/Yahoo!_search_interface',\n", + " '/wiki/Yin_and_yang',\n", + " '/wiki/Zebra_puzzle',\n", + " '/wiki/Zeckendorf_arithmetic',\n", + " '/wiki/Zeckendorf_number_representation',\n", + " '/wiki/Zero_to_the_zero_power',\n", + " '/wiki/Zhang-Suen_thinning_algorithm',\n", + " '/wiki/Zig-zag_matrix',\n", + " '/wiki/Category:Programming_Tasks',\n", + " '/wiki/Category_talk:Programming_Tasks',\n", + " '/wiki/Category:Programming_Tasks',\n", + " '/wiki/Rosetta_Code',\n", + " '/wiki/Special:WebChat',\n", + " '/wiki/Rosetta_Code:Village_Pump',\n", + " '/wiki/Rosetta_Code:Finances',\n", + " '/wiki/Category:Programming_Languages',\n", + " '/wiki/Category:Programming_Tasks',\n", + " '/wiki/Special:RecentChanges',\n", + " '/wiki/Help:Similar_Sites',\n", + " '/wiki/Special:Random',\n", + " '/wiki/Special:WhatLinksHere/Category:Programming_Tasks',\n", + " '/wiki/Special:RecentChangesLinked/Category:Programming_Tasks',\n", + " '/wiki/Special:SpecialPages',\n", + " '/wiki/Special:Browse/Category:Programming_Tasks',\n", + " '/wiki/Rosetta_Code:Privacy_policy',\n", + " '/wiki/Rosetta_Code:About',\n", + " '/wiki/Rosetta_Code:General_disclaimer']" + ] + }, + "execution_count": 41, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "def scrape_links():\n", + " req = urllib.request.Request('http://rosettacode.org/wiki/Category:Programming_Tasks', headers={'User-Agent': 'Mozilla/5.0'})\n", + " content = urllib.request.urlopen(req).read()\n", + " soup = BeautifulSoup(content)\n", + " link_list = [link.get('href') for link in soup.find_all('a')]\n", + " return [link for link in link_list[1:] if link.startswith('/wiki/')]\n", + "scrape_links()" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "web_scraper = make_links_list(500)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['http://www.rosettacode.org/wiki/Hailstone_sequence',\n", + " 'http://www.rosettacode.org/wiki/Keyboard_macros',\n", + " 'http://www.rosettacode.org/wiki/Average_loop_length',\n", + " 'http://www.rosettacode.org/wiki/Knight%27s_tour',\n", + " 'http://www.rosettacode.org/wiki/Category:Programming_Languages',\n", + " 'http://www.rosettacode.org/wiki/Metered_concurrency',\n", + " 'http://www.rosettacode.org/wiki/Continued_fraction',\n", + " 'http://www.rosettacode.org/wiki/System_time',\n", + " 'http://www.rosettacode.org/wiki/Search_a_list',\n", + " 'http://www.rosettacode.org/wiki/Rate_counter',\n", + " 'http://www.rosettacode.org/wiki/List_comprehensions',\n", + " 'http://www.rosettacode.org/wiki/Matrix_transposition',\n", + " 'http://www.rosettacode.org/wiki/Loops/For_with_a_specified_step',\n", + " 'http://www.rosettacode.org/wiki/Cholesky_decomposition',\n", + " 'http://www.rosettacode.org/wiki/HTTPS/Client-authenticated',\n", + " 'http://www.rosettacode.org/wiki/Digital_root/Multiplicative_digital_root',\n", + " 'http://www.rosettacode.org/wiki/Hello_world/Line_printer',\n", + " 'http://www.rosettacode.org/wiki/Benford%27s_law',\n", + " 'http://www.rosettacode.org/wiki/Partial_function_application',\n", + " 'http://www.rosettacode.org/wiki/Loops/Foreach',\n", + " 'http://www.rosettacode.org/wiki/Evolutionary_algorithm',\n", + " 'http://www.rosettacode.org/wiki/Huffman_coding',\n", + " 'http://www.rosettacode.org/wiki/Monty_Hall_problem',\n", + " 'http://www.rosettacode.org/wiki/DNS_query',\n", + " 'http://www.rosettacode.org/wiki/Topic_variable',\n", + " 'http://www.rosettacode.org/wiki/Parsing/RPN_to_infix_conversion',\n", + " 'http://www.rosettacode.org/wiki/Memory_allocation',\n", + " 'http://www.rosettacode.org/wiki/Flipping_bits_game',\n", + " 'http://www.rosettacode.org/wiki/Determine_if_only_one_instance_is_running',\n", + " 'http://www.rosettacode.org/wiki/Higher-order_functions',\n", + " 'http://www.rosettacode.org/wiki/Case-sensitivity_of_identifiers',\n", + " 'http://www.rosettacode.org/wiki/Priority_queue',\n", + " 'http://www.rosettacode.org/wiki/Ethiopian_multiplication',\n", + " 'http://www.rosettacode.org/wiki/Here_document',\n", + " 'http://www.rosettacode.org/wiki/SEDOLs',\n", + " 'http://www.rosettacode.org/wiki/Extreme_floating_point_values',\n", + " 'http://www.rosettacode.org/wiki/Verify_distribution_uniformity/Naive',\n", + " 'http://www.rosettacode.org/wiki/Zhang-Suen_thinning_algorithm',\n", + " 'http://www.rosettacode.org/wiki/Draw_a_sphere',\n", + " 'http://www.rosettacode.org/wiki/JortSort',\n", + " 'http://www.rosettacode.org/wiki/JSON',\n", + " 'http://www.rosettacode.org/wiki/Compound_data_type',\n", + " 'http://www.rosettacode.org/wiki/Random_numbers',\n", + " 'http://www.rosettacode.org/wiki/Least_common_multiple',\n", + " 'http://www.rosettacode.org/wiki/Rep-string',\n", + " 'http://www.rosettacode.org/wiki/Sleep',\n", + " 'http://www.rosettacode.org/wiki/Semiprime',\n", + " 'http://www.rosettacode.org/wiki/Greyscale_bars/Display',\n", + " 'http://www.rosettacode.org/wiki/Iterated_digits_squaring',\n", + " 'http://www.rosettacode.org/wiki/Roots_of_unity',\n", + " 'http://www.rosettacode.org/wiki/Sierpinski_triangle/Graphical',\n", + " 'http://www.rosettacode.org/wiki/Create_an_object_at_a_given_address',\n", + " 'http://www.rosettacode.org/wiki/Bitmap/PPM_conversion_through_a_pipe',\n", + " 'http://www.rosettacode.org/wiki/Set_consolidation',\n", + " 'http://www.rosettacode.org/wiki/Terminal_control/Cursor_positioning',\n", + " 'http://www.rosettacode.org/wiki/Combinations_and_permutations',\n", + " 'http://www.rosettacode.org/wiki/Singly-linked_list/Element_insertion',\n", + " 'http://www.rosettacode.org/wiki/Documentation',\n", + " 'http://www.rosettacode.org/wiki/Doubly-linked_list/Traversal',\n", + " 'http://www.rosettacode.org/wiki/Numeric_error_propagation',\n", + " 'http://www.rosettacode.org/wiki/Natural_sorting',\n", + " 'http://www.rosettacode.org/wiki/URL_decoding',\n", + " 'http://www.rosettacode.org/wiki/Rosetta_Code/Count_examples',\n", + " 'http://www.rosettacode.org/wiki/Playing_cards',\n", + " 'http://www.rosettacode.org/wiki/9_billion_names_of_God_the_integer',\n", + " 'http://www.rosettacode.org/wiki/Use_another_language_to_call_a_function',\n", + " 'http://www.rosettacode.org/wiki/Hello_world/Standard_error',\n", + " 'http://www.rosettacode.org/wiki/Almost_prime',\n", + " 'http://www.rosettacode.org/wiki/Sierpinski_triangle',\n", + " 'http://www.rosettacode.org/wiki/Minesweeper_game',\n", + " 'http://www.rosettacode.org/wiki/S-Expressions',\n", + " 'http://www.rosettacode.org/wiki/Comma_quibbling',\n", + " 'http://www.rosettacode.org/wiki/Mad_Libs',\n", + " 'http://www.rosettacode.org/wiki/Sort_using_a_custom_comparator',\n", + " 'http://www.rosettacode.org/wiki/Sequence_of_non-squares',\n", + " 'http://www.rosettacode.org/wiki/Sorting_algorithms/Comb_sort',\n", + " 'http://www.rosettacode.org/wiki/Five_weekends',\n", + " 'http://www.rosettacode.org/wiki/Send_an_unknown_method_call',\n", + " 'http://www.rosettacode.org/wiki/Pointers_and_references',\n", + " 'http://www.rosettacode.org/wiki/Bitwise_IO',\n", + " 'http://www.rosettacode.org/wiki/String_case',\n", + " 'http://www.rosettacode.org/wiki/Generic_swap',\n", + " 'http://www.rosettacode.org/wiki/Set_of_real_numbers',\n", + " 'http://www.rosettacode.org/wiki/Binary_digits',\n", + " 'http://www.rosettacode.org/wiki/Array_concatenation',\n", + " 'http://www.rosettacode.org/wiki/Word_wrap',\n", + " 'http://www.rosettacode.org/wiki/24_game',\n", + " 'http://www.rosettacode.org/wiki/Non-decimal_radices/Input',\n", + " 'http://www.rosettacode.org/wiki/Averages/Mean_angle',\n", + " 'http://www.rosettacode.org/wiki/Call_a_foreign-language_function',\n", + " 'http://www.rosettacode.org/wiki/Bitmap/Flood_fill',\n", + " 'http://www.rosettacode.org/wiki/Percolation/Mean_cluster_density',\n", + " 'http://www.rosettacode.org/wiki/Play_recorded_sounds',\n", + " 'http://www.rosettacode.org/wiki/Function_frequency',\n", + " 'http://www.rosettacode.org/wiki/One_of_n_lines_in_a_file',\n", + " 'http://www.rosettacode.org/wiki/Kaprekar_numbers',\n", + " 'http://www.rosettacode.org/wiki/Rosetta_Code:General_disclaimer',\n", + " 'http://www.rosettacode.org/wiki/Matrix-exponentiation_operator',\n", + " 'http://www.rosettacode.org/wiki/Caesar_cipher',\n", + " 'http://www.rosettacode.org/wiki/Sum_of_squares',\n", + " 'http://www.rosettacode.org/wiki/Solve_a_Holy_Knight%27s_tour',\n", + " 'http://www.rosettacode.org/wiki/XML/Input',\n", + " 'http://www.rosettacode.org/wiki/Empty_program',\n", + " 'http://www.rosettacode.org/wiki/Gray_code',\n", + " 'http://www.rosettacode.org/wiki/Best_shuffle',\n", + " 'http://www.rosettacode.org/wiki/Thiele%27s_interpolation_formula',\n", + " 'http://www.rosettacode.org/wiki/Remove_lines_from_a_file',\n", + " 'http://www.rosettacode.org/wiki/Narcissistic_decimal_number',\n", + " 'http://www.rosettacode.org/wiki/XML/Output',\n", + " 'http://www.rosettacode.org/wiki/Dragon_curve',\n", + " 'http://www.rosettacode.org/wiki/Sorting_algorithms/Bead_sort',\n", + " 'http://www.rosettacode.org/wiki/Vigen%C3%A8re_cipher/Cryptanalysis',\n", + " 'http://www.rosettacode.org/wiki/Colour_pinstripe/Display',\n", + " 'http://www.rosettacode.org/wiki/Permutations/Rank_of_a_permutation',\n", + " 'http://www.rosettacode.org/wiki/K-d_tree',\n", + " 'http://www.rosettacode.org/wiki/Special_characters',\n", + " 'http://www.rosettacode.org/wiki/Read_entire_file',\n", + " 'http://www.rosettacode.org/wiki/Simple_database',\n", + " 'http://www.rosettacode.org/wiki/Ludic_numbers',\n", + " 'http://www.rosettacode.org/wiki/SHA-1',\n", + " 'http://www.rosettacode.org/wiki/Circles_of_given_radius_through_two_points',\n", + " 'http://www.rosettacode.org/wiki/Holidays_related_to_Easter',\n", + " 'http://www.rosettacode.org/wiki/Category:Programming_Tasks',\n", + " 'http://www.rosettacode.org/wiki/Integer_comparison',\n", + " 'http://www.rosettacode.org/wiki/Knapsack_problem/0-1',\n", + " 'http://www.rosettacode.org/wiki/Death_Star',\n", + " 'http://www.rosettacode.org/wiki/Random_number_generator_(included)',\n", + " 'http://www.rosettacode.org/wiki/Continued_fraction/Arithmetic/Construct_from_rational_number',\n", + " 'http://www.rosettacode.org/wiki/Enforced_immutability',\n", + " 'http://www.rosettacode.org/wiki/Machine_code',\n", + " 'http://www.rosettacode.org/wiki/Perfect_numbers',\n", + " 'http://www.rosettacode.org/wiki/Strip_control_codes_and_extended_characters_from_a_string',\n", + " 'http://www.rosettacode.org/wiki/Arithmetic_evaluation',\n", + " 'http://www.rosettacode.org/wiki/Same_Fringe',\n", + " 'http://www.rosettacode.org/wiki/Rosetta_Code/Fix_code_tags',\n", + " 'http://www.rosettacode.org/wiki/Scope/Function_names_and_labels',\n", + " 'http://www.rosettacode.org/wiki/Boolean_values',\n", + " 'http://www.rosettacode.org/wiki/XML/DOM_serialization',\n", + " 'http://www.rosettacode.org/wiki/Jensen%27s_Device',\n", + " 'http://www.rosettacode.org/wiki/Percolation/Site_percolation',\n", + " 'http://www.rosettacode.org/wiki/Forward_difference',\n", + " 'http://www.rosettacode.org/wiki/OpenGL',\n", + " 'http://www.rosettacode.org/wiki/Cut_a_rectangle',\n", + " 'http://www.rosettacode.org/wiki/Hofstadter_Figure-Figure_sequences',\n", + " 'http://www.rosettacode.org/wiki/Zebra_puzzle',\n", + " 'http://www.rosettacode.org/wiki/MD5/Implementation',\n", + " 'http://www.rosettacode.org/wiki/Reverse_a_string',\n", + " 'http://www.rosettacode.org/wiki/Sorting_algorithms/Counting_sort',\n", + " 'http://www.rosettacode.org/wiki/Associative_array/Creation',\n", + " 'http://www.rosettacode.org/wiki/Queue/Definition',\n", + " 'http://www.rosettacode.org/wiki/Unix/ls',\n", + " 'http://www.rosettacode.org/wiki/Loops/Break',\n", + " 'http://www.rosettacode.org/wiki/Magic_squares_of_odd_order',\n", + " 'http://www.rosettacode.org/wiki/Echo_server',\n", + " 'http://www.rosettacode.org/wiki/Flow-control_structures',\n", + " 'http://www.rosettacode.org/wiki/Ordered_Partitions',\n", + " 'http://www.rosettacode.org/wiki/Pythagorean_triples',\n", + " 'http://www.rosettacode.org/wiki/Introspection',\n", + " 'http://www.rosettacode.org/wiki/Exponentiation_operator',\n", + " 'http://www.rosettacode.org/wiki/Primes_-_allocate_descendants_to_their_ancestors',\n", + " 'http://www.rosettacode.org/wiki/Fibonacci_word',\n", + " 'http://www.rosettacode.org/wiki/Start_from_a_main_routine',\n", + " 'http://www.rosettacode.org/wiki/Secure_temporary_file',\n", + " 'http://www.rosettacode.org/wiki/Linear_congruential_generator',\n", + " 'http://www.rosettacode.org/wiki/Set',\n", + " 'http://www.rosettacode.org/wiki/Record_sound',\n", + " 'http://www.rosettacode.org/wiki/Call_an_object_method',\n", + " 'http://www.rosettacode.org/wiki/First-class_functions/Use_numbers_analogously',\n", + " 'http://www.rosettacode.org/wiki/Rosetta_Code:Privacy_policy',\n", + " 'http://www.rosettacode.org/wiki/Sorting_algorithms/Bubble_sort',\n", + " 'http://www.rosettacode.org/wiki/Active_Directory/Connect',\n", + " 'http://www.rosettacode.org/wiki/Inheritance/Multiple',\n", + " 'http://www.rosettacode.org/wiki/Tokenize_a_string',\n", + " 'http://www.rosettacode.org/wiki/Sokoban',\n", + " 'http://www.rosettacode.org/wiki/Infinity',\n", + " 'http://www.rosettacode.org/wiki/Object_serialization',\n", + " 'http://www.rosettacode.org/wiki/Text_processing/Max_licenses_in_use',\n", + " 'http://www.rosettacode.org/wiki/Arbitrary-precision_integers_(included)',\n", + " 'http://www.rosettacode.org/wiki/SOAP',\n", + " 'http://www.rosettacode.org/wiki/Ray-casting_algorithm',\n", + " 'http://www.rosettacode.org/wiki/Zig-zag_matrix',\n", + " 'http://www.rosettacode.org/wiki/Keyboard_input/Flush_the_keyboard_buffer',\n", + " 'http://www.rosettacode.org/wiki/Levenshtein_distance',\n", + " 'http://www.rosettacode.org/wiki/Set_puzzle',\n", + " 'http://www.rosettacode.org/wiki/Carmichael_3_strong_pseudoprimes',\n", + " 'http://www.rosettacode.org/wiki/Left_factorials',\n", + " 'http://www.rosettacode.org/wiki/Chinese_remainder_theorem',\n", + " 'http://www.rosettacode.org/wiki/Soundex',\n", + " 'http://www.rosettacode.org/wiki/Mandelbrot_set',\n", + " 'http://www.rosettacode.org/wiki/Anagrams',\n", + " 'http://www.rosettacode.org/wiki/Number_names',\n", + " 'http://www.rosettacode.org/wiki/CRC-32',\n", + " 'http://www.rosettacode.org/wiki/Find_the_last_Sunday_of_each_month',\n", + " 'http://www.rosettacode.org/wiki/Metaprogramming',\n", + " 'http://www.rosettacode.org/wiki/Ordered_words',\n", + " 'http://www.rosettacode.org/wiki/99_Bottles_of_Beer',\n", + " 'http://www.rosettacode.org/wiki/Execute_a_Markov_algorithm',\n", + " 'http://www.rosettacode.org/wiki/Events',\n", + " 'http://www.rosettacode.org/wiki/Knapsack_problem/Unbounded',\n", + " 'http://www.rosettacode.org/wiki/MD4',\n", + " 'http://www.rosettacode.org/wiki/Truncatable_primes',\n", + " 'http://www.rosettacode.org/wiki/Sorting_algorithms/Insertion_sort',\n", + " 'http://www.rosettacode.org/wiki/Exceptions',\n", + " 'http://www.rosettacode.org/wiki/Arrays',\n", + " 'http://www.rosettacode.org/wiki/Help:Similar_Sites',\n", + " 'http://www.rosettacode.org/wiki/Sequence_of_primes_by_Trial_Division',\n", + " 'http://www.rosettacode.org/wiki/SHA-256',\n", + " 'http://www.rosettacode.org/wiki/Update_a_configuration_file',\n", + " 'http://www.rosettacode.org/wiki/IBAN',\n", + " 'http://www.rosettacode.org/wiki/Multiple_distinct_objects',\n", + " 'http://www.rosettacode.org/wiki/Parametrized_SQL_statement',\n", + " 'http://www.rosettacode.org/wiki/Parallel_calculations',\n", + " 'http://www.rosettacode.org/wiki/Guess_the_number/With_feedback_(player)',\n", + " 'http://www.rosettacode.org/wiki/Non-decimal_radices/Output',\n", + " 'http://www.rosettacode.org/wiki/Jump_anywhere',\n", + " 'http://www.rosettacode.org/wiki/Entropy',\n", + " 'http://www.rosettacode.org/wiki/Strip_a_set_of_characters_from_a_string',\n", + " 'http://www.rosettacode.org/wiki/Averages/Mode',\n", + " 'http://www.rosettacode.org/wiki/Include_a_file',\n", + " 'http://www.rosettacode.org/wiki/Bitmap/Bresenham%27s_line_algorithm',\n", + " 'http://www.rosettacode.org/wiki/Universal_Turing_machine',\n", + " 'http://www.rosettacode.org/wiki/Monte_Carlo_methods',\n", + " 'http://www.rosettacode.org/wiki/Rosetta_Code:Finances',\n", + " 'http://www.rosettacode.org/wiki/Pi',\n", + " 'http://www.rosettacode.org/wiki/Repeat_a_string',\n", + " 'http://www.rosettacode.org/wiki/Sum_of_a_series',\n", + " 'http://www.rosettacode.org/wiki/Map_range',\n", + " 'http://www.rosettacode.org/wiki/Primality_by_trial_division',\n", + " 'http://www.rosettacode.org/wiki/Catalan_numbers/Pascal%27s_triangle',\n", + " 'http://www.rosettacode.org/wiki/Special:SpecialPages',\n", + " 'http://www.rosettacode.org/wiki/Go_Fish',\n", + " 'http://www.rosettacode.org/wiki/Day_of_the_week',\n", + " 'http://www.rosettacode.org/wiki/Combinations_with_repetitions',\n", + " 'http://www.rosettacode.org/wiki/Last_Friday_of_each_month',\n", + " 'http://www.rosettacode.org/wiki/Bitcoin/address_validation',\n", + " 'http://www.rosettacode.org/wiki/Y_combinator',\n", + " 'http://www.rosettacode.org/wiki/K-means%2B%2B_clustering',\n", + " 'http://www.rosettacode.org/wiki/Arithmetic/Rational',\n", + " 'http://www.rosettacode.org/wiki/Roots_of_a_quadratic_function',\n", + " 'http://www.rosettacode.org/wiki/Verify_distribution_uniformity/Chi-squared_test',\n", + " 'http://www.rosettacode.org/wiki/Terminal_control/Unicode_output',\n", + " 'http://www.rosettacode.org/wiki/String_prepend',\n", + " 'http://www.rosettacode.org/wiki/N%27th',\n", + " 'http://www.rosettacode.org/wiki/Arithmetic/Integer',\n", + " 'http://www.rosettacode.org/wiki/Abstract_type',\n", + " 'http://www.rosettacode.org/wiki/Sorting_algorithms/Sleep_sort',\n", + " 'http://www.rosettacode.org/wiki/Special:WhatLinksHere/Category:Programming_Tasks',\n", + " 'http://www.rosettacode.org/wiki/Arithmetic-geometric_mean/Calculate_Pi',\n", + " 'http://www.rosettacode.org/wiki/Dynamic_variable_names',\n", + " 'http://www.rosettacode.org/wiki/Roots_of_a_function',\n", + " 'http://www.rosettacode.org/wiki/Arithmetic-geometric_mean',\n", + " 'http://www.rosettacode.org/wiki/Hello_world/Graphical',\n", + " 'http://www.rosettacode.org/wiki/Closest-pair_problem',\n", + " 'http://www.rosettacode.org/wiki/Interactive_programming',\n", + " 'http://www.rosettacode.org/wiki/Sailors,_coconuts_and_a_monkey_problem',\n", + " 'http://www.rosettacode.org/wiki/RSA_code',\n", + " 'http://www.rosettacode.org/wiki/HTTPS',\n", + " 'http://www.rosettacode.org/wiki/Calendar',\n", + " 'http://www.rosettacode.org/wiki/Factorial',\n", + " 'http://www.rosettacode.org/wiki/Bitmap/Read_an_image_through_a_pipe',\n", + " 'http://www.rosettacode.org/wiki/Sparkline_in_unicode',\n", + " 'http://www.rosettacode.org/wiki/Globally_replace_text_in_several_files',\n", + " 'http://www.rosettacode.org/wiki/Greatest_common_divisor',\n", + " 'http://www.rosettacode.org/wiki/Color_of_a_screen_pixel',\n", + " 'http://www.rosettacode.org/wiki/Loops/For',\n", + " 'http://www.rosettacode.org/wiki/Amicable_pairs',\n", + " 'http://www.rosettacode.org/wiki/Quickselect_algorithm',\n", + " 'http://www.rosettacode.org/wiki/Bitmap/Midpoint_circle_algorithm',\n", + " 'http://www.rosettacode.org/wiki/HTTPS/Authenticated',\n", + " 'http://www.rosettacode.org/wiki/User_input/Graphical',\n", + " 'http://www.rosettacode.org/wiki/Call_a_function_in_a_shared_library',\n", + " 'http://www.rosettacode.org/wiki/State_name_puzzle',\n", + " 'http://www.rosettacode.org/wiki/Sum_and_product_of_an_array',\n", + " 'http://www.rosettacode.org/wiki/Range_expansion',\n", + " 'http://www.rosettacode.org/wiki/LZW_compression',\n", + " 'http://www.rosettacode.org/wiki/Hello_world/Text',\n", + " 'http://www.rosettacode.org/wiki/Hello_world/Newline_omission',\n", + " 'http://www.rosettacode.org/wiki/Morse_code',\n", + " 'http://www.rosettacode.org/wiki/Fibonacci_n-step_number_sequences',\n", + " 'http://www.rosettacode.org/wiki/Window_management',\n", + " 'http://www.rosettacode.org/wiki/Associative_array/Iteration',\n", + " 'http://www.rosettacode.org/wiki/Munching_squares',\n", + " 'http://www.rosettacode.org/wiki/Bitmap/B%C3%A9zier_curves/Quadratic',\n", + " 'http://www.rosettacode.org/wiki/Abundant,_deficient_and_perfect_number_classifications',\n", + " 'http://www.rosettacode.org/wiki/Dutch_national_flag_problem',\n", + " 'http://www.rosettacode.org/wiki/Terminal_control/Preserve_screen',\n", + " 'http://www.rosettacode.org/wiki/Substring/Top_and_tail',\n", + " 'http://www.rosettacode.org/wiki/Sorting_algorithms/Strand_sort',\n", + " 'http://www.rosettacode.org/wiki/Non-continuous_subsequences',\n", + " 'http://www.rosettacode.org/wiki/Loops/While',\n", + " 'http://www.rosettacode.org/wiki/Assertions',\n", + " 'http://www.rosettacode.org/wiki/Ackermann_function',\n", + " 'http://www.rosettacode.org/wiki/Bitmap/Write_a_PPM_file',\n", + " 'http://www.rosettacode.org/wiki/Animate_a_pendulum',\n", + " 'http://www.rosettacode.org/wiki/GUI/Maximum_window_dimensions',\n", + " 'http://www.rosettacode.org/wiki/Integer_overflow',\n", + " 'http://www.rosettacode.org/wiki/Create_an_HTML_table',\n", + " 'http://www.rosettacode.org/wiki/String_interpolation_(included)',\n", + " 'http://www.rosettacode.org/wiki/Bitwise_operations',\n", + " 'http://www.rosettacode.org/wiki/Fractal_tree',\n", + " 'http://www.rosettacode.org/wiki/Solve_a_Hidato_puzzle',\n", + " 'http://www.rosettacode.org/wiki/Accumulator_factory',\n", + " 'http://www.rosettacode.org/wiki/OLE_Automation',\n", + " 'http://www.rosettacode.org/wiki/File_size',\n", + " 'http://www.rosettacode.org/wiki/Permutations',\n", + " 'http://www.rosettacode.org/wiki/Matrix_arithmetic',\n", + " 'http://www.rosettacode.org/wiki/24_game/Solve',\n", + " 'http://www.rosettacode.org/wiki/Continued_fraction/Arithmetic/G(matrix_NG,_Contined_Fraction_N1,_Contined_Fraction_N2)',\n", + " 'http://www.rosettacode.org/wiki/Optional_parameters',\n", + " 'http://www.rosettacode.org/wiki/Paraffins',\n", + " 'http://www.rosettacode.org/wiki/Parsing/Shunting-yard_algorithm',\n", + " 'http://www.rosettacode.org/wiki/Mouse_position',\n", + " 'http://www.rosettacode.org/wiki/Conditional_structures',\n", + " 'http://www.rosettacode.org/wiki/Factors_of_a_Mersenne_number',\n", + " 'http://www.rosettacode.org/wiki/Doubly-linked_list/Definition',\n", + " 'http://www.rosettacode.org/wiki/Extensible_prime_generator',\n", + " 'http://www.rosettacode.org/wiki/Doubly-linked_list/Element_definition',\n", + " 'http://www.rosettacode.org/wiki/Canny_edge_detector',\n", + " 'http://www.rosettacode.org/wiki/Remove_duplicate_elements',\n", + " 'http://www.rosettacode.org/wiki/AKS_test_for_primes',\n", + " 'http://www.rosettacode.org/wiki/Function_prototype',\n", + " 'http://www.rosettacode.org/wiki/Named_parameters',\n", + " 'http://www.rosettacode.org/wiki/Define_a_primitive_data_type',\n", + " 'http://www.rosettacode.org/wiki/Sorting_algorithms/Pancake_sort',\n", + " 'http://www.rosettacode.org/wiki/Memory_layout_of_a_data_structure',\n", + " 'http://www.rosettacode.org/wiki/Sudoku',\n", + " 'http://www.rosettacode.org/wiki/Count_in_octal',\n", + " 'http://www.rosettacode.org/wiki/Pig_the_dice_game/Player',\n", + " 'http://www.rosettacode.org/wiki/Sockets',\n", + " 'http://www.rosettacode.org/wiki/Unbias_a_random_generator',\n", + " 'http://www.rosettacode.org/wiki/Dining_philosophers',\n", + " 'http://www.rosettacode.org/wiki/Polymorphism',\n", + " 'http://www.rosettacode.org/wiki/Maze_generation',\n", + " 'http://www.rosettacode.org/wiki/Sort_an_array_of_composite_structures',\n", + " 'http://www.rosettacode.org/wiki/Averages/Mean_time_of_day',\n", + " 'http://www.rosettacode.org/wiki/Category:Programming_Tasks',\n", + " 'http://www.rosettacode.org/wiki/Sorting_algorithms/Permutation_sort',\n", + " 'http://www.rosettacode.org/wiki/Truncate_a_file',\n", + " 'http://www.rosettacode.org/wiki/Special_variables',\n", + " 'http://www.rosettacode.org/wiki/Sort_disjoint_sublist',\n", + " 'http://www.rosettacode.org/wiki/Maze_solving',\n", + " 'http://www.rosettacode.org/wiki/Haversine_formula',\n", + " 'http://www.rosettacode.org/wiki/Program_name',\n", + " 'http://www.rosettacode.org/wiki/Function_composition',\n", + " 'http://www.rosettacode.org/wiki/Aliquot_sequence_classifications',\n", + " 'http://www.rosettacode.org/wiki/Gamma_function',\n", + " 'http://www.rosettacode.org/wiki/Simple_windowed_application',\n", + " 'http://www.rosettacode.org/wiki/First_class_environments',\n", + " 'http://www.rosettacode.org/wiki/XML/XPath',\n", + " 'http://www.rosettacode.org/wiki/Checkpoint_synchronization',\n", + " 'http://www.rosettacode.org/wiki/GUI_enabling/disabling_of_controls',\n", + " 'http://www.rosettacode.org/wiki/Draw_a_clock',\n", + " 'http://www.rosettacode.org/wiki/Write_language_name_in_3D_ASCII',\n", + " 'http://www.rosettacode.org/wiki/Singly-linked_list/Traversal',\n", + " 'http://www.rosettacode.org/wiki/Stable_marriage_problem',\n", + " 'http://www.rosettacode.org/wiki/Knapsack_problem/Bounded',\n", + " 'http://www.rosettacode.org/wiki/Terminal_control/Clear_the_screen',\n", + " 'http://www.rosettacode.org/wiki/Spiral_matrix',\n", + " 'http://www.rosettacode.org/wiki/Host_introspection',\n", + " 'http://www.rosettacode.org/wiki/Ulam_spiral_(for_primes)',\n", + " 'http://www.rosettacode.org/wiki/Command-line_arguments',\n", + " 'http://www.rosettacode.org/wiki/Simulate_input/Keyboard',\n", + " 'http://www.rosettacode.org/wiki/Vigen%C3%A8re_cipher',\n", + " 'http://www.rosettacode.org/wiki/Balanced_brackets',\n", + " 'http://www.rosettacode.org/wiki/Show_the_epoch',\n", + " 'http://www.rosettacode.org/wiki/Sutherland-Hodgman_polygon_clipping',\n", + " 'http://www.rosettacode.org/wiki/Horner%27s_rule_for_polynomial_evaluation',\n", + " 'http://www.rosettacode.org/wiki/Empty_directory',\n", + " 'http://www.rosettacode.org/wiki/Stem-and-leaf_plot',\n", + " 'http://www.rosettacode.org/wiki/Fibonacci_sequence',\n", + " 'http://www.rosettacode.org/wiki/Category_talk:Programming_Tasks',\n", + " 'http://www.rosettacode.org/wiki/Character_codes',\n", + " 'http://www.rosettacode.org/wiki/Count_occurrences_of_a_substring',\n", + " 'http://www.rosettacode.org/wiki/Permutation_test',\n", + " 'http://www.rosettacode.org/wiki/Constrained_random_points_on_a_circle',\n", + " 'http://www.rosettacode.org/wiki/Handle_a_signal',\n", + " 'http://www.rosettacode.org/wiki/Seven-sided_dice_from_five-sided_dice',\n", + " 'http://www.rosettacode.org/wiki/Special:Browse/Category:Programming_Tasks',\n", + " 'http://www.rosettacode.org/wiki/N-queens_problem',\n", + " 'http://www.rosettacode.org/wiki/Rendezvous',\n", + " 'http://www.rosettacode.org/wiki/Tree_traversal',\n", + " 'http://www.rosettacode.org/wiki/Rosetta_Code',\n", + " 'http://www.rosettacode.org/wiki/Even_or_odd',\n", + " 'http://www.rosettacode.org/wiki/Knuth%27s_algorithm_S',\n", + " 'http://www.rosettacode.org/wiki/Quaternion_type',\n", + " 'http://www.rosettacode.org/wiki/Image_noise',\n", + " 'http://www.rosettacode.org/wiki/String_matching',\n", + " 'http://www.rosettacode.org/wiki/Make_directory_path',\n", + " 'http://www.rosettacode.org/wiki/Prime_decomposition',\n", + " 'http://www.rosettacode.org/wiki/File_modification_time',\n", + " 'http://www.rosettacode.org/wiki/Special:RecentChangesLinked/Category:Programming_Tasks',\n", + " 'http://www.rosettacode.org/wiki/Rock-paper-scissors',\n", + " 'http://www.rosettacode.org/wiki/Longest_increasing_subsequence',\n", + " 'http://www.rosettacode.org/wiki/Check_that_file_exists',\n", + " 'http://www.rosettacode.org/wiki/Runtime_evaluation',\n", + " 'http://www.rosettacode.org/wiki/Numerical_integration/Gauss-Legendre_Quadrature',\n", + " 'http://www.rosettacode.org/wiki/Classes',\n", + " 'http://www.rosettacode.org/wiki/Happy_numbers',\n", + " 'http://www.rosettacode.org/wiki/Find_largest_left_truncatable_prime_in_a_given_base',\n", + " 'http://www.rosettacode.org/wiki/Increment_a_numerical_string',\n", + " 'http://www.rosettacode.org/wiki/Identity_matrix',\n", + " 'http://www.rosettacode.org/wiki/Walk_a_directory/Recursively',\n", + " 'http://www.rosettacode.org/wiki/CSV_to_HTML_translation',\n", + " 'http://www.rosettacode.org/wiki/Colour_pinstripe/Printer',\n", + " 'http://www.rosettacode.org/wiki/Amb',\n", + " 'http://www.rosettacode.org/wiki/Loop_over_multiple_arrays_simultaneously',\n", + " 'http://www.rosettacode.org/wiki/String_comparison',\n", + " 'http://www.rosettacode.org/wiki/Yahoo!_search_interface',\n", + " 'http://www.rosettacode.org/wiki/Parse_an_IP_Address',\n", + " 'http://www.rosettacode.org/wiki/Longest_common_subsequence',\n", + " 'http://www.rosettacode.org/wiki/Palindrome_detection',\n", + " 'http://www.rosettacode.org/wiki/Hello_world/Newbie',\n", + " 'http://www.rosettacode.org/wiki/Date_manipulation',\n", + " 'http://www.rosettacode.org/wiki/Special:RecentChanges',\n", + " 'http://www.rosettacode.org/wiki/Special:Random',\n", + " 'http://www.rosettacode.org/wiki/Guess_the_number/With_feedback',\n", + " 'http://www.rosettacode.org/wiki/Miller-Rabin_primality_test',\n", + " 'http://www.rosettacode.org/wiki/Permutations/Derangements',\n", + " 'http://www.rosettacode.org/wiki/Logical_operations',\n", + " 'http://www.rosettacode.org/wiki/Category_talk:Programming_Tasks',\n", + " 'http://www.rosettacode.org/wiki/Permutations_by_swapping',\n", + " 'http://www.rosettacode.org/wiki/Element-wise_operations',\n", + " 'http://www.rosettacode.org/wiki/Safe_addition',\n", + " 'http://www.rosettacode.org/wiki/Averages/Median',\n", + " 'http://www.rosettacode.org/wiki/Standard_deviation',\n", + " 'http://www.rosettacode.org/wiki/Deconvolution/1D',\n", + " 'http://www.rosettacode.org/wiki/Percentage_difference_between_images',\n", + " 'http://www.rosettacode.org/wiki/Vampire_number',\n", + " 'http://www.rosettacode.org/wiki/Main_step_of_GOST_28147-89',\n", + " 'http://www.rosettacode.org/wiki/Simulate_input/Mouse',\n", + " 'http://www.rosettacode.org/wiki/Singly-linked_list/Element_definition',\n", + " 'http://www.rosettacode.org/wiki/HTTP',\n", + " 'http://www.rosettacode.org/wiki/Execute_Brain****',\n", + " 'http://www.rosettacode.org/wiki/Variable_size/Get',\n", + " 'http://www.rosettacode.org/wiki/Order_disjoint_list_items',\n", + " 'http://www.rosettacode.org/wiki/Hash_from_two_arrays',\n", + " 'http://www.rosettacode.org/wiki/Matrix_multiplication',\n", + " 'http://www.rosettacode.org/wiki/Zeckendorf_number_representation',\n", + " 'http://www.rosettacode.org/wiki/Terminal_control/Cursor_movement',\n", + " 'http://www.rosettacode.org/wiki/Median_filter',\n", + " 'http://www.rosettacode.org/wiki/Determine_if_a_string_is_numeric',\n", + " 'http://www.rosettacode.org/wiki/Solve_a_Hopido_puzzle',\n", + " 'http://www.rosettacode.org/wiki/Sorting_algorithms/Cocktail_sort',\n", + " 'http://www.rosettacode.org/wiki/Create_a_file',\n", + " 'http://www.rosettacode.org/wiki/Short-circuit_evaluation',\n", + " 'http://www.rosettacode.org/wiki/Window_creation',\n", + " 'http://www.rosettacode.org/wiki/Bitmap',\n", + " 'http://www.rosettacode.org/wiki/Euler%27s_sum_of_powers_conjecture',\n", + " 'http://www.rosettacode.org/wiki/QR_decomposition',\n", + " 'http://www.rosettacode.org/wiki/Synchronous_concurrency',\n", + " 'http://www.rosettacode.org/wiki/Sum_digits_of_an_integer',\n", + " 'http://www.rosettacode.org/wiki/Table_creation/Postal_addresses',\n", + " 'http://www.rosettacode.org/wiki/Subleq',\n", + " 'http://www.rosettacode.org/wiki/Sorting_algorithms/Selection_sort',\n", + " 'http://www.rosettacode.org/wiki/Strip_whitespace_from_a_string/Top_and_tail',\n", + " 'http://www.rosettacode.org/wiki/Textonyms',\n", + " 'http://www.rosettacode.org/wiki/FizzBuzz',\n", + " 'http://www.rosettacode.org/wiki/Active_Directory/Search_for_a_user',\n", + " 'http://www.rosettacode.org/wiki/Ternary_logic',\n", + " 'http://www.rosettacode.org/wiki/Yin_and_yang',\n", + " 'http://www.rosettacode.org/wiki/Loops/Nested',\n", + " 'http://www.rosettacode.org/wiki/Stern-Brocot_sequence',\n", + " 'http://www.rosettacode.org/wiki/Comments',\n", + " 'http://www.rosettacode.org/wiki/Unicode_strings',\n", + " 'http://www.rosettacode.org/wiki/Color_quantization',\n", + " 'http://www.rosettacode.org/wiki/Odd_word_problem',\n", + " 'http://www.rosettacode.org/wiki/Keyboard_input/Obtain_a_Y_or_N_response',\n", + " 'http://www.rosettacode.org/wiki/Solve_a_Numbrix_puzzle',\n", + " 'http://www.rosettacode.org/wiki/Respond_to_an_unknown_method_call',\n", + " 'http://www.rosettacode.org/wiki/Bitcoin/public_point_to_address',\n", + " 'http://www.rosettacode.org/wiki/Reduced_row_echelon_form',\n", + " 'http://www.rosettacode.org/wiki/Empty_string',\n", + " 'http://www.rosettacode.org/wiki/The_Twelve_Days_of_Christmas',\n", + " 'http://www.rosettacode.org/wiki/Multifactorial',\n", + " 'http://www.rosettacode.org/wiki/Terminal_control/Display_an_extended_character',\n", + " 'http://www.rosettacode.org/wiki/Hofstadter_Q_sequence',\n", + " 'http://www.rosettacode.org/wiki/Four_bit_adder',\n", + " 'http://www.rosettacode.org/wiki/Undefined_values',\n", + " 'http://www.rosettacode.org/wiki/Fibonacci_word/fractal',\n", + " 'http://www.rosettacode.org/wiki/Metronome',\n", + " 'http://www.rosettacode.org/wiki/A%2BB',\n", + " 'http://www.rosettacode.org/wiki/Flatten_a_list',\n", + " 'http://www.rosettacode.org/wiki/Twelve_statements',\n", + " 'http://www.rosettacode.org/wiki/Active_object',\n", + " 'http://www.rosettacode.org/wiki/Program_termination',\n", + " 'http://www.rosettacode.org/wiki/Operator_precedence',\n", + " 'http://www.rosettacode.org/wiki/Phrase_reversals',\n", + " 'http://www.rosettacode.org/wiki/Atomic_updates',\n", + " 'http://www.rosettacode.org/wiki/Add_a_variable_to_a_class_instance_at_runtime',\n", + " 'http://www.rosettacode.org/wiki/Parametric_polymorphism',\n", + " 'http://www.rosettacode.org/wiki/Pinstripe/Display',\n", + " 'http://www.rosettacode.org/wiki/Price_fraction',\n", + " 'http://www.rosettacode.org/wiki/Bulls_and_cows/Player',\n", + " 'http://www.rosettacode.org/wiki/Penney%27s_game',\n", + " 'http://www.rosettacode.org/wiki/Averages/Pythagorean_means',\n", + " 'http://www.rosettacode.org/wiki/Variable_size/Set',\n", + " 'http://www.rosettacode.org/wiki/Middle_three_digits',\n", + " 'http://www.rosettacode.org/wiki/SQL-based_authentication',\n", + " 'http://www.rosettacode.org/wiki/Special:WebChat',\n", + " 'http://www.rosettacode.org/wiki/CSV_data_manipulation']" + ] + }, + "execution_count": 43, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "web_scraper\n" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
01
0adatype Link;type Link_Access is access Link;type...
1adageneric type Element_Type is private;package...
2adatype Link is limited record Next : not null ...
3algol68# -*- coding: utf-8 -*- #CO REQUIRES: MODE OB...
5cstruct link { struct link *next; struct link...
6csharpclass Link{ public int item; public Link...
7clojure(defrecord Node [prev next data]) (defn new-no...
8lisp(defstruct dlist head tail)(defstruct dlink co...
9dstruct Node(T) { T data; typeof(this)* p...
10dstruct Node(T) { type  pList = ^List ;  ...
11edef makeElement(var value, var next, var prev)...
12erlangnew( Data ) -> erlang:spawn( fun() -> loop( D...
13fortrantype node real :: data type(node), pointer...
14gotype dlNode struct { string next, prev *...
15goimport \"container/list\" var node list.Element/...
16haskelldata DList a = Leaf | Node (DList a) a (DList...
17uniconclass DoubleLink (value, prev_link, next_link...
18jcoclass'DoublyLinkedListElement'create=:3 :0 ...
19jcoclass'DoublyLinkedListHead'create=:3 :0 pre...
20javapublic class Node<T> { private T element; ...
21javascriptfunction DoublyLinkedList(value, next, prev) {...
22modula2TYPE Link = POINTER TO LinkRcd; LinkRcd = RE...
24ocamltype 'a dlink = { mutable data: 'a; mutable ...
25ocaml# let dl = dlink_of_list [1;2;3;4;5] in iter_...
26ocamltype 'a nav_list = 'a list * 'a * 'a list
27ocamllet nav_list_of_list = function | hd::tl -> [...
28ocaml# let nl = nav_list_of_list [1;2;3;4;5] ;;val ...
29ozfun {CreateNewNode Value} node(prev:{NewCell...
30pascaltype link_ptr = ^link; data_ptr = ^data; (...
31perlmy %node = ( data => 'say what', next ...
.........
272java5import java.util.*; public class IntConcat {  ...
273java5import java.util.Comparator;import java.util.s...
277luafunction icsort(numbers)\\ttable.sort(numbers,f...
279netrexx/* NetRexx */options replace format comments j...
281ocamllet myCompare a b = compare (b ^ a) (a ^ b)let...
283pascalconst base = 10; MaxDigitCnt = 11; sourc...
284pascalconst base = 10; MaxDigitCnt = 11; sourc...
285parigplarge(v)=eval(concat(apply(n->Str(n),vecsort(v...
286perlsub maxnum { join '', sort { \"$b$a\" cmp \"$a...
287perl6sub maxnum(@x) { [~] @x.sort: -> $a, $b { $...
288phpfunction maxnum($nums) { usort($nums, func...
293pli/* Largest catenation of integers ...
294prologlargest_int_v1(In, Out) :-\\tmaplist(name, In, ...
295prologlargest_int_v2(In, Out) :-\\tmaplist(name, In, ...
296pythontry: cmp # Python 2 OK or NameError in ...
297pythondef maxnum(x): maxlen = len(str(max(x))) ...
298pythonfrom fractions import Fractionfrom math import...
299pythonfrom itertools import permutationsdef maxnum(x...
301rexx/*REXX pgm constructs largest integer from a ...
302rubydef icsort nums nums.sort { |x, y| \"#{y}#{x}\"...
303rubydef icsort nums maxlen = nums.max.to_s.length...
304rubyrequire 'rational' #Only needed in Ruby < 1.9 ...
306scalaobject LIFCI extends App {  def lifci(list: L...
307scheme(define (cat . nums) (apply string-append (ma...
308rubyfunc maxnum(nums) { nums.sort {|x,y| \"#{y}...
309tclproc intcatsort {nums} { lsort -command {ap...
310tclforeach collection { {1 34 3 98 9 76 45 4} ...
311vbFunction largestint(list)\\tnums = Split(list,...
312vim%s/\\(.\\+\\)/\\1\\1/ | sort! | %s/\\(.\\+\\)\\1\\n/\\1/
313bash$ paste -s nums1\\t34\\t3\\t98\\t9\\t76\\t45\\t4$ vim...
\n", + "

236 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " 0 1\n", + "0 ada type Link;type Link_Access is access Link;type...\n", + "1 ada generic type Element_Type is private;package...\n", + "2 ada type Link is limited record Next : not null ...\n", + "3 algol68 # -*- coding: utf-8 -*- #CO REQUIRES: MODE OB...\n", + "5 c struct link { struct link *next; struct link...\n", + "6 csharp class Link{ public int item; public Link...\n", + "7 clojure (defrecord Node [prev next data]) (defn new-no...\n", + "8 lisp (defstruct dlist head tail)(defstruct dlink co...\n", + "9 d struct Node(T) { T data; typeof(this)* p...\n", + "10 d struct Node(T) { type  pList = ^List ;  ...\n", + "11 e def makeElement(var value, var next, var prev)...\n", + "12 erlang  new( Data ) -> erlang:spawn( fun() -> loop( D...\n", + "13 fortran type node real :: data type(node), pointer...\n", + "14 go type dlNode struct { string next, prev *...\n", + "15 go import \"container/list\" var node list.Element/...\n", + "16 haskell  data DList a = Leaf | Node (DList a) a (DList...\n", + "17 unicon  class DoubleLink (value, prev_link, next_link...\n", + "18 j coclass'DoublyLinkedListElement'create=:3 :0 ...\n", + "19 j coclass'DoublyLinkedListHead'create=:3 :0 pre...\n", + "20 java public class Node { private T element; ...\n", + "21 javascript function DoublyLinkedList(value, next, prev) {...\n", + "22 modula2 TYPE Link = POINTER TO LinkRcd; LinkRcd = RE...\n", + "24 ocaml type 'a dlink = { mutable data: 'a; mutable ...\n", + "25 ocaml # let dl = dlink_of_list [1;2;3;4;5] in iter_...\n", + "26 ocaml type 'a nav_list = 'a list * 'a * 'a list\n", + "27 ocaml let nav_list_of_list = function | hd::tl -> [...\n", + "28 ocaml # let nl = nav_list_of_list [1;2;3;4;5] ;;val ...\n", + "29 oz fun {CreateNewNode Value} node(prev:{NewCell...\n", + "30 pascal type link_ptr = ^link; data_ptr = ^data; (...\n", + "31 perl my %node = ( data => 'say what', next ...\n", + ".. ... ...\n", + "272 java5 import java.util.*; public class IntConcat {  ...\n", + "273 java5 import java.util.Comparator;import java.util.s...\n", + "277 lua function icsort(numbers)\\ttable.sort(numbers,f...\n", + "279 netrexx /* NetRexx */options replace format comments j...\n", + "281 ocaml let myCompare a b = compare (b ^ a) (a ^ b)let...\n", + "283 pascal const base = 10; MaxDigitCnt = 11; sourc...\n", + "284 pascal const base = 10; MaxDigitCnt = 11; sourc...\n", + "285 parigp large(v)=eval(concat(apply(n->Str(n),vecsort(v...\n", + "286 perl sub maxnum { join '', sort { \"$b$a\" cmp \"$a...\n", + "287 perl6 sub maxnum(@x) { [~] @x.sort: -> $a, $b { $...\n", + "288 php function maxnum($nums) { usort($nums, func...\n", + "293 pli  /* Largest catenation of integers ...\n", + "294 prolog largest_int_v1(In, Out) :-\\tmaplist(name, In, ...\n", + "295 prolog largest_int_v2(In, Out) :-\\tmaplist(name, In, ...\n", + "296 python try: cmp # Python 2 OK or NameError in ...\n", + "297 python def maxnum(x): maxlen = len(str(max(x))) ...\n", + "298 python from fractions import Fractionfrom math import...\n", + "299 python from itertools import permutationsdef maxnum(x...\n", + "301 rexx /*REXX pgm constructs largest integer from a ...\n", + "302 ruby def icsort nums nums.sort { |x, y| \"#{y}#{x}\"...\n", + "303 ruby def icsort nums maxlen = nums.max.to_s.length...\n", + "304 ruby require 'rational' #Only needed in Ruby < 1.9 ...\n", + "306 scala object LIFCI extends App {  def lifci(list: L...\n", + "307 scheme (define (cat . nums) (apply string-append (ma...\n", + "308 ruby func maxnum(nums) { nums.sort {|x,y| \"#{y}...\n", + "309 tcl proc intcatsort {nums} { lsort -command {ap...\n", + "310 tcl foreach collection { {1 34 3 98 9 76 45 4} ...\n", + "311 vb  Function largestint(list)\\tnums = Split(list,...\n", + "312 vim %s/\\(.\\+\\)/\\1\\1/ | sort! | %s/\\(.\\+\\)\\1\\n/\\1/\n", + "313 bash $ paste -s nums1\\t34\\t3\\t98\\t9\\t76\\t45\\t4$ vim...\n", + "\n", + "[236 rows x 2 columns]" + ] + }, + "execution_count": 44, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = scrape_and_clean(5)\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
01
0adatype Link;type Link_Access is access Link;type...
1adageneric type Element_Type is private;package...
2adatype Link is limited record Next : not null ...
3algol68# -*- coding: utf-8 -*- #CO REQUIRES: MODE OB...
5cstruct link { struct link *next; struct link...
6csharpclass Link{ public int item; public Link...
7clojure(defrecord Node [prev next data]) (defn new-no...
8lisp(defstruct dlist head tail)(defstruct dlink co...
9dstruct Node(T) { T data; typeof(this)* p...
10dstruct Node(T) { type  pList = ^List ;  ...
11edef makeElement(var value, var next, var prev)...
12erlangnew( Data ) -> erlang:spawn( fun() -> loop( D...
13fortrantype node real :: data type(node), pointer...
14gotype dlNode struct { string next, prev *...
15goimport \"container/list\" var node list.Element/...
16haskelldata DList a = Leaf | Node (DList a) a (DList...
17uniconclass DoubleLink (value, prev_link, next_link...
18jcoclass'DoublyLinkedListElement'create=:3 :0 ...
19jcoclass'DoublyLinkedListHead'create=:3 :0 pre...
20javapublic class Node<T> { private T element; ...
\n", + "
" + ], + "text/plain": [ + " 0 1\n", + "0 ada type Link;type Link_Access is access Link;type...\n", + "1 ada generic type Element_Type is private;package...\n", + "2 ada type Link is limited record Next : not null ...\n", + "3 algol68 # -*- coding: utf-8 -*- #CO REQUIRES: MODE OB...\n", + "5 c struct link { struct link *next; struct link...\n", + "6 csharp class Link{ public int item; public Link...\n", + "7 clojure (defrecord Node [prev next data]) (defn new-no...\n", + "8 lisp (defstruct dlist head tail)(defstruct dlink co...\n", + "9 d struct Node(T) { T data; typeof(this)* p...\n", + "10 d struct Node(T) { type  pList = ^List ;  ...\n", + "11 e def makeElement(var value, var next, var prev)...\n", + "12 erlang  new( Data ) -> erlang:spawn( fun() -> loop( D...\n", + "13 fortran type node real :: data type(node), pointer...\n", + "14 go type dlNode struct { string next, prev *...\n", + "15 go import \"container/list\" var node list.Element/...\n", + "16 haskell  data DList a = Leaf | Node (DList a) a (DList...\n", + "17 unicon  class DoubleLink (value, prev_link, next_link...\n", + "18 j coclass'DoublyLinkedListElement'create=:3 :0 ...\n", + "19 j coclass'DoublyLinkedListHead'create=:3 :0 pre...\n", + "20 java public class Node { private T element; ..." + ] + }, + "execution_count": 45, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head(20)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "new_df = df[df[0]!='text']\n" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
01
0adatype Link;type Link_Access is access Link;type...
1adageneric type Element_Type is private;package...
2adatype Link is limited record Next : not null ...
3algol68# -*- coding: utf-8 -*- #CO REQUIRES: MODE OB...
5cstruct link { struct link *next; struct link...
6csharpclass Link{ public int item; public Link...
7clojure(defrecord Node [prev next data]) (defn new-no...
8lisp(defstruct dlist head tail)(defstruct dlink co...
9dstruct Node(T) { T data; typeof(this)* p...
10dstruct Node(T) { type  pList = ^List ;  ...
11edef makeElement(var value, var next, var prev)...
12erlangnew( Data ) -> erlang:spawn( fun() -> loop( D...
13fortrantype node real :: data type(node), pointer...
14gotype dlNode struct { string next, prev *...
15goimport \"container/list\" var node list.Element/...
16haskelldata DList a = Leaf | Node (DList a) a (DList...
17uniconclass DoubleLink (value, prev_link, next_link...
18jcoclass'DoublyLinkedListElement'create=:3 :0 ...
19jcoclass'DoublyLinkedListHead'create=:3 :0 pre...
20javapublic class Node<T> { private T element; ...
21javascriptfunction DoublyLinkedList(value, next, prev) {...
22modula2TYPE Link = POINTER TO LinkRcd; LinkRcd = RE...
24ocamltype 'a dlink = { mutable data: 'a; mutable ...
25ocaml# let dl = dlink_of_list [1;2;3;4;5] in iter_...
26ocamltype 'a nav_list = 'a list * 'a * 'a list
27ocamllet nav_list_of_list = function | hd::tl -> [...
28ocaml# let nl = nav_list_of_list [1;2;3;4;5] ;;val ...
29ozfun {CreateNewNode Value} node(prev:{NewCell...
30pascaltype link_ptr = ^link; data_ptr = ^data; (...
31perlmy %node = ( data => 'say what', next ...
.........
272java5import java.util.*; public class IntConcat {  ...
273java5import java.util.Comparator;import java.util.s...
277luafunction icsort(numbers)\\ttable.sort(numbers,f...
279netrexx/* NetRexx */options replace format comments j...
281ocamllet myCompare a b = compare (b ^ a) (a ^ b)let...
283pascalconst base = 10; MaxDigitCnt = 11; sourc...
284pascalconst base = 10; MaxDigitCnt = 11; sourc...
285parigplarge(v)=eval(concat(apply(n->Str(n),vecsort(v...
286perlsub maxnum { join '', sort { \"$b$a\" cmp \"$a...
287perl6sub maxnum(@x) { [~] @x.sort: -> $a, $b { $...
288phpfunction maxnum($nums) { usort($nums, func...
293pli/* Largest catenation of integers ...
294prologlargest_int_v1(In, Out) :-\\tmaplist(name, In, ...
295prologlargest_int_v2(In, Out) :-\\tmaplist(name, In, ...
296pythontry: cmp # Python 2 OK or NameError in ...
297pythondef maxnum(x): maxlen = len(str(max(x))) ...
298pythonfrom fractions import Fractionfrom math import...
299pythonfrom itertools import permutationsdef maxnum(x...
301rexx/*REXX pgm constructs largest integer from a ...
302rubydef icsort nums nums.sort { |x, y| \"#{y}#{x}\"...
303rubydef icsort nums maxlen = nums.max.to_s.length...
304rubyrequire 'rational' #Only needed in Ruby < 1.9 ...
306scalaobject LIFCI extends App {  def lifci(list: L...
307scheme(define (cat . nums) (apply string-append (ma...
308rubyfunc maxnum(nums) { nums.sort {|x,y| \"#{y}...
309tclproc intcatsort {nums} { lsort -command {ap...
310tclforeach collection { {1 34 3 98 9 76 45 4} ...
311vbFunction largestint(list)\\tnums = Split(list,...
312vim%s/\\(.\\+\\)/\\1\\1/ | sort! | %s/\\(.\\+\\)\\1\\n/\\1/
313bash$ paste -s nums1\\t34\\t3\\t98\\t9\\t76\\t45\\t4$ vim...
\n", + "

236 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " 0 1\n", + "0 ada type Link;type Link_Access is access Link;type...\n", + "1 ada generic type Element_Type is private;package...\n", + "2 ada type Link is limited record Next : not null ...\n", + "3 algol68 # -*- coding: utf-8 -*- #CO REQUIRES: MODE OB...\n", + "5 c struct link { struct link *next; struct link...\n", + "6 csharp class Link{ public int item; public Link...\n", + "7 clojure (defrecord Node [prev next data]) (defn new-no...\n", + "8 lisp (defstruct dlist head tail)(defstruct dlink co...\n", + "9 d struct Node(T) { T data; typeof(this)* p...\n", + "10 d struct Node(T) { type  pList = ^List ;  ...\n", + "11 e def makeElement(var value, var next, var prev)...\n", + "12 erlang  new( Data ) -> erlang:spawn( fun() -> loop( D...\n", + "13 fortran type node real :: data type(node), pointer...\n", + "14 go type dlNode struct { string next, prev *...\n", + "15 go import \"container/list\" var node list.Element/...\n", + "16 haskell  data DList a = Leaf | Node (DList a) a (DList...\n", + "17 unicon  class DoubleLink (value, prev_link, next_link...\n", + "18 j coclass'DoublyLinkedListElement'create=:3 :0 ...\n", + "19 j coclass'DoublyLinkedListHead'create=:3 :0 pre...\n", + "20 java public class Node { private T element; ...\n", + "21 javascript function DoublyLinkedList(value, next, prev) {...\n", + "22 modula2 TYPE Link = POINTER TO LinkRcd; LinkRcd = RE...\n", + "24 ocaml type 'a dlink = { mutable data: 'a; mutable ...\n", + "25 ocaml # let dl = dlink_of_list [1;2;3;4;5] in iter_...\n", + "26 ocaml type 'a nav_list = 'a list * 'a * 'a list\n", + "27 ocaml let nav_list_of_list = function | hd::tl -> [...\n", + "28 ocaml # let nl = nav_list_of_list [1;2;3;4;5] ;;val ...\n", + "29 oz fun {CreateNewNode Value} node(prev:{NewCell...\n", + "30 pascal type link_ptr = ^link; data_ptr = ^data; (...\n", + "31 perl my %node = ( data => 'say what', next ...\n", + ".. ... ...\n", + "272 java5 import java.util.*; public class IntConcat {  ...\n", + "273 java5 import java.util.Comparator;import java.util.s...\n", + "277 lua function icsort(numbers)\\ttable.sort(numbers,f...\n", + "279 netrexx /* NetRexx */options replace format comments j...\n", + "281 ocaml let myCompare a b = compare (b ^ a) (a ^ b)let...\n", + "283 pascal const base = 10; MaxDigitCnt = 11; sourc...\n", + "284 pascal const base = 10; MaxDigitCnt = 11; sourc...\n", + "285 parigp large(v)=eval(concat(apply(n->Str(n),vecsort(v...\n", + "286 perl sub maxnum { join '', sort { \"$b$a\" cmp \"$a...\n", + "287 perl6 sub maxnum(@x) { [~] @x.sort: -> $a, $b { $...\n", + "288 php function maxnum($nums) { usort($nums, func...\n", + "293 pli  /* Largest catenation of integers ...\n", + "294 prolog largest_int_v1(In, Out) :-\\tmaplist(name, In, ...\n", + "295 prolog largest_int_v2(In, Out) :-\\tmaplist(name, In, ...\n", + "296 python try: cmp # Python 2 OK or NameError in ...\n", + "297 python def maxnum(x): maxlen = len(str(max(x))) ...\n", + "298 python from fractions import Fractionfrom math import...\n", + "299 python from itertools import permutationsdef maxnum(x...\n", + "301 rexx /*REXX pgm constructs largest integer from a ...\n", + "302 ruby def icsort nums nums.sort { |x, y| \"#{y}#{x}\"...\n", + "303 ruby def icsort nums maxlen = nums.max.to_s.length...\n", + "304 ruby require 'rational' #Only needed in Ruby < 1.9 ...\n", + "306 scala object LIFCI extends App {  def lifci(list: L...\n", + "307 scheme (define (cat . nums) (apply string-append (ma...\n", + "308 ruby func maxnum(nums) { nums.sort {|x,y| \"#{y}...\n", + "309 tcl proc intcatsort {nums} { lsort -command {ap...\n", + "310 tcl foreach collection { {1 34 3 98 9 76 45 4} ...\n", + "311 vb  Function largestint(list)\\tnums = Split(list,...\n", + "312 vim %s/\\(.\\+\\)/\\1\\1/ | sort! | %s/\\(.\\+\\)\\1\\n/\\1/\n", + "313 bash $ paste -s nums1\\t34\\t3\\t98\\t9\\t76\\t45\\t4$ vim...\n", + "\n", + "[236 rows x 2 columns]" + ] + }, + "execution_count": 47, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "new_df\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.4.3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/More ideas.ipynb b/More ideas.ipynb new file mode 100644 index 0000000..a346053 --- /dev/null +++ b/More ideas.ipynb @@ -0,0 +1,58 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "#from stack overflow\n", + "import urllib2\n", + "from BeautifulSoup import BeautifulSoup\n", + "# or if you're using BeautifulSoup4:\n", + "# from bs4 import BeautifulSoup\n", + "\n", + "soup = BeautifulSoup(urllib2.urlopen('http://example.com').read())\n", + "\n", + "for row in soup('table', {'class': 'spad'})[0].tbody('tr'):\n", + " tds = row('td')\n", + " print tds[0].string, tds[1].string\n", + " # will print date and sunrise" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.4.3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/New_Trial.ipynb b/New_Trial.ipynb new file mode 100644 index 0000000..d9c19bd --- /dev/null +++ b/New_Trial.ipynb @@ -0,0 +1,114 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "import csv\n", + "import re\n", + "import numpy as np\n", + "import random\n", + "\n", + "from collections import Counter\n", + "\n", + "from sklearn.pipeline import make_pipeline, make_union\n", + "from sklearn.base import TransformerMixin\n", + "from sklearn.tree import DecisionTreeClassifier\n", + "from sklearn.feature_extraction.text import CountVectorizer\n", + "from sklearn.cross_validation import train_test_split\n", + "from sklearn.metrics import classification_report, confusion_matrix" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "def longest_run_of_capitol_letters_feature(char, text):\n", + " \"\"\"Find the longest run of capitol letters and return their length.\"\"\"\n", + " if char == '~':\n", + " runs = sorted(re.findall(r\"~+\", text), key=len)\n", + " elif char == '.':\n", + " runs = sorted(re.findall(r\"\\.+\", text), key=len)\n", + " elif char == '|':\n", + " runs = sorted(re.findall(r\"\\|+\", text), key=len)\n", + " elif char == ':':\n", + " runs = sorted(re.findall(r\"\\:+\", text), key=len)\n", + " elif char == ';':\n", + " runs = sorted(re.findall(r\";+\", text), key=len)\n", + " elif char == '$':\n", + " runs = sorted(re.findall(r\"\\$+\", text), key=len)\n", + " elif char == '(':\n", + " runs = sorted(re.findall(r\"\\(+\", text), key=len)\n", + " elif char == ')':\n", + " runs = sorted(re.findall(r\"\\)+\", text), key=len)\n", + " elif char == '-':\n", + " runs = sorted(re.findall(r\"\\-+\", text), key=len)\n", + " if runs:\n", + " return len(runs[-1])\n", + " else:\n", + " return 0\n", + "\n", + "def longest_run_of_character_feature(text):\n", + " \"\"\"Find the longest run of characters and return their length.\"\"\"\n", + " runs = sorted(re.findall(r\"[A-Z]+\", text), key=len)\n", + " if runs:\n", + " return len(runs[-1])\n", + " else:\n", + " return 0 \n", + " \n", + "def percent_character_feature(char, text):\n", + " \"\"\"Return percentage of text that is a particular char compared to total text length.\"\"\"\n", + " def feature_fn(text):\n", + " periods = text.count(char)\n", + " return periods / len(text)\n", + " return feature_fn\n", + "\n", + "class FunctionFeaturizer(TransformerMixin):\n", + " def __init__(self, *featurizers):\n", + " self.featurizers = featurizers\n", + " \n", + " def fit(self, X, y=None):\n", + " \"\"\"All SciKit-Learn compatible transformers and classifiers have the\n", + " same interface. `fit` always returns the same object.\"\"\"\n", + " return self\n", + " \n", + " def transform(self, X):\n", + " \"\"\"Given a list of original data, return a list of feature vectors.\"\"\"\n", + " fvs = []\n", + " for datum in X:\n", + " fv = [f(datum) for f in self.featurizers]\n", + " fvs.append(fv)\n", + " return np.array(fvs)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.4.3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/Untitled.ipynb b/Untitled.ipynb new file mode 100644 index 0000000..2789b9e --- /dev/null +++ b/Untitled.ipynb @@ -0,0 +1,47 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "from programming_language_classifier import *" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "df = scrape_and_clean" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.4.3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/programming_language_classifier.py b/programming_language_classifier.py new file mode 100644 index 0000000..063806d --- /dev/null +++ b/programming_language_classifier.py @@ -0,0 +1,129 @@ +from bs4 import BeautifulSoup +import urllib +from re import findall +import pandas as pd +import random +from sklearn.naive_bayes import GaussianNB +from sklearn.naive_bayes import MultinomialNB +from sklearn.naive_bayes import BernoulliNB +from sklearn.cross_validation import train_test_split +from sklearn.pipeline import Pipeline +from sklearn.feature_extraction.text import CountVectorizer +from sklearn.cross_validation import cross_val_score + +# C (.gcc, .c) +# C# +# Common Lisp (.sbcl) +# Clojure +# Haskell +# Java +# JavaScript +# OCaml +# Perl +# PHP (.hack, .php) +# Python +# Ruby (.jruby, .yarv) +# Scala +# Scheme (.racket) + +# def get_text(url): +# """Takes a url and returns text""" +# req = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/5.0'}) +# content = urllib.request.urlopen(req).read() +# page_text=BeautifulSoup(content) +# return page_text.get_text() + +# def scrape_text(text): +# data_crop = findall("[EDIT] \n.+\n", text) +# return data_crop + + +# def scrape_text(text): +# """Takes text from get_text and returns a list of tuples with +# language in [0] and code in [1]""" +# data_crop = findall(r"edit] (.+)\n(.+)\n", text) +# return data_crop +# ##Should maybe grab all of the text +# +# def scrape_links(): +# """Creates list of links to use with create_url to gather code.""" +# with open ("links_list.txt", "r") as myfile: +# data=myfile.read() +# return findall(r"wiki/(.+)\" ti", data) + + +# def create_url_for_scraping(task_string): +# return "http://www.rosettacode.org{}".format(task_string) + +language_start = ["C", "C#", "Common Lisp", "Clojure", "Haskell", + "Java", "JavaScript", "OCaml", "Perl", "PHP", + "Python", "Ruby", "Scala", "Scheme"] + + +def scrape_data(url): + req = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/5.0'}) + content = urllib.request.urlopen(req).read() + soup = BeautifulSoup(content) + return soup.find_all( "pre", class_="highlighted_source") + #pre is an html tag. We want all text from pre with class highlighted_source + #returns a list of soup objects + + +def pull_code_from_soup(soup_list): + return [[soup_list[i]['class'][0], soup_list[i].get_text()] for i in range(len(soup_list))] + + +def make_data(url_list): + code_snippets = pd.DataFrame(columns=([0, 1])) + for url in url_list: + soup_list = scrape_data(url) + code_snippets = code_snippets.append(pd.DataFrame(pull_code_from_soup(soup_list)), ignore_index=True) + return code_snippets + + +def scrape_links(): + req = urllib.request.Request('http://rosettacode.org/wiki/Category:Programming_Tasks', headers={'User-Agent': 'Mozilla/5.0'}) + content = urllib.request.urlopen(req).read() + soup = BeautifulSoup(content) + link_list = [link.get('href') for link in soup.find_all('a')] + return ["http://www.rosettacode.org{}".format(link) for link in link_list[1:] if link.startswith('/wiki/')] + + +def make_links_list(num_links=30): + return random.sample(scrape_links(), num_links) + + +def scrape_and_clean(num_links=30): + df = make_data(make_links_list(num_links)) + new_df = df[df[0]!='text'] + return new_df + + +def scrape_clean_cut(num_links=100, min_examples=40): + df = make_data(make_links_list(num_links)) + new_df = df[df[0]!='text'] + new_df = new_df.groupby(0).filter(lambda x: len(x) >= min_examples) + return new_df + +def pipeline_runner(dataframe, estimator): + ##Re-testing with MultinomialNB + y = dataframe.loc[:, 0] + X = dataframe.loc[:, 1] + #splitting data + X_train, X_test, y_train, y_test = train_test_split(X, y) + #running pipe to vectorize and run estimator + if estimator == 'Multinomial': + estimator_pipe = Pipeline([('bag_of_words', CountVectorizer()), + ('mnb', MultinomialNB())]) + elif estimator == 'Gaussian': + estimator_pipe = Pipeline([('bag_of_words', CountVectorizer()), + ('gnb', GaussianNB())]) + elif estimator == 'Bernoulli': + estimator_pipe = Pipeline([('bag_of_words', CountVectorizer(binary=True)), + ('bnb', BernoulliNB())]) + else: + return pipeline_runner(dataframe, estimator) + #fitting + estimator_pipe.fit(X_train, y_train) + #checking score + return estimator_pipe.score(X_train, y_train), estimator_pipe.score(X_test, y_test) \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 473a3b2..894d818 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,3 +3,4 @@ scipy pandas numpy matplotlib +beautifulsoup4 From fff49f531b20e630853e9a6fcd4e88d6a02505f7 Mon Sep 17 00:00:00 2001 From: Will Flowers Date: Mon, 8 Jun 2015 01:28:22 -0400 Subject: [PATCH 2/3] attempt/partial understanding --- README.md | 13 +++++++++++++ data_scrape.py | 0 final.py | 0 3 files changed, 13 insertions(+) create mode 100644 data_scrape.py create mode 100644 final.py diff --git a/README.md b/README.md index 394f93b..b1c308c 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,16 @@ +Clinton, + +Hello, hope you had a good weekend. So, I worked with Josh and Tripp and Sovello for this assignment. +Overall, I learned more this way than I have in the past, working on my own. I didn't ask quite enough +questions as there are a few things I don't fully understand/still need some practice on. I also made +the mistake of not working on my own computer; I was more or less working from and with them on their +computers. I know the work I'm producing is not good, but I'm still learning a lot, and overall, +making progress. I'll probably just pick one person next time instead of getting in a group of 4, that +way it will be easier to ask questions, and do more work on my own. + + + + # Classify code snippets into programming languages ## Description diff --git a/data_scrape.py b/data_scrape.py new file mode 100644 index 0000000..e69de29 diff --git a/final.py b/final.py new file mode 100644 index 0000000..e69de29 From 40c5ae6239497b78ca2ffc647c77015759adf4e8 Mon Sep 17 00:00:00 2001 From: Will Flowers Date: Fri, 20 May 2016 13:39:10 -0400 Subject: [PATCH 3/3] Update README --- README.md | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/README.md b/README.md index b1c308c..394f93b 100644 --- a/README.md +++ b/README.md @@ -1,16 +1,3 @@ -Clinton, - -Hello, hope you had a good weekend. So, I worked with Josh and Tripp and Sovello for this assignment. -Overall, I learned more this way than I have in the past, working on my own. I didn't ask quite enough -questions as there are a few things I don't fully understand/still need some practice on. I also made -the mistake of not working on my own computer; I was more or less working from and with them on their -computers. I know the work I'm producing is not good, but I'm still learning a lot, and overall, -making progress. I'll probably just pick one person next time instead of getting in a group of 4, that -way it will be easier to ask questions, and do more work on my own. - - - - # Classify code snippets into programming languages ## Description