diff --git a/.gitignore b/.gitignore index f00dbf2..4f29d80 100644 --- a/.gitignore +++ b/.gitignore @@ -65,3 +65,13 @@ docs/_build/ # PyBuilder target/ +.direnv/ +.envrc +.idea/ +.DS_Store +scraper_500x100.pkl +scraper_50x10.pkl +scraper_50x1_17.pkl +scraper_700x1_17.pkl +test_X_values.pkl +test_y_values.pkl diff --git a/bs4_scratch_work.ipynb b/bs4_scratch_work.ipynb new file mode 100644 index 0000000..8ce7fd5 --- /dev/null +++ b/bs4_scratch_work.ipynb @@ -0,0 +1,6549 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "from bs4 import BeautifulSoup\n", + "import urllib" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "url = 'http://www.pythonforbeginners.com'" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "req = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/5.0'})" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "content = urllib.request.urlopen(req).read()" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "soup = BeautifulSoup(content)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " Pythonforbeginners.com - Learn Python by Example\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + " python\n", + " \n", + " for beginners\n", + " \n", + " \n", + "
\n", + "
\n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " ©\n", + " \n", + " Perceptive Minds\n", + " \n", + " 2012-2014\n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + "\n" + ] + } + ], + "source": [ + "print(soup.prettify())" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Pythonforbeginners.com - Learn Python by Example \n" + ] + } + ], + "source": [ + "print(soup.title)" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "[ Pythonforbeginners.com - Learn Python by Example ]" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "soup.find_all(\"title\")" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "[\n", + " python for beginners\n", + " \n", + " ,\n", + " basics,\n", + " lists,\n", + " dictionary,\n", + " code snippets,\n", + " modules,\n", + " Getting the most popular pages from your Apache logfile,\n", + " ,\n", + " \n", + " read more →,\n", + "   Code snippets,\n", + "   collection,\n", + "   IO,\n", + " Make your life easier with Virtualenvwrapper,\n", + " ,\n", + " \n", + " read more →,\n", + "   Pip,\n", + "   Virtualenv,\n", + " This site now runs on Django,\n", + " ,\n", + " \n", + " read more →,\n", + "   News,\n", + " PythonForBeginners.com has a new owner,\n", + " ,\n", + " \n", + " read more →,\n", + "   News,\n", + " How to use Pillow, a fork of PIL,\n", + " ,\n", + " \n", + " read more →,\n", + "   gui,\n", + "   imaging,\n", + "   Modules,\n", + "   pil,\n", + " How to use the Python Imaging Library,\n", + " ,\n", + " \n", + " read more →,\n", + "   gui,\n", + "   imaging,\n", + "   Modules,\n", + "   pil,\n", + " Python Websites and Tutorials,\n", + " ,\n", + " \n", + " read more →,\n", + "   Basics,\n", + " How to use Envoy,\n", + " ,\n", + " \n", + " read more →,\n", + "   envoy,\n", + "   subprocess,\n", + "   System & OS,\n", + " Using Feedparser in Python,\n", + " ,\n", + " \n", + " read more →,\n", + "   Feedparser,\n", + "   Scrapers,\n", + "   Web & Internet,\n", + " Subprocess and Shell Commands in Python,\n", + " ,\n", + " \n", + " read more →,\n", + "   OS,\n", + "   subprocess,\n", + "   System & OS,\n", + " «,\n", + " ,\n", + " 1,\n", + " 2,\n", + " 3,\n", + " 4,\n", + " 5,\n", + " 6,\n", + " 7,\n", + " 8,\n", + " 9,\n", + " 10,\n", + " ,\n", + " »,\n", + " follow @pythonbeginners,\n", + " Basics,\n", + " Cheatsheet,\n", + " Code snippets,\n", + " Development,\n", + " Dictionary,\n", + " Error Handling,\n", + " Lists,\n", + " Loops,\n", + " Modules,\n", + " Strings,\n", + " System & OS,\n", + " Uncategorized,\n", + " Web & Internet,\n", + " Perceptive Minds]" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "soup.find_all(\"a\")" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "\n", + "\n", + "\n", + " Pythonforbeginners.com - Learn Python by Example \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "python for beginners\n", + " \n", + "\n", + "\n", + "\n", + "\n", + "basics\n", + "lists\n", + "dictionary\n", + "code snippets\n", + "modules\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "(adsbygoogle = window.adsbygoogle || []).push({});\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "Getting the most popular pages from your Apache logfile\n", + "\n", + "\n", + "\n", + " An Apache logfile can be huge and hard to read. Here is a way to get a list of the most visited pages (or files) from an Apache logfile. In this example, we only want to know the URLs from GET requests. We will use the wonderful Counter which is ...\n", + " \n", + "\n", + "\n", + " read more →\n", + "\n", + "  Code snippets\n", + "  collection\n", + "  IO\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "Make your life easier with Virtualenvwrapper\n", + "\n", + "\n", + "\n", + " When you do a lot of Python programming, you can make a mess of your system with Pip. Different apps need different requirements. One app needs version 1.2 of a package and another one needs 1.5. And then... you're in trouble. When you want to know what packages have been ...\n", + " \n", + "\n", + "\n", + " read more →\n", + "\n", + "  Pip\n", + "  Virtualenv\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "This site now runs on Django\n", + "\n", + "\n", + "\n", + " Today we finished migrating the old PythonForBeginners.com from PHP/WordPress to Python/Django.There's still some legacy WordPress inline HTML included in the posts, causing example code blocks to render in a strange way. They will be cleaned up in the near future.This site now has a responsive design, using Twitter Bootstrap and ...\n", + " \n", + "\n", + "\n", + " read more →\n", + "\n", + "  News\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "PythonForBeginners.com has a new owner\n", + "\n", + "\n", + "\n", + " I am pleased to announce that this blog has a new owner. This week, my company Perceptive Minds acquired PythonForBeginners.com. From now on I will be the main developer and author of this blog. I am a developer of high traffic websites, mostly Python/Django, and I do a lot of ...\n", + " \n", + "\n", + "\n", + " read more →\n", + "\n", + "  News\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "(adsbygoogle = window.adsbygoogle || []).push({});\n", + "\n", + "\n", + "\n", + "\n", + "How to use Pillow, a fork of PIL\n", + "\n", + "\n", + "\n", + " Overview In last post I was writing about PIL, also known as Python Imaging Library, this library can be used to manipulate images quite easy. PIL hasn't seen any development since 2009. Therefore, the kind users of this site suggested to take a look at Pillow. This article well tell ...\n", + " \n", + "\n", + "\n", + " read more →\n", + "\n", + "  gui\n", + "  imaging\n", + "  Modules\n", + "  pil\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "How to use the Python Imaging Library\n", + "\n", + "\n", + "\n", + " PIL is deprecated, obsolete. please use Pillow. Find out How to use the Pillow\n", + " \n", + "\n", + "\n", + " read more →\n", + "\n", + "  gui\n", + "  imaging\n", + "  Modules\n", + "  pil\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "Python Websites and Tutorials\n", + "\n", + "\n", + "\n", + " The list below is made to help new Python programmers to find the best online resources to learn Python. Python Websites Tutorials Learn Python in 10 minutes Python for Beginners Python Documentation Index Welcome to Python for you and me Python Articles Dive Into Python Hyperpolyglot Learn X in Y ...\n", + " \n", + "\n", + "\n", + " read more →\n", + "\n", + "  Basics\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "How to use Envoy\n", + "\n", + "\n", + "\n", + " About Envoy Recently I stumble upon Envoy. Envoy is a wrapper around the subprocess module and is supposed to humanize subprocess of Python. Its written by Kenneth Reitz (the author of \"Requests: HTTP for Humans\") Why use Envoy? It was written to be an easy to use alternative to subprocess. ...\n", + " \n", + "\n", + "\n", + " read more →\n", + "\n", + "  envoy\n", + "  subprocess\n", + "  System & OS\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "Using Feedparser in Python\n", + "\n", + "\n", + "\n", + " Overview In this post we will take a look on how we can download and parse syndicated feeds with Python. The Python module we will use for that is \"Feedparser\". The complete documentation can be found here. What is RSS? RSS stands for Rich Site Summary and uses standard web ...\n", + " \n", + "\n", + "\n", + " read more →\n", + "\n", + "  Feedparser\n", + "  Scrapers\n", + "  Web & Internet\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "Subprocess and Shell Commands in Python\n", + "\n", + "\n", + "\n", + " Subprocess Overview For a long time I have been using os.system() when dealing with system administration tasks in Python. The main reason for that, was that I thought that was the simplest way of running Linux commands. In the official python documentation we can read that subprocess should be used ...\n", + " \n", + "\n", + "\n", + " read more →\n", + "\n", + "  OS\n", + "  subprocess\n", + "  System & OS\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "«\n", + "\n", + "\n", + "←\n", + "\n", + "\n", + "1\n", + "\n", + "\n", + "2\n", + "\n", + "\n", + "3\n", + "\n", + "\n", + "4\n", + "\n", + "\n", + "5\n", + "\n", + "\n", + "6\n", + "\n", + "\n", + "7\n", + "\n", + "\n", + "8\n", + "\n", + "\n", + "9\n", + "\n", + "\n", + "10\n", + "\n", + "\n", + "→\n", + "\n", + "\n", + "»\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "SEARCH\n", + "\n", + "\n", + "\n", + "\n", + " follow @pythonbeginners\n", + "\n", + "\n", + "\n", + "\n", + "(adsbygoogle = window.adsbygoogle || []).push({});\n", + "\n", + "\n", + "Categories\n", + "\n", + "Basics\n", + "Cheatsheet\n", + "Code snippets\n", + "Development\n", + "Dictionary\n", + "Error Handling\n", + "Lists\n", + "Loops\n", + "Modules\n", + "Strings\n", + "System & OS\n", + "Uncategorized\n", + "Web & Internet\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + " © Perceptive Minds 2012-2014\n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + " (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){\n", + " (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),\n", + " m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)\n", + " })(window,document,'script','//www.google-analytics.com/analytics.js','ga');\n", + "\n", + " ga('create', 'UA-262833-88', 'pythonforbeginners.com');\n", + " ga('send', 'pageview');\n", + " \n", + "(function(d, s, id) {\n", + " var js, fjs = d.getElementsByTagName(s)[0];\n", + " if (d.getElementById(id)) return;\n", + " js = d.createElement(s); js.id = id;\n", + " js.src = \"//connect.facebook.net/en_US/all.js#xfbml=1&appId=1474940332733250\";\n", + " fjs.parentNode.insertBefore(js, fjs);\n", + " }(document, 'script', 'facebook-jssdk'));\n", + "\n", + "\n", + "\n" + ] + } + ], + "source": [ + "print(soup.get_text())" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + " \n", + " \n", + " Knuth shuffle - Rosetta Code\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + "
\n", + "
\n", + "

\n", + " \n", + " Knuth shuffle\n", + " \n", + "

\n", + "
\n", + "
\n", + " From Rosetta Code\n", + "
\n", + "
\n", + "
\n", + "
\n", + " Jump to:\n", + " \n", + " navigation\n", + " \n", + " ,\n", + " \n", + " search\n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + " \"Task\"\n", + " \n", + "
\n", + " \n", + " Knuth shuffle\n", + " \n", + "
\n", + " You are encouraged to\n", + " \n", + " solve this task\n", + " \n", + " according to the task description, using any language you may know.\n", + "
\n", + "

\n", + " Implement the\n", + " \n", + " Knuth shuffle\n", + " \n", + " (a.k.a. the Fisher-Yates shuffle) for an integer array (or, if possible, an array of any type).\n", + "The Knuth shuffle is used to create a random permutation of an array.\n", + "

\n", + " \n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "

\n", + " Contents\n", + "

\n", + "
\n", + " \n", + "
\n", + "

\n", + " \n", + " [\n", + " \n", + " edit\n", + " \n", + " ]\n", + " \n", + " \n", + " \n", + " ACL2\n", + " \n", + " \n", + "

\n", + "
:set-state-ok t
 
(defun array-swap (name array i j)
(let ((ai (aref1 name array i))
(aj (aref1 name array j)))
(aset1 name
(aset1 name array j ai)
i aj)))
 
(defun shuffle-r (name array m state)
(if (zp m)
(mv array state)
(mv-let (i state)
(random$ m state)
(shuffle-r name
(array-swap name array i m)
(1- m)
state))))
 
(defun shuffle (name array state)
(shuffle-r name
array
(1- (first (dimensions name array)))
state))
\n", + "

\n", + " \n", + " [\n", + " \n", + " edit\n", + " \n", + " ]\n", + " \n", + " \n", + " \n", + " Ada\n", + " \n", + " \n", + "

\n", + "

\n", + " This implementation is a generic shuffle routine, able to shuffle an array of any type.\n", + "

\n", + "
generic
type Element_Type is private;
type Array_Type is array (Positive range <>) of Element_Type;
 
procedure Generic_Shuffle (List : in out Array_Type);
\n", + "
with Ada.Numerics.Discrete_Random;
 
procedure Generic_Shuffle (List : in out Array_Type) is
package Discrete_Random is new Ada.Numerics.Discrete_Random(Result_Subtype => Integer);
use Discrete_Random;
K : Integer;
G : Generator;
T : Element_Type;
begin
Reset (G);
for I in reverse List'Range loop
K := (Random(G) mod I) + 1;
T := List(I);
List(I) := List(K);
List(K) := T;
end loop;
end Generic_Shuffle;
\n", + "

\n", + " An example using Generic_Shuffle.\n", + "

\n", + "
with Ada.Text_IO;
with Generic_Shuffle;
 
procedure Test_Shuffle is
 
type Integer_Array is array (Positive range <>) of Integer;
 
Integer_List : Integer_Array
 := (1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18);
procedure Integer_Shuffle is new Generic_Shuffle(Element_Type => Integer,
Array_Type => Integer_Array);
begin
 
for I in Integer_List'Range loop
Ada.Text_IO.Put(Integer'Image(Integer_List(I)));
end loop;
Integer_Shuffle(List => Integer_List);
Ada.Text_IO.New_Line;
for I in Integer_List'Range loop
Ada.Text_IO.Put(Integer'Image(Integer_List(I)));
end loop;
end Test_Shuffle;
\n", + "

\n", + " \n", + " [\n", + " \n", + " edit\n", + " \n", + " ]\n", + " \n", + " \n", + " \n", + " Aime\n", + " \n", + " \n", + "

\n", + "

\n", + " The shuffle function works on any type (the lists are heterogenous).\n", + "

\n", + "
void
shuffle(list l)
{
integer i;
 
i = l_length(l);
if (i) {
i -= 1;
while (i) {
l_spin(l, i, drand(i));
i -= 1;
}
}
}
\n", + "

\n", + " \n", + " [\n", + " \n", + " edit\n", + " \n", + " ]\n", + " \n", + " \n", + " \n", + " ALGOL 68\n", + " \n", + " \n", + "

\n", + "
\n", + " \n", + " Works with\n", + " \n", + " :\n", + " \n", + " ALGOL 68G\n", + " \n", + "
\n", + "
PROC between = (INT a, b)INT :
(
ENTIER (random * ABS (b-a+1) + (a<b|a|b))
);
 
PROC knuth shuffle = (REF[]INT a)VOID:
(
FOR i FROM LWB a TO UPB a DO
INT j = between(LWB a, UPB a);
INT t = a[i];
a[i] := a[j];
a[j] := t
OD
);
\n", + "
main:(
[20]INT a;
FOR i FROM 1 TO 20 DO a[i] := i OD;
knuth shuffle(a);
print(a)
)
\n", + "

\n", + " \n", + " [\n", + " \n", + " edit\n", + " \n", + " ]\n", + " \n", + " \n", + " \n", + " AppleScript\n", + " \n", + " \n", + "

\n", + "
set n to 25
 
set array to {}
repeat with i from 1 to n
\tset end of array to i
end repeat
copy {array, array} to {unshuffled, shuffled}
repeat with i from n to 1 by -1
\tset j to (((random number) * (i - 1)) as integer) + 1
\tset shuffled's item i to array's item j
\tif j ≠ i's contents then set array's item j to array's item i
end repeat
 
return {unshuffled, shuffled}
\n", + "

\n", + " Example:\n", + "

\n", + "
{{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25},
{14, 25, 3, 1, 12, 18, 11, 20, 16, 15, 21, 5, 22, 19, 2, 24, 8, 10, 13, 6, 17, 23, 9, 7, 4}}
\n", + "

\n", + " \n", + " [\n", + " \n", + " edit\n", + " \n", + " ]\n", + " \n", + " \n", + " \n", + " AutoHotkey\n", + " \n", + " \n", + "

\n", + "

\n", + " ahk forum:\n", + " \n", + " discussion\n", + " \n", + "

\n", + "
MsgBox % shuffle(\"1,2,3,4,5,6,7,8,9\")
MsgBox % shuffle(\"1,2,3,4,5,6,7,8,9\")
 
shuffle(list) { ; shuffle comma separated list, converted to array
StringSplit a, list, `, ; make array (length = a0)
Loop % a0-1 {
Random i, A_Index, a0 ; swap item 1,2... with a random item to the right of it
t := a%i%, a%i% := a%A_Index%, a%A_Index% := t
}
Loop % a0 ; construct string from sorted array
s .= \",\" . a%A_Index%
Return SubStr(s,2) ; drop leading comma
}
\n", + "

\n", + " \n", + " [\n", + " \n", + " edit\n", + " \n", + " ]\n", + " \n", + " \n", + " \n", + " AutoIt\n", + " \n", + " \n", + "

\n", + "
 
Dim $a[10]
ConsoleWrite('array before permutation:' & @CRLF)
For $i = 0 To 9
\t$a[$i] = Random(20,100,1)
\tConsoleWrite($a[$i] & ' ')
Next
ConsoleWrite(@CRLF)
 
_Permute($a)
ConsoleWrite('array after permutation:' & @CRLF)
For $i = 0 To UBound($a) -1
\tConsoleWrite($a[$i] & ' ')
Next
ConsoleWrite(@CRLF)
 
 
Func _Permute(ByRef $array)
\tLocal $random, $tmp
\tFor $i = UBound($array) -1 To 0 Step -1
\t\t$random = Random(0,$i,1)
\t\t$tmp = $array[$random]
\t\t$array[$random] = $array[$i]
\t\t$array[$i] = $tmp
\tNext
EndFunc
 
\n", + "
\n", + "
\n", + "
\n", + " Output:\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "\n", + "
\n",
+      " array before permutation:\n",
+      " 43 57 37 20 97 98 69 76 97 70 \n",
+      " array after permutation:\n",
+      " 57 69 97 70 37 97 20 76 43 98 \n",
+      "
\n", + "

\n", + " \n", + " [\n", + " \n", + " edit\n", + " \n", + " ]\n", + " \n", + " \n", + " \n", + " AWK\n", + " \n", + " \n", + "

\n", + "

\n", + " Many\n", + " \n", + " arrays in AWK\n", + " \n", + " have the first index at 1.\n", + "This example shows how to shuffle such arrays.\n", + "The elements can be integers, floating-point numbers, or strings.\n", + "

\n", + "
# Shuffle an _array_ with indexes from 1 to _len_.
function shuffle(array, len, i, j, t) {
\tfor (i = len; i > 1; i--) {
\t\t# j = random integer from 1 to i
\t\tj = int(i * rand()) + 1
 
\t\t# swap array[i], array[j]
\t\tt = array[i]
\t\tarray[i] = array[j]
\t\tarray[j] = t
\t}
}
 
# Test program.
BEGIN {
\tlen = split(\"11 22 33 44 55 66 77 88 99 110\", array)
\tshuffle(array, len)
 
\tfor (i = 1; i < len; i++) printf \"%s \", array[i]
\tprintf \"%s\\n\", array[len]
}
\n", + "

\n", + " \n", + " [\n", + " \n", + " edit\n", + " \n", + " ]\n", + " \n", + " \n", + " \n", + " BASIC\n", + " \n", + " \n", + "

\n", + "
RANDOMIZE TIMER
 
DIM cards(51) AS INTEGER
DIM L0 AS LONG, card AS LONG
 
PRINT \"before:\"
FOR L0 = 0 TO 51
cards(L0) = L0
PRINT LTRIM$(STR$(cards(L0))); \" \";
NEXT
 
FOR L0 = 51 TO 0 STEP -1
card = INT(RND * (L0 + 1))
IF card <> L0 THEN SWAP cards(card), cards(L0)
NEXT
 
PRINT : PRINT \"after:\"
FOR L0 = 0 TO 51
PRINT LTRIM$(STR$(cards(L0))); \" \";
NEXT
PRINT
\n", + "
\n", + "
\n", + "
\n", + " Output:\n", + "
\n", + "
\n", + "
\n", + "
\n",
+      " before:\n",
+      " 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29\n",
+      " 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51\n",
+      " after:\n",
+      " 27 14 37 35 3 44 25 38 46 1 22 49 2 51 16 32 20 30 4 33 36 6 31 21 41 34 9 13 0\n",
+      " 50 47 48 40 39 7 18 19 26 24 10 29 5 12 28 11 17 43 45 8 23 42 15\n",
+      "
\n", + "

\n", + " \n", + " [\n", + " \n", + " edit\n", + " \n", + " ]\n", + " \n", + " \n", + " \n", + " BBC BASIC\n", + " \n", + " \n", + "

\n", + "
      cards% = 52
DIM pack%(cards%)
FOR I% = 1 TO cards%
pack%(I%) = I%
NEXT I%
FOR N% = cards% TO 2 STEP -1
SWAP pack%(N%),pack%(RND(N%))
NEXT N%
FOR I% = 1 TO cards%
PRINT pack%(I%);
NEXT I%
PRINT
\n", + "

\n", + " \n", + " [\n", + " \n", + " edit\n", + " \n", + " ]\n", + " \n", + " \n", + " \n", + " bc\n", + " \n", + " \n", + "

\n", + "

\n", + " I provide a\n", + " \n", + " shuffle()\n", + " \n", + " function. It can only shuffle an array of numbers. It fails if the array has more than 32768 elements. It always shuffles the array named\n", + " \n", + " shuffle[]\n", + " \n", + " ; the array is not a function parameter because\n", + " \n", + " bc\n", + " \n", + " passes arrays by copying.\n", + "

\n", + "

\n", + " This code requires a\n", + " \n", + " bc\n", + " \n", + " with long names; the test program also requires a\n", + " \n", + " bc\n", + " \n", + " with the\n", + " \n", + " print\n", + " \n", + " statement.\n", + "

\n", + "
\n", + " \n", + " Works with\n", + " \n", + " :\n", + " \n", + " OpenBSD bc\n", + " \n", + "
\n", + "
seed = 1   /* seed of the random number generator */
scale = 0
 
/* Random number from 0 to 32767. */
define rand() {
\t/* Formula (from POSIX) for random numbers of low quality. */
\tseed = (seed * 1103515245 + 12345) % 4294967296
\treturn ((seed / 65536) % 32768)
}
 
/* Shuffle the first _count_ elements of shuffle[]. */
define shuffle(count) {
\tauto b, i, j, t
 
\ti = count
\twhile (i > 0) {
\t\t/* j = random number in [0, i) */
\t\tb = 32768 % i /* want rand() >= b */
\t\twhile (1) {
\t\t\tj = rand()
\t\t\tif (j >= b) break
\t\t}
\t\tj = j % i
 
\t\t/* decrement i, swap shuffle[i] and shuffle[j] */
\t\tt = shuffle[--i]
\t\tshuffle[i] = shuffle[j]
\t\tshuffle[j] = t
\t}
}
 
/* Test program. */
define print_array(count) {
\tauto i
\tfor (i = 0; i < count - 1; i++) print shuffle[i], \", \"
\tprint shuffle[i], \"\\n\"
}
 
for (i = 0; i < 10; i++) shuffle[i] = 11 * (i + 1)
\"Original array: \"; trash = print_array(10)
 
trash = shuffle(10)
\"Shuffled array: \"; trash = print_array(10)
quit
\n", + "
\n", + "
\n", + "
\n", + " Output:\n", + "
\n", + "
\n", + "
\n", + "
Original array: 11, 22, 33, 44, 55, 66, 77, 88, 99, 110\n",
+      "Shuffled array: 66, 44, 11, 55, 33, 77, 110, 22, 88, 99
\n", + "

\n", + " \n", + " [\n", + " \n", + " edit\n", + " \n", + " ]\n", + " \n", + " \n", + " \n", + " Brat\n", + " \n", + " \n", + "

\n", + "
shuffle = { a |
(a.length - 1).to 1 { i |
random_index = random(0, i)
temp = a[i]
a[i] = a[random_index]
a[random_index] = temp
}
 
a
}
 
p shuffle [1 2 3 4 5 6 7]
\n", + "

\n", + " \n", + " [\n", + " \n", + " edit\n", + " \n", + " ]\n", + " \n", + " \n", + " \n", + " C\n", + " \n", + " \n", + "

\n", + "

\n", + " This shuffles any \"object\"; it imitates\n", + " \n", + " qsort\n", + " \n", + " in the syntax.\n", + "

\n", + "
#include <stdlib.h>
#include <string.h>
 
int rrand(int m)
{
return (int)((double)m * ( rand() / (RAND_MAX+1.0) ));
}
 
#define BYTE(X) ((unsigned char *)(X))
void shuffle(void *obj, size_t nmemb, size_t size)
{
void *temp = malloc(size);
size_t n = nmemb;
while ( n > 1 ) {
size_t k = rrand(n--);
memcpy(temp, BYTE(obj) + n*size, size);
memcpy(BYTE(obj) + n*size, BYTE(obj) + k*size, size);
memcpy(BYTE(obj) + k*size, temp, size);
}
free(temp);
}
\n", + "

\n", + " Alternatively, using Durstenfeld's method (swapping selected item and last item in each iteration instead of literally shifting everything), and macro'd function declaration/definition:\n", + "

\n", + "
#include <stdio.h>
#include <stdlib.h>
 
/* define a shuffle function. e.g. decl_shuffle(double).
* advantage: compiler is free to optimize the swap operation without
* indirection with pointers, which could be much faster.
* disadvantage: each datatype needs a separate instance of the function.
* for a small funciton like this, it's not very big a deal.
*/

#define decl_shuffle(type)\t\t\t\t\\
void shuffle_##type(type *list, size_t len) {\t\t\\
\tint j;\t\t\t\t\t\t\\
\ttype tmp;\t\t\t\t\t\\
\twhile(len) {\t\t\t\t\t\\
\t\tj = irand(len);\t\t\t\t\\
\t\tif (j != len - 1) {\t\t\t\\
\t\t\ttmp = list[j];\t\t\t\\
\t\t\tlist[j] = list[len - 1];\t\\
\t\t\tlist[len - 1] = tmp;\t\t\\
\t\t}\t\t\t\t\t\\
\t\tlen--;\t\t\t\t\t\\
\t}\t\t\t\t\t\t\\
}\t\t\t\t\t\t\t\\

/* random integer from 0 to n-1 */
int irand(int n)
{
\tint r, rand_max = RAND_MAX - (RAND_MAX % n);
\t/* reroll until r falls in a range that can be evenly
\t * distributed in n bins. Unless n is comparable to
\t * to RAND_MAX, it's not *that* important really. */

\twhile ((r = rand()) >= rand_max);
\treturn r / (rand_max / n);
}
 
/* declare and define int type shuffle function from macro */
decl_shuffle(int);
 
int main()
{
\tint i, x[20];
 
\tfor (i = 0; i < 20; i++) x[i] = i;
\tfor (printf(\"before:\"), i = 0; i < 20 || !printf(\"\\n\"); i++)
\t\tprintf(\" %d\", x[i]);
 
\tshuffle_int(x, 20);
 
\tfor (printf(\"after: \"), i = 0; i < 20 || !printf(\"\\n\"); i++)
\t\tprintf(\" %d\", x[i]);
\treturn 0;
}
\n", + "

\n", + " \n", + " [\n", + " \n", + " edit\n", + " \n", + " ]\n", + " \n", + " \n", + " \n", + " C++\n", + " \n", + " \n", + "

\n", + "

\n", + " \n", + " Compiler:\n", + " \n", + " \n", + " g++\n", + " \n", + " (version 4.3.2 20081105 (Red Hat 4.3.2-7))\n", + "

\n", + "
#include <cstdlib>
#include <algorithm>
#include <iterator>
 
template<typename RandomAccessIterator>
void knuthShuffle(RandomAccessIterator begin, RandomAccessIterator end) {
for(unsigned int n = end - begin - 1; n >= 1; --n) {
unsigned int k = rand() % (n + 1);
if(k != n) {
std::iter_swap(begin + k, begin + n);
}
}
}
\n", + "

\n", + " The standard library provides this in the form of\n", + " \n", + " std::random_shuffle\n", + " \n", + " .\n", + "

\n", + "
#include <algorithm>
#include <vector>
 
int main()
{
int array[] = { 1,2,3,4,5,6,7,8,9 }; // C-style array of integers
std::vector<int> vec(array, array + 9); // build STL container from int array
 
std::random_shuffle(array, array + 9); // shuffle C-style array
std::random_shuffle(vec.begin(), vec.end()); // shuffle STL container
}
\n", + "

\n", + " \n", + " [\n", + " \n", + " edit\n", + " \n", + " ]\n", + " \n", + " \n", + " \n", + " \n", + " C#\n", + " \n", + " \n", + " \n", + "

\n", + "
public static void KnuthShuffle<T>(T[] array)
{
System.Random random = new System.Random();
for (int i = 0; i < array.Length; i++)
{
int j = random.Next(i, array.Length); // Don't select from the entire array on subsequent loops
T temp = array[i]; array[i] = array[j]; array[j] = temp;
}
}
\n", + "

\n", + " \n", + " [\n", + " \n", + " edit\n", + " \n", + " ]\n", + " \n", + " \n", + " \n", + " Clojure\n", + " \n", + " \n", + "

\n", + "
(defn shuffle [vect]
(reduce (fn [v i] (let [r (rand-int i)]
(assoc v i (v r) r (v i)))
vect (range (dec (count vect)) 1 -1)))
\n", + "

\n", + " This works by generating a sequence of end-indices from n-1 to 1, then reducing that sequence (starting with the original vector) through a function that, given a vector and end-index, performs a swap between the end-index and some random index less than the end-index.\n", + "

\n", + "

\n", + " \n", + " [\n", + " \n", + " edit\n", + " \n", + " ]\n", + " \n", + " \n", + " \n", + " COBOL\n", + " \n", + " \n", + "

\n", + "
       IDENTIFICATION DIVISION.
PROGRAM-ID. knuth-shuffle.
 
DATA DIVISION.
LOCAL-STORAGE SECTION.
01 i PIC 9(8).
01 j PIC 9(8).
 
01 temp PIC 9(8).
 
LINKAGE SECTION.
78 Table-Len VALUE 10.
01 ttable-area.
03 ttable PIC 9(8) OCCURS Table-Len TIMES.
 
PROCEDURE DIVISION USING ttable-area.
MOVE FUNCTION RANDOM(FUNCTION CURRENT-DATE (11:6)) TO i
 
PERFORM VARYING i FROM Table-Len BY -1 UNTIL i = 0
COMPUTE j =
FUNCTION MOD(FUNCTION RANDOM * 10000, Table-Len) + 1
 
MOVE ttable (i) TO temp
MOVE ttable (j) TO ttable (i)
MOVE temp TO ttable (j)
END-PERFORM
 
GOBACK
.
\n", + "

\n", + " \n", + " [\n", + " \n", + " edit\n", + " \n", + " ]\n", + " \n", + " \n", + " \n", + " CMake\n", + " \n", + " \n", + "

\n", + "
# shuffle(<output variable> [<value>...]) shuffles the values, and
# stores the result in a list.
function(shuffle var)
set(forever 1)
 
# Receive ARGV1, ARGV2, ..., ARGV${last} as an array of values.
math(EXPR last \"${ARGC} - 1\")
 
# Shuffle the array with Knuth shuffle (Fisher-Yates shuffle).
foreach(i RANGE ${last} 1)
# Roll j = a random number from 1 to i.
math(EXPR min \"100000000 % ${i}\")
while(forever)
string(RANDOM LENGTH 8 ALPHABET 0123456789 j)
if(NOT j LESS min) # Prevent modulo bias when j < min.
break() # Break loop when j >= min.
endif()
endwhile()
math(EXPR j \"${j} % ${i} + 1\")
 
# Swap ARGV${i} with ARGV${j}.
set(t ${ARGV${i}})
set(ARGV${i} ${ARGV${j}})
set(ARGV${j} ${t})
endforeach(i)
 
# Convert array to list.
set(answer)
foreach(i RANGE 1 ${last})
list(APPEND answer ${ARGV${i}})
endforeach(i)
set(\"${var}\" ${answer} PARENT_SCOPE)
endfunction(shuffle)
\n", + "
shuffle(result 11 22 33 44 55 66)
message(STATUS \"${result}\")
# One possible output:
# -- 66;33;22;55;44;11
\n", + "

\n", + " \n", + " [\n", + " \n", + " edit\n", + " \n", + " ]\n", + " \n", + " \n", + " \n", + " CoffeeScript\n", + " \n", + " \n", + "

\n", + "
\n", + " \n", + " Translation of\n", + " \n", + " :\n", + " \n", + " JavaScript\n", + " \n", + "
\n", + "
knuth_shuffle = (a) ->
n = a.length
while n > 1
r = Math.floor(n * Math.random())
n -= 1
[a[n], a[r]] = [a[r], a[n]]
a
 
counts =
\"1,2,3\": 0
\"1,3,2\": 0
\"2,1,3\": 0
\"2,3,1\": 0
\"3,1,2\": 0
\"3,2,1\": 0
 
for i in [1..100000]
counts[knuth_shuffle([ 1, 2, 3 ]).join(\",\")] += 1
 
for key, val of counts
console.log \"#{key}: #{val}\"
\n", + "
\n", + "
\n", + "
\n", + " Output:\n", + "
\n", + "
\n", + "
\n", + "
\n",
+      "> coffee knuth_shuffle.coffee \n",
+      "1,2,3: 16714\n",
+      "1,3,2: 16566\n",
+      "2,1,3: 16460\n",
+      "2,3,1: 16715\n",
+      "3,1,2: 16750\n",
+      "3,2,1: 16795\n",
+      "
\n", + "

\n", + " \n", + " [\n", + " \n", + " edit\n", + " \n", + " ]\n", + " \n", + " \n", + " \n", + " Common Lisp\n", + " \n", + " \n", + "

\n", + "
(defun nshuffle (sequence)
(loop for i from (length sequence) downto 2
do (rotatef (elt sequence (random i))
(elt sequence (1- i))))
sequence)
\n", + "

\n", + " This operates on arbitrary sequences, but will be inefficient applied to a list as opposed to a vector. Dispatching on type, and using an intermediate vector to hold the contents of list can make both cases more efficient (since the array specific case can use\n", + " \n", + " aref\n", + " \n", + " rather than\n", + " \n", + " elt\n", + " \n", + " ):\n", + "

\n", + "
(defun nshuffle (sequence)
(etypecase sequence
(list (nshuffle-list sequence))
(array (nshuffle-array sequence))))
 
(defun nshuffle-list (list)
\"Shuffle the list using an intermediate vector.\"
(let ((array (nshuffle-array (coerce list 'vector))))
(declare (dynamic-extent array))
(map-into list 'identity array)))
 
(defun nshuffle-array (array)
(loop for i from (length array) downto 2
do (rotatef (aref array (random i))
(aref array (1- i)))
finally (return array)))
\n", + "

\n", + " \n", + " [\n", + " \n", + " edit\n", + " \n", + " ]\n", + " \n", + " \n", + " \n", + " D\n", + " \n", + " \n", + "

\n", + "

\n", + " \n", + " [\n", + " \n", + " edit\n", + " \n", + " ]\n", + " \n", + " \n", + " Standard Version\n", + " \n", + "

\n", + "

\n", + " A variant of the Knuth shuffle is in the D standard library Phobos:\n", + "

\n", + "
void main() {
import std.stdio, std.random;
 
auto a = [1, 2, 3, 4, 5, 6, 7, 8, 9];
a.randomShuffle;
a.writeln;
}
\n", + "
\n", + "
\n", + "
\n", + " Output:\n", + "
\n", + "
\n", + "
\n", + "
[8, 9, 3, 1, 7, 5, 4, 6, 2]
\n", + "

\n", + " \n", + " [\n", + " \n", + " edit\n", + " \n", + " ]\n", + " \n", + " \n", + " One Implementation\n", + " \n", + "

\n", + "

\n", + " This shuffles any collection that supports random access, length and swapping of items:\n", + "

\n", + "
import std.stdio, std.algorithm, std.random, std.range;
 
void knuthShuffle(Range)(Range r)
if (isRandomAccessRange!Range && hasLength!Range &&
hasSwappableElements!Range) {
foreach_reverse (immutable i, ref ri; r[1 .. $ - 1])
ri.swap(r[uniform(0, i + 1)]);
}
 
void main() {
auto a = [1, 2, 3, 4, 5, 6, 7, 8, 9];
a.knuthShuffle;
a.writeln;
}
\n", + "

\n", + " \n", + " [\n", + " \n", + " edit\n", + " \n", + " ]\n", + " \n", + " \n", + " \n", + " Delphi\n", + " \n", + " \n", + "

\n", + "
\n", + "
\n", + " \n", + " See\n", + " \n", + " Pascal\n", + " \n", + " or\n", + " \n", + " DWScript\n", + " \n", + " \n", + "
\n", + "
\n", + "

\n", + " \n", + " [\n", + " \n", + " edit\n", + " \n", + " ]\n", + " \n", + " \n", + " \n", + " DWScript\n", + " \n", + " \n", + "

\n", + "
procedure KnuthShuffle(a : array of Integer);
var
i, j, tmp : Integer;
begin
for i:=a.High downto 1 do begin
j:=RandomInt(a.Length);
tmp:=a[i]; a[i]:=a[j]; a[j]:=tmp;
end;
end;
\n", + "

\n", + " \n", + " [\n", + " \n", + " edit\n", + " \n", + " ]\n", + " \n", + " \n", + " \n", + " E\n", + " \n", + " \n", + "

\n", + "
def shuffle(array, random) {
for bound in (2..(array.size())).descending() {
def i := random.nextInt(bound)
def swapTo := bound - 1
def t := array[swapTo]
array[swapTo] := array[i]
array[i] := t
}
}
\n", + "
? def arr := [1,2,3,4,5,6,7,8,9,10].diverge()
# value: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10].diverge()
 
? shuffle(arr, entropy)
? arr
# value: [4, 5, 2, 9, 7, 8, 1, 3, 6, 10].diverge()
\n", + "

\n", + " \n", + " [\n", + " \n", + " edit\n", + " \n", + " ]\n", + " \n", + " \n", + " \n", + " Eiffel\n", + " \n", + " \n", + "

\n", + "
 
class
\tAPPLICATION
 
create
\tmake
 
feature {NONE} -- Initialization
 
\tmake
\t\tdo
\t\t\ttest := <<1, 2>>
\t\t\tio.put_string (\"Initial: \")
\t\t\tacross
\t\t\t\ttest as t
\t\t\tloop
\t\t\t\tio.put_string (t.item.out + \" \")
\t\t\tend
\t\t\ttest := shuffle (test)
\t\t\tio.new_line
\t\t\tio.put_string (\"Shuffled: \")
\t\t\tacross
\t\t\t\ttest as t
\t\t\tloop
\t\t\t\tio.put_string (t.item.out + \" \")
\t\t\tend
\t\tend
 
\ttest: ARRAY [INTEGER]
 
\tshuffle (ar: ARRAY [INTEGER]): ARRAY [INTEGER]
\t\t\t-- Array containing the same elements as 'ar' in a shuffled order.
\t\trequire
\t\t\tmore_than_one_element: ar.count > 1
\t\tlocal
\t\t\tcount, j, ith: INTEGER
\t\t\trandom: V_RANDOM
\t\tdo
\t\t\tcreate random
\t\t\tcreate Result.make_empty
\t\t\tResult.deep_copy (ar)
\t\t\tcount := ar.count
\t\t\tacross
\t\t\t\t1 |..| count as c
\t\t\tloop
\t\t\t\tj := random.bounded_item (c.item, count)
\t\t\t\tith := Result [c.item]
\t\t\t\tResult [c.item] := Result [j]
\t\t\t\tResult [j] := ith
\t\t\t\trandom.forth
\t\t\tend
\t\tensure
\t\t\tsame_elements: across ar as a all Result.has (a.item) end
\t\tend
 
end
 
 
\n", + "
\n", + "
\n", + "
\n", + " Output:\n", + "
\n", + "
\n", + "
\n", + "
\n",
+      "Initial: 1 2 3 4 5 6 7\n",
+      "Shuffeld: 1 5 3 4 7 6 2\n",
+      "
\n", + "

\n", + " \n", + " [\n", + " \n", + " edit\n", + " \n", + " ]\n", + " \n", + " \n", + " \n", + " Erlang\n", + " \n", + " \n", + "

\n", + "
 
-module( knuth_shuffle ).
 
-export( [list/1] ).
 
list( Inputs ) ->
\tN = erlang:length( Inputs ),
\t{[], Acc} = lists:foldl( fun random_move/2, {Inputs, []}, lists:reverse(lists:seq(1, N)) ),
\tAcc.
 
 
 
random_move( N, {Inputs, Acc} ) ->
\tItem = lists:nth( random:uniform(N), Inputs ),
\t{lists:delete(Item, Inputs), [Item | Acc]}.
 
\n", + "
\n", + "
\n", + "
\n", + " Output:\n", + "
\n", + "
\n", + "
\n", + "
\n",
+      "21> knuth_shuffle:list(lists:seq(1,9)).\n",
+      "[5,7,8,1,4,2,3,9,6]\n",
+      "
\n", + "

\n", + " \n", + " [\n", + " \n", + " edit\n", + " \n", + " ]\n", + " \n", + " \n", + " \n", + " ERRE\n", + " \n", + " \n", + "

\n", + "
PROGRAM KNUTH_SHUFFLE
 
CONST CARDS%=52
 
DIM PACK%[CARDS%]
 
BEGIN
RANDOMIZE(TIMER)
FOR I%=1 TO CARDS% DO
PACK%[I%]=I%
END FOR
FOR N%=CARDS% TO 2 STEP -1 DO
SWAP(PACK%[N%],PACK%[1+INT(N%*RND(1))])
END FOR
FOR I%=1 TO CARDS% DO
PRINT(PACK%[I%];)
END FOR
PRINT
END PROGRAM
 
\n", + "

\n", + " \n", + " [\n", + " \n", + " edit\n", + " \n", + " ]\n", + " \n", + " \n", + " \n", + " Euphoria\n", + " \n", + " \n", + "

\n", + "
\n", + " \n", + " Translation of\n", + " \n", + " :\n", + " \n", + " BASIC\n", + " \n", + "
\n", + "
sequence cards
cards = repeat(0,52)
integer card,temp
 
puts(1,\"Before:\\n\")
for i = 1 to 52 do
cards[i] = i
printf(1,\"%d \",cards[i])
end for
 
for i = 52 to 1 by -1 do
card = rand(i)
if card != i then
temp = cards[card]
cards[card] = cards[i]
cards[i] = temp
end if
end for
 
puts(1,\"\\nAfter:\\n\")
for i = 1 to 52 do
printf(1,\"%d \",cards[i])
end for
\n", + "

\n", + " \n", + " [\n", + " \n", + " edit\n", + " \n", + " ]\n", + " \n", + " \n", + " \n", + " Factor\n", + " \n", + " \n", + "

\n", + "

\n", + " There is a\n", + " \n", + " randomize\n", + " \n", + " word already in the standard library. Implementation:\n", + "

\n", + "
: randomize ( seq -- seq )
dup length [ dup 1 > ]
[ [ iota random ] [ 1 - ] bi [ pick exchange ] keep ]
while drop ;
\n", + "

\n", + " \n", + " [\n", + " \n", + " edit\n", + " \n", + " ]\n", + " \n", + " \n", + " \n", + " Fantom\n", + " \n", + " \n", + "

\n", + "
class Main
{
static Void knuthShuffle (List array)
{
((array.size-1)..1).each |Int i|
{
r := Int.random(0..i)
array.swap (i, r)
}
}
 
public static Void main ()
{
List a := [1,2,3,4,5]
knuthShuffle (a)
echo (a)
 
List b := [\"apples\", \"oranges\", \"pears\", \"bananas\"]
knuthShuffle (b)
echo (b)
}
}
\n", + "

\n", + " \n", + " [\n", + " \n", + " edit\n", + " \n", + " ]\n", + " \n", + " \n", + " \n", + " Forth\n", + " \n", + " \n", + "

\n", + "
include random.fs
 
: shuffle ( deck size -- )
2 swap do
dup i random cells +
over @ over @ swap
rot  ! over !
cell+
-1 +loop drop ;
 
: .array 0 do dup @ . cell+ loop drop ;
 
create deck 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 ,
 
deck 10 2dup shuffle .array
\n", + "

\n", + " \n", + " [\n", + " \n", + " edit\n", + " \n", + " ]\n", + " \n", + " \n", + " \n", + " Fortran\n", + " \n", + " \n", + "

\n", + "
\n", + " \n", + " Works with\n", + " \n", + " :\n", + " \n", + " Fortran\n", + " \n", + " version 90 and later\n", + "
\n", + "
program Knuth_Shuffle
implicit none
 
integer, parameter :: reps = 1000000
integer :: i, n
integer, dimension(10) :: a, bins = 0, initial = (/ (n, n=1,10) /)
 
do i = 1, reps
a = initial
\tcall Shuffle(a)
where (a == initial) bins = bins + 1 ! skew tester
end do
write(*, \"(10(i8))\") bins
! prints 100382 100007 99783 100231 100507 99921 99941 100270 100290 100442
 
contains
 
subroutine Shuffle(a)
integer, intent(inout) :: a(:)
integer :: i, randpos, temp
real :: r
 
do i = size(a), 2, -1
call random_number(r)
randpos = int(r * i) + 1
temp = a(randpos)
a(randpos) = a(i)
a(i) = temp
end do
 
end subroutine Shuffle
 
end program Knuth_Shuffle
\n", + "

\n", + " \n", + " [\n", + " \n", + " edit\n", + " \n", + " ]\n", + " \n", + " \n", + " \n", + " Frink\n", + " \n", + " \n", + "

\n", + "

\n", + " The built-in method\n", + " \n", + " \n", + " array\n", + " \n", + " .shuffle[]\n", + " \n", + " implements the Fisher-Yates-Knuth shuffle algorithm:\n", + "

\n", + "
 
a = [1,2,3]
a.shuffle[]
 
\n", + "

\n", + " \n", + " [\n", + " \n", + " edit\n", + " \n", + " ]\n", + " \n", + " \n", + " \n", + " \n", + " F#\n", + " \n", + " \n", + " \n", + "

\n", + "

\n", + " Allows a shuffle of arrays of arbitrary items. Requires 2010 beta of F#. Lazily returns a sequence.\n", + "

\n", + "

\n", + " This is the original Fisher-Yates shuffle as described by the link:\n", + "

\n", + "
open System
 
let FisherYatesShuffle (initialList : array<'a>) = // '
let availableFlags = Array.init initialList.Length (fun i -> (i, true))
// Which items are available and their indices
let rnd = new Random()
let nextItem nLeft =
let nItem = rnd.Next(0, nLeft) // Index out of available items
let index = // Index in original deck
availableFlags // Go through available array
|> Seq.filter (fun (ndx,f) -> f) // and pick out only the available tuples
|> Seq.nth nItem // Get the one at our chosen index
|> fst // and retrieve it's index into the original array
availableFlags.[index] <- (index, false) // Mark that index as unavailable
initialList.[index] // and return the original item
seq {(initialList.Length) .. -1 .. 1} // Going from the length of the list down to 1
|> Seq.map (fun i -> nextItem i) // yield the next item
\n", + "

\n", + " Here's the modified Knuth shuffle which shuffles the original array in place\n", + "

\n", + "
let KnuthShuffle (lst : array<'a>) =                   // '
let Swap i j = // Standard swap
let item = lst.[i]
lst.[i] <- lst.[j]
lst.[j] <- item
let rnd = new Random()
let ln = lst.Length
[0..(ln - 2)] // For all indices except the last
|> Seq.iter (fun i -> Swap i (rnd.Next(i, ln))) // swap th item at the index with a random one following it (or itself)
lst // Return the list shuffled in place
\n", + "

\n", + " Example:\n", + "

\n", + "
> KnuthShuffle [| \"Darrell\"; \"Marvin\"; \"Doug\"; \"Greg\"; \"Sam\"; \"Ken\" |];;
val it : string array = [|\"Marvin\"; \"Doug\"; \"Sam\"; \"Darrell\"; \"Ken\"; \"Greg\"|]
\n", + "

\n", + " \n", + " [\n", + " \n", + " edit\n", + " \n", + " ]\n", + " \n", + " \n", + " \n", + " FunL\n", + " \n", + " \n", + "

\n", + "
def shuffle( a ) =
res = array( a )
n = a.length()
 
for i <- 0:n
r = rnd( i:n )
res(i), res(r) = res(r), res(i)
 
res.toList()
\n", + "

\n", + " \n", + " [\n", + " \n", + " edit\n", + " \n", + " ]\n", + " \n", + " \n", + " \n", + " GAP\n", + " \n", + " \n", + "

\n", + "
# Return the list L after applying Knuth shuffle. GAP also has the function Shuffle, which does the same.
ShuffleAlt := function(a)
local i, j, n, t;
n := Length(a);
for i in [n, n - 1 .. 2] do
j := Random(1, i);
t := a[i];
a[i] := a[j];
a[j] := t;
od;
return a;
end;
 
# Return a \"Permutation\" object (a permutation of 1 .. n).
# They are printed in GAP, in cycle decomposition form.
PermShuffle := n -> PermList(ShuffleAlt([1 .. n]));
 
ShuffleAlt([1 .. 10]);
# [ 4, 7, 1, 5, 8, 2, 6, 9, 10, 3 ]
 
PermShuffle(10);
# (1,9)(2,3,6,4,5,10,8,7)
 
# One may also call the built-in random generator on the symmetric group :
Random(SymmetricGroup(10));
(1,8,2,5,9,6)(3,4,10,7)
\n", + "

\n", + " \n", + " [\n", + " \n", + " edit\n", + " \n", + " ]\n", + " \n", + " \n", + " \n", + " Go\n", + " \n", + " \n", + "

\n", + "
package main
 
import (
\"fmt\"
\"math/rand\"
\"time\"
)
 
func main() {
var a [20]int
for i := range a {
a[i] = i
}
fmt.Println(a)
 
rand.Seed(time.Now().UnixNano())
for i := len(a) - 1; i >= 1; i-- {
j := rand.Intn(i + 1)
a[i], a[j] = a[j], a[i]
}
fmt.Println(a)
}
\n", + "

\n", + " To shuffle any type:\n", + "

\n", + "
package main
 
import (
\"fmt\"
\"math/rand\"
\"time\"
)
 
// Generic Knuth Shuffle algorithm. In Go, this is done with interface
// types. The parameter s of function shuffle is an interface type.
// Any type satisfying the interface \"shuffler\" can be shuffled with
// this function. Since the shuffle function uses the random number
// generator, it's nice to seed the generator at program load time.
func init() {
rand.Seed(time.Now().UnixNano())
}
func shuffle(s shuffler) {
for i := s.Len() - 1; i >= 1; i-- {
j := rand.Intn(i + 1)
s.Swap(i, j)
}
}
 
// Conceptually, a shuffler is an indexed collection of things.
// It requires just two simple methods.
type shuffler interface {
Len() int // number of things in the collection
Swap(i, j int) // swap the two things indexed by i and j
}
 
// ints is an example of a concrete type implementing the shuffler
// interface.
type ints []int
 
func (s ints) Len() int { return len(s) }
func (s ints) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
 
// Example program. Make an ints collection, fill with sequential numbers,
// print, shuffle, print.
func main() {
a := make(ints, 20)
for i := range a {
a[i] = i
}
fmt.Println(a)
shuffle(a)
fmt.Println(a)
}
\n", + "
\n", + "
\n", + "
\n", + " Example output:\n", + "
\n", + "
\n", + "
\n", + "(of either program)\n", + "
\n",
+      "[0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19]\n",
+      "[11 10 12 19 4 13 15 17 14 2 5 18 8 0 6 9 7 3 1 16]\n",
+      "
\n", + "

\n", + " \n", + " [\n", + " \n", + " edit\n", + " \n", + " ]\n", + " \n", + " \n", + " \n", + " Groovy\n", + " \n", + " \n", + "

\n", + "

\n", + " Solution:\n", + "

\n", + "
def shuffle = { list ->
if (list == null || list.empty) return list
def r = new Random()
def n = list.size()
(n..1).each { i ->
def j = r.nextInt(i)
list[[i-1, j]] = list[[j, i-1]]
}
list
}
\n", + "

\n", + " Test:\n", + "

\n", + "
def list = [] + (0..20)
println list
println shuffle(list)
println shuffle(list)
println shuffle(list)
\n", + "
\n", + "
\n", + "
\n", + " Output:\n", + "
\n", + "
\n", + "
\n", + "
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]\n",
+      "[12, 16, 7, 13, 1, 9, 17, 20, 15, 3, 5, 6, 8, 0, 18, 10, 14, 4, 2, 11, 19]\n",
+      "[17, 6, 10, 1, 18, 5, 7, 13, 2, 11, 16, 3, 14, 0, 4, 20, 19, 12, 8, 9, 15]\n",
+      "[6, 20, 11, 4, 7, 12, 5, 14, 19, 18, 13, 15, 1, 2, 8, 16, 17, 10, 0, 9, 3]
\n", + "

\n", + " \n", + " [\n", + " \n", + " edit\n", + " \n", + " ]\n", + " \n", + " \n", + " \n", + " Haskell\n", + " \n", + " \n", + "

\n", + "
import System.Random
import Data.List
import Control.Monad
import Control.Arrow
 
mkRands = mapM (randomRIO.(,)0 ). enumFromTo 1. pred
 
replaceAt :: Int -> a -> [a] -> [a]
replaceAt i c = let (a,b) = splitAt i l in a++x:(drop 1 b)
 
swapElems :: (Int, Int) -> [a] -> [a]
swapElems (i,j) xs | i==j = xs
| otherwise = replaceAt j (xs!!i) $ replaceAt i (xs!!j) xs
 
knuthShuffle :: [a] -> IO [a]
knuthShuffle xs =
liftM (foldr swapElems xs. zip [1..]) (mkRands (length xs))
\n", + "

\n", + " Examples of use:\n", + "

\n", + "
*Main> knuthShuffle  ['a'..'k']\n",
+      "\"bhjdgfciake\"\n",
+      "\n",
+      "*Main> knuthShuffle $ map(ap (,)(+10)) [0..9]\n",
+      "[(0,10),(8,18),(2,12),(3,13),(9,19),(4,14),(7,17),(1,11),(6,16),(5,15)]
\n", + "

\n", + " Function for showing intermediate results:\n", + "

\n", + "
knuthShuffleProcess :: (Show a) => [a] -> IO ()
knuthShuffleProcess =
(mapM_ print. reverse =<<). ap (fmap. (. zip [1..]). scanr swapElems) (mkRands. length)
\n", + "
\n", + "
\n", + "
\n", + " Output:\n", + "
\n", + "
\n", + "
\n", + "Detailed example:\n", + "
*Main> knuthShuffleProcess  ['a'..'k']\n",
+      "\"abcdefghijk\"\n",
+      "\"abckefghijd\"\n",
+      "\"jbckefghiad\"\n",
+      "\"jbckeighfad\"\n",
+      "\"jbckeihgfad\"\n",
+      "\"jbhkeicgfad\"\n",
+      "\"jbhiekcgfad\"\n",
+      "\"jbeihkcgfad\"\n",
+      "\"ibejhkcgfad\"\n",
+      "\"iebjhkcgfad\"\n",
+      "\"iebjhkcgfad\"
\n", + "

\n", + " An imperative implementation using arrays and the\n", + " \n", + " ST\n", + " \n", + " monad:\n", + "

\n", + "
import Data.Array.ST
import Data.STRef
import Control.Monad
import Control.Monad.ST
import Control.Arrow
import System.Random
 
shuffle :: RandomGen g => [a] -> g -> ([a], g)
shuffle list g = runST $ do
r <- newSTRef g
let rand range = liftM (randomR range) (readSTRef r) >>=
runKleisli (second (Kleisli $ writeSTRef r) >>> arr fst)
a <- newAry (1, len) list
forM_ [len, len - 1 .. 2] $ \\n -> do
k <- rand (1, n)
liftM2 (,) (readArray a k) (readArray a n) >>=
runKleisli (Kleisli (writeArray a n) *** Kleisli (writeArray a k))
liftM2 (,) (getElems a) (readSTRef r)
where len = length list
newAry :: (Int, Int) -> [a] -> ST s (STArray s Int a)
newAry = newListArray
\n", + "

\n", + " \n", + " [\n", + " \n", + " edit\n", + " \n", + " ]\n", + " \n", + " \n", + " \n", + " Icon\n", + " \n", + " and\n", + " \n", + " Unicon\n", + " \n", + " \n", + "

\n", + "

\n", + " The\n", + " \n", + " shuffle\n", + " \n", + " method used here can shuffle lists, record fields, and strings:\n", + "

\n", + "
procedure main()
show(shuffle([3,1,4,1,5,9,2,6,3]))
show(shuffle(\"this is a string\"))
end
 
procedure shuffle(A)
every A[i := *A to 1 by -1] :=: A[?i]
return A
end
 
procedure show(A)
every writes(!A,\" \")
write()
end
\n", + "
\n", + "
\n", + "
\n", + " Output:\n", + "
\n", + "
\n", + "
\n", + "
->ks\n",
+      "9 6 1 4 3 1 3 5 2 \n",
+      "i n   t i s   r t g   h s a i s \n",
+      "->
\n", + "

\n", + " Note that the gloriously succinct 'standard' Icon shuffle:\n", + "

\n", + "
procedure shuffle(A)
every !A :=: ?A
end
\n", + "

\n", + " is subtly biased.\n", + "

\n", + "

\n", + " \n", + " [\n", + " \n", + " edit\n", + " \n", + " ]\n", + " \n", + " \n", + " \n", + " Inform 6\n", + " \n", + " \n", + "

\n", + "
[ shuffle a n i j tmp;
for(i = n - 1: i > 0: i--)
{
j = random(i + 1) - 1;
 
tmp = a->j;
a->j = a->i;
a->i = tmp;
}
];
\n", + "

\n", + " \n", + " [\n", + " \n", + " edit\n", + " \n", + " ]\n", + " \n", + " \n", + " \n", + " J\n", + " \n", + " \n", + "

\n", + "
KS=:{~ (2&{.@[ {`(|.@[)`]} ])/@(,~(,.?@>:))@i.@#
\n", + "

\n", + " The input array is transformed to a rectangular array of indexes. By doing this all kinds of arrays can serve as input (see examples below). The process is imitated by using using a fold, swapping elements in a restricted part of this index-array in each fold step.\n", + "

\n", + "
process                         J
 
fold swap transform array <==> f / g y
\n", + "

\n", + " Example of a transformed input:\n", + "

\n", + "
(,~(,.?@>:))@i.@# 1+i.6
0 0 0 0 0 0
1 1 0 0 0 0
2 0 0 0 0 0
3 2 0 0 0 0
4 3 0 0 0 0
5 0 0 0 0 0
0 1 2 3 4 5
\n", + "

\n", + " The last row is the index-array that has to be shuffled. The other rows have valid indexes in the first two columns. The second column has a randomized value <= value first column.\n", + "

\n", + "

\n", + " The index-swapping is done by the part:\n", + "

\n", + "
2&{.@[ {`(|.@[)`]} ]
\n", + "

\n", + " Finally, the shuffled indexes select elements from the original array.\n", + "

\n", + "
input { ~ shuffled indexes
\n", + "

\n", + " Alternatively, instead of creating a rectangular array, the swapping indices and the original data can be individually boxed.\n", + "

\n", + "

\n", + " In other words,\n", + " \n", + " (,~ (,. ?@>:))@i.@#\n", + " \n", + " can be replaced with\n", + " \n", + " |.@; ;&~./@(,. ?@>:)@i.@#\n", + " \n", + " , and the swapping can be achieved using\n", + " \n", + " (<@C. >)/\n", + " \n", + " instead of\n", + " \n", + " (2&{.@[ {`(|.@[)`]} ])/\n", + " \n", + " .\n", + "

\n", + "

\n", + " With this approach, the data structure with the swapping indices and the original data could look like this:\n", + "

\n", + "
    (|.@; ;&~./@(,. ?@>:)@i.@#)'abcde'
+---+-+---+---+-+-----+
|4 2|3|2 1|1 0|0|abcde|
+---+-+---+---+-+-----+
\n", + "

\n", + " Note that we have the original data here, instead of indices to select all of its items. Note also that we have only a single value in a box where an item is being \"swapped\" with itself (this is required by J's cycle operation (\n", + " \n", + " C.\n", + " \n", + " )).\n", + "

\n", + "

\n", + " The resulting definition looks like this:\n", + "

\n", + "
KS=: [: > (<@C. >)/@(|.@; ;&~./@(,. ?@>:)@i.@#)
\n", + "

\n", + " Note that here we did not wind up with a list of indices which we used to permute the original data set. That data set is permuted directly. However, it is in a box and we do have to remove it from that box.\n", + "

\n", + "

\n", + " Permuting the data directly, instead of permuting indices, has performance implications when the items being swapped are large, but see the note at the end of this entry for J for how you would do this operation in a \"real\" J program.\n", + "

\n", + "Examples:\n", + "
]A=: 5+i.9
5 6 7 8 9 10 11 12 13
\n", + "Shuffle:\n", + "
KS A
13 10 7 5 11 9 8 6 12
\n", + "Input\n", + "
]M=: /:~(1 2 3,:2 3 4),(11 2 3,: 0 11 2),(1 1 1,:1 0),:1 1 1,:1 0 1
1 1 1
1 0 0
 
1 1 1
1 0 1
 
1 2 3
2 3 4
 
11 2 3
0 11 2
\n", + "Shuffle\n", + "
KS M
11 2 3
0 11 2
 
1 1 1
1 0 1
 
1 1 1
1 0 0
 
1 2 3
2 3 4
\n", + "Input\n", + "
]L=:'aA';'bbB';'cC%$';'dD@'
+--+---+----+---+
|aA|bbB|cC%$|dD@|
+--+---+----+---+
\n", + "Shuffle\n", + "
KS L
+--+----+---+---+
|aA|cC%$|dD@|bbB|
+--+----+---+---+
\n", + "

\n", + " In J the shuffling of an arbitrary array can easily be implemented by the phrase\n", + "( ref\n", + " \n", + " http://www.jsoftware.com/jwiki/JPhrases/RandomNumbers\n", + " \n", + " ):\n", + "

\n", + "
({~?~@#)
\n", + "

\n", + " Applied on the former examples:\n", + "

\n", + "
({~?~@#) A
8 7 13 6 10 11 5 9 12
 
({~?~@#) M
1 1 1
1 0 1
 
1 2 3
2 3 4
 
11 2 3
0 11 2
 
1 1 1
1 0 0
 
({~?~@#) L
+----+---+--+---+
|cC%$|bbB|aA|dD@|
+----+---+--+---+
\n", + "

\n", + " \n", + " [\n", + " \n", + " edit\n", + " \n", + " ]\n", + " \n", + " \n", + " \n", + " Java\n", + " \n", + " \n", + "

\n", + "
import java.util.Random;
 
public static final Random gen = new Random();
 
// version for array of ints
public static void shuffle (int[] array) {
int n = array.length;
while (n > 1) {
int k = gen.nextInt(n--); //decrements after using the value
int temp = array[n];
array[n] = array[k];
array[k] = temp;
}
}
// version for array of references
public static void shuffle (Object[] array) {
int n = array.length;
while (n > 1) {
int k = gen.nextInt(n--); //decrements after using the value
Object temp = array[n];
array[n] = array[k];
array[k] = temp;
}
}
\n", + "

\n", + " \n", + " [\n", + " \n", + " edit\n", + " \n", + " ]\n", + " \n", + " \n", + " \n", + " JavaScript\n", + " \n", + " \n", + "

\n", + "
function knuthShuffle(arr) {
var rand, temp, i;
 
for (i = arr.length - 1; i > 0; i -= 1) {
rand = Math.floor((i + 1) * Math.random());//get random between zero and i (inclusive)
temp = arr[rand];//swap i and the zero-indexed number
arr[rand] = arr[i];
arr[i] = temp;
}
return arr;
}
 
var res = {
'1,2,3': 0, '1,3,2': 0,
'2,1,3': 0, '2,3,1': 0,
'3,1,2': 0, '3,2,1': 0
};
 
for (var i = 0; i < 100000; i++) {
res[knuthShuffle([1,2,3]).join(',')] += 1;
}
 
for (var key in res) {
print(key + \"\\t\" + res[key]);
}
\n", + "

\n", + " Results in:\n", + "

\n", + "
1,2,3   16619\n",
+      "1,3,2   16614\n",
+      "2,1,3   16752\n",
+      "2,3,1   16959\n",
+      "3,1,2   16460\n",
+      "3,2,1   16596
\n", + "

\n", + " \n", + " [\n", + " \n", + " edit\n", + " \n", + " ]\n", + " \n", + " \n", + " \n", + " Joy\n", + " \n", + " \n", + "

\n", + "
DEFINE knuth-shuffle ==
 
(* Take the size of the array (without destroying it) *)
dup dup size
 
(* Generate a list of as many random numbers *)
[rand] [rem] enconcat map
 
(* Zip the two lists *)
swap zip
 
(* Sort according to the new index number *)
[small] [] [uncons unswonsd [first >] split [swons] dip2]
[enconcat] binrec
 
(* Delete the new index number *)
[second] map.
\n", + "

\n", + " Using knuth-shuffle (file shuffle.joy):\n", + "

\n", + "
(* Sorted array of 21 integers *)
[ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20]
knuth-shuffle.
\n", + "

\n", + " Command line:\n", + "

\n", + "
\n", + "
\n", + " \n", + " joy shuffle.joy\n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " Output:\n", + "
\n", + "
\n", + "
\n", + "
\n",
+      "usrlib  is loaded\n",
+      "inilib  is loaded\n",
+      "agglib  is loaded\n",
+      "[12 6 8 4 14 18 7 15 1 0 11 13 5 10 16 2 19 17 9 20 3]\n",
+      "
\n", + "

\n", + " \n", + " [\n", + " \n", + " edit\n", + " \n", + " ]\n", + " \n", + " \n", + " \n", + " Julia\n", + " \n", + " \n", + "

\n", + "

\n", + " Julia provides the built-ins\n", + " \n", + " shuffle\n", + " \n", + " and\n", + " \n", + " shuffle!\n", + " \n", + " that implement the Kunth shuffle (with the latter being an in-place version). These methods work for any sort of vector. The current (version 0.3) source for the most general version of\n", + " \n", + " shuffle!\n", + " \n", + " as contained in\n", + " \n", + " \n", + " random.jl\n", + " \n", + " \n", + " is\n", + "

\n", + "
 
function shuffle!(r::AbstractRNG, a::AbstractVector)
for i = length(a):-1:2
j = rand(r, 1:i)
a[i], a[j] = a[j], a[i]
end
return a
end
 
\n", + "

\n", + " As an example, here is\n", + " \n", + " shuffle\n", + " \n", + " in action.\n", + "

\n", + "
 
a = collect(1:20)
b = shuffle(a)
 
print(\"Unshuffled Array:\\n \")
println(a)
print(\"Shuffled Array:\\n \")
println(b)
 
\n", + "
\n", + "
\n", + "
\n", + " Output:\n", + "
\n", + "
\n", + "
\n", + "
\n",
+      "Unshuffled Array:\n",
+      "    [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20]\n",
+      "Shuffled Array:\n",
+      "    [1,13,19,17,6,4,10,8,18,20,2,5,7,3,12,16,9,15,11,14]\n",
+      "
\n", + "

\n", + " \n", + " [\n", + " \n", + " edit\n", + " \n", + " ]\n", + " \n", + " \n", + " \n", + " LabVIEW\n", + " \n", + " \n", + "

\n", + "
\n", + " \n", + " Works with\n", + " \n", + " :\n", + " \n", + " LabVIEW\n", + " \n", + " version 8.0 Full Development System\n", + "
\n", + "
\n", + "
\n", + "

\n", + " \n", + " \"Knuth\n", + " \n", + " \n", + " \"Knuth\n", + " \n", + "

\n", + "

\n", + "
\n", + "

\n", + "

\n", + " \n", + " [\n", + " \n", + " edit\n", + " \n", + " ]\n", + " \n", + " \n", + " \n", + " Lasso\n", + " \n", + " \n", + "

\n", + "
define staticarray->swap(p1::integer,p2::integer) => {
fail_if(
#p1 < 1 or #p2 < 1 or
#p1 > .size or #p2 > .size,
'invalid parameters'
)
#p1 == #p2
 ? return
 
local(tmp) = .get(#p2)
.get(#p2) = .get(#p1)
.get(#p1) = #tmp
}
define staticarray->knuthShuffle => {
loop(-from=.size, -to=2, -by=-1) => {
.swap(math_random(1, loop_count), loop_count)
}
}
 
(1 to 10)->asStaticArray->knuthShuffle&asString
\n", + "
\n", + "
\n", + "
\n", + " Output:\n", + "
\n", + "
\n", + "
\n", + "
staticarray(9, 5, 6, 1, 10, 8, 3, 4, 2, 7)
\n", + "

\n", + "
\n", + "

\n", + "

\n", + " \n", + " [\n", + " \n", + " edit\n", + " \n", + " ]\n", + " \n", + " \n", + " \n", + " Liberty BASIC\n", + " \n", + " \n", + "

\n", + "
'Declared the UpperBound to prevent confusion with lots of 9's floating around....
UpperBound = 9
Dim array(UpperBound)
 
For i = 0 To UpperBound
array(i) = Int(Rnd(1) * 10)
Print array(i)
Next i
 
For i = 0 To UpperBound
'set a random value because we will need to use the same value twice
randval = Int(Rnd(1) * (UpperBound - i))
temp1 = array(randval)
temp2 = array((UpperBound - i))
array(randval) = temp2
array((UpperBound - i)) = temp1
Next i
 
Print
For i = 0 To UpperBound
Print array(i)
Next i
\n", + "

\n", + " \n", + " [\n", + " \n", + " edit\n", + " \n", + " ]\n", + " \n", + " \n", + " \n", + " Logo\n", + " \n", + " \n", + "

\n", + "
to swap :i :j :a
localmake \"t item :i :a
setitem :i :a item :j :a
setitem :j :a :t
end
to shuffle :a
for [i [count :a] 2] [swap 1 + random :i :i :a]
end
 
make \"a {1 2 3 4 5 6 7 8 9 10}
shuffle :a
show :a
\n", + "

\n", + " Lhogho does not have a setitem, and also does things more 'function'ally.\n", + "

\n", + "
to slice :lst :start :finish
\tlocal \"res
\tmake \"res []
\tfor \"i [:start :finish 1] [
\t\tmake \"j item :i :lst
\t\tmake \"res se :res :j
\t]
\top :res
end
 
to setitem :n :lst :val
\tlocal \"lhs
\tlocal \"rhs
\tmake \"lhs slice :lst 1 :n-1
\tmake \"rhs slice :lst :n+1 count :lst
\top (se :lhs :val :rhs)
end
 
to swap :i :j :a
\tlocal \"t
\tmake \"t item :i :a
\tmake \"a setitem :i :a item :j :a
\tmake \"a setitem :j :a :t
\top :a
end
 
to shuffle :a
\tfor \"i [count :a 2]
\t[
\t\tmake \"a swap 1 + random :i :i :a
\t]
\top :a
end
 
make \"a ( list 1 2 3 4 5 6 7 8 9 10 )
make \"a shuffle :a
show :a
\n", + "

\n", + " \n", + " [\n", + " \n", + " edit\n", + " \n", + " ]\n", + " \n", + " \n", + " \n", + " Lua\n", + " \n", + " \n", + "

\n", + "
function table.shuffle(t)
local n = #t
while n > 1 do
local k = math.random(n)
t[n], t[k] = t[k], t[n]
n = n - 1
end
 
return t
end
math.randomseed( os.time() )
a = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}
table.shuffle(a)
for i,v in ipairs(a) do print(i,v) end
\n", + "

\n", + " \n", + " [\n", + " \n", + " edit\n", + " \n", + " ]\n", + " \n", + " \n", + " \n", + " M4\n", + " \n", + " \n", + "

\n", + "
divert(-1)
define(`randSeed',141592653)
define(`rand_t',`eval(randSeed^(randSeed>>13))')
define(`random',
`define(`randSeed',eval((rand_t^(rand_t<<18))&0x7fffffff))randSeed')
define(`for',
`ifelse($#,0,``$0'',
`ifelse(eval($2<=$3),1,
`pushdef(`$1',$2)$4`'popdef(`$1')$0(`$1',incr($2),$3,`$4')')')')
define(`set',`define(`$1[$2]',`$3')')
define(`get',`defn($1[$2])')
define(`new',`set($1,size,0)')
define(`deck',
`new($1)for(`x',1,$2,
`set(`$1',x,x)')`'set(`$1',size,$2)')
define(`show',
`for(`x',1,get($1,size),`get($1,x)`'ifelse(x,get($1,size),`',`, ')')')
define(`swap',`set($1,$2,get($1,$4))`'set($1,$4,$3)')
define(`shuffle',
`define(`s',get($1,size))`'for(`x',1,decr(s),
`swap($1,x,get($1,x),eval(x+random%(s-x+1)))')')
divert
 
deck(`b',52)
show(`b')
shuffle(`b')
show(`b')
\n", + "
\n", + "
\n", + "
\n", + " Output:\n", + "
\n", + "
\n", + "
\n", + "
\n",
+      "1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,\n",
+      "24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,\n",
+      "44, 45, 46, 47, 48, 49, 50, 51, 52\n",
+      "\n",
+      "6, 22, 33, 51, 35, 45, 16, 32, 7, 34, 10, 44, 5, 38, 43, 25, 29, 9, 37, 20, 21,\n",
+      "48, 24, 46, 8, 26, 41, 47, 49, 36, 14, 31, 15, 39, 12, 17, 13, 1, 3, 4, 27, 11,\n",
+      "28, 2, 19, 30, 42, 50, 18, 52, 40, 23\n",
+      "
\n", + "

\n", + " \n", + " [\n", + " \n", + " edit\n", + " \n", + " ]\n", + " \n", + " \n", + " \n", + " Mathematica\n", + " \n", + " \n", + "

\n", + "

\n", + " Usage of built-in function:\n", + "

\n", + "
RandomSample[{1, 2, 3, 4, 5, 6}]
\n", + "

\n", + " Custom function:\n", + "

\n", + "
Shuffle[input_List /; Length[input] >= 1] := 
Module[{indices = {}, allindices = Range[Length[input]]},
Do[
AppendTo[indices,
Complement[allindices, indices][[RandomInteger[{1, i}]]]];
,
{i, Length[input], 1, -1}
];
input[[indices]]
]
\n", + "

\n", + " Example:\n", + "

\n", + "
Shuffle[{1, 2, 3, 4, 5, 6}]
\n", + "

\n", + " \n", + " [\n", + " \n", + " edit\n", + " \n", + " ]\n", + " \n", + " \n", + " \n", + " MATLAB\n", + " \n", + " \n", + "

\n", + "

\n", + " Because this shuffle is done using rounds of operations on subsets of decreasing size, this is not an algorithm that can be vectorized using built-in MATLAB functions. So, we have to go old-school, no fancy MATLAB trickery.\n", + "

\n", + "
function list = knuthShuffle(list)
 
for i = (numel(list):-1:2)
 
j = floor(i*rand(1) + 1); %Generate random int between 1 and i
 
%Swap element i with element j.
list([j i]) = list([i j]);
end
end
\n", + "

\n", + " There is an alternate way to do this that is not a true Knuth Shuffle, but operates with the same spirit.\n", + "This alternate version produces the same output, saves some space,\n", + "and can be implemented in-line without the need to encapsulate it\n", + "in a function call like the Knuth Shuffle.\n", + "

\n", + "
function list = randSort(list)
 
list = list( randperm(numel(list)) );
 
end
\n", + "

\n", + " \n", + " [\n", + " \n", + " edit\n", + " \n", + " ]\n", + " \n", + " \n", + " \n", + " Maxima\n", + " \n", + " \n", + "

\n", + "
/* Maxima has an implementation of Knuth shuffle */
random_permutation([a, b, c]);
\n", + "

\n", + " \n", + " [\n", + " \n", + " edit\n", + " \n", + " ]\n", + " \n", + " \n", + " \n", + " Modula-3\n", + " \n", + " \n", + "

\n", + "
MODULE Shuffle EXPORTS Main;
 
IMPORT IO, Fmt, Random;
 
VAR a := ARRAY [0..9] OF INTEGER {1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
 
PROCEDURE Shuffle(VAR a: ARRAY OF INTEGER) =
VAR temp: INTEGER;
n: INTEGER := NUMBER(a);
BEGIN
WITH rand = NEW(Random.Default).init() DO
WHILE n > 1 DO
WITH k = rand.integer(0, n - 1) DO
DEC(n);
temp := a[n];
a[n] := a[k];
a[k] := temp;
END;
END;
END;
END Shuffle;
 
BEGIN
Shuffle(a);
FOR i := FIRST(a) TO LAST(a) DO
IO.Put(Fmt.Int(a[i]) & \" \");
END;
IO.Put(\"\\n\");
END Shuffle.
\n", + "
\n", + "
\n", + "
\n", + " Output:\n", + "
\n", + "
\n", + "
\n", + "
\n",
+      "martin@thinkpad:~$ ./shuffle\n",
+      "9 2 7 3 6 8 4 5 1 10 \n",
+      "martin@thinkpad:~$ ./shuffle\n",
+      "1 7 8 10 5 4 6 3 9 2 \n",
+      "
\n", + "

\n", + " \n", + " [\n", + " \n", + " edit\n", + " \n", + " ]\n", + " \n", + " \n", + " \n", + " MUMPS\n", + " \n", + " \n", + "

\n", + "
Shuffle(items,separator)\tNew ii,item,list,n
\tSet list=\"\",n=0
\tSet ii=\"\" For Set ii=$Order(items(ii)) Quit:ii=\"\" Do
\t. Set n=n+1,list(n)=items(ii),list=list_$Char(n)
\t. Quit
\tFor Quit:list=\"\" Do
\t. Set n=$Random($Length(list))+1
\t. Set item=list($ASCII(list,n))
\t. Set $Extract(list,n)=\"\"
\t. Write item,separator
\t. Quit
\tQuit
CardDeck\tNew card,ii,suite
\tSet ii=0
\tFor suite=\"Spades\",\"Hearts\",\"Clubs\",\"Diamonds\" Do
\t. For card=2:1:10,\"Jack\",\"Queen\",\"King\",\"Ace\" Do
\t. . Set ii=ii+1,items(ii)=card_\" of \"_suite
\t. . Quit
\t. Quit
\tQuit
 
Kill items
Set items(91)=\"Red\"
Set items(82)=\"White\"
Set items(73)=\"Blue\"
Set items(64)=\"Yellow\"
Set items(55)=\"Green\"
Do Shuffle(.items,\" \") ; Red Yellow White Green Blue
Do Shuffle(.items,\" \") ; Red Blue Yellow White Green
Do Shuffle(.items,\" \") ; Green Blue Yellow White Red
 
Kill items Do CardDeck,Shuffle(.items,$Char(13,10))
Queen of Hearts
9 of Diamonds
10 of Hearts
King of Hearts
7 of Diamonds
9 of Clubs
6 of Diamonds
8 of Diamonds
Jack of Spades
Ace of Hearts
Queen of Diamonds
9 of Hearts
2 of Hearts
King of Clubs
10 of Spades
7 of Clubs
6 of Clubs
3 of Diamonds
3 of Spades
Queen of Clubs
Ace of Spades
4 of Hearts
Ace of Diamonds
7 of Spades
Ace of Clubs
King of Spades
10 of Diamonds
Jack of Diamonds
8 of Clubs
4 of Spades
Jack of Hearts
10 of Clubs
4 of Diamonds
3 of Hearts
2 of Diamonds
5 of Hearts
Jack of Clubs
2 of Clubs
5 of Diamonds
6 of Hearts
4 of Clubs
9 of Spades
3 of Clubs
5 of Spades
6 of Spades
7 of Hearts
8 of Spades
8 of Hearts
2 of Spades
Queen of Spades
King of Diamonds
5 of Clubs
\n", + "

\n", + " \n", + " [\n", + " \n", + " edit\n", + " \n", + " ]\n", + " \n", + " \n", + " \n", + " Nemerle\n", + " \n", + " \n", + "

\n", + "
Shuffle[T] (arr : array[T]) : array[T]
{
def rnd = Random();
 
foreach (i in [0 .. (arr.Length - 2)])
arr[i] <-> arr[(rnd.Next(i, arr.Length))];
arr
}
\n", + "

\n", + " \n", + " [\n", + " \n", + " edit\n", + " \n", + " ]\n", + " \n", + " \n", + " \n", + " NetRexx\n", + " \n", + " \n", + "

\n", + "

\n", + " \n", + " [\n", + " \n", + " edit\n", + " \n", + " ]\n", + " \n", + " \n", + " version 1\n", + " \n", + "

\n", + "
/* NetRexx */
options replace format comments java crossref savelog symbols nobinary
 
import java.util.List
 
cards = [String -
'hA', 'h2', 'h3', 'h4', 'h5', 'h6', 'h7', 'h8', 'h9', 'h10', 'hJ', 'hQ', 'hK' -
, 'cA', 'c2', 'c3', 'c4', 'c5', 'c6', 'c7', 'c8', 'c9', 'c10', 'cJ', 'cQ', 'cK' -
, 'dA', 'd2', 'd3', 'd4', 'd5', 'd6', 'd7', 'd8', 'd9', 'd10', 'dJ', 'dQ', 'dK' -
, 'sA', 's2', 's3', 's4', 's5', 's6', 's7', 's8', 's9', 's10', 'sJ', 'sQ', 'sK' -
]
cardsLen = cards.length
deck = ArrayList(cardsLen)
loop c_ = 0 to cardsLen - 1
deck.add(String(cards[c_]))
end c_
 
showHand(deck)
deck = ArrayList shuffle(deck)
showHand(deck)
 
return
 
method shuffle(deck = List) public static binary returns List
 
rn = Random()
dl = deck.size
 
loop i_ = dl - 1 to 1 by -1
j_ = rn.nextInt(i_)
__ = deck.get(i_)
deck.set(i_, deck.get(j_))
deck.set(j_, __)
end i_
 
return deck
 
method showHand(deck = ArrayList) public static binary
 
dl = deck.size
hl = dl % 4
loop c_ = 0 to dl - 1 by hl
d_ = c_ + hl
if d_ >= dl then d_ = dl
say ArrayList(deck.subList(c_, d_)).toString
end c_
say
 
return
\n", + "
\n", + "
\n", + "
\n", + " Output:\n", + "
\n", + "
\n", + "
\n", + "
\n",
+      "[hA, h2, h3, h4, h5, h6, h7, h8, h9, h10, hJ, hQ, hK]\n",
+      "[cA, c2, c3, c4, c5, c6, c7, c8, c9, c10, cJ, cQ, cK]\n",
+      "[dA, d2, d3, d4, d5, d6, d7, d8, d9, d10, dJ, dQ, dK]\n",
+      "[sA, s2, s3, s4, s5, s6, s7, s8, s9, s10, sJ, sQ, sK]\n",
+      "\n",
+      "[s8, c10, sJ, c8, h10, h3, s3, d6, hJ, d3, c7, h5, s5]\n",
+      "[h8, d10, cK, s6, dQ, d9, d4, c4, c6, h6, cA, sA, dK]\n",
+      "[dJ, dA, d7, c2, d2, s10, sK, h2, c5, s7, cJ, d5, h9]\n",
+      "[c9, d8, c3, s9, cQ, sQ, h4, s4, hQ, h7, hK, hA, s2]\n",
+      "
\n", + "

\n", + " \n", + " [\n", + " \n", + " edit\n", + " \n", + " ]\n", + " \n", + " \n", + " version 2\n", + " \n", + "

\n", + "
/* NetRexx ------------------------------------------------------------
* 08.01.2014 Walter Pachl modified to show state development a la Rexx
*--------------------------------------------------------------------*/

options replace format comments java crossref savelog symbols nobinary
 
import java.util.List
 
cards = [String '1','2','3','4','5','6','7','8','9','10']
cardsLen = cards.length
deck = ArrayList(cardsLen)
loop c_ = 0 to cardsLen - 1
deck.add(String(cards[c_]))
end c_
 
showHand(deck,'In ')
deck = ArrayList shuffle(deck)
showHand(deck,'Out')
return
 
method shuffle(deck = List) public static binary returns List
rn = Random()
dl = deck.size
loop i_ = dl - 1 to 1 by -1
j_ = rn.nextInt(i_)
__ = deck.get(i_)
deck.set(i_, deck.get(j_))
deck.set(j_, __)
say i_ j_ ArrayList(deck.subList(0,i_+1)).toString
end i_
return deck
 
method showHand(deck = ArrayList,tag=REXX) public static binary
say tag ArrayList(deck.subList(0,deck.size)).toString
return
\n", + "
\n", + "
\n", + "
\n", + " Output:\n", + "
\n", + "
\n", + "
\n", + "
In  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]\n",
+      "9 5 [1, 2, 3, 4, 5, 10, 7, 8, 9, 6]\n",
+      "8 4 [1, 2, 3, 4, 9, 10, 7, 8, 5]\n",
+      "7 2 [1, 2, 8, 4, 9, 10, 7, 3]\n",
+      "6 0 [7, 2, 8, 4, 9, 10, 1]\n",
+      "5 4 [7, 2, 8, 4, 10, 9]\n",
+      "4 1 [7, 10, 8, 4, 2]\n",
+      "3 2 [7, 10, 4, 8]\n",
+      "2 0 [4, 10, 7]\n",
+      "1 0 [10, 4]\n",
+      "Out [10, 4, 7, 8, 2, 9, 1, 3, 5, 6]
\n", + "

\n", + " \n", + " [\n", + " \n", + " edit\n", + " \n", + " ]\n", + " \n", + " \n", + " \n", + " Nim\n", + " \n", + " \n", + "

\n", + "
import math
randomize()
 
proc shuffle[T](x: var seq[T]) =
for i in countdown(x.high, 0):
let j = random(i + 1)
swap(x[i], x[j])
 
var x = @[0,1,2,3,4,5,6,7,8,9]
shuffle(x)
echo x
\n", + "

\n", + " \n", + " [\n", + " \n", + " edit\n", + " \n", + " ]\n", + " \n", + " \n", + " \n", + " Objective-C\n", + " \n", + " \n", + "

\n", + "
#import <Foundation/Foundation.h>
 
@interface NSMutableArray (KnuthShuffle)
- (void)knuthShuffle;
@end
@implementation NSMutableArray (KnuthShuffle)
- (void)knuthShuffle {
for (NSUInteger i = self.count-1; i > 0; i--) {
NSUInteger j = arc4random_uniform(i+1);
[self exchangeObjectAtIndex:i withObjectAtIndex:j];
}
}
@end
 
int main() {
@autoreleasepool {
NSMutableArray *x = [NSMutableArray arrayWithObjects:@0, @1, @2, @3, @4, @5, @6, @7, @8, @9, nil];
[x knuthShuffle];
NSLog(@\"%@\", x);
}
return 0;
}
\n", + "
\n", + "
\n", + "
\n", + " Output:\n", + "
\n", + "
\n", + "
\n", + "
\n",
+      "(\n",
+      "    9,\n",
+      "    4,\n",
+      "    0,\n",
+      "    8,\n",
+      "    5,\n",
+      "    3,\n",
+      "    2,\n",
+      "    1,\n",
+      "    7,\n",
+      "    6\n",
+      ")\n",
+      "
\n", + "

\n", + " \n", + " [\n", + " \n", + " edit\n", + " \n", + " ]\n", + " \n", + " \n", + " \n", + " OCaml\n", + " \n", + " \n", + "

\n", + "
let shuffle arr =
for n = Array.length arr - 1 downto 1 do
let k = Random.int (n + 1) in
let temp = arr.(n) in
arr.(n) <- arr.(k);
arr.(k) <- temp
done
\n", + "

\n", + " \n", + " [\n", + " \n", + " edit\n", + " \n", + " ]\n", + " \n", + " \n", + " \n", + " Oforth\n", + " \n", + " \n", + "

\n", + "

\n", + " Works with any object that has the property to be Indexable (Lists, Intervals, ...)\n", + "Returns a new list\n", + "

\n", + "
Indexable method: shuffle
{
| s i l |
self size dup ->s ListBuffer newSize dup addAll(self) ->l
s loop: i [ i l at s rand dup l at i l put l put ]
l dup freeze
}
\n", + "

\n", + " \n", + " [\n", + " \n", + " edit\n", + " \n", + " ]\n", + " \n", + " \n", + " \n", + " Oz\n", + " \n", + " \n", + "

\n", + "
declare
proc {Shuffle Arr}
Low = {Array.low Arr}
High = {Array.high Arr}
in
for I in High..Low;~1 do
\tJ = Low + {OS.rand} mod (I - Low + 1)
OldI = Arr.I
in
\tArr.I := Arr.J
Arr.J := OldI
end
end
 
X = {Tuple.toArray unit(0 1 2 3 4 5 6 7 8 9)}
in
{Show {Array.toRecord unit X}}
{Shuffle X}
{Show {Array.toRecord unit X}}
\n", + "

\n", + " \n", + " [\n", + " \n", + " edit\n", + " \n", + " ]\n", + " \n", + " \n", + " \n", + " PARI/GP\n", + " \n", + " \n", + "

\n", + "
FY(v)={
forstep(n=#v,2,-1,
my(i=random(n)+1,t=v[i]);
v[i]=v[n];
v[n]=t
);
v
};
 
FY(vector(52,i,i))
\n", + "

\n", + " \n", + " [\n", + " \n", + " edit\n", + " \n", + " ]\n", + " \n", + " \n", + " \n", + " Pascal\n", + " \n", + " \n", + "

\n", + "
program Knuth;
 
const
max = 10;
type
list = array [1..max] of integer;
 
procedure shuffle(var a: list);
var
i,k,tmp: integer;
begin
randomize;
for i := max downto 2 do begin
k := random(i) + 1;
if (a[i] <> a[k]) then begin
tmp := a[i]; a[i] := a[k]; a[k] := tmp
end
end
end;
 
{ Test and display }
var
a: list;
i: integer;
 
begin
for i := 1 to max do
a[i] := i;
shuffle(a);
for i := 1 to max do
write(a[i], ' ');
writeln
end.
\n", + "
\n", + "
\n", + "
\n", + " Output:\n", + "
\n", + "
\n", + "
\n", + "
2 7 10 4 3 5 1 9 6 8
\n", + "

\n", + " \n", + " [\n", + " \n", + " edit\n", + " \n", + " ]\n", + " \n", + " \n", + " \n", + " Perl\n", + " \n", + " \n", + "

\n", + "
sub shuffle {
my @a = @_;
foreach my $n (1 .. $#a) {
my $k = int rand $n + 1;
$k == $n or @a[$k, $n] = @a[$n, $k];
}
return @a;
}
\n", + "

\n", + " \n", + " [\n", + " \n", + " edit\n", + " \n", + " ]\n", + " \n", + " \n", + " \n", + " Perl 6\n", + " \n", + " \n", + "

\n", + "
\n", + " \n", + " Works with\n", + " \n", + " :\n", + " \n", + " Rakudo\n", + " \n", + " version #21 \"Seattle\"\n", + "
\n", + "
sub shuffle (@a is copy) {
for 1 ..^ @a -> $n {
my $k = (0 .. $n).pick;
$k == $n or @a[$k, $n] = @a[$n, $k];
}
return @a;
}
\n", + "

\n", + " The shuffle is also built into the pick method on lists when you pass it a \"whatever\" for the number to pick:\n", + "

\n", + "
my @deck = @cards.pick(*);
\n", + "

\n", + " \n", + " [\n", + " \n", + " edit\n", + " \n", + " ]\n", + " \n", + " \n", + " \n", + " PHP\n", + " \n", + " \n", + "

\n", + "
//The Fisher-Yates original Method
function yates_shuffle($arr){
\t$shuffled = Array();
\twhile($arr){
\t\t$rnd = array_rand($arr);
\t\t$shuffled[] = $arr[$rnd];
\t\tarray_splice($arr, $rnd, 1);
\t}
\treturn $shuffled;
}
 
//The modern Durstenfeld-Knuth algorithm
function knuth_shuffle(&$arr){
\tfor($i=count($arr)-1;$i>0;$i--){
\t\t$rnd = mt_rand(0,$i);
\t\tlist($arr[$i], $arr[$rnd]) = array($arr[$rnd], $arr[$i]);
\t}
}
\n", + "

\n", + " \n", + " [\n", + " \n", + " edit\n", + " \n", + " ]\n", + " \n", + " \n", + " \n", + " PicoLisp\n", + " \n", + " \n", + "

\n", + "
(de shuffle (Lst)
(make
(for (N (length Lst) (gt0 N))
(setq Lst
(conc
(cut (rand 0 (dec 'N)) 'Lst)
(prog (link (car Lst)) (cdr Lst)) ) ) ) ) )
\n", + "

\n", + " \n", + " [\n", + " \n", + " edit\n", + " \n", + " ]\n", + " \n", + " \n", + " \n", + " PL/I\n", + " \n", + " \n", + "

\n", + "

\n", + " \n", + " [\n", + " \n", + " edit\n", + " \n", + " ]\n", + " \n", + " \n", + " version 1\n", + " \n", + "

\n", + "
declare T(0:10) fixed binary initial (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
declare (i, j, temp) fixed binary;
do i = lbound(T,1) to hbound(T,1);
j = min(random() * 12, 11);
temp = T(j); T(j) = T(i); T(i) = temp;
end;
\n", + "

\n", + " \n", + " [\n", + " \n", + " edit\n", + " \n", + " ]\n", + " \n", + " \n", + " version 2\n", + " \n", + "

\n", + "
 kn: Proc Options(main);
/*--------------------------------------------------------------------
* 07.01.2014 Walter Pachl translated from REXX version 2
* Iteration i: only the first i elements are candidates for swapping
*-------------------------------------------------------------------*/

Dcl T(10) Bin Fixed(15) Init(1,2,3,4,5,6,7,8,9,10);
Dcl (i,j,temp) Bin Fixed(15) init(0);
Dcl h Char(6);
Call show('In',10); /* show start */
do i = 10 To 2 By -1; /* shuffle */
j=random()*i+1;
Put string(h)Edit(i,j)(f(2),f(3));
temp=t(i); t(i)=t(j); t(j)=temp; /* t(i) <-> t(j) */
Call show(h,i); /* show intermediate states */
end;
Call show('Out',10); /* show final state */
 
show: Proc(txt,n);
Dcl txt Char(*);
Dcl n Bin Fixed(15);
Put Edit(txt,(t(k) do k=1 To n))(Skip,a(7),10(f(3)));
End;
end;
\n", + "
\n", + "
\n", + "
\n", + " Output:\n", + "
\n", + "
\n", + "
\n", + "
In       1  2  3  4  5  6  7  8  9 10\n",
+      "10  5    1  2  3  4 10  6  7  8  9  5\n",
+      " 9  1    9  2  3  4 10  6  7  8  1\n",
+      " 8  7    9  2  3  4 10  6  8  7\n",
+      " 7  2    9  8  3  4 10  6  2\n",
+      " 6  6    9  8  3  4 10  6\n",
+      " 5  3    9  8 10  4  3\n",
+      " 4  2    9  4 10  8\n",
+      " 3  3    9  4 10\n",
+      " 2  1    4  9\n",
+      "Out      4  9 10  8  3  6  2  7  1  5
\n", + "

\n", + " \n", + " [\n", + " \n", + " edit\n", + " \n", + " ]\n", + " \n", + " \n", + " \n", + " PowerShell\n", + " \n", + " \n", + "

\n", + "
\n", + " \n", + " Works with\n", + " \n", + " :\n", + " \n", + " PowerShell\n", + " \n", + " version 2\n", + "
\n", + "
function shuffle ($a) {
$c = $a.Clone() # make copy to avoid clobbering $a
1..($c.Length - 1) | ForEach-Object {
$i = Get-Random -Minimum $_ -Maximum $c.Length
$c[$_-1],$c[$i] = $c[$i],$c[$_-1]
$c[$_-1] # return newly-shuffled value
}
$c[-1] # last value
}
\n", + "

\n", + " This yields the values one by one instead of returning the array as a whole, so the rest of the pipeline can work on the values while shuffling is still in progress.\n", + "

\n", + "

\n", + " \n", + " [\n", + " \n", + " edit\n", + " \n", + " ]\n", + " \n", + " \n", + " \n", + " PureBasic\n", + " \n", + " \n", + "

\n", + "
EnableExplicit
 
Procedure KnuthShuffle(Array a(1))
Protected i, last = ArraySize(a())
 
For i = last To 1 Step -1
Swap a(i), a(Random(i))
Next
EndProcedure
 
Procedure.s ArrayToString(Array a(1))
Protected ret$, i, last = ArraySize(a())
 
ret$ = Str(a(0))
For i = 1 To last
ret$ + \",\" + Str(a(i))
Next
ProcedureReturn ret$
EndProcedure
 
 
#NumElements = 10
 
Dim a(#NumElements-1)
Define i
 
For i = 0 To #NumElements-1
a(i) = i
Next
 
KnuthShuffle(a())
Debug \"shuffled: \" + ArrayToString(a())
\n", + "
\n", + "
\n", + "
\n", + " Output:\n", + "
\n", + "
\n", + "
\n", + "
shuffled: 1,8,6,0,5,9,2,4,7,3
\n", + "

\n", + " \n", + " [\n", + " \n", + " edit\n", + " \n", + " ]\n", + " \n", + " \n", + " \n", + " Python\n", + " \n", + " \n", + "

\n", + "

\n", + " Python's standard library function\n", + " \n", + " \n", + " random.shuffle\n", + " \n", + " \n", + " uses this algorithm and so should normally be used.\n", + "The function below is very similar:\n", + "

\n", + "
from random import randrange
 
def knuth_shuffle(x):
for i in range(len(x)-1, 0, -1):
j = randrange(i + 1)
x[i], x[j] = x[j], x[i]
 
x = list(range(10))
knuth_shuffle(x)
print(\"shuffled:\", x)
\n", + "
\n", + "
\n", + "
\n", + " Output:\n", + "
\n", + "
\n", + "
\n", + "
\n",
+      "shuffled: [5, 1, 6, 0, 8, 4, 2, 3, 9, 7]\n",
+      "
\n", + "

\n", + " \n", + " [\n", + " \n", + " edit\n", + " \n", + " ]\n", + " \n", + " \n", + " \n", + " R\n", + " \n", + " \n", + "

\n", + "

\n", + " See also, the built-in function 'sample'.\n", + "

\n", + "

\n", + " Original Fisher-Yates version\n", + "

\n", + "
fisheryatesshuffle <- function(n)
{
pool <- seq_len(n)
a <- c()
while(length(pool) > 0)
{
k <- sample.int(length(pool), 1)
a <- c(a, pool[k])
pool <- pool[-k]
}
a
}
\n", + "

\n", + " Knuth variation:\n", + "

\n", + "
fisheryatesknuthshuffle <- function(n)
{
a <- seq_len(n)
while(n >=2)
{
k <- sample.int(n, 1)
if(k != n)
{
temp <- a[k]
a[k] <- a[n]
a[n] <- temp
}
n <- n - 1
}
a
}
 
#Example usage:
fisheryatesshuffle(6) # e.g. 1 3 6 2 4 5
x <- c(\"foo\", \"bar\", \"baz\", \"quux\")
x[fisheryatesknuthshuffle(4)] # e.g. \"bar\" \"baz\" \"quux\" \"foo\"
\n", + "

\n", + " \n", + " [\n", + " \n", + " edit\n", + " \n", + " ]\n", + " \n", + " \n", + " \n", + " Racket\n", + " \n", + " \n", + "

\n", + "
 
#lang racket
 
(define (swap! vec i j)
(let ([tmp (vector-ref vec i)])
(vector-set! vec i (vector-ref vec j))
(vector-set! vec j tmp)))
 
(define (knuth-shuffle x)
(if (list? x)
(vector->list (knuth-shuffle (list->vector x)))
(begin (for ([i (in-range (sub1 (vector-length x)) 0 -1)])
(define r (random i))
(swap! x i r))
x)))
 
(knuth-shuffle '(1 2 3 4))
 
\n", + "

\n", + " \n", + " [\n", + " \n", + " edit\n", + " \n", + " ]\n", + " \n", + " \n", + " \n", + " REBOL\n", + " \n", + " \n", + "

\n", + "
rebol [
Title: \"Fisher-Yates\"
Purpose: {Fisher-Yates shuffling algorithm}
]

 
fisher-yates: func [b [block!] /local n i j k] [
n: length? b: copy b
i: n
while [i > 1] [
if i <> j: random i [
error? set/any 'k pick b j
change/only at b j pick b i
change/only at b i get/any 'k
]
i: i - 1
]
b
]
\n", + "

\n", + " \n", + " [\n", + " \n", + " edit\n", + " \n", + " ]\n", + " \n", + " \n", + " \n", + " REXX\n", + " \n", + " \n", + "

\n", + "

\n", + " \n", + " [\n", + " \n", + " edit\n", + " \n", + " ]\n", + " \n", + " \n", + " version 0, card pips\n", + " \n", + "

\n", + "
/*REXX program shuffles a deck of playing cards using the Knuth shuffle.*/
rank='A 2 3 4 5 6 7 8 9 10 J Q K' /*pips of the playing cards. */
suit='♣♠♦♥' /*suit \" \" \" \" */
parse arg seed .; if seed\\=='' then call random ,,seed /*repeatability?*/
say '────────────────── getting a new deck out of the box ···'
deck.1='highJoker' /*good decks have a color joker, */
deck.2='lowJoker' /*··· and a black & white joker. */
cards=2 /*now, two cards are in the deck.*/
do j =1 for length(suit)
do k=1 for words(rank); cards=cards+1
deck.cards=substr(suit,j,1)word(rank,k)
end /*k*/
end /*j*/
call showDeck
say '────────────────── shuffling' cards \"cards ···\"
do s=cards by -1 to 2; rand=random(1,s)
parse value deck.rand deck.s with deck.s deck.rand
/* [↑] swap two cards in the deck*/
end /*s*/
call showDeck
exit /*stick a fork in it, we're done.*/
/*──────────────────────────────────SHOWDECK subroutine─────────────────*/
showDeck: _=; do m=1 for cards; _=_ deck.m; end /*m*/; say _; say; return
\n", + "
\n", + "
\n", + "
\n", + " Output:\n", + "
\n", + "
\n", + "
\n", + "
\n",
+      "────────────────── getting a new deck out of the box ···\n",
+      " highJoker lowJoker ♣A ♣2 ♣3 ♣4 ♣5 ♣6 ♣7 ♣8 ♣9 ♣10 ♣J ♣Q ♣K ♠A ♠2 ♠3 ♠4 ♠5 ♠6 ♠7 ♠8 ♠9 ♠10 ♠J ♠Q ♠K ♦A ♦2 ♦3 ♦4 ♦5 ♦6 ♦7 ♦8 ♦9 ♦10 ♦J ♦Q ♦K ♥A ♥2 ♥3 ♥4 ♥5 ♥6 ♥7 ♥8 ♥9 ♥10 ♥J ♥Q ♥K\n",
+      "\n",
+      "────────────────── shuffling 54 cards ···\n",
+      " ♥3 ♦7 ♦4 ♥10 ♠K ♠Q ♣K ♣4 ♠3 ♦9 ♥J ♣10 ♦2 ♦3 ♠10 ♦8 ♣9 ♠J ♥5 ♣7 ♠4 ♥9 ♥8 ♠9 ♠7 ♠6 ♠A ♠5 ♥A ♣6 ♣A ♦5 ♥K ♥Q ♦6 ♣Q ♣J ♣8 ♣2 ♦A ♦K ♣3 ♦J ♥4 ♥2 ♥7 ♣5 highJoker ♦10 ♠2 lowJoker ♥6 ♦Q ♠8\n",
+      "
\n", + "

\n", + " \n", + " [\n", + " \n", + " edit\n", + " \n", + " ]\n", + " \n", + " \n", + " version 1, card names\n", + " \n", + "

\n", + "
/*REXX program shuffles a deck of playing cards using the Knuth shuffle.*/
rank = 'ace deuce trey 4 5 6 7 8 9 10 jack queen king' /*use pip names.*/
suit = 'club spade diamond heart' /* \" suit \" */
say '────────────────── getting a new deck out of the box ···'
deck.1 = ' color joker' /*good decks have a color joker, */
deck.2 = ' b&w joker' /*··· and a black & white joker. */
cards=2 /*now, two cards are in the deck.*/
do j =1 for words(suit)
do k=1 for words(rank); cards=cards+1 /*bump counter.*/
deck.cards=right(word(suit,j),7) word(rank,k) /*assign.*/
end /*k*/
end /*j*/
 
call showDeck 'ace' /*inserts blank when ACE is found*/
say '────────────────── shuffling' cards \"cards ···\"
 
do s=cards by -1 to 2; rand=random(1,s) /*get random number for swap*/
_=deck.rand; deck.rand=deck.s; deck.s=_ /*swap 2 cards in card deck.*/
end /*s*/
 
call showDeck
exit /*stick a fork in it, we're done.*/
/*──────────────────────────────────SHOWDECK subroutine─────────────────*/
showDeck: parse arg break; say /*get sep card, shows blank line*/
do m=1 for cards /*traipse through the deck. */
if pos(break,deck.m)\\==0 then say /*a blank: easier to read cards.*/
say 'card' right(m,2) '───►' deck.m /*display a particular card. */
end /*m*/
say /*show a trailing blank line. */
return
\n", + "
\n", + "
\n", + "
\n", + " Output:\n", + "
\n", + "
\n", + "
\n", + "
\n",
+      "────────────────── getting a new deck out of the box ···\n",
+      "\n",
+      "card  1 ───►   color joker\n",
+      "card  2 ───►     b&w joker\n",
+      "\n",
+      "card  3 ───►    club ace\n",
+      "card  4 ───►    club deuce\n",
+      "card  5 ───►    club trey\n",
+      "card  6 ───►    club 4\n",
+      "card  7 ───►    club 5\n",
+      "card  8 ───►    club 6\n",
+      "card  9 ───►    club 7\n",
+      "card 10 ───►    club 8\n",
+      "card 11 ───►    club 9\n",
+      "card 12 ───►    club 10\n",
+      "card 13 ───►    club jack\n",
+      "card 14 ───►    club queen\n",
+      "card 15 ───►    club king\n",
+      "\n",
+      "card 16 ───►   spade ace\n",
+      "card 17 ───►   spade duece\n",
+      "card 18 ───►   spade trey\n",
+      "card 19 ───►   spade 4\n",
+      "card 20 ───►   spade 5\n",
+      "card 21 ───►   spade 6\n",
+      "card 22 ───►   spade 7\n",
+      "card 23 ───►   spade 8\n",
+      "card 24 ───►   spade 9\n",
+      "card 25 ───►   spade 10\n",
+      "card 26 ───►   spade jack\n",
+      "card 27 ───►   spade queen\n",
+      "card 28 ───►   spade king\n",
+      "\n",
+      "card 29 ───► diamond ace\n",
+      "card 30 ───► diamond duece\n",
+      "card 31 ───► diamond trey\n",
+      "card 32 ───► diamond 4\n",
+      "card 33 ───► diamond 5\n",
+      "card 34 ───► diamond 6\n",
+      "card 35 ───► diamond 7\n",
+      "card 36 ───► diamond 8\n",
+      "card 37 ───► diamond 9\n",
+      "card 38 ───► diamond 10\n",
+      "card 39 ───► diamond jack\n",
+      "card 40 ───► diamond queen\n",
+      "card 41 ───► diamond king\n",
+      "\n",
+      "card 42 ───►   heart ace\n",
+      "card 43 ───►   heart duece\n",
+      "card 44 ───►   heart trey\n",
+      "card 45 ───►   heart 4\n",
+      "card 46 ───►   heart 5\n",
+      "card 47 ───►   heart 6\n",
+      "card 48 ───►   heart 7\n",
+      "card 49 ───►   heart 8\n",
+      "card 50 ───►   heart 9\n",
+      "card 51 ───►   heart 10\n",
+      "card 52 ───►   heart jack\n",
+      "card 53 ───►   heart queen\n",
+      "card 54 ───►   heart king\n",
+      "\n",
+      "────────────────── shuffling 54 cards ···\n",
+      "\n",
+      "card  1 ───► diamond king\n",
+      "card  2 ───►   spade jack\n",
+      "card  3 ───►   spade 7\n",
+      "card  4 ───►    club 4\n",
+      "card  5 ───►   heart 7\n",
+      "card  6 ───►   heart 10\n",
+      "card  7 ───►    club jack\n",
+      "card  8 ───► diamond duece\n",
+      "card  9 ───►    club 10\n",
+      "card 10 ───► diamond 5\n",
+      "card 11 ───►   spade 10\n",
+      "card 12 ───►   heart jack\n",
+      "card 13 ───►    club king\n",
+      "card 14 ───► diamond 8\n",
+      "card 15 ───►   heart 9\n",
+      "card 16 ───►   spade ace\n",
+      "card 17 ───►   spade king\n",
+      "card 18 ───►   spade trey\n",
+      "card 19 ───►   color joker\n",
+      "card 20 ───►   heart 8\n",
+      "card 21 ───► diamond 7\n",
+      "card 22 ───► diamond jack\n",
+      "card 23 ───►    club duece\n",
+      "card 24 ───►    club 9\n",
+      "card 25 ───►    club 5\n",
+      "card 26 ───►   spade 9\n",
+      "card 27 ───►   spade queen\n",
+      "card 28 ───►   heart 5\n",
+      "card 29 ───►   spade 6\n",
+      "card 30 ───►    club 8\n",
+      "card 31 ───►   heart duece\n",
+      "card 32 ───► diamond ace\n",
+      "card 33 ───►   spade 4\n",
+      "card 34 ───► diamond 9\n",
+      "card 35 ───►     b&w joker\n",
+      "card 36 ───► diamond 4\n",
+      "card 37 ───►   heart king\n",
+      "card 38 ───►    club ace\n",
+      "card 39 ───►   spade duece\n",
+      "card 40 ───►    club trey\n",
+      "card 41 ───► diamond queen\n",
+      "card 42 ───► diamond 10\n",
+      "card 43 ───►   spade 8\n",
+      "card 44 ───► diamond trey\n",
+      "card 45 ───►    club queen\n",
+      "card 46 ───►   heart ace\n",
+      "card 47 ───►   heart queen\n",
+      "card 48 ───►   heart trey\n",
+      "card 49 ───►    club 7\n",
+      "card 50 ───►    club 6\n",
+      "card 51 ───►   heart 4\n",
+      "card 52 ───►   heart 6\n",
+      "card 53 ───► diamond 6\n",
+      "card 54 ───►   spade 5\n",
+      "
\n", + "

\n", + " \n", + " [\n", + " \n", + " edit\n", + " \n", + " ]\n", + " \n", + " \n", + " version 2\n", + " \n", + "

\n", + "
/* REXX ---------------------------------------------------------------
* 05.01.2014 Walter Pachl
* borrow one improvement from version 1
* 06.01.2014 removed -\"- (many tests cost more than few \"swaps\")
*--------------------------------------------------------------------*/

Call random ,,123456 /* seed for random */
Do i=1 To 10; a.i=i; End; /* fill array */
Call show 'In',10 /* show start */
do i = 10 To 2 By -1 /* shuffle */
j=random(i-1)+1;
h=right(i,2) right(j,2)
Parse Value a.i a.j With a.j a.i /* a.i <-> a.j */
Call show h,i /* show intermediate states */
end;
Call show 'Out',10 /* show fomaö state */
Exit
 
show: Procedure Expose a.
Parse Arg txt,n
ol=left(txt,6);
Do k=1 To n; ol=ol right(a.k,2); End
Say ol
Return
\n", + "
\n", + "
\n", + "
\n", + " Output:\n", + "
\n", + "
\n", + "
\n", + "
In      1  2  3  4  5  6  7  8  9 10\n",
+      "10  2   1 10  3  4  5  6  7  8  9  2\n",
+      " 9  6   1 10  3  4  5  9  7  8  6\n",
+      " 8  6   1 10  3  4  5  8  7  9\n",
+      " 7  3   1 10  7  4  5  8  3\n",
+      " 6  5   1 10  7  4  8  5\n",
+      " 5  1   8 10  7  4  1\n",
+      " 4  1   4 10  7  8\n",
+      " 3  1   7 10  4\n",
+      " 2  1  10  7\n",
+      "Out    10  7  4  8  1  5  3  9  6  2
\n", + "

\n", + " \n", + " [\n", + " \n", + " edit\n", + " \n", + " ]\n", + " \n", + " \n", + " \n", + " Ruby\n", + " \n", + " \n", + "

\n", + "
\n", + " \n", + " Translation of\n", + " \n", + " :\n", + " \n", + " Tcl\n", + " \n", + "
\n", + "
class Array
def knuth_shuffle!
j = length
i = 0
while j > 1
r = i + rand(j)
self[i], self[r] = self[r], self[i]
i += 1
j -= 1
end
self
end
end
 
r = Hash.new(0)
100_000.times do |i|
a = [1,2,3].knuth_shuffle!
r[a] += 1
end
 
r.keys.sort.each {|a| puts \"#{a.inspect} => #{r[a]}\"}
\n", + "

\n", + " results in\n", + "

\n", + "
[1, 2, 3] => 16572\n",
+      "[1, 3, 2] => 16610\n",
+      "[2, 1, 3] => 16633\n",
+      "[2, 3, 1] => 16714\n",
+      "[3, 1, 2] => 16838\n",
+      "[3, 2, 1] => 16633
\n", + "

\n", + " \n", + " More idomatic:\n", + " \n", + "

\n", + "
class Array
def knuth_shuffle!
(length - 1).downto(1) do |i|
j = rand(i + 1)
self[i], self[j] = self[j], self[i]
end
self
end
end
\n", + "

\n", + "
\n", + "

\n", + "

\n", + " \n", + " [\n", + " \n", + " edit\n", + " \n", + " ]\n", + " \n", + " \n", + " \n", + " Run BASIC\n", + " \n", + " \n", + "

\n", + "
dim cards(52) 
for i = 1 to 52 ' make deck
cards(i) = i
next
 
for i = 52 to 1 step -1 ' shuffle deck
r = int((rnd(1)*i) + 1)
if r <> i then
hold = cards(r)
cards(r) = cards(i)
cards(i) = hold
end if
next
 
print \"== Shuffled Cards ==\" ' print shuffled cards
for i = 1 to 52
print cards(i);\" \";
if i mod 18 = 0 then print
next
print
\n", + "

\n", + " \n", + " [\n", + " \n", + " edit\n", + " \n", + " ]\n", + " \n", + " \n", + " \n", + " Rust\n", + " \n", + " \n", + "

\n", + "
use std::iter;
use std::rand;
use std::rand::Rng;
use std::vec;
 
fn knuth_shuffle<T>(v: &mut [T]) {
let mut rng = rand::rng();
let l = v.len();
 
for n in iter::range(0, l) {
let i = rng.gen_range(0, l - n);
v.swap(i, l - n - 1);
}
}
 
fn main() {
let mut v = vec::from_fn(10, |i| i);
 
println!(\"before: {:?}\", v);
knuth_shuffle(v);
println!(\"after: {:?}\", v);
}
\n", + "

\n", + " Works as of 2014-01-05.\n", + "

\n", + "

\n", + " \n", + " [\n", + " \n", + " edit\n", + " \n", + " ]\n", + " \n", + " \n", + " \n", + " Scala\n", + " \n", + " \n", + "

\n", + "
def shuffle[T](a: Array[T]) = {
for (i <- 1 until a.size reverse) {
val j = util.Random nextInt (i + 1)
val t = a(i)
a(i) = a(j)
a(j) = t
}
a
}
\n", + "

\n", + " \n", + " [\n", + " \n", + " edit\n", + " \n", + " ]\n", + " \n", + " \n", + " \n", + " Scratch\n", + " \n", + " \n", + "

\n", + "

\n", + " See Knuth's shuffle in action. Visit\n", + " \n", + " this Scratch implementation\n", + " \n", + " to see a demo and inspect its source.\n", + "

\n", + "

\n", + " \n", + " [\n", + " \n", + " edit\n", + " \n", + " ]\n", + " \n", + " \n", + " \n", + " Seed7\n", + " \n", + " \n", + "

\n", + "
$ include \"seed7_05.s7i\";
 
const type: intArray is array integer;
 
const proc: shuffle (inout intArray: a) is func
local
var integer: i is 0;
var integer: k is 0;
var integer: tmp is 0;
begin
for i range maxIdx(a) downto 2 do
k := rand(1, i);
tmp := a[i];
a[i] := a[k];
a[k] := tmp;
end for;
end func;
 
const proc: main is func
local
var intArray: a is 10 times 0;
var integer: i is 0;
begin
for key i range a do
a[i] := i;
end for;
shuffle(a);
for i range a do
write(i <& \" \");
end for;
writeln;
end func;
\n", + "
\n", + "
\n", + "
\n", + " Output:\n", + "
\n", + "
\n", + "
\n", + "
\n",
+      "7 5 6 8 3 10 9 4 2 1 \n",
+      "
\n", + "

\n", + " \n", + " [\n", + " \n", + " edit\n", + " \n", + " ]\n", + " \n", + " \n", + " \n", + " Sidef\n", + " \n", + " \n", + "

\n", + "
func shuffle (a) {
 
{ |n|
var k = (n + 1 -> rand.int);
k == n || (a[k, n] = a[n, k]);
} * a.offset;
 
return a;
}
\n", + "

\n", + " \n", + " [\n", + " \n", + " edit\n", + " \n", + " ]\n", + " \n", + " \n", + " \n", + " Smalltalk\n", + " \n", + " \n", + "

\n", + "
\n", + " \n", + " Works with\n", + " \n", + " :\n", + " \n", + " GNU Smalltalk\n", + " \n", + "
\n", + "
\"The selector swap:with: is documented, but it seems not
implemented (GNU Smalltalk version 3.0.4); so here it is an implementation\"

SequenceableCollection extend [
swap: i with: j [
|t|
t := self at: i.
self at: i put: (self at: j).
self at: j put: t.
]
].
 
Object subclass: Shuffler [
Shuffler class >> Knuth: aSequenceableCollection [
|n k|
n := aSequenceableCollection size.
[ n > 1 ] whileTrue: [
k := Random between: 1 and: n.
aSequenceableCollection swap: n with: k.
n := n - 1
]
]
].
\n", + "

\n", + " Testing\n", + "

\n", + "
\"Test\"
|c|
c := OrderedCollection new.
c addAll: #( 1 2 3 4 5 6 7 8 9 ).
Shuffler Knuth: c.
c display.
\n", + "

\n", + " \n", + " [\n", + " \n", + " edit\n", + " \n", + " ]\n", + " \n", + " \n", + " \n", + " SNOBOL4\n", + " \n", + " \n", + "

\n", + "
* Library for random()
-include 'Random.sno'
 
* # String -> array
define('s2a(str,n)i') :(s2a_end)
s2a s2a = array(n); str = str ' '
sa1 str break(' ') . s2a<i = i + 1> span(' ') = :s(sa1)f(return)
s2a_end
 
* # Array -> string
define('a2s(a)i') :(a2s_end)
a2s a2s = a2s a<i = i + 1> ' ' :s(a2s)f(return)
a2s_end
 
* # Knuth shuffle in-place
define('shuffle(a)alen,n,k,tmp') :(shuffle_end)
shuffle n = alen = prototype(a);
sh1 k = convert(random() * alen,'integer') + 1
eq(a<n>,a<k>) :s(sh2)
tmp = a<n>; a<n> = a<k>; a<k> = tmp
sh2 n = gt(n,1) n - 1 :s(sh1)
shuffle = a :(return)
shuffle_end
 
* # Test and display
a = s2a('1 2 3 4 5 6 7 8 9 10',10)
output = a2s(a) '->'
shuffle(a)
output = a2s(a)
end
\n", + "
\n", + "
\n", + "
\n", + " Output:\n", + "
\n", + "
\n", + "
\n", + "
1 2 3 4 5 6 7 8 9 10 ->\n",
+      "2 10 4 9 1 5 6 8 7 3
\n", + "

\n", + " \n", + " [\n", + " \n", + " edit\n", + " \n", + " ]\n", + " \n", + " \n", + " \n", + " Swift\n", + " \n", + " \n", + "

\n", + "

\n", + " Uses generics to allow shuffling arrays of any type.\n", + "

\n", + "
import Darwin
 
func shuffle<T>(inout array: [T]) {
for i in 1..<array.count {
let j = Int(arc4random_uniform(UInt32(i)))
(array[i], array[j]) = (array[j], array[i])
}
}
\n", + "

\n", + " Test program\n", + "

\n", + "
var array = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
println(array)
shuffle(&array)
println(array)
\n", + "
\n", + "
\n", + "
\n", + " Output:\n", + "
\n", + "
\n", + "
\n", + "
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]\n",
+      "[2, 5, 7, 1, 6, 10, 4, 3, 8, 9]
\n", + "

\n", + " \n", + " [\n", + " \n", + " edit\n", + " \n", + " ]\n", + " \n", + " \n", + " \n", + " Tcl\n", + " \n", + " \n", + "

\n", + "
proc knuth_shuffle lst {
set j [llength $lst]
for {set i 0} {$j > 1} {incr i;incr j -1} {
set r [expr {$i+int(rand()*$j)}]
set t [lindex $lst $i]
lset lst $i [lindex $lst $r]
lset lst $r $t
}
return $lst
}
 
% knuth_shuffle {1 2 3 4 5}
2 1 3 5 4
% knuth_shuffle {1 2 3 4 5}
5 2 1 4 3
% knuth_shuffle {tom dick harry peter paul mary}
tom paul mary harry peter dick
\n", + "

\n", + " As a test of skewing (an indicator of a poor implementation) this code was used:\n", + "

\n", + "
% for {set i 0} {$i<100000} {incr i} {
foreach val [knuth_shuffle {1 2 3 4 5}] pos {pos0 pos1 pos2 pos3 pos4} {
incr tots($pos) $val
}
}
% parray tots
tots(pos0) = 300006
tots(pos1) = 300223
tots(pos2) = 299701
tots(pos3) = 299830
tots(pos4) = 300240
\n", + "

\n", + " \n", + " [\n", + " \n", + " edit\n", + " \n", + " ]\n", + " \n", + " \n", + " \n", + " TI-83 BASIC\n", + " \n", + " \n", + "

\n", + "

\n", + " Input L\n", + " \n", + " 1\n", + " \n", + " , output L\n", + " \n", + " 2\n", + " \n", + " .\n", + "

\n", + "
:\"SHUFFLE\"\n",
+      ":L1→L2\n",
+      ":dim(L2)→A\n",
+      ":For(B,1,dim(L2)-1)\n",
+      ":randInt(1,A)→C\n",
+      ":L2(C)→D\n",
+      ":L2(A)→L2(C)\n",
+      ":D→L2(A)\n",
+      ":A-1→A\n",
+      ":End\n",
+      ":DelVar A\n",
+      ":DelVar B\n",
+      ":DelVar C\n",
+      ":DelVar D\n",
+      ":Return\n",
+      "
\n", + "

\n", + " \n", + " [\n", + " \n", + " edit\n", + " \n", + " ]\n", + " \n", + " \n", + " \n", + " TUSCRIPT\n", + " \n", + " \n", + "

\n", + "
$$ MODE TUSCRIPT
oldnumbers=newnumbers=\"\",range=20
LOOP nr=1,#range
oldnumbers=APPEND(oldnumbers,nr)
ENDLOOP
 
PRINT \"before \",oldnumbers
 
LOOP r=#range,1,-1
RANDNR=RANDOM_NUMBERS (1,#r,1)
shuffle=SELECT (oldnumbers,#randnr,oldnumbers)
newnumbers=APPEND(newnumbers,shuffle)
ENDLOOP
 
PRINT \"after \",newnumbers
\n", + "
\n", + "
\n", + "
\n", + " Output:\n", + "
\n", + "
\n", + "
\n", + "
\n",
+      "before 1'2'3'4'5'6'7'8'9'10'11'12'13'14'15'16'17'18'19'20\n",
+      "after  7'16'13'11'1'9'15'4'18'14'3'12'17'8'19'20'6'5'2'10\n",
+      "
\n", + "

\n", + " \n", + " [\n", + " \n", + " edit\n", + " \n", + " ]\n", + " \n", + " \n", + " \n", + " UNIX Shell\n", + " \n", + " \n", + "

\n", + "
\n", + " \n", + " Works with\n", + " \n", + " :\n", + " \n", + " ksh93\n", + " \n", + "
\n", + "
\n", + " \n", + " Works with\n", + " \n", + " :\n", + " \n", + " pdksh\n", + " \n", + "
\n", + "
# Shuffle array[@].
function shuffle {
\tinteger i j t
 
\t((i = ${#array[@]}))
\twhile ((i > 1)); do
\t\t((j = RANDOM)) # 0 <= j < 32768
\t\t((j < 32768 % i)) && continue # no modulo bias
\t\t((j %= i)) # 0 <= j < i
 
\t\t((i -= 1))
\t\t((t = array[i]))
\t\t((array[i] = array[j]))
\t\t((array[j] = t))
\tdone
}
 
# Test program.
set -A array 11 22 33 44 55 66 77 88 99 110
shuffle
echo \"${array[@]}\"
\n", + "

\n", + " \n", + " [\n", + " \n", + " edit\n", + " \n", + " ]\n", + " \n", + " \n", + " \n", + " Ursala\n", + " \n", + " \n", + "

\n", + "

\n", + " This function works on lists of any type and length, including character strings.\n", + "

\n", + "
shuffle = @iNX ~&l->r ^jrX/~&l ~&lK8PrC
\n", + "

\n", + " test program:\n", + "

\n", + "
#cast %s
 
example = shuffle 'abcdefghijkl'
\n", + "
\n", + "
\n", + "
\n", + " Output:\n", + "
\n", + "
\n", + "
\n", + "
'keacfjlbdigh'
\n", + "

\n", + " \n", + " [\n", + " \n", + " edit\n", + " \n", + " ]\n", + " \n", + " \n", + " \n", + " VBScript\n", + " \n", + " \n", + "

\n", + "
\n", + "
\n", + " Implementation\n", + "
\n", + "
\n", + "
 
function shuffle( a )
\tdim i
\tdim r
\trandomize timer
\tfor i = lbound( a ) to ubound( a )
\t\tr = int( rnd * ( ubound( a ) + 1 ) )
\t\tif r <> i then
\t\t\tswap a(i), a(r)
\t\tend if
\tnext
\tshuffle = a
end function
 
sub swap( byref a, byref b )
\tdim tmp
\ttmp = a
\ta = b
\tb = tmp
end sub
\n", + "
\n", + "
\n", + " Invocation\n", + "
\n", + "
\n", + "
dim a
a = array( 1,2,3,4,5,6,7,8,9)
wscript.echo \"before: \", join( a, \", \" )
shuffle a
wscript.echo \"after: \", join( a, \", \" )
shuffle a
wscript.echo \"after: \", join( a, \", \" )
wscript.echo \"--\"
a = array( now(), \"cow\", 123, true, sin(1), 16.4 )
wscript.echo \"before: \", join( a, \", \" )
shuffle a
wscript.echo \"after: \", join( a, \", \" )
shuffle a
wscript.echo \"after: \", join( a, \", \" )
\n", + "
\n", + "
\n", + "
\n", + " Output:\n", + "
\n", + "
\n", + "
\n", + "
\n",
+      "before:  1, 2, 3, 4, 5, 6, 7, 8, 9\n",
+      "after:  6, 4, 1, 2, 7, 3, 5, 8, 9\n",
+      "after:  8, 7, 3, 2, 6, 5, 9, 1, 4\n",
+      "--\n",
+      "before:  16/02/2010 5:46:58 PM, cow, 123, True, 0.841470984807897, 16.4\n",
+      "after:  True, 16.4, 16/02/2010 5:46:58 PM, 123, cow, 0.841470984807897\n",
+      "after:  16.4, 16/02/2010 5:46:58 PM, 123, 0.841470984807897, True, cow\n",
+      "
\n", + "

\n", + " \n", + " [\n", + " \n", + " edit\n", + " \n", + " ]\n", + " \n", + " \n", + " \n", + " Vedit macro language\n", + " \n", + " \n", + "

\n", + "

\n", + " The shuffle routine in\n", + " \n", + " Playing Cards\n", + " \n", + " shuffles text lines in edit buffer.\n", + "This example shuffles numeric registers #0 to #19.\n", + "

\n", + "

\n", + " The output will be inserted in current edit buffer.\n", + "

\n", + "
// Test main
#90 = Time_Tick // seed for random number generator
#99 = 20 // number of items in the array
 
IT(\"Before:\") IN
for (#100 = 0; #100 < #99; #100++) {
#@100 = #100
Num_Ins(#@100, LEFT+NOCR) IT(\" \")
}
IN
 
Call(\"SHUFFLE_NUMBERS\")
 
IT(\"After:\") IN
for (#100 = 0; #100 < #99; #100++) {
Num_Ins(#@100, LEFT+NOCR) IT(\" \")
}
IN
Return
 
//--------------------------------------------------------------
// Shuffle numeric registers #0 to #nn
// #99 = number of registers to shuffle (nn-1)
//
:SHUFFLE_NUMBERS:
for (#91 = #99-1; #91 > 0; #91--) {
Call(\"RANDOM\")
#101 = Return_Value
#102 = #@101; #@101 = #@91; #@91 = #102
}
Return
 
//--------------------------------------------------------------
// Generate random numbers in range 0 <= Return_Value < #91
// #90 = Seed (0 to 0x7fffffff)
// #91 = Scaling (0 to 0x10000)
//
:RANDOM:
#92 = 0x7fffffff / 48271
#93 = 0x7fffffff % 48271
#90 = (48271 * (#90 % #92) - #93 * (#90 / #92)) & 0x7fffffff
Return ((#90 & 0xffff) * #91 / 0x10000)
\n", + "
\n", + "
\n", + "
\n", + " Output:\n", + "
\n", + "
\n", + "
\n", + "
Before:\n",
+      "0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 \n",
+      "After:\n",
+      "9 13 8 18 10 1 17 15 0 16 14 19 3 2 7 11 6 4 5 12 
\n", + "

\n", + " \n", + " [\n", + " \n", + " edit\n", + " \n", + " ]\n", + " \n", + " \n", + " \n", + " zkl\n", + " \n", + " \n", + "

\n", + "

\n", + " Two versions, imperative and functional, same results.\n", + "xs has to be a mutable list.\n", + "

\n", + "
fcn kshuffle(xs){foreach i in ([xs.len()-1..1,-1]){
xs.swap(i,(0).random(0,i+1))}
xs
}
fcn kshufflep(xs){[xs.len()-1..1,-1].pump(Void,'wrap(i){
xs.swap(i,(0).random(0,i+1))})
}
\n", + "
\n",
+      "var ns=(1).pump(10,List).copy() // [1..10] made mutable\n",
+      "kshuffle(ns)  //-->L(6,3,8,2,4,5,10,9,1,7)\n",
+      "\n",
+      "ns=\"this is a test foo bar hoho\".split(\" \").copy();\n",
+      "kshufflep(ns)  //-->L(\"a\",\"bar\",\"hoho\",\"foo\",\"test\",\"is\",\"this\")\n",
+      "
\n", + "
\n", + " Retrieved from \"\n", + " \n", + " http://rosettacode.org/mw/index.php?title=Knuth_shuffle&oldid=204996\n", + " \n", + " \"\n", + "
\n", + "
\n", + "
\n", + " \n", + " Categories\n", + " \n", + " :\n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " Personal tools\n", + "
\n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + " Namespaces\n", + "
\n", + " \n", + "
\n", + "
\n", + "

\n", + "

\n", + "
\n", + " \n", + " Variants\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
    \n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " Views\n", + "
\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Actions\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
    \n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + " Community\n", + "
\n", + "
\n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + " Explore\n", + "
\n", + "
\n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + " Misc\n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " Tweet\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + " Toolbox\n", + "
\n", + "
\n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n" + ] + } + ], + "source": [ + "url2 = 'http://rosettacode.org/wiki/Knuth_shuffle'\n", + "req2 = urllib.request.Request(url2, headers={'User-Agent': 'Mozilla/5.0'})\n", + "content = urllib.request.urlopen(req2).read()\n", + "soup2 = BeautifulSoup(content)\n", + "print(soup2.prettify())" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['SSc1|Site selection|1\\t\\t\\t\\t\\t\\t/ 1',\n", + " 'SSc2|Development density and community connectivity|0\\t\\t\\t\\t\\t\\t/ 1',\n", + " 'SSc3|Brownfield redevelopment|0\\t\\t\\t\\t\\t\\t/ 1',\n", + " 'SSc4.1|Alternative transportation - public transportation access|1\\t\\t\\t\\t\\t\\t/ 1',\n", + " 'SSc4.2|Alternative transportation - bicycle storage and changing rooms|1\\t\\t\\t\\t\\t\\t/ 1',\n", + " 'SSc4.3|Alternative transportation - low emitting and fuel efficient vehicles|1\\t\\t\\t\\t\\t\\t/ 1',\n", + " 'SSc4.4|Alternative transportation - parking capacity|1\\t\\t\\t\\t\\t\\t/ 1',\n", + " 'SSc5.1|Site development - protect or restore habitat|1\\t\\t\\t\\t\\t\\t/ 1',\n", + " 'SSc5.2|Site development - maximize open space|1\\t\\t\\t\\t\\t\\t/ 1',\n", + " 'SSc6.1|Stormwater design - quantity control|1\\t\\t\\t\\t\\t\\t/ 1',\n", + " 'SSc6.2|Stormwater design - quality control|1\\t\\t\\t\\t\\t\\t/ 1',\n", + " 'SSc7.1|Heat island effect - non-roof|1\\t\\t\\t\\t\\t\\t/ 1',\n", + " 'SSc7.2|Heat island effect - roof|0\\t\\t\\t\\t\\t\\t/ 1',\n", + " 'SSc8|Light pollution reduction|0\\t\\t\\t\\t\\t\\t/ 1',\n", + " 'WEc1.1|Water efficient landscaping - reduce by 50%|1\\t\\t\\t\\t\\t\\t/ 1',\n", + " 'WEc1.2|Water efficient landscaping - no potable water use or no irrigation|1\\t\\t\\t\\t\\t\\t/ 1',\n", + " 'WEc2|Innovative wastewater technologies|0\\t\\t\\t\\t\\t\\t/ 1',\n", + " 'WEc3.1|Water use reduction - 20% reduction|1\\t\\t\\t\\t\\t\\t/ 1',\n", + " 'WEc3.2|Water use reduction - 30% reduction|1\\t\\t\\t\\t\\t\\t/ 1',\n", + " 'EAc1|Optimize energy performance|2\\t\\t\\t\\t\\t\\t/ 10',\n", + " 'EAc2|On-site renewable energy|0\\t\\t\\t\\t\\t\\t/ 3',\n", + " 'EAc3|Enhanced commissioning|1\\t\\t\\t\\t\\t\\t/ 1',\n", + " 'EAc4|Enhanced refrigerant management|0\\t\\t\\t\\t\\t\\t/ 1',\n", + " 'EAc5|Measurement and verification|0\\t\\t\\t\\t\\t\\t/ 1',\n", + " 'EAc6|Green power|0\\t\\t\\t\\t\\t\\t/ 1',\n", + " 'MRc1.1|Building reuse - maintain 75% of existing walls, floors & roof|0\\t\\t\\t\\t\\t\\t/ 1',\n", + " 'MRc1.2|Building reuse - maintain 95% of existing walls, floors & roof|0\\t\\t\\t\\t\\t\\t/ 1',\n", + " 'MRc1.3|Building reuse - maintain 50% of interior non-structural elements|0\\t\\t\\t\\t\\t\\t/ 1',\n", + " 'MRc2.1|Construction waste management - divert 50% from disposal|1\\t\\t\\t\\t\\t\\t/ 1',\n", + " 'MRc2.2|Construction waste management - divert 75% from disposal|1\\t\\t\\t\\t\\t\\t/ 1',\n", + " 'MRc3.1|Materials reuse - 5%|0\\t\\t\\t\\t\\t\\t/ 1',\n", + " 'MRc3.2|Materials reuse - 10%|0\\t\\t\\t\\t\\t\\t/ 1',\n", + " 'MRc4.1|Recycled content - 10% (post-consumer + 1/2 pre-consumer)|2\\t\\t\\t\\t\\t\\t/ 1',\n", + " 'MRc4.2|Recycled content - 20% (post-consumer + 1/2 pre-consumer)|0\\t\\t\\t\\t\\t\\t/ 1',\n", + " 'MRc5.1|Regional materials - 10% extracted, processed and manufactured regiona...|1\\t\\t\\t\\t\\t\\t/ 1',\n", + " 'MRc5.2|Regional materials - 20% extracted, processed and manufactured regiona...|1\\t\\t\\t\\t\\t\\t/ 1',\n", + " 'MRc6|Rapidly renewable materials|0\\t\\t\\t\\t\\t\\t/ 1',\n", + " 'MRc7|Certified wood|1\\t\\t\\t\\t\\t\\t/ 1',\n", + " 'EQc1|Outdoor air delivery monitoring|1\\t\\t\\t\\t\\t\\t/ 1',\n", + " 'EQc2|Increased ventilation|0\\t\\t\\t\\t\\t\\t/ 1',\n", + " 'EQc3.1|Construction IAQ management plan - during construction|1\\t\\t\\t\\t\\t\\t/ 1',\n", + " 'EQc3.2|Construction IAQ management plan - before occupancy|1\\t\\t\\t\\t\\t\\t/ 1',\n", + " 'EQc4.1|Low-emitting materials - adhesives and sealants|1\\t\\t\\t\\t\\t\\t/ 1',\n", + " 'EQc4.2|Low-emitting materials - paints and coatings|1\\t\\t\\t\\t\\t\\t/ 1',\n", + " 'EQc4.3|Low-emitting materials - carpet systems|1\\t\\t\\t\\t\\t\\t/ 1',\n", + " 'EQc4.4|Low-emitting materials - composite wood and agrifiber products|1\\t\\t\\t\\t\\t\\t/ 1',\n", + " 'EQc5|Indoor chemical and pollutant source control|1\\t\\t\\t\\t\\t\\t/ 1',\n", + " 'EQc6.1|Controllability of systems - lighting|0\\t\\t\\t\\t\\t\\t/ 1',\n", + " 'EQc6.2|Controllability of systems - thermal comfort|0\\t\\t\\t\\t\\t\\t/ 1',\n", + " 'EQc7.1|Thermal comfort - design|1\\t\\t\\t\\t\\t\\t/ 1',\n", + " 'EQc7.2|Thermal comfort - verification|1\\t\\t\\t\\t\\t\\t/ 1',\n", + " 'EQc8.1|Daylight and views - daylight 75% of spaces|0\\t\\t\\t\\t\\t\\t/ 1',\n", + " 'EQc8.2|Daylight and views - views for 90% of spaces|0\\t\\t\\t\\t\\t\\t/ 1',\n", + " 'IDc1|Innovation in design|+\\r\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t4',\n", + " 'IDc2|LEED Accredited Professional|+\\r\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t1']" + ] + }, + "execution_count": 58, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "leed_url = 'http://www.usgbc.org/projects/human-agricultural-biosciences-bldg-i?view=scorecard'\n", + "leed_req = urllib.request.Request(leed_url, headers={'User-Agent': 'Mozilla/5.0'})\n", + "leed_content = urllib.request.urlopen(leed_req).read()\n", + "leed_soup = BeautifulSoup(leed_content)\n", + "items = leed_soup.find_all( \"ul\", class_=\"sh-content\")\n", + "items_list = []\n", + "for item in items:\n", + " scores = item.find_all(\"li\")\n", + " for score in scores:\n", + " items_list.append(score.get_text(\"|\", strip=True))\n", + "\n", + "items_list\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "[SSc1,\n", + " SSc2,\n", + " SSc3,\n", + " SSc4.1,\n", + " SSc4.2,\n", + " SSc4.3,\n", + " SSc4.4,\n", + " SSc5.1,\n", + " SSc5.2,\n", + " SSc6.1,\n", + " SSc6.2,\n", + " SSc7.1,\n", + " SSc7.2,\n", + " SSc8,\n", + " WEc1.1,\n", + " WEc1.2,\n", + " WEc2,\n", + " WEc3.1,\n", + " WEc3.2,\n", + " EAc1,\n", + " EAc2,\n", + " EAc3,\n", + " EAc4,\n", + " EAc5,\n", + " EAc6,\n", + " MRc1.1,\n", + " MRc1.2,\n", + " MRc1.3,\n", + " MRc2.1,\n", + " MRc2.2,\n", + " MRc3.1,\n", + " MRc3.2,\n", + " MRc4.1,\n", + " MRc4.2,\n", + " MRc5.1,\n", + " MRc5.2,\n", + " MRc6,\n", + " MRc7,\n", + " EQc1,\n", + " EQc2,\n", + " EQc3.1,\n", + " EQc3.2,\n", + " EQc4.1,\n", + " EQc4.2,\n", + " EQc4.3,\n", + " EQc4.4,\n", + " EQc5,\n", + " EQc6.1,\n", + " EQc6.2,\n", + " EQc7.1,\n", + " EQc7.2,\n", + " EQc8.1,\n", + " EQc8.2,\n", + " IDc1,\n", + " IDc2]" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "leed_url = 'http://www.usgbc.org/?q=projectscorecard/0010402551'\n", + "leed_req = urllib.request.Request(leed_url, headers={'User-Agent': 'Mozilla/5.0'})\n", + "leed_content = urllib.request.urlopen(leed_req).read()\n", + "leed_soup = BeautifulSoup(leed_content)\n", + "ids = leed_soup.find_all( \"td\", class_=\"credit-id\")\n", + "names = leed_soup.find_all( \"td\", class_=\"credit-name\")\n", + "points = leed_soup.find_all( \"td\", class_=\"point possible\")\n", + "# items_list = []\n", + "# for item in items:\n", + "# scores = item.find_all(\"li\")\n", + "# for score in scores:\n", + "# items_list.append(score.get_text(\"|\", strip=True))\n", + "\n", + "# items_list\n", + "ids" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "'1 / 1'" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "points[0].get_text()" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "url = 'http://rosettacode.org/wiki/Hailstone_sequence'\n", + "req = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/5.0'})\n", + "content = urllib.request.urlopen(req).read()\n", + "soup = BeautifulSoup(content)\n", + "code = soup.find_all( \"pre\", class_=\"highlighted_source\")\n", + "\n", + "# items_list = []\n", + "# for item in code:\n", + "# scores = item.find_all(\"pli\")\n", + "# for score in scores:\n", + "# items_list.append(score.get_text(\"|\", strip=True))\n" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "'abap'" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "code[0]['class'][0]" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "'\\xa0CLASS lcl_hailstone DEFINITION. PUBLIC SECTION. TYPES: tty_sequence TYPE STANDARD TABLE OF i WITH NON-UNIQUE EMPTY KEY, BEGIN OF ty_seq_len, start TYPE i, len TYPE i, END OF ty_seq_len, tty_seq_len TYPE HASHED TABLE OF ty_seq_len WITH UNIQUE KEY start.\\xa0 CLASS-METHODS: get_next IMPORTING n TYPE i RETURNING VALUE(r_next_hailstone_num) TYPE i,\\xa0 get_sequence IMPORTING start TYPE i RETURNING VALUE(r_sequence) TYPE tty_sequence,\\xa0 get_longest_sequence_upto IMPORTING limit TYPE i RETURNING VALUE(r_longest_sequence) TYPE ty_seq_len.\\xa0 PRIVATE SECTION. TYPES: BEGIN OF ty_seq, start TYPE i, seq TYPE tty_sequence, END OF ty_seq. CLASS-DATA: sequence_buffer TYPE HASHED TABLE OF ty_seq WITH UNIQUE KEY start.ENDCLASS.\\xa0CLASS lcl_hailstone IMPLEMENTATION. METHOD get_next. r_next_hailstone_num = COND #( WHEN n MOD 2 = 0 THEN n / 2 ELSE ( 3 * n ) + 1 ). ENDMETHOD.\\xa0 METHOD get_sequence. INSERT start INTO TABLE r_sequence. IF start = 1. RETURN. ENDIF.\\xa0 READ TABLE sequence_buffer ASSIGNING FIELD-SYMBOL() WITH TABLE KEY start = start. IF sy-subrc = 0. INSERT LINES OF -seq INTO TABLE r_sequence. ELSE. DATA(seq) = get_sequence( get_next( start ) ). INSERT LINES OF seq INTO TABLE r_sequence. INSERT VALUE ty_seq( start = start seq = seq ) INTO TABLE sequence_buffer. ENDIF. ENDMETHOD.\\xa0 METHOD get_longest_sequence_upto. DATA: max_seq TYPE ty_seq_len, act_seq TYPE ty_seq_len.\\xa0 DO limit TIMES. act_seq-len = lines( get_sequence( sy-index ) ).\\xa0 IF act_seq-len > max_seq-len. max_seq-len = act_seq-len. max_seq-start = sy-index. ENDIF. ENDDO.\\xa0 r_longest_sequence = max_seq. ENDMETHOD.ENDCLASS.\\xa0START-OF-SELECTION. cl_demo_output=>begin_section( |Hailstone sequence of 27 is: | ). cl_demo_output=>write( REDUCE string( INIT result = `` FOR item IN lcl_hailstone=>get_sequence( 27 ) NEXT result = |{ result } { item }| ) ). cl_demo_output=>write( |With length: { lines( lcl_hailstone=>get_sequence( 27 ) ) }| ). cl_demo_output=>begin_section( |Longest hailstone sequence upto 100k| ). cl_demo_output=>write( lcl_hailstone=>get_longest_sequence_upto( 100000 ) ). cl_demo_output=>display( ).\\xa0'" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "code[0].get_text()" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "'ada'" + ] + }, + "execution_count": 52, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "code[5]['class'][0]" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "'package Hailstones is type Integer_Sequence is array(Positive range <>) of Integer; function Create_Sequence (N\\xa0: Positive) return Integer_Sequence;end Hailstones;'" + ] + }, + "execution_count": 48, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "code[3].get_text()" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": 72, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "fake_list = [[12, 13], [15, 16]]\n", + "fake_list2 = [[12, 1], [15, 2]]" + ] + }, + { + "cell_type": "code", + "execution_count": 73, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "df = pd.DataFrame(fake_list)" + ] + }, + { + "cell_type": "code", + "execution_count": 74, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
01
01213
11516
0121
1152
\n", + "
" + ], + "text/plain": [ + " 0 1\n", + "0 12 13\n", + "1 15 16\n", + "0 12 1\n", + "1 15 2" + ] + }, + "execution_count": 74, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.append(fake_list2)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.4.3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/classification_accuracy_by_dataframe_size.ipynb b/classification_accuracy_by_dataframe_size.ipynb new file mode 100644 index 0000000..f801e9b --- /dev/null +++ b/classification_accuracy_by_dataframe_size.ipynb @@ -0,0 +1,337 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "from scraper import pipeline_runner\n", + "from feature_vectors import *\n", + "import pandas as pd" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#Small Dataframe" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "###Only using CountVectorizer, no additional regex feature vectors" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "df_50x10 = pd.read_pickle('scraper_50x10.pkl')" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "df_mnb = pipeline_runner(df_50x10, 'Multinomial')\n", + "df_knn = pipeline_runner(df_50x10, 'KNeighbors')\n", + "df_forest = pipeline_runner(df_50x10, 'Forest')" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "df_mnb: (0.89317507418397624, 0.51333333333333331)\n", + "df_knn: (0.49406528189910981, 0.31777777777777777)\n", + "df_forest: (0.99183976261127593, 0.62666666666666671)\n" + ] + } + ], + "source": [ + "print(\"df_mnb: {}\".format(df_mnb))\n", + "print(\"df_knn: {}\".format(df_knn))\n", + "print(\"df_forest: {}\".format(df_forest))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": true + }, + "source": [ + "###Getting low scores. Random Forest is the most accurate with 0.62 mean score. " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#Larger Dataframe" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "###Still no additional feature vectorizers" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "#see scraper.py for how the 500x100 dataframe was pulled from RosettaCode. \n", + "df_500x100 = pd.read_pickle('scraper_500x100.pkl')" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "df_mnb_500 = pipeline_runner(df_500x100, 'Multinomial')\n", + "df_knn_500 = pipeline_runner(df_500x100, 'KNeighbors')\n", + "df_forest_500 = pipeline_runner(df_500x100, 'Forest')" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "df_mnb_500: (0.80047166883882581, 0.62267311988086371)\n", + "df_knn_500: (0.60628064295910133, 0.40171258376768426)\n", + "df_forest_500: (0.98907714267982372, 0.72040208488458679)\n" + ] + } + ], + "source": [ + "print(\"df_mnb_500: {}\".format(df_mnb_500))\n", + "print(\"df_knn_500: {}\".format(df_knn_500))\n", + "print(\"df_forest_500: {}\".format(df_forest_500))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "##Random Forest, again, produces the best mean accuracy. " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#Adding additional feature vectors to improve score accuracy" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "from sklearn.ensemble import RandomForestClassifier\n", + "from sklearn.feature_extraction.text import CountVectorizer\n", + "from sklearn.naive_bayes import MultinomialNB" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "X = df_500x100.loc[:, 1]\n", + "y = df_500x100.loc[:, 0]" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "X_train, X_test, y_train, y_test = train_test_split(X, y) " + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "language_featurizer = make_union(CountVectorizer(),\n", + " FunctionFeaturizer(longest_run_of_capital_letters_feature,\n", + " longest_run_of_character_feature,\n", + " percent_character_combinations,\n", + " percent_character_feature,\n", + " binary_character_combinations))" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "collapsed": false, + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "<21485x67251 sparse matrix of type ''\n", + "\twith 833585 stored elements in Compressed Sparse Row format>" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "language_featurizer.fit_transform(X)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "pipe = make_pipeline(language_featurizer, RandomForestClassifier())" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Pipeline(steps=[('featureunion', FeatureUnion(n_jobs=1,\n", + " transformer_list=[('countvectorizer', CountVectorizer(analyzer='word', binary=False, decode_error='strict',\n", + " dtype=, encoding='utf-8', input='content',\n", + " lowercase=True, max_df=1.0, max_features=None, min_df=1,\n", + " ...n_jobs=1,\n", + " oob_score=False, random_state=None, verbose=0,\n", + " warm_start=False))])" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pipe.fit(X_train, y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0.76954579300074455" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pipe.score(X_test, y_test)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": true + }, + "source": [ + "The test mean accuracy is 0.76 using only the data pulled from RosettaCode (not using the test data from the file)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "###Score accuracy imporved 0.05 by adding additional feature vectors. " + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.4.3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/estimator_mnb_vs_bernoulli.ipynb b/estimator_mnb_vs_bernoulli.ipynb new file mode 100644 index 0000000..6c8163d --- /dev/null +++ b/estimator_mnb_vs_bernoulli.ipynb @@ -0,0 +1,117 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "from scraper import scrape_clean_cut\n", + "from scraper import pipeline_runner" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "##Dataframe imported 500 example tasks from Rosetta Code and filtered only the languages with 60 or more total examples" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "df = pd.read_pickle('scraper_500x100.pkl')" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(0.83517375771354341, 0.59132976132489046)" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#MultinomialNB\n", + "pipeline_runner(df, 'Multinomial')" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(0.25040597596622283, 0.19142717973697029)" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Bernoulli\n", + "pipeline_runner(df, 'Bernoulli')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "###Bernoulli is not a good estimator" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.4.3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/feature_vectors.py b/feature_vectors.py new file mode 100644 index 0000000..2c8c9f6 --- /dev/null +++ b/feature_vectors.py @@ -0,0 +1,109 @@ +import csv +import re +import numpy as np +import random +import pickle +import itertools +from collections import Counter + +from sklearn.pipeline import make_pipeline, make_union +from sklearn.base import TransformerMixin +from sklearn.feature_extraction.text import CountVectorizer +from sklearn.cross_validation import train_test_split +from sklearn.metrics import classification_report, confusion_matrix +from sklearn.naive_bayes import MultinomialNB +from sklearn.ensemble import RandomForestClassifier +from sklearn.neighbors import KNeighborsClassifier +from sklearn.tree import DecisionTreeClassifier + + +def longest_run_of_capital_letters_feature(text): + """Find the longest run of characters and return their length.""" + runs = sorted(re.findall(r"[A-Z]+", text), key=len) + if runs: + return [len(runs[-1])] + else: + return [0] + + +def longest_run_of_character_feature(text): + """Find the longest run of capitol letters and return their length.""" + chars = ["~+", "\.+", "\|+", "\:+", ";+", "\$+", "\(+", "\)+", "\-+", " +", "\t+"] + runs = [] + for i in chars: + run = sorted(re.findall(r"{}".format(i), text), key=len) + if run: + runs.append(len(run[-1])) + else: + runs.append(0) + return runs + + +def percent_character_feature(text): + """Return percentage of text that is a particular char compared to total text length.""" + chars = [".", "|", "$", "_", "!", "#", "@", "%", "^", "&", "*", "(", ")", + "+", "=", "{", "}", "[", "]", ":", ";", "?", "<", ">"] + return [text.count(i)/len(text) for i in chars] + + +def percent_character_combinations(text): + """Return percentage of text that is a particular char compared to total text length.""" + chars = ["==", "\->+", ":\-+", "\+=", "\n\t+if", "\n+", "\n\$+", "\n\t+", "\ndef", "%{", "~=", "\|\|", + "\n\t+\(\w+", "^\$", "\.=", "\{:", "===", "!==", "\*\w+", "__", "__name__", "__main__", "^\#" + "^def", "^@w+", "^@end", "^begin", "^end", "^functions", "^loop\n", "^procedure", "^func", + "\+\+"] + runs = [] + for i in chars: + run = re.findall(r"{}".format(i), text) + if run: + runs.append(len(run)/len(text)) + else: + runs.append(0) + return runs + + +def binary_character_combinations(text): + """Return binary of text that is a particular char compared to total text length.""" + chars = ["==", "\->+", ":\-+", "\+=", "\n\t+if", "\n+", "\n\$+", "\n\t+", "\ndef", "%{", "~=", "\|\|", + "\n\t+\(\w+", "^\$", "\.=", "\{:", "===", "!==", "\*\w+", "__", "__name__", "__main__", "^\#" + "^def", "^@w+", "^@end", "^begin", "^end", "^functions", "^loop\n", "^procedure", "^func", + "\+\+"] + runs = [] + for i in chars: + run = re.findall(r"{}".format(i), text) + if run: + runs.append(1) + else: + runs.append(0) + return runs + + +def make_pipe(estimator): + """make_pipe function must have the type of estimator e.g RandomForestClassifier()""" + language_featurizer = make_union(CountVectorizer(), + FunctionFeaturizer(longest_run_of_capital_letters_feature, + longest_run_of_character_feature, + percent_character_combinations, + percent_character_feature, + binary_character_combinations)) + + return make_pipeline(language_featurizer, estimator) + + +class FunctionFeaturizer(TransformerMixin): + def __init__(self, *featurizers): + self.featurizers = featurizers + + def fit(self, X, y=None): + """All SciKit-Learn compatible transformers and classifiers have the + same interface. `fit` always returns the same object.""" + return self + + + def transform(self, X): + fvs = [] + for datum in X: + fv = [f(datum) for f in self.featurizers] + a = list(itertools.chain(*fv)) + fvs.append(a) + return fvs \ No newline at end of file diff --git a/final_testing_languages.ipynb b/final_testing_languages.ipynb new file mode 100644 index 0000000..172eec2 --- /dev/null +++ b/final_testing_languages.ipynb @@ -0,0 +1,754 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#Programming Language Classifier using Machine Learning" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "import pandas as pd\n", + "from scraper import scrape_clean_cut\n", + "from feature_vectors import *" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "##Using Two Dataframes: \n", + "###(1) Dataframe with various languages (>200 examples from Rosetta Code) " + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "df_700x200 = pd.read_pickle('scraper_700x200.pkl')" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
01
0adawith Ada.Text_IO; procedure Integers_In_Englis...
1algol68PROC number words = (INT n)STRING:( # returns...
2algol68MODE EXCEPTION = STRUCT(STRING name, PROC VOID...
4autohotkeyLoop { ; TEST ...
5awk# syntax: GAWK -f NUMBER_NAMES.AWKBEGIN { ...
6qbasicDECLARE FUNCTION int2Text$ (number AS LONG) 's...
8c#include <stdio.h>#include <string.h> const ch...
9cpp#include <string>#include <iostream>using std:...
10csharpusing System; class NumberNamer { static re...
11clojure(clojure.pprint/cl-format nil \"~R\" 1234)=> \"on...
12coffeescriptspell_integer = (n) -> tens = [null, null, \"...
13lisp(format nil \"~R\" 1234)=> \"one thousand two hun...
14dimport std.stdio, std.array, std.algorithm, st...
15euphoriafunction abs(atom i) if i < 0 then r...
17fortranprogram spell  implicit none integer :: e i...
\n", + "
" + ], + "text/plain": [ + " 0 1\n", + "0 ada with Ada.Text_IO; procedure Integers_In_Englis...\n", + "1 algol68 PROC number words = (INT n)STRING:( # returns...\n", + "2 algol68 MODE EXCEPTION = STRUCT(STRING name, PROC VOID...\n", + "4 autohotkey Loop { ; TEST ...\n", + "5 awk  # syntax: GAWK -f NUMBER_NAMES.AWKBEGIN { ...\n", + "6 qbasic DECLARE FUNCTION int2Text$ (number AS LONG) 's...\n", + "8 c #include #include  const ch...\n", + "9 cpp #include #include using std:...\n", + "10 csharp using System; class NumberNamer { static re...\n", + "11 clojure (clojure.pprint/cl-format nil \"~R\" 1234)=> \"on...\n", + "12 coffeescript  spell_integer = (n) -> tens = [null, null, \"...\n", + "13 lisp (format nil \"~R\" 1234)=> \"one thousand two hun...\n", + "14 d import std.stdio, std.array, std.algorithm, st...\n", + "15 euphoria function abs(atom i) if i < 0 then r...\n", + "17 fortran program spell  implicit none integer :: e i..." + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_700x200.head(15)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "y = df_700x200.loc[:, 0]\n", + "X = df_700x200.loc[:, 1]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "###(2) Dataframe with only languages from test file (11 total languages)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The train data was scraped from Rosetta Code. \n", + "In total 700 example pages of code and only the languages in the test file." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "filtered_df = pd.read_pickle('scraper_filtered_700x1.pkl')" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "filtered_y_train = filtered_df.loc[:, 0]\n", + "filtered_X_train = filtered_df.loc[:, 1]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#Testing only with dataset from Rosetta Code" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The purpose of testing first with all of the rosetta code data and laguages, and not just the test languages, is to see if my estimator is overfitting the test file data. The test data, when I split my data using train_test_split includes much more sample code compared to the 32 lines of code and 11 langagues in the test file. " + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "rc_X_train, rc_X_test, rc_y_train, rc_y_test = train_test_split(X, y)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0.6182965299684543" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rc_pipe_bayes = make_pipe(MultinomialNB())\n", + "rc_pipe_bayes.fit(rc_X_train, rc_y_train)\n", + "rc_pipe_bayes.score(rc_X_test, rc_y_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0.76148676450418329" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rc_pipe_tree = make_pipe(DecisionTreeClassifier())\n", + "rc_pipe_tree.fit(rc_X_train, rc_y_train)\n", + "rc_pipe_tree.score(rc_X_test, rc_y_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0.7679330681662323" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rc_pipe_forest = make_pipe(RandomForestClassifier())\n", + "rc_pipe_forest.fit(rc_X_train, rc_y_train)\n", + "rc_pipe_forest.score(rc_X_test, rc_y_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " precision recall f1-score support\n", + "\n", + " ada 0.98 0.76 0.85 298\n", + " algol68 0.83 0.82 0.82 104\n", + " autohotkey 0.73 0.79 0.76 121\n", + " awk 0.83 0.72 0.77 98\n", + " bash 0.65 0.60 0.63 155\n", + " c 0.81 0.74 0.77 285\n", + " clojure 0.59 0.54 0.56 115\n", + " cobol 0.89 0.79 0.84 71\n", + "coffeescript 0.75 0.67 0.71 63\n", + " cpp 0.82 0.85 0.83 168\n", + " csharp 0.79 0.65 0.71 165\n", + " d 0.92 0.90 0.91 222\n", + " delphi 0.71 0.77 0.74 88\n", + " e 0.54 0.59 0.56 87\n", + " erlang 0.83 0.79 0.81 98\n", + " euphoria 0.67 0.72 0.69 46\n", + " fortran 0.83 0.86 0.85 117\n", + " fsharp 0.56 0.57 0.57 82\n", + " go 0.94 0.88 0.91 212\n", + " groovy 0.71 0.69 0.70 153\n", + " haskell 0.84 0.81 0.82 256\n", + " icon 0.83 0.89 0.86 105\n", + " j 0.90 0.66 0.76 512\n", + " java 0.66 0.66 0.66 166\n", + " java5 0.12 0.47 0.19 15\n", + " javascript 0.71 0.76 0.73 129\n", + " lb 0.76 0.78 0.77 67\n", + " lisp 0.75 0.71 0.73 334\n", + " lua 0.65 0.78 0.71 79\n", + " matlab 0.58 0.70 0.64 91\n", + " netrexx 0.85 0.97 0.90 59\n", + " objc 0.77 0.96 0.85 48\n", + " objeck 0.90 0.95 0.92 58\n", + " ocaml 0.76 0.79 0.77 192\n", + " oz 0.79 0.96 0.86 70\n", + " parigp 0.77 0.69 0.73 114\n", + " pascal 0.73 0.80 0.76 66\n", + " perl 0.78 0.65 0.71 289\n", + " perl6 0.65 0.68 0.66 191\n", + " php 0.70 0.80 0.75 87\n", + " powershell 0.59 0.76 0.67 51\n", + " prolog 0.88 0.88 0.88 85\n", + " purebasic 0.66 0.92 0.77 90\n", + " python 0.82 0.79 0.81 325\n", + " qbasic 0.51 0.68 0.58 41\n", + " rexx 0.89 0.93 0.91 225\n", + " ruby 0.71 0.79 0.75 305\n", + " scala 0.70 0.86 0.77 114\n", + " scheme 0.59 0.83 0.69 69\n", + " smalltalk 0.62 0.84 0.71 57\n", + " tcl 0.89 0.95 0.92 229\n", + " vb 0.47 0.96 0.63 24\n", + "\n", + "avg / total 0.78 0.77 0.77 7291\n", + "\n" + ] + } + ], + "source": [ + "print((classification_report(rc_pipe_forest.predict(rc_X_test), rc_y_test)))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The estimator poorly classifies coffeescript, java5, and e even with the additaionl built in feature vectorizers." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#Testing with test samples given in the test folder." + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "y_test = pd.read_pickle('test_y_values.pkl')\n", + "X_test = pd.read_pickle('test_X_values.pkl')" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "y_test = y_test.loc[:, 1]\n", + "X_test = X_test.loc[:, 0]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "###Estimating with Multinomial Bayes, Decision Tree, and Random Forest" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "I used my entire data frame (all of my code examples scraped from Rosetta Code) as my training data. " + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0.5625" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pipe_mnb = make_pipe(MultinomialNB())\n", + "pipe_mnb.fit(X, y)\n", + "pipe_mnb.score(X_test, y_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0.59375" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pipe_tree = make_pipe(DecisionTreeClassifier())\n", + "pipe_tree.fit(X, y)\n", + "pipe_tree.score(X_test, y_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0.78125" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pipe_forest = make_pipe(RandomForestClassifier())\n", + "pipe_forest.fit(X, y)\n", + "pipe_forest.score(X_test, y_test)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "###Classification report to see which language is not well represented. " + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " precision recall f1-score support\n", + "\n", + " ada 0.00 0.00 0.00 1\n", + " algol68 0.00 0.00 0.00 1\n", + " autohotkey 0.00 0.00 0.00 1\n", + " awk 0.00 0.00 0.00 1\n", + " clojure 1.00 0.80 0.89 5\n", + " fsharp 0.00 0.00 0.00 1\n", + " haskell 0.33 1.00 0.50 1\n", + " java 0.00 0.00 0.00 0\n", + " javascript 0.75 1.00 0.86 3\n", + " objc 0.00 0.00 0.00 1\n", + " ocaml 1.00 1.00 1.00 2\n", + " php 0.67 1.00 0.80 2\n", + " python 1.00 1.00 1.00 4\n", + " ruby 1.00 1.00 1.00 3\n", + " scala 0.50 1.00 0.67 1\n", + " scheme 1.00 1.00 1.00 3\n", + " tcl 1.00 1.00 1.00 2\n", + "\n", + "avg / total 0.73 0.78 0.74 32\n", + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/trippshealy/iron_yard/week5/programming-language-classifier/.direnv/python-3.4.3/lib/python3.4/site-packages/sklearn/metrics/classification.py:958: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples.\n", + " 'precision', 'predicted', average, warn_for)\n", + "/Users/trippshealy/iron_yard/week5/programming-language-classifier/.direnv/python-3.4.3/lib/python3.4/site-packages/sklearn/metrics/classification.py:960: UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples.\n", + " 'recall', 'true', average, warn_for)\n" + ] + } + ], + "source": [ + "print((classification_report(pipe_forest.predict(X_test), y_test)))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The precision and recall are worse than the Rosetta Code test sample because this test sample is much smaller, only 32 examples. When the esitmator gets an example wrong the relative weight of the wrong answer is much greater. " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "##Using Dataframe 2: only includes languages in the test sample. " + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0.875" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pipe_filtered_bayes = make_pipe(MultinomialNB())\n", + "pipe_filtered_bayes.fit(filtered_X_train, filtered_y_train)\n", + "pipe_filtered_bayes.score(X_test, y_test)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Significantly imporves when only using the languages in the test sample. " + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0.84375" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pipe_filtered_tree = make_pipe(DecisionTreeClassifier())\n", + "pipe_filtered_tree.fit(filtered_X_train, filtered_y_train)\n", + "pipe_filtered_tree.score(X_test, y_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0.84375" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pipe_filtered_forest = make_pipe(RandomForestClassifier())\n", + "pipe_filtered_forest.fit(filtered_X_train, filtered_y_train)\n", + "pipe_filtered_forest.score(X_test, y_test)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "###Classification report to see which language is not well represented" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " precision recall f1-score support\n", + "\n", + " clojure 0.75 0.60 0.67 5\n", + " haskell 0.67 0.67 0.67 3\n", + " java 0.00 0.00 0.00 0\n", + " javascript 1.00 0.80 0.89 5\n", + " ocaml 1.00 1.00 1.00 2\n", + " php 0.67 1.00 0.80 2\n", + " python 1.00 0.80 0.89 5\n", + " ruby 1.00 1.00 1.00 3\n", + " scala 1.00 1.00 1.00 2\n", + " scheme 1.00 1.00 1.00 3\n", + " tcl 1.00 1.00 1.00 2\n", + "\n", + "avg / total 0.91 0.84 0.87 32\n", + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/trippshealy/iron_yard/week5/programming-language-classifier/.direnv/python-3.4.3/lib/python3.4/site-packages/sklearn/metrics/classification.py:960: UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples.\n", + " 'recall', 'true', average, warn_for)\n" + ] + } + ], + "source": [ + "print((classification_report(pipe_filtered_forest.predict(X_test), y_test)))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": true + }, + "source": [ + "A huge improvement compared to the testing results using many languages. However, these results are less representative of a \"real world\" scenario. Mainly, because you may want to allow more than 11 languages. Because I knew what languages to expect, I constrained my fit data to better predict the test languages. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.4.3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/initial_scrape_and_score.ipynb b/initial_scrape_and_score.ipynb new file mode 100644 index 0000000..d7b027c --- /dev/null +++ b/initial_scrape_and_score.ipynb @@ -0,0 +1,809 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "##Making the dataframe from RosettaCode using web scraper " + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "from scraper import scrape_and_clean" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "###Using 50 Rosetta Code tasks (e.g. palindrome, hailstone sequence, random numbers)" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "df = scrape_and_clean(num_links=50)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
01
0abapreport z_align no standard page header.start-o...
1adawith Ada.Characters.Latin_1; use Ada.Characte...
2algol68STRING nl = REPR 10;STRING text in list := \"Gi...
3autohotkeylines = (|$|$|$|$|$|$|$|$|$|$|$|Given$a$text$f...
4autoit; == If the given text is in an file, it will...
\n", + "
" + ], + "text/plain": [ + " 0 1\n", + "0 abap report z_align no standard page header.start-o...\n", + "1 ada with Ada.Characters.Latin_1; use Ada.Characte...\n", + "2 algol68 STRING nl = REPR 10;STRING text in list := \"Gi...\n", + "3 autohotkey lines = (|$|$|$|$|$|$|$|$|$|$|$|Given$a$text$f...\n", + "4 autoit  ; == If the given text is in an file, it will..." + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "#split dataframe to labels and code \n", + "y = df.loc[:, 0]\n", + "X = df.loc[:, 1]" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "ruby 82\n", + "python 76\n", + "j 76\n", + "tcl 73\n", + "c 65\n", + "lisp 61\n", + "haskell 57\n", + "rexx 55\n", + "html5 54\n", + "ada 54\n", + "perl6 52\n", + "d 52\n", + "perl 51\n", + "cpp 48\n", + "ocaml 40\n", + "go 40\n", + "java 39\n", + "scala 39\n", + "purebasic 36\n", + "autohotkey 36\n", + "groovy 32\n", + "csharp 31\n", + "fortran 31\n", + "lua 30\n", + "javascript 27\n", + "clojure 26\n", + "icon 26\n", + "php 25\n", + "algol68 24\n", + "parigp 23\n", + " ..\n", + "asm 4\n", + "xml 4\n", + "rsplus 3\n", + "modula2 3\n", + "cfm 3\n", + "cmake 3\n", + "vbnet 3\n", + "newlisp 3\n", + "pli 2\n", + "zxbasic 2\n", + "actionscript3 2\n", + "gml 2\n", + "locobasic 2\n", + "sas 2\n", + "lolcode 1\n", + "sql 1\n", + "dos 1\n", + "vim 1\n", + "freebasic 1\n", + "asymptote 1\n", + "gnuplot 1\n", + "html4strict 1\n", + "povray 1\n", + "io 1\n", + "abap 1\n", + "logtalk 1\n", + "visualfoxpro 1\n", + "make 1\n", + "bf 1\n", + "whitespace 1\n", + "dtype: int64" + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#looking at the number of languages in the dataframe\n", + "y.value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(0 abap\n", + " 1 ada\n", + " 2 algol68\n", + " 3 autohotkey\n", + " 4 autoit\n", + " Name: 0, dtype: object, 0 report z_align no standard page header.start-o...\n", + " 1 with Ada.Characters.Latin_1; use Ada.Characte...\n", + " 2 STRING nl = REPR 10;STRING text in list := \"Gi...\n", + " 3 lines = (|$|$|$|$|$|$|$|$|$|$|$|Given$a$text$f...\n", + " 4  ; == If the given text is in an file, it will...\n", + " Name: 1, dtype: object)" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "y.head(),\\\n", + "X.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "import pandas as pd\n", + "from sklearn.cross_validation import train_test_split\n", + "from sklearn.naive_bayes import MultinomialNB\n", + "from sklearn.cross_validation import cross_val_score\n", + "from sklearn.pipeline import Pipeline\n", + "from sklearn.feature_extraction.text import CountVectorizer" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "X_train, X_test, y_train, y_test = train_test_split(X, y)" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Pipeline(steps=[('bag_of_words', CountVectorizer(analyzer='word', binary=False, decode_error='strict',\n", + " dtype=, encoding='utf-8', input='content',\n", + " lowercase=True, max_df=1.0, max_features=None, min_df=1,\n", + " ngram_range=(1, 1), preprocessor=None, stop_words=None,\n", + " strip_accents=None, token_pattern='(?u)\\\\b\\\\w\\\\w+\\\\b',\n", + " tokenizer=None, vocabulary=None)), ('bayes', MultinomialNB(alpha=1.0, class_prior=None, fit_prior=True))])" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "nb_pipe = Pipeline([('bag_of_words', CountVectorizer()),\n", + " ('bayes', MultinomialNB())])\n", + "nb_pipe" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Pipeline(steps=[('bag_of_words', CountVectorizer(analyzer='word', binary=False, decode_error='strict',\n", + " dtype=, encoding='utf-8', input='content',\n", + " lowercase=True, max_df=1.0, max_features=None, min_df=1,\n", + " ngram_range=(1, 1), preprocessor=None, stop_words=None,\n", + " strip_accents=None, token_pattern='(?u)\\\\b\\\\w\\\\w+\\\\b',\n", + " tokenizer=None, vocabulary=None)), ('bayes', MultinomialNB(alpha=1.0, class_prior=None, fit_prior=True))])" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "nb_pipe.fit(X_train, y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(0.82278481012658233, 0.44303797468354428)" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "nb_pipe.score(X_train, y_train),\\\n", + "nb_pipe.score(X_test, y_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['python'], \n", + " dtype='= 20)\n", + "new_df[0].value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(0.80028129395218006, 0.51265822784810122)" + ] + }, + "execution_count": 58, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "##Re-testing with MultinomialNB\n", + "new_y = df.loc[:, 0]\n", + "new_X = df.loc[:, 1]\n", + "#splitting data\n", + "new_X_train, new_X_test, new_y_train, new_y_test = train_test_split(new_X, new_y)\n", + "#running pipe to vectorize and run Multinomial\n", + "new_nb_pipe = Pipeline([('bag_of_words', CountVectorizer()),\n", + " ('bayes', MultinomialNB())])\n", + "#fitting \n", + "new_nb_pipe.fit(new_X_train, new_y_train)\n", + "#checking score\n", + "new_nb_pipe.score(new_X_train, new_y_train),\\\n", + "new_nb_pipe.score(new_X_test, new_y_test)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "###A little better, test score increased by 10%!" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#Now on to testing with BIGGER dataframe. See the big_scrape_and_score notebook." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "###Beacuse only including only the langagues with the most example code impoves the score I changed/improved the function that creates the dataframe to now include a cut off for number of languages. The cut off value is now passed in as an arugment. " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " ###I also created a function the runs the pipeline. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.4.3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/learning_beautifulsoup_scraper.ipynb b/learning_beautifulsoup_scraper.ipynb new file mode 100644 index 0000000..bf9ff8c --- /dev/null +++ b/learning_beautifulsoup_scraper.ipynb @@ -0,0 +1,4302 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "from scraper import *" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "tc = make_data(['Temperature_conversion'])" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(65, 2)" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tc.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "collapsed": false, + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(125, 2)" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pn = make_data(['Perfect_numbers'])\n", + "pn.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
01
\n", + "
" + ], + "text/plain": [ + "Empty DataFrame\n", + "Columns: [0, 1]\n", + "Index: []" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "code_snippets = pd.DataFrame(columns=([0, 1]))\n", + "code_snippets" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
01
0text: KtoC \\ n -- n\\t273.15 n:- ; : KtoF \\ n -- n\\...
1adawith Ada.Float_Text_IO, Ada.Text_IO; use Ada....
2textvoidshow(integer symbol, real temperature){ ...
3autohotkeyMsgBox, % \"Kelvin:`t`t 21.00 K`n\" . \"Ce...
4awk# syntax: AWK -f TEMPERATURE_CONVERSION.AWKBEG...
5awk# usage: gawk -f temperature_conversion.awk i...
6text10 REM TRANSLATION OF AWK VERSION20 INPUT \"KE...
7textdo print \"Kelvin degrees (>=0): \"; input K ...
8textREPEAT INPUT \"Kelvin degrees (>=0): \" KUNTIL...
9text( ( rational2fixedpoint = minus fixedpointn...
10c#include <stdio.h>#include <stdlib.h> double k...
11cpp#include <iostream>#include <iomanip> //-----...
12csharpusing System; namespace TemperatureConversion{...
13clojure(defn to-celsius [k] (- k 273.15))(defn to-fa...
14cobolIDENTIFICATION DIVISION. PROGRAM-...
15lisp(defun to-celsius (k) (- k 273.15))(defun t...
16ddouble kelvinToCelsius(in double k) pure nothr...
17delphiprogram Temperature; {$APPTYPE CONSOLE} uses ...
18erlang% Implemented by Arjun Sunel-module(temp_conv)...
19textinclude std/console.e atom Kwhile 1 do\\tK = p...
20textA1 : KelvinB1 : CelsiusC1 : FahrenheitD1 : Ran...
21text# convert from Kelvinநிரல்பாகம் கெல்வின்_இருந...
22fsharp// Define units of measure[<Measure>] type k[...
23fortranProgram Temperature implicit none  real :: k...
24gopackage main import ( \"fmt\" \"os\" \"str...
25haskellmain = do putStrLn \"Please enter temperature...
26uniconprocedure main(A) k := A[1] | 21.00 writ...
27jNB. Temp conversions are all linear polyno...
28jNB. Format matrix for printing & tag each ...
29javapublic class TemperatureConversion { public...
.........
35texttempConvert[t_] :=Grid[Transpose@{{\"K\", \"C\", \"...
36textП7\\t0\\t,\\t8\\t*\\tП8\\tИП7\\t9\\t*\\t5/\\t3\\t2\\t+\\tП9...
37ocamlfun KtoC n = n - 273.15;fun KtoF n = n * 1.8 -...
38netrexx/* NetRexx */options replace format comments j...
39textimport rdstdin, strutils, strfmt while true: ...
40objeckclass Temperature { function : Main(args : S...
41objc#import <Foundation/Foundation.h> int main(int...
42ocamllet print_temp s t = print_string s; print_...
43text: kelvinToCelsius { 273.15 - }: kelvinToFahren...
44parigpf(x)=[x,x-273.15,1.8*x-459.67,1.8*x]
45perlmy %scale = ( Celcius => { factor => 1 ...
46perl6while my $answer = prompt 'Temperature: ' { ...
47phperror_reporting(E_ALL & ~ ( E_NOTICE | E_WARNI...
48text(scl 2) (de convertKelvin (Kelvin) (for X ...
49text(convertKelvin 21.0)
50pli*process source attributes xref; /* PL/I *****...
51python>>> while True:\\tk = float(input('K ? '))\\tpri...
52python>>> toK = {'C': (lambda c: c + 273.15), ...
53text#lang racket(define (converter temp init final...
54rexx/*REXX program converts temperatures for a num...
55rubymodule TempConvert  FROM_TEMP_SCALE_TO_K = ...
56rubyTempConvert.kelvin_to_celsius 100 #=> -173.15T...
57text[loop]input \"Kelvin Degrees\";kelvinif kelvin <...
58scalaobject TemperatureConversion extends App {  d...
59text$ include \"seed7_05.s7i\"; include \"float.s7i\"...
60tclproc temps {k} { set c [expr {$k - 273.15}]...
61tclputs -nonewline \"Enter a temperature in K: \"fl...
62textinclude c:\\cxpl\\codes;real K, C, F, R;[ChOut(0...
63textK:=ask(0,\"Kelvin: \").toFloat();println(\"K %.2f...
64zxbasic10 REM Translation of traditional basic versio...
\n", + "

65 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " 0 1\n", + "0 text : KtoC \\ n -- n\\t273.15 n:- ; : KtoF \\ n -- n\\...\n", + "1 ada with Ada.Float_Text_IO, Ada.Text_IO; use Ada....\n", + "2 text voidshow(integer symbol, real temperature){ ...\n", + "3 autohotkey MsgBox, % \"Kelvin:`t`t 21.00 K`n\" . \"Ce...\n", + "4 awk # syntax: AWK -f TEMPERATURE_CONVERSION.AWKBEG...\n", + "5 awk # usage: gawk -f temperature_conversion.awk i...\n", + "6 text  10 REM TRANSLATION OF AWK VERSION20 INPUT \"KE...\n", + "7 text  do print \"Kelvin degrees (>=0): \"; input K ...\n", + "8 text  REPEAT INPUT \"Kelvin degrees (>=0): \" KUNTIL...\n", + "9 text ( ( rational2fixedpoint = minus fixedpointn...\n", + "10 c #include #include  double k...\n", + "11 cpp  #include #include  //-----...\n", + "12 csharp using System; namespace TemperatureConversion{...\n", + "13 clojure (defn to-celsius [k] (- k 273.15))(defn to-fa...\n", + "14 cobol IDENTIFICATION DIVISION. PROGRAM-...\n", + "15 lisp  (defun to-celsius (k) (- k 273.15))(defun t...\n", + "16 d double kelvinToCelsius(in double k) pure nothr...\n", + "17 delphi  program Temperature; {$APPTYPE CONSOLE} uses ...\n", + "18 erlang % Implemented by Arjun Sunel-module(temp_conv)...\n", + "19 text  include std/console.e atom Kwhile 1 do\\tK = p...\n", + "20 text A1 : KelvinB1 : CelsiusC1 : FahrenheitD1 : Ran...\n", + "21 text  # convert from Kelvinநிரல்பாகம் கெல்வின்_இருந...\n", + "22 fsharp  // Define units of measure[] type k[...\n", + "23 fortran Program Temperature implicit none  real :: k...\n", + "24 go package main import ( \"fmt\" \"os\" \"str...\n", + "25 haskell  main = do putStrLn \"Please enter temperature...\n", + "26 unicon procedure main(A) k := A[1] | 21.00 writ...\n", + "27 j NB. Temp conversions are all linear polyno...\n", + "28 j NB. Format matrix for printing & tag each ...\n", + "29 java public class TemperatureConversion { public...\n", + ".. ... ...\n", + "35 text tempConvert[t_] :=Grid[Transpose@{{\"K\", \"C\", \"...\n", + "36 text П7\\t0\\t,\\t8\\t*\\tП8\\tИП7\\t9\\t*\\t5/\\t3\\t2\\t+\\tП9...\n", + "37 ocaml fun KtoC n = n - 273.15;fun KtoF n = n * 1.8 -...\n", + "38 netrexx /* NetRexx */options replace format comments j...\n", + "39 text import rdstdin, strutils, strfmt while true: ...\n", + "40 objeck  class Temperature { function : Main(args : S...\n", + "41 objc #import  int main(int...\n", + "42 ocaml  let print_temp s t = print_string s; print_...\n", + "43 text : kelvinToCelsius { 273.15 - }: kelvinToFahren...\n", + "44 parigp f(x)=[x,x-273.15,1.8*x-459.67,1.8*x]\n", + "45 perl my %scale = ( Celcius => { factor => 1 ...\n", + "46 perl6 while my $answer = prompt 'Temperature: ' { ...\n", + "47 php error_reporting(E_ALL & ~ ( E_NOTICE | E_WARNI...\n", + "48 text (scl 2) (de convertKelvin (Kelvin) (for X ...\n", + "49 text (convertKelvin 21.0)\n", + "50 pli *process source attributes xref; /* PL/I *****...\n", + "51 python >>> while True:\\tk = float(input('K ? '))\\tpri...\n", + "52 python >>> toK = {'C': (lambda c: c + 273.15), ...\n", + "53 text #lang racket(define (converter temp init final...\n", + "54 rexx /*REXX program converts temperatures for a num...\n", + "55 ruby module TempConvert  FROM_TEMP_SCALE_TO_K = ...\n", + "56 ruby TempConvert.kelvin_to_celsius 100 #=> -173.15T...\n", + "57 text [loop]input \"Kelvin Degrees\";kelvinif kelvin <...\n", + "58 scala object TemperatureConversion extends App {  d...\n", + "59 text $ include \"seed7_05.s7i\"; include \"float.s7i\"...\n", + "60 tcl proc temps {k} { set c [expr {$k - 273.15}]...\n", + "61 tcl puts -nonewline \"Enter a temperature in K: \"fl...\n", + "62 text include c:\\cxpl\\codes;real K, C, F, R;[ChOut(0...\n", + "63 text K:=ask(0,\"Kelvin: \").toFloat();println(\"K %.2f...\n", + "64 zxbasic 10 REM Translation of traditional basic versio...\n", + "\n", + "[65 rows x 2 columns]" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "code_snippets.append(tc)." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(125, 2)" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "code_snippets.append(pn).shape" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "url = 'http://rosettacode.org/wiki/Hailstone_sequence'" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
01
0abapCLASS lcl_hailstone DEFINITION. PUBLIC SECTI...
1lisp(defun hailstone (len) (loop for x = len ...
2adawith Ada.Text_IO; use Ada.Text_IO;procedure ha...
3adapackage Hailstones is type Integer_Sequence ...
4adapackage body Hailstones is function Create_S...
5adawith Ada.Text_IO;with Hailstones; procedure Ma...
6textvoidprint_hailstone(integer h){ list l;  ...
7algol68MODE LINT = # LONG ... # INT; PROC hailstone =...
8textseq←hailstone n;next⍝ Returns the hailstone se...
9text5↑hailstone 2727 82 41 124 62 ¯5↑hailstone 27...
10autohotkey; Submitted by MasterFocus --- http://tiny.cc/...
11autoit$Hail = Hailstone(27)ConsoleWrite(\"Sequence-L...
12awk#!/usr/bin/awk -ffunction hailstone(v, verbos...
13text10 HOME 100 N = 27110 GOSUB 400\"HAILSTONE120 D...
14textseqlen% = FNhailstone(27, TRUE) PRI...
15lbprint \"Part 1: Create a routine to generate th...
16textfunction Hailstone(sys *n)'=================...
17purebasicNewList Hailstones.i() ; Make a linked list to...
18textprint \"Part 1: Create a routine to generate th...
19dos@echo offsetlocal enabledelayedexpansionif \"%1...
20dos>hailstone.cmd 2020 10 5 16 8 4 2 1
21text&>:.:1-| >3*^ @ |%2: < V>2/>+
22text( ( hailstone = L len .  !arg:?L ...
23text>,[ [ ----------[ >>>[>>>...
24text27111
25texthailstone = { num | sequence = [num] while {...
26textblsq ) 27{^^^^2.%{3.*1.+}\\/{2./}\\/ie}{1!=}w!b...
27c#include <stdio.h>#include <stdlib.h> int hail...
28c#include <stdio.h> #define N 10000000#define C...
29csharpusing System;using System.Collections.Generic;...
.........
139pythondef hailstone(n): seq = [n] while n>1: ...
140text### PART 1:makeHailstone <- function(n){ hseq...
141text#lang racket (define hailstone (let ([t (mak...
142rexx/*REXX pgm tests a number and a range for hail...
143rexx/*REXX pgm tests a number and a range for hail...
144rubydef hailstone n seq = [n] until n == 1 n ...
145rubymodule Hailstone ListNode = Struct.new(:value...
146textuse std::vec::Vec; fn hailstone(mut n : int) -...
147sas* Create a routine to generate the hailstone ...
148scalaobject HailstoneSequence extends App { def ha...
149scheme(define (collatz n)(if (= n 1) '(1)(cons n (co...
150textfunction x=hailstone(n) // iterative defini...
151text$ include \"seed7_05.s7i\"; const func array int...
152rubyfunc hailstone(n) { var a = [n]; while (...
153smalltalkObject subclass: Sequences [ Sequences class ...
154smalltalk|r|r := Sequences hailstone: 27. \"hailstone '...
155textfunc hailstone(var n:Int) -> [Int] {  var ...
156tclproc hailstone n { while 1 {\\tlappend seq $...
157textprompt NN→M: 0→X: 1→LWhile L=1X+1→XDisp MIf M=...
158textprompt N0→A:0→Bfor(I,1,N)I→M: 0→X: 1→LWhile L=...
159text@(do (defun hailstone (n) (cons n ...
160bash#!/bin/bash# seq is the array genereated by ha...
161bash# Outputs a hailstone sequence from $1, with o...
162text# Outputs a hailstone sequence from !:1, with ...
163text#import std#import nat hail = @iNC ~&h~=1->x ^...
164vbOption ExplicitDim flag As Boolean ' true to p...
165vbnetModule HailstoneSequence Sub Main() ...
166textinclude c:\\cxpl\\codes; \\intrinsic 'code' decl...
167textfcn collatz(n,z=L()){ z.append(n); if(n==1) re...
168text[2..0d100_000].pump(Void, // loop n from 2 to...
\n", + "

169 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " 0 1\n", + "0 abap  CLASS lcl_hailstone DEFINITION. PUBLIC SECTI...\n", + "1 lisp (defun hailstone (len) (loop for x = len ...\n", + "2 ada with Ada.Text_IO; use Ada.Text_IO;procedure ha...\n", + "3 ada package Hailstones is type Integer_Sequence ...\n", + "4 ada package body Hailstones is function Create_S...\n", + "5 ada with Ada.Text_IO;with Hailstones; procedure Ma...\n", + "6 text voidprint_hailstone(integer h){ list l;  ...\n", + "7 algol68 MODE LINT = # LONG ... # INT; PROC hailstone =...\n", + "8 text seq←hailstone n;next⍝ Returns the hailstone se...\n", + "9 text 5↑hailstone 2727 82 41 124 62 ¯5↑hailstone 27...\n", + "10 autohotkey ; Submitted by MasterFocus --- http://tiny.cc/...\n", + "11 autoit  $Hail = Hailstone(27)ConsoleWrite(\"Sequence-L...\n", + "12 awk  #!/usr/bin/awk -ffunction hailstone(v, verbos...\n", + "13 text 10 HOME 100 N = 27110 GOSUB 400\"HAILSTONE120 D...\n", + "14 text seqlen% = FNhailstone(27, TRUE) PRI...\n", + "15 lb print \"Part 1: Create a routine to generate th...\n", + "16 text   function Hailstone(sys *n)'=================...\n", + "17 purebasic NewList Hailstones.i() ; Make a linked list to...\n", + "18 text print \"Part 1: Create a routine to generate th...\n", + "19 dos @echo offsetlocal enabledelayedexpansionif \"%1...\n", + "20 dos >hailstone.cmd 2020 10 5 16 8 4 2 1\n", + "21 text &>:.:1-| >3*^ @ |%2: < V>2/>+ \n", + "22 text ( ( hailstone = L len .  !arg:?L ...\n", + "23 text >,[ [ ----------[ >>>[>>>...\n", + "24 text 27111\n", + "25 text hailstone = { num | sequence = [num] while {...\n", + "26 text  blsq ) 27{^^^^2.%{3.*1.+}\\/{2./}\\/ie}{1!=}w!b...\n", + "27 c #include #include  int hail...\n", + "28 c #include  #define N 10000000#define C...\n", + "29 csharp using System;using System.Collections.Generic;...\n", + ".. ... ...\n", + "139 python def hailstone(n): seq = [n] while n>1: ...\n", + "140 text ### PART 1:makeHailstone <- function(n){ hseq...\n", + "141 text  #lang racket (define hailstone (let ([t (mak...\n", + "142 rexx /*REXX pgm tests a number and a range for hail...\n", + "143 rexx /*REXX pgm tests a number and a range for hail...\n", + "144 ruby def hailstone n seq = [n] until n == 1 n ...\n", + "145 ruby module Hailstone ListNode = Struct.new(:value...\n", + "146 text use std::vec::Vec; fn hailstone(mut n : int) -...\n", + "147 sas  * Create a routine to generate the hailstone ...\n", + "148 scala object HailstoneSequence extends App { def ha...\n", + "149 scheme (define (collatz n)(if (= n 1) '(1)(cons n (co...\n", + "150 text function x=hailstone(n) // iterative defini...\n", + "151 text $ include \"seed7_05.s7i\"; const func array int...\n", + "152 ruby func hailstone(n) { var a = [n]; while (...\n", + "153 smalltalk Object subclass: Sequences [ Sequences class ...\n", + "154 smalltalk |r|r := Sequences hailstone: 27. \"hailstone '...\n", + "155 text  func hailstone(var n:Int) -> [Int] {  var ...\n", + "156 tcl proc hailstone n { while 1 {\\tlappend seq $...\n", + "157 text prompt NN→M: 0→X: 1→LWhile L=1X+1→XDisp MIf M=...\n", + "158 text prompt N0→A:0→Bfor(I,1,N)I→M: 0→X: 1→LWhile L=...\n", + "159 text @(do (defun hailstone (n) (cons n ...\n", + "160 bash #!/bin/bash# seq is the array genereated by ha...\n", + "161 bash # Outputs a hailstone sequence from $1, with o...\n", + "162 text # Outputs a hailstone sequence from !:1, with ...\n", + "163 text #import std#import nat hail = @iNC ~&h~=1->x ^...\n", + "164 vb Option ExplicitDim flag As Boolean ' true to p...\n", + "165 vbnet Module HailstoneSequence Sub Main() ...\n", + "166 text include c:\\cxpl\\codes; \\intrinsic 'code' decl...\n", + "167 text fcn collatz(n,z=L()){ z.append(n); if(n==1) re...\n", + "168 text [2..0d100_000].pump(Void, // loop n from 2 to...\n", + "\n", + "[169 rows x 2 columns]" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "code = scrape_data(url)\n", + "codes = pull_code_from_soup(code)\n", + "pd.DataFrame(codes)" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "hs = make_data(['Hailstone_sequence'])" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "tc = make_data(['Temperature_conversion'])" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "((169, 2), (65, 2))" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "hs.shape, tc.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "hs = hs.append(tc)" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "pn = make_data(['Perfect_numbers'])" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(359, 2)" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "hs.append(pn).shape" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "pn = make_data(['Perfect_numbers', 'Temperature_conversion', 'Hailstone_sequence'])" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(359, 2)" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pn.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['24 game', '24 game/Solve9', '9 billion names of God the integer', '99 Bottles of BeerA', 'A+B', 'ABC Problem', 'Abstract type', 'Abundant, deficient and perfect number classifications', 'Accumulator factory', 'Ackermann function', 'Active Directory/Connect', 'Active Directory/Search for a user', 'Active object', 'Add a variable to a class instance at runtime', 'Address of a variable', 'AKS test for primes', 'Align columns', 'Aliquot sequence classifications', 'Almost prime', 'Amb', 'Amicable pairs', 'Anagrams', 'Anagrams/Deranged anagrams', 'Animate a pendulum', 'Animation', 'Anonymous recursion', 'Append a record to the end of a text file', 'Apply a callback to an array', 'Arbitrary-precision integers (included)', 'Arena storage pool', 'Arithmetic evaluation', 'Arithmetic-geometric mean', 'Arithmetic-geometric mean/Calculate Pi', 'Arithmetic/Complex', 'Arithmetic/Integer', 'Arithmetic/Rational', 'Array concatenation', 'Arrays', 'Assertions', 'Associative array/Creation', 'Associative array/Iteration', 'Atomic updates', 'Average loop length', 'Averages/Arithmetic mean', 'Averages/Mean angle', 'Averages/Mean time of day', 'Averages/Median', 'Averages/Mode', 'Averages/Pythagorean means', 'Averages/Root mean square', 'Averages/Simple moving averageB', 'Balanced brackets', 'Balanced ternary', \"Benford's law\", 'Bernoulli numbers', 'Best shuffle', 'Binary digits', 'Binary search', 'Binary strings', 'Bitcoin/address validation', 'Bitcoin/public point to address', 'Bitmap', \"Bitmap/Bresenham's line algorithm\", 'Bitmap/Bézier curves/Cubic', 'Bitmap/Bézier curves/Quadratic', 'Bitmap/Flood fill', 'Bitmap/Histogram', 'Bitmap/Midpoint circle algorithm', 'Bitmap/PPM conversion through a pipe', 'Bitmap/Read a PPM file', 'Bitmap/Read an image through a pipe', 'Bitmap/Write a PPM file', 'Bitwise IO', 'Bitwise operations', 'Boolean values', 'Box the compass', 'Break OO privacy', 'Brownian tree', 'Bulls and cows', 'Bulls and cows/PlayerC', 'Caesar cipher', 'Calendar', 'Calendar - for \"REAL\" programmers', 'Call a foreign-language function', 'Call a function', 'Call a function in a shared library', 'Call an object method', 'Canny edge detector', 'Carmichael 3 strong pseudoprimes', 'Case-sensitivity of identifiers', 'Casting out nines', 'Catalan numbers', \"Catalan numbers/Pascal's triangle\", 'Catamorphism', 'Catmull–Clark subdivision surface', 'Character codes', 'Chat server', 'Check Machin-like formulas', 'Check that file exists', 'Checkpoint synchronization', 'Chinese remainder theorem', 'Cholesky decomposition', 'Circles of given radius through two points', 'Classes', 'Closest-pair problem', 'Closures/Value capture', 'Collections', 'Color of a screen pixel', 'Color quantization', 'Colour bars/Display', 'Colour pinstripe/Display', 'Colour pinstripe/Printer', 'Combinations', 'Combinations and permutations', 'Combinations with repetitions', 'Comma quibbling', 'Command-line arguments', 'Comments', \"Compare sorting algorithms' performance\", 'Compile-time calculation', 'Compound data type', 'Concurrent computing', 'Conditional structures', 'Conjugate transpose', 'Constrained genericity', 'Constrained random points on a circle', 'Continued fraction', 'Continued fraction/Arithmetic/Construct from rational number', 'Continued fraction/Arithmetic/G(matrix NG, Contined Fraction N)', 'Continued fraction/Arithmetic/G(matrix NG, Contined Fraction N1, Contined Fraction N2)', 'Convert decimal number to rational', \"Conway's Game of Life\", 'Copy a string', 'Count in factors', 'Count in octal', 'Count occurrences of a substring', 'Count the coins', 'CRC-32', 'Create a file', 'Create a file on magnetic tape', 'Create a two-dimensional array at runtime', 'Create an HTML table', 'Create an object at a given address', 'CSV data manipulation', 'CSV to HTML translation', 'Currying', 'Cut a rectangleD', 'Date format', 'Date manipulation', 'Day of the week', 'Deal cards for FreeCell', 'Death Star', 'Deconvolution/1D', 'Deconvolution/2D+', 'Deepcopy', 'Define a primitive data type', 'Delegates', 'Delete a file', 'Detect division by zero', 'Determine if a string is numeric', 'Determine if only one instance is running', 'Digital root', 'Digital root/Multiplicative digital root', \"Dinesman's multiple-dwelling problem\", 'Dining philosophers', 'Discordian date', 'Distributed programming', 'DNS query', 'Documentation', 'Dot product', 'Doubly-linked list/Definition', 'Doubly-linked list/Element definition', 'Doubly-linked list/Element insertion', 'Doubly-linked list/Traversal', 'Dragon curve', 'Draw a clock', 'Draw a cuboid', 'Draw a sphere', 'Dutch national flag problem', 'Dynamic variable namesE', 'Echo server', 'Element-wise operations', 'Empty directory', 'Empty program', 'Empty string', 'Enforced immutability', 'Entropy', 'Enumerations', 'Environment variables', 'Equilibrium index', 'Ethiopian multiplication', 'Euler method', \"Euler's sum of powers conjecture\", 'Evaluate binomial coefficients', 'Even or odd', 'Events', 'Evolutionary algorithm', 'Exceptions', 'Exceptions/Catch an exception thrown in a nested call', 'Executable library', 'Execute a Markov algorithm', 'Execute a system command', 'Execute Brain****', 'Execute HQ9+', 'Execute SNUSP', 'Exponentiation operator', 'Extend your language', 'Extensible prime generator', 'Extreme floating point valuesF', 'Factorial', 'Factors of a Mersenne number', 'Factors of an integer', 'Fast Fourier transform', 'Fibonacci n-step number sequences', 'Fibonacci sequence', 'Fibonacci word', 'Fibonacci word/fractal', 'File input/output', 'File modification time', 'File size', 'Filter', 'Find common directory path', 'Find largest left truncatable prime in a given base', 'Find limit of recursion', 'Find the last Sunday of each month', 'Find the missing permutation', 'First class environments', 'First-class functions', 'First-class functions/Use numbers analogously', 'Five weekends', 'FizzBuzz', 'Flatten a list', 'Flipping bits game', 'Flow-control structures', \"Floyd's triangle\", 'Forest fire', 'Fork', 'Formal power series', 'Formatted numeric output', 'Forward difference', 'Four bit adder', 'Fractal tree', 'Fractran', 'Function composition', 'Function definition', 'Function frequency', 'Function prototypeG', 'Galton box animation', 'Gamma function', 'Gaussian elimination', 'Generate Chess960 starting position', 'Generate lower case ASCII alphabet', 'Generator/Exponential', 'Generic swap', 'Globally replace text in several files', 'Go Fish', 'G cont.', 'Gray code', 'Grayscale image', 'Greatest common divisor', 'Greatest element of a list', 'Greatest subsequential sum', 'Greyscale bars/Display', 'Guess the number', 'Guess the number/With feedback', 'Guess the number/With feedback (player)', 'GUI component interaction', 'GUI enabling/disabling of controls', 'GUI/Maximum window dimensionsH', 'Hailstone sequence', 'Hamming numbers', 'Handle a signal', 'Happy numbers', 'Harshad or Niven series', 'Hash from two arrays', 'Hash join', 'Haversine formula', 'Hello world/Graphical', 'Hello world/Line printer', 'Hello world/Newbie', 'Hello world/Newline omission', 'Hello world/Standard error', 'Hello world/Text', 'Hello world/Web server', 'Here document', 'Heronian triangles', 'Hickerson series of almost integers', 'Higher-order functions', 'History variables', 'Hofstadter Figure-Figure sequences', 'Hofstadter Q sequence', 'Hofstadter-Conway $10,000 sequence', 'Holidays related to Easter', 'Honeycombs', 'Horizontal sundial calculations', \"Horner's rule for polynomial evaluation\", 'Host introspection', 'Hostname', 'Hough transform', 'HTTP', 'HTTPS', 'HTTPS/Authenticated', 'HTTPS/Client-authenticated', 'Huffman codingI', 'I before E except after C', 'IBAN', 'Identity matrix', 'Image convolution', 'Image noise', 'Include a file', 'Increment a numerical string', 'Infinity', 'Inheritance/Multiple', 'Inheritance/Single', 'Input loop', 'Integer comparison', 'Integer overflow', 'Integer sequence', 'Interactive programming', 'Introspection', 'Inverted index', 'Inverted syntax', 'Iterated digits squaringJ', \"Jensen's Device\", 'JortSort', 'Josephus problem', 'Joystick position', 'JSON', 'Jump anywhereK', 'K-d tree', 'K-means++ clustering', 'Kaprekar numbers', 'Keyboard input/Flush the keyboard buffer', 'Keyboard input/Keypress check', 'Keyboard input/Obtain a Y or N response', 'Keyboard macros', 'Knapsack problem/0-1', 'Knapsack problem/Bounded', 'Knapsack problem/Continuous', 'Knapsack problem/Unbounded', \"Knight's tour\", 'Knuth shuffle', \"Knuth's algorithm SL\", \"Langton's ant\", 'Largest int from concatenated ints', 'Last Friday of each month', 'Last letter-first letter', 'Leap year', 'Least common multiple', 'Left factorials', 'Letter frequency', 'Levenshtein distance', 'Linear congruential generator', 'List comprehensions', 'Literals/Floating point', 'Literals/Integer', 'Literals/String', 'Logical operations', 'Long multiplication', 'Longest common subsequence', 'Longest increasing subsequence', 'Longest string challenge', 'Look-and-say sequence', 'Loop over multiple arrays simultaneously', 'Loops/Break', 'Loops/Continue', 'Loops/Do-while', 'Loops/Downward for', 'Loops/For', 'Loops/For with a specified step', 'Loops/Foreach', 'Loops/Infinite', 'Loops/N plus one half', 'Loops/Nested', 'Loops/While', 'LU decomposition', 'Lucas-Lehmer test', 'Ludic numbers', 'Luhn test of credit card numbers', 'LZW compressionM', 'Machine code', 'Mad Libs', 'Magic squares of odd order', 'Main step of GOST 28147-89', 'Make directory path', 'Man or boy test', 'Mandelbrot set', 'Map range', 'Matrix arithmetic', 'Matrix multiplication', 'Matrix transposition', 'Matrix-exponentiation operator', 'Maximum triangle path sum', 'Maze generation', 'Maze solving', 'MD4', 'MD5', 'MD5/Implementation', 'Median filter', 'Memory allocation', 'Memory layout of a data structure', 'Menu', 'Metaprogramming', 'Metered concurrency', 'Metronome', 'Middle three digits', 'Miller-Rabin primality test', 'Minesweeper game', 'Modular exponentiation', 'Modular inverse', 'Monte Carlo methods', 'Monty Hall problem', 'Morse code', 'Mouse position', 'Move-to-front algorithm', 'Multifactorial', 'Multiple distinct objects', 'Multiple regression', 'Multiplication tables', 'Multiplicative order', 'Multisplit', 'Munching squares', 'Mutual recursionN', \"N'th\", 'N-queens problem', 'Named parameters', 'Narcissist', 'Narcissistic decimal number', 'Natural sorting', 'Nautical bell', 'Non-continuous subsequences', 'Non-decimal radices/Convert', 'Non-decimal radices/Input', 'Non-decimal radices/Output', 'Nth root', 'Null object', 'Number names', 'Number reversal game', 'Numeric error propagation', 'Numerical integration', 'Numerical integration/Gauss-Legendre QuadratureO', 'Object serialization', 'Odd word problem', 'Old lady swallowed a fly', 'OLE Automation', 'One of n lines in a file', 'One-dimensional cellular automata', 'OpenGL', 'Operator precedence', 'Optional parameters', 'Order disjoint list items', 'Order two numerical lists', 'Ordered Partitions', 'Ordered wordsP', 'Palindrome detection', 'Pangram checker', 'Paraffins', 'Parallel calculations', 'Parametric polymorphism', 'Parametrized SQL statement', 'Parse an IP Address', 'Parsing/RPN calculator algorithm', 'Parsing/RPN to infix conversion', 'Parsing/Shunting-yard algorithm', 'Partial function application', 'Pascal matrix generation', \"Pascal's triangle\", \"Pascal's triangle/Puzzle\", 'Pattern matching', \"Penney's game\", 'Percentage difference between images', 'Percolation/Bond percolation', 'Percolation/Mean cluster density', 'Percolation/Mean run density', 'Percolation/Site percolation', 'Perfect numbers', 'Permutation test', 'Permutations', 'Permutations by swapping', 'Permutations/Derangements', 'Permutations/Rank of a permutation', 'Pernicious numbers', 'Phrase reversals', 'Pi', 'Pick random element', 'Pig the dice game', 'Pig the dice game/Player', 'Pinstripe/Display', 'Pinstripe/Printer', 'Play recorded sounds', 'Playing cards', 'Plot coordinate pairs', 'Pointers and references', 'Polymorphic copy', 'Polymorphism', 'Polynomial long division', 'Polynomial regression', 'Power set', 'Pragmatic directives', 'Price fraction', 'Primality by trial division', 'Prime decomposition', 'Primes - allocate descendants to their ancestors', 'Priority queue', 'Probabilistic choice', 'Problem of Apollonius', 'Program name', 'Program termination', 'Pythagorean triplesQ', 'QR decomposition', 'Quaternion type', 'Queue/Definition', 'Queue/Usage', 'Quickselect algorithm', 'Q cont.', 'QuineR', 'Random number generator (device)', 'Random number generator (included)', 'Random numbers', 'Range expansion', 'Range extraction', 'Ranking methods', 'Rate counter', 'Ray-casting algorithm', 'RCRPG', 'Read a configuration file', 'Read a file line by line', 'Read a specific line from a file', 'Read entire file', 'Real constants and functions', 'Record sound', 'Reduced row echelon form', 'Regular expressions', 'Remove duplicate elements', 'Remove lines from a file', 'Rename a file', 'Rendezvous', 'Rep-string', 'Repeat a string', 'Resistor mesh', 'Respond to an unknown method call', 'Return multiple values', 'Reverse a string', 'Reverse words in a string', 'RIPEMD-160', 'Rock-paper-scissors', 'Roman numerals/Decode', 'Roman numerals/Encode', 'Roots of a function', 'Roots of a quadratic function', 'Roots of unity', 'Rosetta Code/Count examples', 'Rosetta Code/Find bare lang tags', 'Rosetta Code/Find unimplemented tasks', 'Rosetta Code/Fix code tags', 'Rosetta Code/Rank languages by popularity', 'Rot-13', 'RSA code', 'Run-length encoding', 'Runge-Kutta method', 'Runtime evaluation', 'Runtime evaluation/In an environmentS', 'S-Expressions', 'Safe addition', 'Sailors, coconuts and a monkey problem', 'Same Fringe', 'Scope modifiers', 'Scope/Function names and labels', 'Search a list', 'Secure temporary file', 'SEDOLs', 'Self-describing numbers', 'Self-referential sequence', 'Semiprime', 'Semordnilap', 'Send an unknown method call', 'Send email', 'Sequence of non-squares', 'Sequence of primes by Trial Division', 'Set', 'Set consolidation', 'Set of real numbers', 'Set puzzle', 'Seven-sided dice from five-sided dice', 'SHA-1', 'SHA-256', 'Shell one-liner', 'Short-circuit evaluation', 'Show the epoch', 'Sierpinski carpet', 'Sierpinski triangle', 'Sierpinski triangle/Graphical', 'Sieve of Eratosthenes', 'Simple database', 'Simple windowed application', 'Simulate input/Keyboard', 'Simulate input/Mouse', 'Singleton', 'Singly-linked list/Element definition', 'Singly-linked list/Element insertion', 'Singly-linked list/Traversal', 'Sleep', 'SOAP', 'Sockets', 'Sokoban', 'Solve a Hidato puzzle', \"Solve a Holy Knight's tour\", 'Solve a Hopido puzzle', 'Solve a Numbrix puzzle', 'Solve the no connection puzzle', 'Sort an array of composite structures', 'Sort an integer array', 'Sort disjoint sublist', 'Sort stability', 'Sort using a custom comparator', 'Sorting algorithms/Bead sort', 'Sorting algorithms/Bogosort', 'Sorting algorithms/Bubble sort', 'Sorting algorithms/Cocktail sort', 'Sorting algorithms/Comb sort', 'Sorting algorithms/Counting sort', 'Sorting algorithms/Gnome sort', 'Sorting algorithms/Heapsort', 'Sorting algorithms/Insertion sort', 'Sorting algorithms/Merge sort', 'Sorting algorithms/Pancake sort', 'Sorting algorithms/Permutation sort', 'Sorting algorithms/Quicksort', 'Sorting algorithms/Radix sort', 'Sorting algorithms/Selection sort', 'Sorting algorithms/Shell sort', 'Sorting algorithms/Sleep sort', 'Sorting algorithms/Stooge sort', 'Sorting algorithms/Strand sort', 'Soundex', 'Sparkline in unicode', 'Special characters', 'Special variables', 'Speech synthesis', 'Spiral matrix', 'SQL-based authentication', 'Stable marriage problem', 'Stack', 'Stack traces', 'Stair-climbing puzzle', 'Standard deviation', 'Start from a main routine', 'State name puzzle', 'Statistics/Basic', 'Stem-and-leaf plot', 'Stern-Brocot sequence', 'String append', 'String case', 'String comparison', 'String concatenation', 'String interpolation (included)', 'String length', 'String matching', 'String prepend', 'Strip a set of characters from a string', 'Strip block comments', 'Strip comments from a string', 'Strip control codes and extended characters from a string', 'Strip whitespace from a string/Top and tail', 'Subleq', 'Substring', 'Substring/Top and tail', 'Subtractive generator', 'Sudoku', 'Sum and product of an array', 'Sum digits of an integer', 'Sum multiples of 3 and 5', 'Sum of a series', 'Sum of squares', 'Sutherland-Hodgman polygon clipping', 'Symmetric difference', 'Synchronous concurrency', 'System timeT', 'Table creation/Postal addresses', 'Take notes on the command line', 'Temperature conversion', 'Terminal control/Clear the screen', 'Terminal control/Coloured text', 'Terminal control/Cursor movement', 'Terminal control/Cursor positioning', 'Terminal control/Dimensions', 'Terminal control/Display an extended character', 'Terminal control/Hiding the cursor', 'Terminal control/Inverse video', 'Terminal control/Positional read', 'Terminal control/Preserve screen', 'Terminal control/Ringing the terminal bell', 'Terminal control/Unicode output', 'Ternary logic', 'Test a function', 'Text processing/1', 'Text processing/2', 'Text processing/Max licenses in use', 'Textonyms', 'The ISAAC Cipher', 'The Twelve Days of Christmas', \"Thiele's interpolation formula\", 'Tic-tac-toe', 'Time a function', 'Tokenize a string', 'Top rank per group', 'Topic variable', 'Topological sort', 'Topswops', 'Total circles area', 'Towers of Hanoi', 'Trabb Pardo–Knuth algorithm', 'Tree traversal', 'Trigonometric functions', 'Truncatable primes', 'Truncate a file', 'Twelve statementsU', 'Ulam spiral (for primes)', 'Unbias a random generator', 'Undefined values', 'Unicode strings', 'Unicode variable names', 'Universal Turing machine', 'Unix/ls', 'Update a configuration file', 'URL decoding', 'URL encoding', 'Use another language to call a function', 'User input/Graphical', 'User input/TextV', 'Vampire number', 'Van der Corput sequence', 'Variable size/Get', 'Variable size/Set', 'Variable-length quantity', 'Variables', 'Variadic function', 'Vector products', 'Verify distribution uniformity/Chi-squared test', 'Verify distribution uniformity/Naive', 'Video display modes', 'Vigenère cipher', 'Vigenère cipher/Cryptanalysis', 'Visualize a tree', \"Vogel's approximation method\", 'Voronoi diagramW', 'Walk a directory/Non-recursively', 'Walk a directory/Recursively', 'Web scraping', 'Window creation', 'Window creation/X11', 'Window management', 'Wireworld', 'Word wrap', 'World Cup group stage', 'Write float arrays to a text file', 'Write language name in 3D ASCII', 'Write to Windows event logX', \"Xiaolin Wu's line algorithm\", 'XML/DOM serialization', 'XML/Input', 'XML/Output', 'XML/XPathY', 'Y combinator', 'Yahoo! search interface', 'Yin and yangZ', 'Zebra puzzle', 'Zeckendorf arithmetic', 'Zeckendorf number representation', 'Zero to the zero power', 'Zhang-Suen thinning algorithm', 'Zig-zag matrix']\n" + ] + } + ], + "source": [ + "for item in task_list:\n", + " item.replace(\" \", \"_\")\n", + " append.\n", + "print(task_list)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "[,\n", + " navigation,\n", + " search,\n", + " guidelines,\n", + " discussion for this page,\n", + " Category:Draft Programming Tasks,\n", + " 100 doors,\n", + " 24 game,\n", + " 24 game/Solve,\n", + " 9 billion names of God the integer,\n", + " 99 Bottles of Beer,\n", + " A+B,\n", + " ABC Problem,\n", + " Abstract type,\n", + " Abundant, deficient and perfect number classifications,\n", + " Accumulator factory,\n", + " Ackermann function,\n", + " Active Directory/Connect,\n", + " Active Directory/Search for a user,\n", + " Active object,\n", + " Add a variable to a class instance at runtime,\n", + " Address of a variable,\n", + " AKS test for primes,\n", + " Align columns,\n", + " Aliquot sequence classifications,\n", + " Almost prime,\n", + " Amb,\n", + " Amicable pairs,\n", + " Anagrams,\n", + " Anagrams/Deranged anagrams,\n", + " Animate a pendulum,\n", + " Animation,\n", + " Anonymous recursion,\n", + " Append a record to the end of a text file,\n", + " Apply a callback to an array,\n", + " Arbitrary-precision integers (included),\n", + " Arena storage pool,\n", + " Arithmetic evaluation,\n", + " Arithmetic-geometric mean,\n", + " Arithmetic-geometric mean/Calculate Pi,\n", + " Arithmetic/Complex,\n", + " Arithmetic/Integer,\n", + " Arithmetic/Rational,\n", + " Array concatenation,\n", + " Arrays,\n", + " Assertions,\n", + " Associative array/Creation,\n", + " Associative array/Iteration,\n", + " Atomic updates,\n", + " Average loop length,\n", + " Averages/Arithmetic mean,\n", + " Averages/Mean angle,\n", + " Averages/Mean time of day,\n", + " Averages/Median,\n", + " Averages/Mode,\n", + " Averages/Pythagorean means,\n", + " Averages/Root mean square,\n", + " Averages/Simple moving average,\n", + " Balanced brackets,\n", + " Balanced ternary,\n", + " Benford's law,\n", + " Bernoulli numbers,\n", + " Best shuffle,\n", + " Binary digits,\n", + " Binary search,\n", + " Binary strings,\n", + " Bitcoin/address validation,\n", + " Bitcoin/public point to address,\n", + " Bitmap,\n", + " Bitmap/Bresenham's line algorithm,\n", + " Bitmap/Bézier curves/Cubic,\n", + " Bitmap/Bézier curves/Quadratic,\n", + " Bitmap/Flood fill,\n", + " Bitmap/Histogram,\n", + " Bitmap/Midpoint circle algorithm,\n", + " Bitmap/PPM conversion through a pipe,\n", + " Bitmap/Read a PPM file,\n", + " Bitmap/Read an image through a pipe,\n", + " Bitmap/Write a PPM file,\n", + " Bitwise IO,\n", + " Bitwise operations,\n", + " Boolean values,\n", + " Box the compass,\n", + " Break OO privacy,\n", + " Brownian tree,\n", + " Bulls and cows,\n", + " Bulls and cows/Player,\n", + " Caesar cipher,\n", + " Calendar,\n", + " Calendar - for \"REAL\" programmers,\n", + " Call a foreign-language function,\n", + " Call a function,\n", + " Call a function in a shared library,\n", + " Call an object method,\n", + " Canny edge detector,\n", + " Carmichael 3 strong pseudoprimes,\n", + " Case-sensitivity of identifiers,\n", + " Casting out nines,\n", + " Catalan numbers,\n", + " Catalan numbers/Pascal's triangle,\n", + " Catamorphism,\n", + " Catmull–Clark subdivision surface,\n", + " Character codes,\n", + " Chat server,\n", + " Check Machin-like formulas,\n", + " Check that file exists,\n", + " Checkpoint synchronization,\n", + " Chinese remainder theorem,\n", + " Cholesky decomposition,\n", + " Circles of given radius through two points,\n", + " Classes,\n", + " Closest-pair problem,\n", + " Closures/Value capture,\n", + " Collections,\n", + " Color of a screen pixel,\n", + " Color quantization,\n", + " Colour bars/Display,\n", + " Colour pinstripe/Display,\n", + " Colour pinstripe/Printer,\n", + " Combinations,\n", + " Combinations and permutations,\n", + " Combinations with repetitions,\n", + " Comma quibbling,\n", + " Command-line arguments,\n", + " Comments,\n", + " Compare sorting algorithms' performance,\n", + " Compile-time calculation,\n", + " Compound data type,\n", + " Concurrent computing,\n", + " Conditional structures,\n", + " Conjugate transpose,\n", + " Constrained genericity,\n", + " Constrained random points on a circle,\n", + " Continued fraction,\n", + " Continued fraction/Arithmetic/Construct from rational number,\n", + " Continued fraction/Arithmetic/G(matrix NG, Contined Fraction N),\n", + " Continued fraction/Arithmetic/G(matrix NG, Contined Fraction N1, Contined Fraction N2),\n", + " Convert decimal number to rational,\n", + " Conway's Game of Life,\n", + " Copy a string,\n", + " Count in factors,\n", + " Count in octal,\n", + " Count occurrences of a substring,\n", + " Count the coins,\n", + " CRC-32,\n", + " Create a file,\n", + " Create a file on magnetic tape,\n", + " Create a two-dimensional array at runtime,\n", + " Create an HTML table,\n", + " Create an object at a given address,\n", + " CSV data manipulation,\n", + " CSV to HTML translation,\n", + " Currying,\n", + " Cut a rectangle,\n", + " Date format,\n", + " Date manipulation,\n", + " Day of the week,\n", + " Deal cards for FreeCell,\n", + " Death Star,\n", + " Deconvolution/1D,\n", + " Deconvolution/2D+,\n", + " Deepcopy,\n", + " Define a primitive data type,\n", + " Delegates,\n", + " Delete a file,\n", + " Detect division by zero,\n", + " Determine if a string is numeric,\n", + " Determine if only one instance is running,\n", + " Digital root,\n", + " Digital root/Multiplicative digital root,\n", + " Dinesman's multiple-dwelling problem,\n", + " Dining philosophers,\n", + " Discordian date,\n", + " Distributed programming,\n", + " DNS query,\n", + " Documentation,\n", + " Dot product,\n", + " Doubly-linked list/Definition,\n", + " Doubly-linked list/Element definition,\n", + " Doubly-linked list/Element insertion,\n", + " Doubly-linked list/Traversal,\n", + " Dragon curve,\n", + " Draw a clock,\n", + " Draw a cuboid,\n", + " Draw a sphere,\n", + " Dutch national flag problem,\n", + " Dynamic variable names,\n", + " Echo server,\n", + " Element-wise operations,\n", + " Empty directory,\n", + " Empty program,\n", + " Empty string,\n", + " Enforced immutability,\n", + " Entropy,\n", + " Enumerations,\n", + " Environment variables,\n", + " Equilibrium index,\n", + " Ethiopian multiplication,\n", + " Euler method,\n", + " Euler's sum of powers conjecture,\n", + " Evaluate binomial coefficients,\n", + " Even or odd,\n", + " Events,\n", + " Evolutionary algorithm,\n", + " Exceptions,\n", + " Exceptions/Catch an exception thrown in a nested call,\n", + " Executable library,\n", + " Execute a Markov algorithm,\n", + " Execute a system command,\n", + " Execute Brain****,\n", + " Execute HQ9+,\n", + " Execute SNUSP,\n", + " Exponentiation operator,\n", + " Extend your language,\n", + " Extensible prime generator,\n", + " Extreme floating point values,\n", + " Factorial,\n", + " Factors of a Mersenne number,\n", + " Factors of an integer,\n", + " Fast Fourier transform,\n", + " Fibonacci n-step number sequences,\n", + " Fibonacci sequence,\n", + " Fibonacci word,\n", + " Fibonacci word/fractal,\n", + " File input/output,\n", + " File modification time,\n", + " File size,\n", + " Filter,\n", + " Find common directory path,\n", + " Find largest left truncatable prime in a given base,\n", + " Find limit of recursion,\n", + " Find the last Sunday of each month,\n", + " Find the missing permutation,\n", + " First class environments,\n", + " First-class functions,\n", + " First-class functions/Use numbers analogously,\n", + " Five weekends,\n", + " FizzBuzz,\n", + " Flatten a list,\n", + " Flipping bits game,\n", + " Flow-control structures,\n", + " Floyd's triangle,\n", + " Forest fire,\n", + " Fork,\n", + " Formal power series,\n", + " Formatted numeric output,\n", + " Forward difference,\n", + " Four bit adder,\n", + " Fractal tree,\n", + " Fractran,\n", + " Function composition,\n", + " Function definition,\n", + " Function frequency,\n", + " Function prototype,\n", + " Galton box animation,\n", + " Gamma function,\n", + " Gaussian elimination,\n", + " Generate Chess960 starting position,\n", + " Generate lower case ASCII alphabet,\n", + " Generator/Exponential,\n", + " Generic swap,\n", + " Globally replace text in several files,\n", + " Go Fish,\n", + " Gray code,\n", + " Grayscale image,\n", + " Greatest common divisor,\n", + " Greatest element of a list,\n", + " Greatest subsequential sum,\n", + " Greyscale bars/Display,\n", + " Guess the number,\n", + " Guess the number/With feedback,\n", + " Guess the number/With feedback (player),\n", + " GUI component interaction,\n", + " GUI enabling/disabling of controls,\n", + " GUI/Maximum window dimensions,\n", + " Hailstone sequence,\n", + " Hamming numbers,\n", + " Handle a signal,\n", + " Happy numbers,\n", + " Harshad or Niven series,\n", + " Hash from two arrays,\n", + " Hash join,\n", + " Haversine formula,\n", + " Hello world/Graphical,\n", + " Hello world/Line printer,\n", + " Hello world/Newbie,\n", + " Hello world/Newline omission,\n", + " Hello world/Standard error,\n", + " Hello world/Text,\n", + " Hello world/Web server,\n", + " Here document,\n", + " Heronian triangles,\n", + " Hickerson series of almost integers,\n", + " Higher-order functions,\n", + " History variables,\n", + " Hofstadter Figure-Figure sequences,\n", + " Hofstadter Q sequence,\n", + " Hofstadter-Conway $10,000 sequence,\n", + " Holidays related to Easter,\n", + " Honeycombs,\n", + " Horizontal sundial calculations,\n", + " Horner's rule for polynomial evaluation,\n", + " Host introspection,\n", + " Hostname,\n", + " Hough transform,\n", + " HTTP,\n", + " HTTPS,\n", + " HTTPS/Authenticated,\n", + " HTTPS/Client-authenticated,\n", + " Huffman coding,\n", + " I before E except after C,\n", + " IBAN,\n", + " Identity matrix,\n", + " Image convolution,\n", + " Image noise,\n", + " Include a file,\n", + " Increment a numerical string,\n", + " Infinity,\n", + " Inheritance/Multiple,\n", + " Inheritance/Single,\n", + " Input loop,\n", + " Integer comparison,\n", + " Integer overflow,\n", + " Integer sequence,\n", + " Interactive programming,\n", + " Introspection,\n", + " Inverted index,\n", + " Inverted syntax,\n", + " Iterated digits squaring,\n", + " Jensen's Device,\n", + " JortSort,\n", + " Josephus problem,\n", + " Joystick position,\n", + " JSON,\n", + " Jump anywhere,\n", + " K-d tree,\n", + " K-means++ clustering,\n", + " Kaprekar numbers,\n", + " Keyboard input/Flush the keyboard buffer,\n", + " Keyboard input/Keypress check,\n", + " Keyboard input/Obtain a Y or N response,\n", + " Keyboard macros,\n", + " Knapsack problem/0-1,\n", + " Knapsack problem/Bounded,\n", + " Knapsack problem/Continuous,\n", + " Knapsack problem/Unbounded,\n", + " Knight's tour,\n", + " Knuth shuffle,\n", + " Knuth's algorithm S,\n", + " Langton's ant,\n", + " Largest int from concatenated ints,\n", + " Last Friday of each month,\n", + " Last letter-first letter,\n", + " Leap year,\n", + " Least common multiple,\n", + " Left factorials,\n", + " Letter frequency,\n", + " Levenshtein distance,\n", + " Linear congruential generator,\n", + " List comprehensions,\n", + " Literals/Floating point,\n", + " Literals/Integer,\n", + " Literals/String,\n", + " Logical operations,\n", + " Long multiplication,\n", + " Longest common subsequence,\n", + " Longest increasing subsequence,\n", + " Longest string challenge,\n", + " Look-and-say sequence,\n", + " Loop over multiple arrays simultaneously,\n", + " Loops/Break,\n", + " Loops/Continue,\n", + " Loops/Do-while,\n", + " Loops/Downward for,\n", + " Loops/For,\n", + " Loops/For with a specified step,\n", + " Loops/Foreach,\n", + " Loops/Infinite,\n", + " Loops/N plus one half,\n", + " Loops/Nested,\n", + " Loops/While,\n", + " LU decomposition,\n", + " Lucas-Lehmer test,\n", + " Ludic numbers,\n", + " Luhn test of credit card numbers,\n", + " LZW compression,\n", + " Machine code,\n", + " Mad Libs,\n", + " Magic squares of odd order,\n", + " Main step of GOST 28147-89,\n", + " Make directory path,\n", + " Man or boy test,\n", + " Mandelbrot set,\n", + " Map range,\n", + " Matrix arithmetic,\n", + " Matrix multiplication,\n", + " Matrix transposition,\n", + " Matrix-exponentiation operator,\n", + " Maximum triangle path sum,\n", + " Maze generation,\n", + " Maze solving,\n", + " MD4,\n", + " MD5,\n", + " MD5/Implementation,\n", + " Median filter,\n", + " Memory allocation,\n", + " Memory layout of a data structure,\n", + " Menu,\n", + " Metaprogramming,\n", + " Metered concurrency,\n", + " Metronome,\n", + " Middle three digits,\n", + " Miller-Rabin primality test,\n", + " Minesweeper game,\n", + " Modular exponentiation,\n", + " Modular inverse,\n", + " Monte Carlo methods,\n", + " Monty Hall problem,\n", + " Morse code,\n", + " Mouse position,\n", + " Move-to-front algorithm,\n", + " Multifactorial,\n", + " Multiple distinct objects,\n", + " Multiple regression,\n", + " Multiplication tables,\n", + " Multiplicative order,\n", + " Multisplit,\n", + " Munching squares,\n", + " Mutual recursion,\n", + " N'th,\n", + " N-queens problem,\n", + " Named parameters,\n", + " Narcissist,\n", + " Narcissistic decimal number,\n", + " Natural sorting,\n", + " Nautical bell,\n", + " Non-continuous subsequences,\n", + " Non-decimal radices/Convert,\n", + " Non-decimal radices/Input,\n", + " Non-decimal radices/Output,\n", + " Nth root,\n", + " Null object,\n", + " Number names,\n", + " Number reversal game,\n", + " Numeric error propagation,\n", + " Numerical integration,\n", + " Numerical integration/Gauss-Legendre Quadrature,\n", + " Object serialization,\n", + " Odd word problem,\n", + " Old lady swallowed a fly,\n", + " OLE Automation,\n", + " One of n lines in a file,\n", + " One-dimensional cellular automata,\n", + " OpenGL,\n", + " Operator precedence,\n", + " Optional parameters,\n", + " Order disjoint list items,\n", + " Order two numerical lists,\n", + " Ordered Partitions,\n", + " Ordered words,\n", + " Palindrome detection,\n", + " Pangram checker,\n", + " Paraffins,\n", + " Parallel calculations,\n", + " Parametric polymorphism,\n", + " Parametrized SQL statement,\n", + " Parse an IP Address,\n", + " Parsing/RPN calculator algorithm,\n", + " Parsing/RPN to infix conversion,\n", + " Parsing/Shunting-yard algorithm,\n", + " Partial function application,\n", + " Pascal matrix generation,\n", + " Pascal's triangle,\n", + " Pascal's triangle/Puzzle,\n", + " Pattern matching,\n", + " Penney's game,\n", + " Percentage difference between images,\n", + " Percolation/Bond percolation,\n", + " Percolation/Mean cluster density,\n", + " Percolation/Mean run density,\n", + " Percolation/Site percolation,\n", + " Perfect numbers,\n", + " Permutation test,\n", + " Permutations,\n", + " Permutations by swapping,\n", + " Permutations/Derangements,\n", + " Permutations/Rank of a permutation,\n", + " Pernicious numbers,\n", + " Phrase reversals,\n", + " Pi,\n", + " Pick random element,\n", + " Pig the dice game,\n", + " Pig the dice game/Player,\n", + " Pinstripe/Display,\n", + " Pinstripe/Printer,\n", + " Play recorded sounds,\n", + " Playing cards,\n", + " Plot coordinate pairs,\n", + " Pointers and references,\n", + " Polymorphic copy,\n", + " Polymorphism,\n", + " Polynomial long division,\n", + " Polynomial regression,\n", + " Power set,\n", + " Pragmatic directives,\n", + " Price fraction,\n", + " Primality by trial division,\n", + " Prime decomposition,\n", + " Primes - allocate descendants to their ancestors,\n", + " Priority queue,\n", + " Probabilistic choice,\n", + " Problem of Apollonius,\n", + " Program name,\n", + " Program termination,\n", + " Pythagorean triples,\n", + " QR decomposition,\n", + " Quaternion type,\n", + " Queue/Definition,\n", + " Queue/Usage,\n", + " Quickselect algorithm,\n", + " Quine,\n", + " Random number generator (device),\n", + " Random number generator (included),\n", + " Random numbers,\n", + " Range expansion,\n", + " Range extraction,\n", + " Ranking methods,\n", + " Rate counter,\n", + " Ray-casting algorithm,\n", + " RCRPG,\n", + " Read a configuration file,\n", + " Read a file line by line,\n", + " Read a specific line from a file,\n", + " Read entire file,\n", + " Real constants and functions,\n", + " Record sound,\n", + " Reduced row echelon form,\n", + " Regular expressions,\n", + " Remove duplicate elements,\n", + " Remove lines from a file,\n", + " Rename a file,\n", + " Rendezvous,\n", + " Rep-string,\n", + " Repeat a string,\n", + " Resistor mesh,\n", + " Respond to an unknown method call,\n", + " Return multiple values,\n", + " Reverse a string,\n", + " Reverse words in a string,\n", + " RIPEMD-160,\n", + " Rock-paper-scissors,\n", + " Roman numerals/Decode,\n", + " Roman numerals/Encode,\n", + " Roots of a function,\n", + " Roots of a quadratic function,\n", + " Roots of unity,\n", + " Rosetta Code/Count examples,\n", + " Rosetta Code/Find bare lang tags,\n", + " Rosetta Code/Find unimplemented tasks,\n", + " Rosetta Code/Fix code tags,\n", + " Rosetta Code/Rank languages by popularity,\n", + " Rot-13,\n", + " RSA code,\n", + " Run-length encoding,\n", + " Runge-Kutta method,\n", + " Runtime evaluation,\n", + " Runtime evaluation/In an environment,\n", + " S-Expressions,\n", + " Safe addition,\n", + " Sailors, coconuts and a monkey problem,\n", + " Same Fringe,\n", + " Scope modifiers,\n", + " Scope/Function names and labels,\n", + " Search a list,\n", + " Secure temporary file,\n", + " SEDOLs,\n", + " Self-describing numbers,\n", + " Self-referential sequence,\n", + " Semiprime,\n", + " Semordnilap,\n", + " Send an unknown method call,\n", + " Send email,\n", + " Sequence of non-squares,\n", + " Sequence of primes by Trial Division,\n", + " Set,\n", + " Set consolidation,\n", + " Set of real numbers,\n", + " Set puzzle,\n", + " Seven-sided dice from five-sided dice,\n", + " SHA-1,\n", + " SHA-256,\n", + " Shell one-liner,\n", + " Short-circuit evaluation,\n", + " Show the epoch,\n", + " Sierpinski carpet,\n", + " Sierpinski triangle,\n", + " Sierpinski triangle/Graphical,\n", + " Sieve of Eratosthenes,\n", + " Simple database,\n", + " Simple windowed application,\n", + " Simulate input/Keyboard,\n", + " Simulate input/Mouse,\n", + " Singleton,\n", + " Singly-linked list/Element definition,\n", + " Singly-linked list/Element insertion,\n", + " Singly-linked list/Traversal,\n", + " Sleep,\n", + " SOAP,\n", + " Sockets,\n", + " Sokoban,\n", + " Solve a Hidato puzzle,\n", + " Solve a Holy Knight's tour,\n", + " Solve a Hopido puzzle,\n", + " Solve a Numbrix puzzle,\n", + " Solve the no connection puzzle,\n", + " Sort an array of composite structures,\n", + " Sort an integer array,\n", + " Sort disjoint sublist,\n", + " Sort stability,\n", + " Sort using a custom comparator,\n", + " Sorting algorithms/Bead sort,\n", + " Sorting algorithms/Bogosort,\n", + " Sorting algorithms/Bubble sort,\n", + " Sorting algorithms/Cocktail sort,\n", + " Sorting algorithms/Comb sort,\n", + " Sorting algorithms/Counting sort,\n", + " Sorting algorithms/Gnome sort,\n", + " Sorting algorithms/Heapsort,\n", + " Sorting algorithms/Insertion sort,\n", + " Sorting algorithms/Merge sort,\n", + " Sorting algorithms/Pancake sort,\n", + " Sorting algorithms/Permutation sort,\n", + " Sorting algorithms/Quicksort,\n", + " Sorting algorithms/Radix sort,\n", + " Sorting algorithms/Selection sort,\n", + " Sorting algorithms/Shell sort,\n", + " Sorting algorithms/Sleep sort,\n", + " Sorting algorithms/Stooge sort,\n", + " Sorting algorithms/Strand sort,\n", + " Soundex,\n", + " Sparkline in unicode,\n", + " Special characters,\n", + " Special variables,\n", + " Speech synthesis,\n", + " Spiral matrix,\n", + " SQL-based authentication,\n", + " Stable marriage problem,\n", + " Stack,\n", + " Stack traces,\n", + " Stair-climbing puzzle,\n", + " Standard deviation,\n", + " Start from a main routine,\n", + " State name puzzle,\n", + " Statistics/Basic,\n", + " Stem-and-leaf plot,\n", + " Stern-Brocot sequence,\n", + " String append,\n", + " String case,\n", + " String comparison,\n", + " String concatenation,\n", + " String interpolation (included),\n", + " String length,\n", + " String matching,\n", + " String prepend,\n", + " Strip a set of characters from a string,\n", + " Strip block comments,\n", + " Strip comments from a string,\n", + " Strip control codes and extended characters from a string,\n", + " Strip whitespace from a string/Top and tail,\n", + " Subleq,\n", + " Substring,\n", + " Substring/Top and tail,\n", + " Subtractive generator,\n", + " Sudoku,\n", + " Sum and product of an array,\n", + " Sum digits of an integer,\n", + " Sum multiples of 3 and 5,\n", + " Sum of a series,\n", + " Sum of squares,\n", + " Sutherland-Hodgman polygon clipping,\n", + " Symmetric difference,\n", + " Synchronous concurrency,\n", + " System time,\n", + " Table creation/Postal addresses,\n", + " Take notes on the command line,\n", + " Temperature conversion,\n", + " Terminal control/Clear the screen,\n", + " Terminal control/Coloured text,\n", + " Terminal control/Cursor movement,\n", + " Terminal control/Cursor positioning,\n", + " Terminal control/Dimensions,\n", + " Terminal control/Display an extended character,\n", + " Terminal control/Hiding the cursor,\n", + " Terminal control/Inverse video,\n", + " Terminal control/Positional read,\n", + " Terminal control/Preserve screen,\n", + " Terminal control/Ringing the terminal bell,\n", + " Terminal control/Unicode output,\n", + " Ternary logic,\n", + " Test a function,\n", + " Text processing/1,\n", + " Text processing/2,\n", + " Text processing/Max licenses in use,\n", + " Textonyms,\n", + " The ISAAC Cipher,\n", + " The Twelve Days of Christmas,\n", + " Thiele's interpolation formula,\n", + " Tic-tac-toe,\n", + " Time a function,\n", + " Tokenize a string,\n", + " Top rank per group,\n", + " Topic variable,\n", + " Topological sort,\n", + " Topswops,\n", + " Total circles area,\n", + " Towers of Hanoi,\n", + " Trabb Pardo–Knuth algorithm,\n", + " Tree traversal,\n", + " Trigonometric functions,\n", + " Truncatable primes,\n", + " Truncate a file,\n", + " Twelve statements,\n", + " Ulam spiral (for primes),\n", + " Unbias a random generator,\n", + " Undefined values,\n", + " Unicode strings,\n", + " Unicode variable names,\n", + " Universal Turing machine,\n", + " Unix/ls,\n", + " Update a configuration file,\n", + " URL decoding,\n", + " URL encoding,\n", + " Use another language to call a function,\n", + " User input/Graphical,\n", + " User input/Text,\n", + " Vampire number,\n", + " Van der Corput sequence,\n", + " Variable size/Get,\n", + " Variable size/Set,\n", + " Variable-length quantity,\n", + " Variables,\n", + " Variadic function,\n", + " Vector products,\n", + " Verify distribution uniformity/Chi-squared test,\n", + " Verify distribution uniformity/Naive,\n", + " Video display modes,\n", + " Vigenère cipher,\n", + " Vigenère cipher/Cryptanalysis,\n", + " Visualize a tree,\n", + " Vogel's approximation method,\n", + " Voronoi diagram,\n", + " Walk a directory/Non-recursively,\n", + " Walk a directory/Recursively,\n", + " Web scraping,\n", + " Window creation,\n", + " Window creation/X11,\n", + " Window management,\n", + " Wireworld,\n", + " Word wrap,\n", + " World Cup group stage,\n", + " Write float arrays to a text file,\n", + " Write language name in 3D ASCII,\n", + " Write to Windows event log,\n", + " Xiaolin Wu's line algorithm,\n", + " XML/DOM serialization,\n", + " XML/Input,\n", + " XML/Output,\n", + " XML/XPath,\n", + " Y combinator,\n", + " Yahoo! search interface,\n", + " Yin and yang,\n", + " Zebra puzzle,\n", + " Zeckendorf arithmetic,\n", + " Zeckendorf number representation,\n", + " Zero to the zero power,\n", + " Zhang-Suen thinning algorithm,\n", + " Zig-zag matrix,\n", + " http://rosettacode.org/mw/index.php?title=Category:Programming_Tasks&oldid=165935,\n", + " Create account,\n", + " Log in,\n", + " Category,\n", + " Discussion,\n", + " ,\n", + " Read,\n", + " Edit,\n", + " View history,\n", + " ,\n", + " ,\n", + " Twitter,\n", + " Chat/IRC,\n", + " Planet,\n", + " The Village Pump,\n", + " Finances,\n", + " Languages,\n", + " Tasks,\n", + " Recent changes,\n", + " Similar sites,\n", + " Random page,\n", + " Tweet,\n", + " What links here,\n", + " Related changes,\n", + " Special pages,\n", + " Printable version,\n", + " Permanent link,\n", + " Browse properties,\n", + " GNU Free Documentation License 1.2,\n", + " Privacy policy,\n", + " About Rosetta Code,\n", + " Disclaimers,\n", + " \"GNU,\n", + " \"Powered,\n", + " \"Powered]" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "def scrape_tasks(url):\n", + " req = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/5.0'})\n", + " content = urllib.request.urlopen(req).read()\n", + " soup = BeautifulSoup(content)\n", + " return soup.find_all( \"a\")#, class_=\"li\")\n", + "links = scrape_tasks('http://rosettacode.org/wiki/Category:Programming_Tasks')" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['/wiki/Rosetta_Code:Add_a_Task',\n", + " '/wiki/Category_talk:Programming_Tasks',\n", + " '/wiki/100_doors',\n", + " '/wiki/24_game',\n", + " '/wiki/24_game/Solve',\n", + " '/wiki/9_billion_names_of_God_the_integer',\n", + " '/wiki/99_Bottles_of_Beer',\n", + " '/wiki/A%2BB',\n", + " '/wiki/ABC_Problem',\n", + " '/wiki/Abstract_type',\n", + " '/wiki/Abundant,_deficient_and_perfect_number_classifications',\n", + " '/wiki/Accumulator_factory',\n", + " '/wiki/Ackermann_function',\n", + " '/wiki/Active_Directory/Connect',\n", + " '/wiki/Active_Directory/Search_for_a_user',\n", + " '/wiki/Active_object',\n", + " '/wiki/Add_a_variable_to_a_class_instance_at_runtime',\n", + " '/wiki/Address_of_a_variable',\n", + " '/wiki/AKS_test_for_primes',\n", + " '/wiki/Align_columns',\n", + " '/wiki/Aliquot_sequence_classifications',\n", + " '/wiki/Almost_prime',\n", + " '/wiki/Amb',\n", + " '/wiki/Amicable_pairs',\n", + " '/wiki/Anagrams',\n", + " '/wiki/Anagrams/Deranged_anagrams',\n", + " '/wiki/Animate_a_pendulum',\n", + " '/wiki/Animation',\n", + " '/wiki/Anonymous_recursion',\n", + " '/wiki/Append_a_record_to_the_end_of_a_text_file',\n", + " '/wiki/Apply_a_callback_to_an_array',\n", + " '/wiki/Arbitrary-precision_integers_(included)',\n", + " '/wiki/Arena_storage_pool',\n", + " '/wiki/Arithmetic_evaluation',\n", + " '/wiki/Arithmetic-geometric_mean',\n", + " '/wiki/Arithmetic-geometric_mean/Calculate_Pi',\n", + " '/wiki/Arithmetic/Complex',\n", + " '/wiki/Arithmetic/Integer',\n", + " '/wiki/Arithmetic/Rational',\n", + " '/wiki/Array_concatenation',\n", + " '/wiki/Arrays',\n", + " '/wiki/Assertions',\n", + " '/wiki/Associative_array/Creation',\n", + " '/wiki/Associative_array/Iteration',\n", + " '/wiki/Atomic_updates',\n", + " '/wiki/Average_loop_length',\n", + " '/wiki/Averages/Arithmetic_mean',\n", + " '/wiki/Averages/Mean_angle',\n", + " '/wiki/Averages/Mean_time_of_day',\n", + " '/wiki/Averages/Median',\n", + " '/wiki/Averages/Mode',\n", + " '/wiki/Averages/Pythagorean_means',\n", + " '/wiki/Averages/Root_mean_square',\n", + " '/wiki/Averages/Simple_moving_average',\n", + " '/wiki/Balanced_brackets',\n", + " '/wiki/Balanced_ternary',\n", + " '/wiki/Benford%27s_law',\n", + " '/wiki/Bernoulli_numbers',\n", + " '/wiki/Best_shuffle',\n", + " '/wiki/Binary_digits',\n", + " '/wiki/Binary_search',\n", + " '/wiki/Binary_strings',\n", + " '/wiki/Bitcoin/address_validation',\n", + " '/wiki/Bitcoin/public_point_to_address',\n", + " '/wiki/Bitmap',\n", + " '/wiki/Bitmap/Bresenham%27s_line_algorithm',\n", + " '/wiki/Bitmap/B%C3%A9zier_curves/Cubic',\n", + " '/wiki/Bitmap/B%C3%A9zier_curves/Quadratic',\n", + " '/wiki/Bitmap/Flood_fill',\n", + " '/wiki/Bitmap/Histogram',\n", + " '/wiki/Bitmap/Midpoint_circle_algorithm',\n", + " '/wiki/Bitmap/PPM_conversion_through_a_pipe',\n", + " '/wiki/Bitmap/Read_a_PPM_file',\n", + " '/wiki/Bitmap/Read_an_image_through_a_pipe',\n", + " '/wiki/Bitmap/Write_a_PPM_file',\n", + " '/wiki/Bitwise_IO',\n", + " '/wiki/Bitwise_operations',\n", + " '/wiki/Boolean_values',\n", + " '/wiki/Box_the_compass',\n", + " '/wiki/Break_OO_privacy',\n", + " '/wiki/Brownian_tree',\n", + " '/wiki/Bulls_and_cows',\n", + " '/wiki/Bulls_and_cows/Player',\n", + " '/wiki/Caesar_cipher',\n", + " '/wiki/Calendar',\n", + " '/wiki/Calendar_-_for_%22REAL%22_programmers',\n", + " '/wiki/Call_a_foreign-language_function',\n", + " '/wiki/Call_a_function',\n", + " '/wiki/Call_a_function_in_a_shared_library',\n", + " '/wiki/Call_an_object_method',\n", + " '/wiki/Canny_edge_detector',\n", + " '/wiki/Carmichael_3_strong_pseudoprimes',\n", + " '/wiki/Case-sensitivity_of_identifiers',\n", + " '/wiki/Casting_out_nines',\n", + " '/wiki/Catalan_numbers',\n", + " '/wiki/Catalan_numbers/Pascal%27s_triangle',\n", + " '/wiki/Catamorphism',\n", + " '/wiki/Catmull%E2%80%93Clark_subdivision_surface',\n", + " '/wiki/Character_codes',\n", + " '/wiki/Chat_server',\n", + " '/wiki/Check_Machin-like_formulas',\n", + " '/wiki/Check_that_file_exists',\n", + " '/wiki/Checkpoint_synchronization',\n", + " '/wiki/Chinese_remainder_theorem',\n", + " '/wiki/Cholesky_decomposition',\n", + " '/wiki/Circles_of_given_radius_through_two_points',\n", + " '/wiki/Classes',\n", + " '/wiki/Closest-pair_problem',\n", + " '/wiki/Closures/Value_capture',\n", + " '/wiki/Collections',\n", + " '/wiki/Color_of_a_screen_pixel',\n", + " '/wiki/Color_quantization',\n", + " '/wiki/Colour_bars/Display',\n", + " '/wiki/Colour_pinstripe/Display',\n", + " '/wiki/Colour_pinstripe/Printer',\n", + " '/wiki/Combinations',\n", + " '/wiki/Combinations_and_permutations',\n", + " '/wiki/Combinations_with_repetitions',\n", + " '/wiki/Comma_quibbling',\n", + " '/wiki/Command-line_arguments',\n", + " '/wiki/Comments',\n", + " '/wiki/Compare_sorting_algorithms%27_performance',\n", + " '/wiki/Compile-time_calculation',\n", + " '/wiki/Compound_data_type',\n", + " '/wiki/Concurrent_computing',\n", + " '/wiki/Conditional_structures',\n", + " '/wiki/Conjugate_transpose',\n", + " '/wiki/Constrained_genericity',\n", + " '/wiki/Constrained_random_points_on_a_circle',\n", + " '/wiki/Continued_fraction',\n", + " '/wiki/Continued_fraction/Arithmetic/Construct_from_rational_number',\n", + " '/wiki/Continued_fraction/Arithmetic/G(matrix_NG,_Contined_Fraction_N)',\n", + " '/wiki/Continued_fraction/Arithmetic/G(matrix_NG,_Contined_Fraction_N1,_Contined_Fraction_N2)',\n", + " '/wiki/Convert_decimal_number_to_rational',\n", + " '/wiki/Conway%27s_Game_of_Life',\n", + " '/wiki/Copy_a_string',\n", + " '/wiki/Count_in_factors',\n", + " '/wiki/Count_in_octal',\n", + " '/wiki/Count_occurrences_of_a_substring',\n", + " '/wiki/Count_the_coins',\n", + " '/wiki/CRC-32',\n", + " '/wiki/Create_a_file',\n", + " '/wiki/Create_a_file_on_magnetic_tape',\n", + " '/wiki/Create_a_two-dimensional_array_at_runtime',\n", + " '/wiki/Create_an_HTML_table',\n", + " '/wiki/Create_an_object_at_a_given_address',\n", + " '/wiki/CSV_data_manipulation',\n", + " '/wiki/CSV_to_HTML_translation',\n", + " '/wiki/Currying',\n", + " '/wiki/Cut_a_rectangle',\n", + " '/wiki/Date_format',\n", + " '/wiki/Date_manipulation',\n", + " '/wiki/Day_of_the_week',\n", + " '/wiki/Deal_cards_for_FreeCell',\n", + " '/wiki/Death_Star',\n", + " '/wiki/Deconvolution/1D',\n", + " '/wiki/Deconvolution/2D%2B',\n", + " '/wiki/Deepcopy',\n", + " '/wiki/Define_a_primitive_data_type',\n", + " '/wiki/Delegates',\n", + " '/wiki/Delete_a_file',\n", + " '/wiki/Detect_division_by_zero',\n", + " '/wiki/Determine_if_a_string_is_numeric',\n", + " '/wiki/Determine_if_only_one_instance_is_running',\n", + " '/wiki/Digital_root',\n", + " '/wiki/Digital_root/Multiplicative_digital_root',\n", + " '/wiki/Dinesman%27s_multiple-dwelling_problem',\n", + " '/wiki/Dining_philosophers',\n", + " '/wiki/Discordian_date',\n", + " '/wiki/Distributed_programming',\n", + " '/wiki/DNS_query',\n", + " '/wiki/Documentation',\n", + " '/wiki/Dot_product',\n", + " '/wiki/Doubly-linked_list/Definition',\n", + " '/wiki/Doubly-linked_list/Element_definition',\n", + " '/wiki/Doubly-linked_list/Element_insertion',\n", + " '/wiki/Doubly-linked_list/Traversal',\n", + " '/wiki/Dragon_curve',\n", + " '/wiki/Draw_a_clock',\n", + " '/wiki/Draw_a_cuboid',\n", + " '/wiki/Draw_a_sphere',\n", + " '/wiki/Dutch_national_flag_problem',\n", + " '/wiki/Dynamic_variable_names',\n", + " '/wiki/Echo_server',\n", + " '/wiki/Element-wise_operations',\n", + " '/wiki/Empty_directory',\n", + " '/wiki/Empty_program',\n", + " '/wiki/Empty_string',\n", + " '/wiki/Enforced_immutability',\n", + " '/wiki/Entropy',\n", + " '/wiki/Enumerations',\n", + " '/wiki/Environment_variables',\n", + " '/wiki/Equilibrium_index',\n", + " '/wiki/Ethiopian_multiplication',\n", + " '/wiki/Euler_method',\n", + " '/wiki/Euler%27s_sum_of_powers_conjecture',\n", + " '/wiki/Evaluate_binomial_coefficients',\n", + " '/wiki/Even_or_odd',\n", + " '/wiki/Events',\n", + " '/wiki/Evolutionary_algorithm',\n", + " '/wiki/Exceptions',\n", + " '/wiki/Exceptions/Catch_an_exception_thrown_in_a_nested_call',\n", + " '/wiki/Executable_library',\n", + " '/wiki/Execute_a_Markov_algorithm',\n", + " '/wiki/Execute_a_system_command',\n", + " '/wiki/Execute_Brain****',\n", + " '/wiki/Execute_HQ9%2B',\n", + " '/wiki/Execute_SNUSP',\n", + " '/wiki/Exponentiation_operator',\n", + " '/wiki/Extend_your_language',\n", + " '/wiki/Extensible_prime_generator',\n", + " '/wiki/Extreme_floating_point_values',\n", + " '/wiki/Factorial',\n", + " '/wiki/Factors_of_a_Mersenne_number',\n", + " '/wiki/Factors_of_an_integer',\n", + " '/wiki/Fast_Fourier_transform',\n", + " '/wiki/Fibonacci_n-step_number_sequences',\n", + " '/wiki/Fibonacci_sequence',\n", + " '/wiki/Fibonacci_word',\n", + " '/wiki/Fibonacci_word/fractal',\n", + " '/wiki/File_input/output',\n", + " '/wiki/File_modification_time',\n", + " '/wiki/File_size',\n", + " '/wiki/Filter',\n", + " '/wiki/Find_common_directory_path',\n", + " '/wiki/Find_largest_left_truncatable_prime_in_a_given_base',\n", + " '/wiki/Find_limit_of_recursion',\n", + " '/wiki/Find_the_last_Sunday_of_each_month',\n", + " '/wiki/Find_the_missing_permutation',\n", + " '/wiki/First_class_environments',\n", + " '/wiki/First-class_functions',\n", + " '/wiki/First-class_functions/Use_numbers_analogously',\n", + " '/wiki/Five_weekends',\n", + " '/wiki/FizzBuzz',\n", + " '/wiki/Flatten_a_list',\n", + " '/wiki/Flipping_bits_game',\n", + " '/wiki/Flow-control_structures',\n", + " '/wiki/Floyd%27s_triangle',\n", + " '/wiki/Forest_fire',\n", + " '/wiki/Fork',\n", + " '/wiki/Formal_power_series',\n", + " '/wiki/Formatted_numeric_output',\n", + " '/wiki/Forward_difference',\n", + " '/wiki/Four_bit_adder',\n", + " '/wiki/Fractal_tree',\n", + " '/wiki/Fractran',\n", + " '/wiki/Function_composition',\n", + " '/wiki/Function_definition',\n", + " '/wiki/Function_frequency',\n", + " '/wiki/Function_prototype',\n", + " '/wiki/Galton_box_animation',\n", + " '/wiki/Gamma_function',\n", + " '/wiki/Gaussian_elimination',\n", + " '/wiki/Generate_Chess960_starting_position',\n", + " '/wiki/Generate_lower_case_ASCII_alphabet',\n", + " '/wiki/Generator/Exponential',\n", + " '/wiki/Generic_swap',\n", + " '/wiki/Globally_replace_text_in_several_files',\n", + " '/wiki/Go_Fish',\n", + " '/wiki/Gray_code',\n", + " '/wiki/Grayscale_image',\n", + " '/wiki/Greatest_common_divisor',\n", + " '/wiki/Greatest_element_of_a_list',\n", + " '/wiki/Greatest_subsequential_sum',\n", + " '/wiki/Greyscale_bars/Display',\n", + " '/wiki/Guess_the_number',\n", + " '/wiki/Guess_the_number/With_feedback',\n", + " '/wiki/Guess_the_number/With_feedback_(player)',\n", + " '/wiki/GUI_component_interaction',\n", + " '/wiki/GUI_enabling/disabling_of_controls',\n", + " '/wiki/GUI/Maximum_window_dimensions',\n", + " '/wiki/Hailstone_sequence',\n", + " '/wiki/Hamming_numbers',\n", + " '/wiki/Handle_a_signal',\n", + " '/wiki/Happy_numbers',\n", + " '/wiki/Harshad_or_Niven_series',\n", + " '/wiki/Hash_from_two_arrays',\n", + " '/wiki/Hash_join',\n", + " '/wiki/Haversine_formula',\n", + " '/wiki/Hello_world/Graphical',\n", + " '/wiki/Hello_world/Line_printer',\n", + " '/wiki/Hello_world/Newbie',\n", + " '/wiki/Hello_world/Newline_omission',\n", + " '/wiki/Hello_world/Standard_error',\n", + " '/wiki/Hello_world/Text',\n", + " '/wiki/Hello_world/Web_server',\n", + " '/wiki/Here_document',\n", + " '/wiki/Heronian_triangles',\n", + " '/wiki/Hickerson_series_of_almost_integers',\n", + " '/wiki/Higher-order_functions',\n", + " '/wiki/History_variables',\n", + " '/wiki/Hofstadter_Figure-Figure_sequences',\n", + " '/wiki/Hofstadter_Q_sequence',\n", + " '/wiki/Hofstadter-Conway_$10,000_sequence',\n", + " '/wiki/Holidays_related_to_Easter',\n", + " '/wiki/Honeycombs',\n", + " '/wiki/Horizontal_sundial_calculations',\n", + " '/wiki/Horner%27s_rule_for_polynomial_evaluation',\n", + " '/wiki/Host_introspection',\n", + " '/wiki/Hostname',\n", + " '/wiki/Hough_transform',\n", + " '/wiki/HTTP',\n", + " '/wiki/HTTPS',\n", + " '/wiki/HTTPS/Authenticated',\n", + " '/wiki/HTTPS/Client-authenticated',\n", + " '/wiki/Huffman_coding',\n", + " '/wiki/I_before_E_except_after_C',\n", + " '/wiki/IBAN',\n", + " '/wiki/Identity_matrix',\n", + " '/wiki/Image_convolution',\n", + " '/wiki/Image_noise',\n", + " '/wiki/Include_a_file',\n", + " '/wiki/Increment_a_numerical_string',\n", + " '/wiki/Infinity',\n", + " '/wiki/Inheritance/Multiple',\n", + " '/wiki/Inheritance/Single',\n", + " '/wiki/Input_loop',\n", + " '/wiki/Integer_comparison',\n", + " '/wiki/Integer_overflow',\n", + " '/wiki/Integer_sequence',\n", + " '/wiki/Interactive_programming',\n", + " '/wiki/Introspection',\n", + " '/wiki/Inverted_index',\n", + " '/wiki/Inverted_syntax',\n", + " '/wiki/Iterated_digits_squaring',\n", + " '/wiki/Jensen%27s_Device',\n", + " '/wiki/JortSort',\n", + " '/wiki/Josephus_problem',\n", + " '/wiki/Joystick_position',\n", + " '/wiki/JSON',\n", + " '/wiki/Jump_anywhere',\n", + " '/wiki/K-d_tree',\n", + " '/wiki/K-means%2B%2B_clustering',\n", + " '/wiki/Kaprekar_numbers',\n", + " '/wiki/Keyboard_input/Flush_the_keyboard_buffer',\n", + " '/wiki/Keyboard_input/Keypress_check',\n", + " '/wiki/Keyboard_input/Obtain_a_Y_or_N_response',\n", + " '/wiki/Keyboard_macros',\n", + " '/wiki/Knapsack_problem/0-1',\n", + " '/wiki/Knapsack_problem/Bounded',\n", + " '/wiki/Knapsack_problem/Continuous',\n", + " '/wiki/Knapsack_problem/Unbounded',\n", + " '/wiki/Knight%27s_tour',\n", + " '/wiki/Knuth_shuffle',\n", + " '/wiki/Knuth%27s_algorithm_S',\n", + " '/wiki/Langton%27s_ant',\n", + " '/wiki/Largest_int_from_concatenated_ints',\n", + " '/wiki/Last_Friday_of_each_month',\n", + " '/wiki/Last_letter-first_letter',\n", + " '/wiki/Leap_year',\n", + " '/wiki/Least_common_multiple',\n", + " '/wiki/Left_factorials',\n", + " '/wiki/Letter_frequency',\n", + " '/wiki/Levenshtein_distance',\n", + " '/wiki/Linear_congruential_generator',\n", + " '/wiki/List_comprehensions',\n", + " '/wiki/Literals/Floating_point',\n", + " '/wiki/Literals/Integer',\n", + " '/wiki/Literals/String',\n", + " '/wiki/Logical_operations',\n", + " '/wiki/Long_multiplication',\n", + " '/wiki/Longest_common_subsequence',\n", + " '/wiki/Longest_increasing_subsequence',\n", + " '/wiki/Longest_string_challenge',\n", + " '/wiki/Look-and-say_sequence',\n", + " '/wiki/Loop_over_multiple_arrays_simultaneously',\n", + " '/wiki/Loops/Break',\n", + " '/wiki/Loops/Continue',\n", + " '/wiki/Loops/Do-while',\n", + " '/wiki/Loops/Downward_for',\n", + " '/wiki/Loops/For',\n", + " '/wiki/Loops/For_with_a_specified_step',\n", + " '/wiki/Loops/Foreach',\n", + " '/wiki/Loops/Infinite',\n", + " '/wiki/Loops/N_plus_one_half',\n", + " '/wiki/Loops/Nested',\n", + " '/wiki/Loops/While',\n", + " '/wiki/LU_decomposition',\n", + " '/wiki/Lucas-Lehmer_test',\n", + " '/wiki/Ludic_numbers',\n", + " '/wiki/Luhn_test_of_credit_card_numbers',\n", + " '/wiki/LZW_compression',\n", + " '/wiki/Machine_code',\n", + " '/wiki/Mad_Libs',\n", + " '/wiki/Magic_squares_of_odd_order',\n", + " '/wiki/Main_step_of_GOST_28147-89',\n", + " '/wiki/Make_directory_path',\n", + " '/wiki/Man_or_boy_test',\n", + " '/wiki/Mandelbrot_set',\n", + " '/wiki/Map_range',\n", + " '/wiki/Matrix_arithmetic',\n", + " '/wiki/Matrix_multiplication',\n", + " '/wiki/Matrix_transposition',\n", + " '/wiki/Matrix-exponentiation_operator',\n", + " '/wiki/Maximum_triangle_path_sum',\n", + " '/wiki/Maze_generation',\n", + " '/wiki/Maze_solving',\n", + " '/wiki/MD4',\n", + " '/wiki/MD5',\n", + " '/wiki/MD5/Implementation',\n", + " '/wiki/Median_filter',\n", + " '/wiki/Memory_allocation',\n", + " '/wiki/Memory_layout_of_a_data_structure',\n", + " '/wiki/Menu',\n", + " '/wiki/Metaprogramming',\n", + " '/wiki/Metered_concurrency',\n", + " '/wiki/Metronome',\n", + " '/wiki/Middle_three_digits',\n", + " '/wiki/Miller-Rabin_primality_test',\n", + " '/wiki/Minesweeper_game',\n", + " '/wiki/Modular_exponentiation',\n", + " '/wiki/Modular_inverse',\n", + " '/wiki/Monte_Carlo_methods',\n", + " '/wiki/Monty_Hall_problem',\n", + " '/wiki/Morse_code',\n", + " '/wiki/Mouse_position',\n", + " '/wiki/Move-to-front_algorithm',\n", + " '/wiki/Multifactorial',\n", + " '/wiki/Multiple_distinct_objects',\n", + " '/wiki/Multiple_regression',\n", + " '/wiki/Multiplication_tables',\n", + " '/wiki/Multiplicative_order',\n", + " '/wiki/Multisplit',\n", + " '/wiki/Munching_squares',\n", + " '/wiki/Mutual_recursion',\n", + " '/wiki/N%27th',\n", + " '/wiki/N-queens_problem',\n", + " '/wiki/Named_parameters',\n", + " '/wiki/Narcissist',\n", + " '/wiki/Narcissistic_decimal_number',\n", + " '/wiki/Natural_sorting',\n", + " '/wiki/Nautical_bell',\n", + " '/wiki/Non-continuous_subsequences',\n", + " '/wiki/Non-decimal_radices/Convert',\n", + " '/wiki/Non-decimal_radices/Input',\n", + " '/wiki/Non-decimal_radices/Output',\n", + " '/wiki/Nth_root',\n", + " '/wiki/Null_object',\n", + " '/wiki/Number_names',\n", + " '/wiki/Number_reversal_game',\n", + " '/wiki/Numeric_error_propagation',\n", + " '/wiki/Numerical_integration',\n", + " '/wiki/Numerical_integration/Gauss-Legendre_Quadrature',\n", + " '/wiki/Object_serialization',\n", + " '/wiki/Odd_word_problem',\n", + " '/wiki/Old_lady_swallowed_a_fly',\n", + " '/wiki/OLE_Automation',\n", + " '/wiki/One_of_n_lines_in_a_file',\n", + " '/wiki/One-dimensional_cellular_automata',\n", + " '/wiki/OpenGL',\n", + " '/wiki/Operator_precedence',\n", + " '/wiki/Optional_parameters',\n", + " '/wiki/Order_disjoint_list_items',\n", + " '/wiki/Order_two_numerical_lists',\n", + " '/wiki/Ordered_Partitions',\n", + " '/wiki/Ordered_words',\n", + " '/wiki/Palindrome_detection',\n", + " '/wiki/Pangram_checker',\n", + " '/wiki/Paraffins',\n", + " '/wiki/Parallel_calculations',\n", + " '/wiki/Parametric_polymorphism',\n", + " '/wiki/Parametrized_SQL_statement',\n", + " '/wiki/Parse_an_IP_Address',\n", + " '/wiki/Parsing/RPN_calculator_algorithm',\n", + " '/wiki/Parsing/RPN_to_infix_conversion',\n", + " '/wiki/Parsing/Shunting-yard_algorithm',\n", + " '/wiki/Partial_function_application',\n", + " '/wiki/Pascal_matrix_generation',\n", + " '/wiki/Pascal%27s_triangle',\n", + " '/wiki/Pascal%27s_triangle/Puzzle',\n", + " '/wiki/Pattern_matching',\n", + " '/wiki/Penney%27s_game',\n", + " '/wiki/Percentage_difference_between_images',\n", + " '/wiki/Percolation/Bond_percolation',\n", + " '/wiki/Percolation/Mean_cluster_density',\n", + " '/wiki/Percolation/Mean_run_density',\n", + " '/wiki/Percolation/Site_percolation',\n", + " '/wiki/Perfect_numbers',\n", + " '/wiki/Permutation_test',\n", + " '/wiki/Permutations',\n", + " '/wiki/Permutations_by_swapping',\n", + " '/wiki/Permutations/Derangements',\n", + " '/wiki/Permutations/Rank_of_a_permutation',\n", + " '/wiki/Pernicious_numbers',\n", + " '/wiki/Phrase_reversals',\n", + " '/wiki/Pi',\n", + " '/wiki/Pick_random_element',\n", + " '/wiki/Pig_the_dice_game',\n", + " '/wiki/Pig_the_dice_game/Player',\n", + " '/wiki/Pinstripe/Display',\n", + " '/wiki/Pinstripe/Printer',\n", + " '/wiki/Play_recorded_sounds',\n", + " '/wiki/Playing_cards',\n", + " '/wiki/Plot_coordinate_pairs',\n", + " '/wiki/Pointers_and_references',\n", + " '/wiki/Polymorphic_copy',\n", + " '/wiki/Polymorphism',\n", + " '/wiki/Polynomial_long_division',\n", + " '/wiki/Polynomial_regression',\n", + " '/wiki/Power_set',\n", + " '/wiki/Pragmatic_directives',\n", + " '/wiki/Price_fraction',\n", + " '/wiki/Primality_by_trial_division',\n", + " '/wiki/Prime_decomposition',\n", + " '/wiki/Primes_-_allocate_descendants_to_their_ancestors',\n", + " '/wiki/Priority_queue',\n", + " '/wiki/Probabilistic_choice',\n", + " '/wiki/Problem_of_Apollonius',\n", + " '/wiki/Program_name',\n", + " '/wiki/Program_termination',\n", + " '/wiki/Pythagorean_triples',\n", + " '/wiki/QR_decomposition',\n", + " '/wiki/Quaternion_type',\n", + " '/wiki/Queue/Definition',\n", + " '/wiki/Queue/Usage',\n", + " '/wiki/Quickselect_algorithm',\n", + " '/wiki/Quine',\n", + " '/wiki/Random_number_generator_(device)',\n", + " '/wiki/Random_number_generator_(included)',\n", + " '/wiki/Random_numbers',\n", + " '/wiki/Range_expansion',\n", + " '/wiki/Range_extraction',\n", + " '/wiki/Ranking_methods',\n", + " '/wiki/Rate_counter',\n", + " '/wiki/Ray-casting_algorithm',\n", + " '/wiki/RCRPG',\n", + " '/wiki/Read_a_configuration_file',\n", + " '/wiki/Read_a_file_line_by_line',\n", + " '/wiki/Read_a_specific_line_from_a_file',\n", + " '/wiki/Read_entire_file',\n", + " '/wiki/Real_constants_and_functions',\n", + " '/wiki/Record_sound',\n", + " '/wiki/Reduced_row_echelon_form',\n", + " '/wiki/Regular_expressions',\n", + " '/wiki/Remove_duplicate_elements',\n", + " '/wiki/Remove_lines_from_a_file',\n", + " '/wiki/Rename_a_file',\n", + " '/wiki/Rendezvous',\n", + " '/wiki/Rep-string',\n", + " '/wiki/Repeat_a_string',\n", + " '/wiki/Resistor_mesh',\n", + " '/wiki/Respond_to_an_unknown_method_call',\n", + " '/wiki/Return_multiple_values',\n", + " '/wiki/Reverse_a_string',\n", + " '/wiki/Reverse_words_in_a_string',\n", + " '/wiki/RIPEMD-160',\n", + " '/wiki/Rock-paper-scissors',\n", + " '/wiki/Roman_numerals/Decode',\n", + " '/wiki/Roman_numerals/Encode',\n", + " '/wiki/Roots_of_a_function',\n", + " '/wiki/Roots_of_a_quadratic_function',\n", + " '/wiki/Roots_of_unity',\n", + " '/wiki/Rosetta_Code/Count_examples',\n", + " '/wiki/Rosetta_Code/Find_bare_lang_tags',\n", + " '/wiki/Rosetta_Code/Find_unimplemented_tasks',\n", + " '/wiki/Rosetta_Code/Fix_code_tags',\n", + " '/wiki/Rosetta_Code/Rank_languages_by_popularity',\n", + " '/wiki/Rot-13',\n", + " '/wiki/RSA_code',\n", + " '/wiki/Run-length_encoding',\n", + " '/wiki/Runge-Kutta_method',\n", + " '/wiki/Runtime_evaluation',\n", + " '/wiki/Runtime_evaluation/In_an_environment',\n", + " '/wiki/S-Expressions',\n", + " '/wiki/Safe_addition',\n", + " '/wiki/Sailors,_coconuts_and_a_monkey_problem',\n", + " '/wiki/Same_Fringe',\n", + " '/wiki/Scope_modifiers',\n", + " '/wiki/Scope/Function_names_and_labels',\n", + " '/wiki/Search_a_list',\n", + " '/wiki/Secure_temporary_file',\n", + " '/wiki/SEDOLs',\n", + " '/wiki/Self-describing_numbers',\n", + " '/wiki/Self-referential_sequence',\n", + " '/wiki/Semiprime',\n", + " '/wiki/Semordnilap',\n", + " '/wiki/Send_an_unknown_method_call',\n", + " '/wiki/Send_email',\n", + " '/wiki/Sequence_of_non-squares',\n", + " '/wiki/Sequence_of_primes_by_Trial_Division',\n", + " '/wiki/Set',\n", + " '/wiki/Set_consolidation',\n", + " '/wiki/Set_of_real_numbers',\n", + " '/wiki/Set_puzzle',\n", + " '/wiki/Seven-sided_dice_from_five-sided_dice',\n", + " '/wiki/SHA-1',\n", + " '/wiki/SHA-256',\n", + " '/wiki/Shell_one-liner',\n", + " '/wiki/Short-circuit_evaluation',\n", + " '/wiki/Show_the_epoch',\n", + " '/wiki/Sierpinski_carpet',\n", + " '/wiki/Sierpinski_triangle',\n", + " '/wiki/Sierpinski_triangle/Graphical',\n", + " '/wiki/Sieve_of_Eratosthenes',\n", + " '/wiki/Simple_database',\n", + " '/wiki/Simple_windowed_application',\n", + " '/wiki/Simulate_input/Keyboard',\n", + " '/wiki/Simulate_input/Mouse',\n", + " '/wiki/Singleton',\n", + " '/wiki/Singly-linked_list/Element_definition',\n", + " '/wiki/Singly-linked_list/Element_insertion',\n", + " '/wiki/Singly-linked_list/Traversal',\n", + " '/wiki/Sleep',\n", + " '/wiki/SOAP',\n", + " '/wiki/Sockets',\n", + " '/wiki/Sokoban',\n", + " '/wiki/Solve_a_Hidato_puzzle',\n", + " '/wiki/Solve_a_Holy_Knight%27s_tour',\n", + " '/wiki/Solve_a_Hopido_puzzle',\n", + " '/wiki/Solve_a_Numbrix_puzzle',\n", + " '/wiki/Solve_the_no_connection_puzzle',\n", + " '/wiki/Sort_an_array_of_composite_structures',\n", + " '/wiki/Sort_an_integer_array',\n", + " '/wiki/Sort_disjoint_sublist',\n", + " '/wiki/Sort_stability',\n", + " '/wiki/Sort_using_a_custom_comparator',\n", + " '/wiki/Sorting_algorithms/Bead_sort',\n", + " '/wiki/Sorting_algorithms/Bogosort',\n", + " '/wiki/Sorting_algorithms/Bubble_sort',\n", + " '/wiki/Sorting_algorithms/Cocktail_sort',\n", + " '/wiki/Sorting_algorithms/Comb_sort',\n", + " '/wiki/Sorting_algorithms/Counting_sort',\n", + " '/wiki/Sorting_algorithms/Gnome_sort',\n", + " '/wiki/Sorting_algorithms/Heapsort',\n", + " '/wiki/Sorting_algorithms/Insertion_sort',\n", + " '/wiki/Sorting_algorithms/Merge_sort',\n", + " '/wiki/Sorting_algorithms/Pancake_sort',\n", + " '/wiki/Sorting_algorithms/Permutation_sort',\n", + " '/wiki/Sorting_algorithms/Quicksort',\n", + " '/wiki/Sorting_algorithms/Radix_sort',\n", + " '/wiki/Sorting_algorithms/Selection_sort',\n", + " '/wiki/Sorting_algorithms/Shell_sort',\n", + " '/wiki/Sorting_algorithms/Sleep_sort',\n", + " '/wiki/Sorting_algorithms/Stooge_sort',\n", + " '/wiki/Sorting_algorithms/Strand_sort',\n", + " '/wiki/Soundex',\n", + " '/wiki/Sparkline_in_unicode',\n", + " '/wiki/Special_characters',\n", + " '/wiki/Special_variables',\n", + " '/wiki/Speech_synthesis',\n", + " '/wiki/Spiral_matrix',\n", + " '/wiki/SQL-based_authentication',\n", + " '/wiki/Stable_marriage_problem',\n", + " '/wiki/Stack',\n", + " '/wiki/Stack_traces',\n", + " '/wiki/Stair-climbing_puzzle',\n", + " '/wiki/Standard_deviation',\n", + " '/wiki/Start_from_a_main_routine',\n", + " '/wiki/State_name_puzzle',\n", + " '/wiki/Statistics/Basic',\n", + " '/wiki/Stem-and-leaf_plot',\n", + " '/wiki/Stern-Brocot_sequence',\n", + " '/wiki/String_append',\n", + " '/wiki/String_case',\n", + " '/wiki/String_comparison',\n", + " '/wiki/String_concatenation',\n", + " '/wiki/String_interpolation_(included)',\n", + " '/wiki/String_length',\n", + " '/wiki/String_matching',\n", + " '/wiki/String_prepend',\n", + " '/wiki/Strip_a_set_of_characters_from_a_string',\n", + " '/wiki/Strip_block_comments',\n", + " '/wiki/Strip_comments_from_a_string',\n", + " '/wiki/Strip_control_codes_and_extended_characters_from_a_string',\n", + " '/wiki/Strip_whitespace_from_a_string/Top_and_tail',\n", + " '/wiki/Subleq',\n", + " '/wiki/Substring',\n", + " '/wiki/Substring/Top_and_tail',\n", + " '/wiki/Subtractive_generator',\n", + " '/wiki/Sudoku',\n", + " '/wiki/Sum_and_product_of_an_array',\n", + " '/wiki/Sum_digits_of_an_integer',\n", + " '/wiki/Sum_multiples_of_3_and_5',\n", + " '/wiki/Sum_of_a_series',\n", + " '/wiki/Sum_of_squares',\n", + " '/wiki/Sutherland-Hodgman_polygon_clipping',\n", + " '/wiki/Symmetric_difference',\n", + " '/wiki/Synchronous_concurrency',\n", + " '/wiki/System_time',\n", + " '/wiki/Table_creation/Postal_addresses',\n", + " '/wiki/Take_notes_on_the_command_line',\n", + " '/wiki/Temperature_conversion',\n", + " '/wiki/Terminal_control/Clear_the_screen',\n", + " '/wiki/Terminal_control/Coloured_text',\n", + " '/wiki/Terminal_control/Cursor_movement',\n", + " '/wiki/Terminal_control/Cursor_positioning',\n", + " '/wiki/Terminal_control/Dimensions',\n", + " '/wiki/Terminal_control/Display_an_extended_character',\n", + " '/wiki/Terminal_control/Hiding_the_cursor',\n", + " '/wiki/Terminal_control/Inverse_video',\n", + " '/wiki/Terminal_control/Positional_read',\n", + " '/wiki/Terminal_control/Preserve_screen',\n", + " '/wiki/Terminal_control/Ringing_the_terminal_bell',\n", + " '/wiki/Terminal_control/Unicode_output',\n", + " '/wiki/Ternary_logic',\n", + " '/wiki/Test_a_function',\n", + " '/wiki/Text_processing/1',\n", + " '/wiki/Text_processing/2',\n", + " '/wiki/Text_processing/Max_licenses_in_use',\n", + " '/wiki/Textonyms',\n", + " '/wiki/The_ISAAC_Cipher',\n", + " '/wiki/The_Twelve_Days_of_Christmas',\n", + " '/wiki/Thiele%27s_interpolation_formula',\n", + " '/wiki/Tic-tac-toe',\n", + " '/wiki/Time_a_function',\n", + " '/wiki/Tokenize_a_string',\n", + " '/wiki/Top_rank_per_group',\n", + " '/wiki/Topic_variable',\n", + " '/wiki/Topological_sort',\n", + " '/wiki/Topswops',\n", + " '/wiki/Total_circles_area',\n", + " '/wiki/Towers_of_Hanoi',\n", + " '/wiki/Trabb_Pardo%E2%80%93Knuth_algorithm',\n", + " '/wiki/Tree_traversal',\n", + " '/wiki/Trigonometric_functions',\n", + " '/wiki/Truncatable_primes',\n", + " '/wiki/Truncate_a_file',\n", + " '/wiki/Twelve_statements',\n", + " '/wiki/Ulam_spiral_(for_primes)',\n", + " '/wiki/Unbias_a_random_generator',\n", + " '/wiki/Undefined_values',\n", + " '/wiki/Unicode_strings',\n", + " '/wiki/Unicode_variable_names',\n", + " '/wiki/Universal_Turing_machine',\n", + " '/wiki/Unix/ls',\n", + " '/wiki/Update_a_configuration_file',\n", + " '/wiki/URL_decoding',\n", + " '/wiki/URL_encoding',\n", + " '/wiki/Use_another_language_to_call_a_function',\n", + " '/wiki/User_input/Graphical',\n", + " '/wiki/User_input/Text',\n", + " '/wiki/Vampire_number',\n", + " '/wiki/Van_der_Corput_sequence',\n", + " '/wiki/Variable_size/Get',\n", + " '/wiki/Variable_size/Set',\n", + " '/wiki/Variable-length_quantity',\n", + " '/wiki/Variables',\n", + " '/wiki/Variadic_function',\n", + " '/wiki/Vector_products',\n", + " '/wiki/Verify_distribution_uniformity/Chi-squared_test',\n", + " '/wiki/Verify_distribution_uniformity/Naive',\n", + " '/wiki/Video_display_modes',\n", + " '/wiki/Vigen%C3%A8re_cipher',\n", + " '/wiki/Vigen%C3%A8re_cipher/Cryptanalysis',\n", + " '/wiki/Visualize_a_tree',\n", + " '/wiki/Vogel%27s_approximation_method',\n", + " '/wiki/Voronoi_diagram',\n", + " '/wiki/Walk_a_directory/Non-recursively',\n", + " '/wiki/Walk_a_directory/Recursively',\n", + " '/wiki/Web_scraping',\n", + " '/wiki/Window_creation',\n", + " '/wiki/Window_creation/X11',\n", + " '/wiki/Window_management',\n", + " '/wiki/Wireworld',\n", + " '/wiki/Word_wrap',\n", + " '/wiki/World_Cup_group_stage',\n", + " '/wiki/Write_float_arrays_to_a_text_file',\n", + " '/wiki/Write_language_name_in_3D_ASCII',\n", + " '/wiki/Write_to_Windows_event_log',\n", + " '/wiki/Xiaolin_Wu%27s_line_algorithm',\n", + " '/wiki/XML/DOM_serialization',\n", + " '/wiki/XML/Input',\n", + " '/wiki/XML/Output',\n", + " '/wiki/XML/XPath',\n", + " '/wiki/Y_combinator',\n", + " '/wiki/Yahoo!_search_interface',\n", + " '/wiki/Yin_and_yang',\n", + " '/wiki/Zebra_puzzle',\n", + " '/wiki/Zeckendorf_arithmetic',\n", + " '/wiki/Zeckendorf_number_representation',\n", + " '/wiki/Zero_to_the_zero_power',\n", + " '/wiki/Zhang-Suen_thinning_algorithm',\n", + " '/wiki/Zig-zag_matrix',\n", + " '/wiki/Category:Programming_Tasks',\n", + " '/wiki/Category_talk:Programming_Tasks',\n", + " '/wiki/Category:Programming_Tasks',\n", + " '/wiki/Rosetta_Code',\n", + " '/wiki/Special:WebChat',\n", + " '/wiki/Rosetta_Code:Village_Pump',\n", + " '/wiki/Rosetta_Code:Finances',\n", + " '/wiki/Category:Programming_Languages',\n", + " '/wiki/Category:Programming_Tasks',\n", + " '/wiki/Special:RecentChanges',\n", + " '/wiki/Help:Similar_Sites',\n", + " '/wiki/Special:Random',\n", + " '/wiki/Special:WhatLinksHere/Category:Programming_Tasks',\n", + " '/wiki/Special:RecentChangesLinked/Category:Programming_Tasks',\n", + " '/wiki/Special:SpecialPages',\n", + " '/wiki/Special:Browse/Category:Programming_Tasks',\n", + " '/wiki/Rosetta_Code:Privacy_policy',\n", + " '/wiki/Rosetta_Code:About',\n", + " '/wiki/Rosetta_Code:General_disclaimer']" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "def scrape_links():\n", + " req = urllib.request.Request('http://rosettacode.org/wiki/Category:Programming_Tasks', headers={'User-Agent': 'Mozilla/5.0'})\n", + " content = urllib.request.urlopen(req).read()\n", + " soup = BeautifulSoup(content)\n", + " link_list = [link.get('href') for link in soup.find_all('a')]\n", + " return [link for link in link_list[1:] if link.startswith('/wiki/')]\n", + "scrape_links()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "web_scraper = make_links_list(500)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['http://www.rosettacode.org/wiki/Sorting_algorithms/Pancake_sort',\n", + " 'http://www.rosettacode.org/wiki/Power_set',\n", + " 'http://www.rosettacode.org/wiki/Rosetta_Code/Find_unimplemented_tasks',\n", + " 'http://www.rosettacode.org/wiki/Associative_array/Iteration',\n", + " 'http://www.rosettacode.org/wiki/Fibonacci_word',\n", + " 'http://www.rosettacode.org/wiki/Dot_product',\n", + " 'http://www.rosettacode.org/wiki/String_prepend',\n", + " 'http://www.rosettacode.org/wiki/Vampire_number',\n", + " 'http://www.rosettacode.org/wiki/String_matching',\n", + " 'http://www.rosettacode.org/wiki/Introspection',\n", + " 'http://www.rosettacode.org/wiki/Empty_directory',\n", + " 'http://www.rosettacode.org/wiki/Globally_replace_text_in_several_files',\n", + " 'http://www.rosettacode.org/wiki/Pinstripe/Display',\n", + " 'http://www.rosettacode.org/wiki/Range_extraction',\n", + " 'http://www.rosettacode.org/wiki/Use_another_language_to_call_a_function',\n", + " 'http://www.rosettacode.org/wiki/Echo_server',\n", + " 'http://www.rosettacode.org/wiki/File_size',\n", + " 'http://www.rosettacode.org/wiki/OpenGL',\n", + " 'http://www.rosettacode.org/wiki/Arithmetic-geometric_mean/Calculate_Pi',\n", + " 'http://www.rosettacode.org/wiki/Zhang-Suen_thinning_algorithm',\n", + " 'http://www.rosettacode.org/wiki/Loops/Nested',\n", + " 'http://www.rosettacode.org/wiki/Window_creation/X11',\n", + " 'http://www.rosettacode.org/wiki/Honeycombs',\n", + " 'http://www.rosettacode.org/wiki/Knight%27s_tour',\n", + " 'http://www.rosettacode.org/wiki/Bulls_and_cows/Player',\n", + " 'http://www.rosettacode.org/wiki/Category:Programming_Languages',\n", + " 'http://www.rosettacode.org/wiki/Dining_philosophers',\n", + " 'http://www.rosettacode.org/wiki/Palindrome_detection',\n", + " 'http://www.rosettacode.org/wiki/Bitmap/PPM_conversion_through_a_pipe',\n", + " 'http://www.rosettacode.org/wiki/Symmetric_difference',\n", + " 'http://www.rosettacode.org/wiki/Create_a_file_on_magnetic_tape',\n", + " 'http://www.rosettacode.org/wiki/Sailors,_coconuts_and_a_monkey_problem',\n", + " 'http://www.rosettacode.org/wiki/Pig_the_dice_game',\n", + " 'http://www.rosettacode.org/wiki/Binary_search',\n", + " 'http://www.rosettacode.org/wiki/Queue/Definition',\n", + " 'http://www.rosettacode.org/wiki/Scope/Function_names_and_labels',\n", + " 'http://www.rosettacode.org/wiki/Execute_SNUSP',\n", + " 'http://www.rosettacode.org/wiki/Y_combinator',\n", + " 'http://www.rosettacode.org/wiki/Greyscale_bars/Display',\n", + " 'http://www.rosettacode.org/wiki/Parallel_calculations',\n", + " 'http://www.rosettacode.org/wiki/Doubly-linked_list/Traversal',\n", + " 'http://www.rosettacode.org/wiki/Assertions',\n", + " 'http://www.rosettacode.org/wiki/FizzBuzz',\n", + " 'http://www.rosettacode.org/wiki/Sorting_algorithms/Stooge_sort',\n", + " 'http://www.rosettacode.org/wiki/Sort_disjoint_sublist',\n", + " 'http://www.rosettacode.org/wiki/Man_or_boy_test',\n", + " 'http://www.rosettacode.org/wiki/Doubly-linked_list/Element_insertion',\n", + " 'http://www.rosettacode.org/wiki/Combinations',\n", + " 'http://www.rosettacode.org/wiki/Find_limit_of_recursion',\n", + " 'http://www.rosettacode.org/wiki/Bitcoin/public_point_to_address',\n", + " 'http://www.rosettacode.org/wiki/Abstract_type',\n", + " 'http://www.rosettacode.org/wiki/Bitmap/Histogram',\n", + " 'http://www.rosettacode.org/wiki/Penney%27s_game',\n", + " 'http://www.rosettacode.org/wiki/OLE_Automation',\n", + " 'http://www.rosettacode.org/wiki/File_input/output',\n", + " 'http://www.rosettacode.org/wiki/Fractran',\n", + " 'http://www.rosettacode.org/wiki/Video_display_modes',\n", + " 'http://www.rosettacode.org/wiki/Sorting_algorithms/Radix_sort',\n", + " 'http://www.rosettacode.org/wiki/Strip_a_set_of_characters_from_a_string',\n", + " 'http://www.rosettacode.org/wiki/Knapsack_problem/0-1',\n", + " 'http://www.rosettacode.org/wiki/Variable_size/Get',\n", + " 'http://www.rosettacode.org/wiki/Binary_digits',\n", + " 'http://www.rosettacode.org/wiki/Read_a_file_line_by_line',\n", + " 'http://www.rosettacode.org/wiki/Create_a_file',\n", + " 'http://www.rosettacode.org/wiki/Command-line_arguments',\n", + " 'http://www.rosettacode.org/wiki/Ordered_Partitions',\n", + " 'http://www.rosettacode.org/wiki/Send_email',\n", + " 'http://www.rosettacode.org/wiki/Look-and-say_sequence',\n", + " 'http://www.rosettacode.org/wiki/Operator_precedence',\n", + " 'http://www.rosettacode.org/wiki/Address_of_a_variable',\n", + " 'http://www.rosettacode.org/wiki/Happy_numbers',\n", + " 'http://www.rosettacode.org/wiki/Walk_a_directory/Non-recursively',\n", + " 'http://www.rosettacode.org/wiki/Multiplicative_order',\n", + " 'http://www.rosettacode.org/wiki/Rosetta_Code:Finances',\n", + " 'http://www.rosettacode.org/wiki/File_modification_time',\n", + " 'http://www.rosettacode.org/wiki/Executable_library',\n", + " 'http://www.rosettacode.org/wiki/Detect_division_by_zero',\n", + " 'http://www.rosettacode.org/wiki/Delete_a_file',\n", + " 'http://www.rosettacode.org/wiki/Pi',\n", + " 'http://www.rosettacode.org/wiki/Terminal_control/Ringing_the_terminal_bell',\n", + " 'http://www.rosettacode.org/wiki/Count_the_coins',\n", + " 'http://www.rosettacode.org/wiki/Zero_to_the_zero_power',\n", + " 'http://www.rosettacode.org/wiki/Rosetta_Code:About',\n", + " 'http://www.rosettacode.org/wiki/Van_der_Corput_sequence',\n", + " 'http://www.rosettacode.org/wiki/Solve_a_Hidato_puzzle',\n", + " 'http://www.rosettacode.org/wiki/Random_number_generator_(included)',\n", + " 'http://www.rosettacode.org/wiki/HTTPS/Client-authenticated',\n", + " 'http://www.rosettacode.org/wiki/Mutual_recursion',\n", + " 'http://www.rosettacode.org/wiki/Boolean_values',\n", + " 'http://www.rosettacode.org/wiki/Conjugate_transpose',\n", + " 'http://www.rosettacode.org/wiki/Run-length_encoding',\n", + " 'http://www.rosettacode.org/wiki/Identity_matrix',\n", + " 'http://www.rosettacode.org/wiki/Anagrams',\n", + " 'http://www.rosettacode.org/wiki/Runge-Kutta_method',\n", + " 'http://www.rosettacode.org/wiki/Mandelbrot_set',\n", + " 'http://www.rosettacode.org/wiki/Zeckendorf_number_representation',\n", + " 'http://www.rosettacode.org/wiki/Literals/Integer',\n", + " 'http://www.rosettacode.org/wiki/RSA_code',\n", + " 'http://www.rosettacode.org/wiki/Sorting_algorithms/Permutation_sort',\n", + " 'http://www.rosettacode.org/wiki/Euler_method',\n", + " 'http://www.rosettacode.org/wiki/Average_loop_length',\n", + " 'http://www.rosettacode.org/wiki/Sorting_algorithms/Bogosort',\n", + " 'http://www.rosettacode.org/wiki/Floyd%27s_triangle',\n", + " 'http://www.rosettacode.org/wiki/Terminal_control/Hiding_the_cursor',\n", + " 'http://www.rosettacode.org/wiki/Loop_over_multiple_arrays_simultaneously',\n", + " 'http://www.rosettacode.org/wiki/Break_OO_privacy',\n", + " 'http://www.rosettacode.org/wiki/Aliquot_sequence_classifications',\n", + " 'http://www.rosettacode.org/wiki/Fibonacci_sequence',\n", + " 'http://www.rosettacode.org/wiki/Determine_if_only_one_instance_is_running',\n", + " 'http://www.rosettacode.org/wiki/Roman_numerals/Decode',\n", + " 'http://www.rosettacode.org/wiki/Phrase_reversals',\n", + " 'http://www.rosettacode.org/wiki/Create_an_HTML_table',\n", + " 'http://www.rosettacode.org/wiki/XML/Output',\n", + " 'http://www.rosettacode.org/wiki/Stack',\n", + " 'http://www.rosettacode.org/wiki/Permutations/Derangements',\n", + " 'http://www.rosettacode.org/wiki/Start_from_a_main_routine',\n", + " 'http://www.rosettacode.org/wiki/Integer_sequence',\n", + " 'http://www.rosettacode.org/wiki/Evolutionary_algorithm',\n", + " 'http://www.rosettacode.org/wiki/Array_concatenation',\n", + " 'http://www.rosettacode.org/wiki/Subleq',\n", + " 'http://www.rosettacode.org/wiki/Primality_by_trial_division',\n", + " 'http://www.rosettacode.org/wiki/RIPEMD-160',\n", + " 'http://www.rosettacode.org/wiki/Haversine_formula',\n", + " 'http://www.rosettacode.org/wiki/Jump_anywhere',\n", + " 'http://www.rosettacode.org/wiki/Environment_variables',\n", + " 'http://www.rosettacode.org/wiki/Caesar_cipher',\n", + " 'http://www.rosettacode.org/wiki/Walk_a_directory/Recursively',\n", + " 'http://www.rosettacode.org/wiki/Bitmap/Write_a_PPM_file',\n", + " 'http://www.rosettacode.org/wiki/Matrix_arithmetic',\n", + " 'http://www.rosettacode.org/wiki/Enumerations',\n", + " 'http://www.rosettacode.org/wiki/Gamma_function',\n", + " 'http://www.rosettacode.org/wiki/Draw_a_cuboid',\n", + " 'http://www.rosettacode.org/wiki/Terminal_control/Unicode_output',\n", + " 'http://www.rosettacode.org/wiki/Hash_join',\n", + " 'http://www.rosettacode.org/wiki/Anagrams/Deranged_anagrams',\n", + " 'http://www.rosettacode.org/wiki/Terminal_control/Preserve_screen',\n", + " 'http://www.rosettacode.org/wiki/Help:Similar_Sites',\n", + " 'http://www.rosettacode.org/wiki/Program_termination',\n", + " 'http://www.rosettacode.org/wiki/ABC_Problem',\n", + " 'http://www.rosettacode.org/wiki/Inverted_syntax',\n", + " 'http://www.rosettacode.org/wiki/SOAP',\n", + " 'http://www.rosettacode.org/wiki/Top_rank_per_group',\n", + " 'http://www.rosettacode.org/wiki/Calendar',\n", + " 'http://www.rosettacode.org/wiki/Sort_an_array_of_composite_structures',\n", + " 'http://www.rosettacode.org/wiki/Doubly-linked_list/Element_definition',\n", + " 'http://www.rosettacode.org/wiki/Multisplit',\n", + " 'http://www.rosettacode.org/wiki/Nth_root',\n", + " 'http://www.rosettacode.org/wiki/Concurrent_computing',\n", + " 'http://www.rosettacode.org/wiki/List_comprehensions',\n", + " 'http://www.rosettacode.org/wiki/Order_two_numerical_lists',\n", + " 'http://www.rosettacode.org/wiki/Towers_of_Hanoi',\n", + " 'http://www.rosettacode.org/wiki/Bitmap/Midpoint_circle_algorithm',\n", + " 'http://www.rosettacode.org/wiki/Doubly-linked_list/Definition',\n", + " 'http://www.rosettacode.org/wiki/Pinstripe/Printer',\n", + " 'http://www.rosettacode.org/wiki/Multiple_regression',\n", + " 'http://www.rosettacode.org/wiki/Topological_sort',\n", + " 'http://www.rosettacode.org/wiki/Collections',\n", + " 'http://www.rosettacode.org/wiki/Compound_data_type',\n", + " 'http://www.rosettacode.org/wiki/Bitmap/Read_an_image_through_a_pipe',\n", + " 'http://www.rosettacode.org/wiki/Knapsack_problem/Continuous',\n", + " 'http://www.rosettacode.org/wiki/Cholesky_decomposition',\n", + " 'http://www.rosettacode.org/wiki/Monte_Carlo_methods',\n", + " 'http://www.rosettacode.org/wiki/Best_shuffle',\n", + " 'http://www.rosettacode.org/wiki/Synchronous_concurrency',\n", + " 'http://www.rosettacode.org/wiki/Strip_comments_from_a_string',\n", + " 'http://www.rosettacode.org/wiki/Write_float_arrays_to_a_text_file',\n", + " 'http://www.rosettacode.org/wiki/Sum_of_squares',\n", + " 'http://www.rosettacode.org/wiki/Add_a_variable_to_a_class_instance_at_runtime',\n", + " 'http://www.rosettacode.org/wiki/Singly-linked_list/Element_definition',\n", + " 'http://www.rosettacode.org/wiki/Terminal_control/Dimensions',\n", + " 'http://www.rosettacode.org/wiki/Call_a_function_in_a_shared_library',\n", + " 'http://www.rosettacode.org/wiki/MD5/Implementation',\n", + " 'http://www.rosettacode.org/wiki/Plot_coordinate_pairs',\n", + " 'http://www.rosettacode.org/wiki/Currying',\n", + " 'http://www.rosettacode.org/wiki/Continued_fraction/Arithmetic/Construct_from_rational_number',\n", + " 'http://www.rosettacode.org/wiki/Generic_swap',\n", + " 'http://www.rosettacode.org/wiki/User_input/Text',\n", + " 'http://www.rosettacode.org/wiki/Pernicious_numbers',\n", + " 'http://www.rosettacode.org/wiki/Knuth_shuffle',\n", + " 'http://www.rosettacode.org/wiki/Keyboard_input/Obtain_a_Y_or_N_response',\n", + " 'http://www.rosettacode.org/wiki/Events',\n", + " 'http://www.rosettacode.org/wiki/Set_puzzle',\n", + " 'http://www.rosettacode.org/wiki/Vigen%C3%A8re_cipher',\n", + " 'http://www.rosettacode.org/wiki/Bitcoin/address_validation',\n", + " 'http://www.rosettacode.org/wiki/Convert_decimal_number_to_rational',\n", + " 'http://www.rosettacode.org/wiki/Variable_size/Set',\n", + " 'http://www.rosettacode.org/wiki/Pick_random_element',\n", + " 'http://www.rosettacode.org/wiki/Five_weekends',\n", + " 'http://www.rosettacode.org/wiki/Interactive_programming',\n", + " 'http://www.rosettacode.org/wiki/Sorting_algorithms/Cocktail_sort',\n", + " 'http://www.rosettacode.org/wiki/Call_a_function',\n", + " 'http://www.rosettacode.org/wiki/Terminal_control/Positional_read',\n", + " 'http://www.rosettacode.org/wiki/SHA-256',\n", + " 'http://www.rosettacode.org/wiki/Ray-casting_algorithm',\n", + " 'http://www.rosettacode.org/wiki/Call_an_object_method',\n", + " 'http://www.rosettacode.org/wiki/Rename_a_file',\n", + " 'http://www.rosettacode.org/wiki/Random_numbers',\n", + " 'http://www.rosettacode.org/wiki/Balanced_brackets',\n", + " 'http://www.rosettacode.org/wiki/Bernoulli_numbers',\n", + " 'http://www.rosettacode.org/wiki/System_time',\n", + " 'http://www.rosettacode.org/wiki/Flow-control_structures',\n", + " 'http://www.rosettacode.org/wiki/Knuth%27s_algorithm_S',\n", + " 'http://www.rosettacode.org/wiki/Generate_Chess960_starting_position',\n", + " 'http://www.rosettacode.org/wiki/Terminal_control/Display_an_extended_character',\n", + " 'http://www.rosettacode.org/wiki/Continued_fraction/Arithmetic/G(matrix_NG,_Contined_Fraction_N1,_Contined_Fraction_N2)',\n", + " 'http://www.rosettacode.org/wiki/Deepcopy',\n", + " 'http://www.rosettacode.org/wiki/String_comparison',\n", + " 'http://www.rosettacode.org/wiki/Greatest_element_of_a_list',\n", + " 'http://www.rosettacode.org/wiki/Polynomial_regression',\n", + " 'http://www.rosettacode.org/wiki/9_billion_names_of_God_the_integer',\n", + " 'http://www.rosettacode.org/wiki/Odd_word_problem',\n", + " 'http://www.rosettacode.org/wiki/Read_a_configuration_file',\n", + " 'http://www.rosettacode.org/wiki/Yin_and_yang',\n", + " 'http://www.rosettacode.org/wiki/Fast_Fourier_transform',\n", + " 'http://www.rosettacode.org/wiki/Ternary_logic',\n", + " 'http://www.rosettacode.org/wiki/Prime_decomposition',\n", + " 'http://www.rosettacode.org/wiki/Combinations_and_permutations',\n", + " 'http://www.rosettacode.org/wiki/Rosetta_Code:Village_Pump',\n", + " 'http://www.rosettacode.org/wiki/World_Cup_group_stage',\n", + " 'http://www.rosettacode.org/wiki/Enforced_immutability',\n", + " 'http://www.rosettacode.org/wiki/Dutch_national_flag_problem',\n", + " 'http://www.rosettacode.org/wiki/Search_a_list',\n", + " 'http://www.rosettacode.org/wiki/Morse_code',\n", + " 'http://www.rosettacode.org/wiki/Semordnilap',\n", + " 'http://www.rosettacode.org/wiki/State_name_puzzle',\n", + " 'http://www.rosettacode.org/wiki/Window_creation',\n", + " 'http://www.rosettacode.org/wiki/Queue/Usage',\n", + " 'http://www.rosettacode.org/wiki/Box_the_compass',\n", + " 'http://www.rosettacode.org/wiki/Magic_squares_of_odd_order',\n", + " 'http://www.rosettacode.org/wiki/Here_document',\n", + " 'http://www.rosettacode.org/wiki/Hello_world/Text',\n", + " 'http://www.rosettacode.org/wiki/Sorting_algorithms/Gnome_sort',\n", + " 'http://www.rosettacode.org/wiki/Loops/Foreach',\n", + " 'http://www.rosettacode.org/wiki/Price_fraction',\n", + " 'http://www.rosettacode.org/wiki/Bitmap/Flood_fill',\n", + " 'http://www.rosettacode.org/wiki/Metronome',\n", + " 'http://www.rosettacode.org/wiki/Window_management',\n", + " 'http://www.rosettacode.org/wiki/Checkpoint_synchronization',\n", + " 'http://www.rosettacode.org/wiki/S-Expressions',\n", + " 'http://www.rosettacode.org/wiki/HTTPS/Authenticated',\n", + " 'http://www.rosettacode.org/wiki/Calendar_-_for_%22REAL%22_programmers',\n", + " 'http://www.rosettacode.org/wiki/Averages/Mean_angle',\n", + " 'http://www.rosettacode.org/wiki/Increment_a_numerical_string',\n", + " 'http://www.rosettacode.org/wiki/Hamming_numbers',\n", + " 'http://www.rosettacode.org/wiki/Terminal_control/Clear_the_screen',\n", + " 'http://www.rosettacode.org/wiki/Count_occurrences_of_a_substring',\n", + " 'http://www.rosettacode.org/wiki/Arrays',\n", + " 'http://www.rosettacode.org/wiki/Chinese_remainder_theorem',\n", + " 'http://www.rosettacode.org/wiki/Sorting_algorithms/Selection_sort',\n", + " 'http://www.rosettacode.org/wiki/Miller-Rabin_primality_test',\n", + " 'http://www.rosettacode.org/wiki/Stem-and-leaf_plot',\n", + " 'http://www.rosettacode.org/wiki/Sierpinski_triangle',\n", + " 'http://www.rosettacode.org/wiki/Equilibrium_index',\n", + " 'http://www.rosettacode.org/wiki/Soundex',\n", + " 'http://www.rosettacode.org/wiki/Date_manipulation',\n", + " 'http://www.rosettacode.org/wiki/Lucas-Lehmer_test',\n", + " 'http://www.rosettacode.org/wiki/Catmull%E2%80%93Clark_subdivision_surface',\n", + " 'http://www.rosettacode.org/wiki/Text_processing/1',\n", + " 'http://www.rosettacode.org/wiki/Rosetta_Code:Add_a_Task',\n", + " 'http://www.rosettacode.org/wiki/Terminal_control/Cursor_movement',\n", + " 'http://www.rosettacode.org/wiki/Sleep',\n", + " 'http://www.rosettacode.org/wiki/Rep-string',\n", + " 'http://www.rosettacode.org/wiki/Documentation',\n", + " 'http://www.rosettacode.org/wiki/Fractal_tree',\n", + " 'http://www.rosettacode.org/wiki/Color_of_a_screen_pixel',\n", + " 'http://www.rosettacode.org/wiki/Maze_solving',\n", + " 'http://www.rosettacode.org/wiki/Sum_digits_of_an_integer',\n", + " 'http://www.rosettacode.org/wiki/Inverted_index',\n", + " 'http://www.rosettacode.org/wiki/Holidays_related_to_Easter',\n", + " 'http://www.rosettacode.org/wiki/Vector_products',\n", + " 'http://www.rosettacode.org/wiki/Special_characters',\n", + " 'http://www.rosettacode.org/wiki/Stern-Brocot_sequence',\n", + " 'http://www.rosettacode.org/wiki/Catalan_numbers',\n", + " 'http://www.rosettacode.org/wiki/Roman_numerals/Encode',\n", + " 'http://www.rosettacode.org/wiki/Semiprime',\n", + " 'http://www.rosettacode.org/wiki/Reverse_a_string',\n", + " 'http://www.rosettacode.org/wiki/Horner%27s_rule_for_polynomial_evaluation',\n", + " 'http://www.rosettacode.org/wiki/Greatest_common_divisor',\n", + " 'http://www.rosettacode.org/wiki/Speech_synthesis',\n", + " 'http://www.rosettacode.org/wiki/Solve_a_Hopido_puzzle',\n", + " 'http://www.rosettacode.org/wiki/Loops/For_with_a_specified_step',\n", + " 'http://www.rosettacode.org/wiki/Shell_one-liner',\n", + " 'http://www.rosettacode.org/wiki/String_case',\n", + " 'http://www.rosettacode.org/wiki/Special:WebChat',\n", + " 'http://www.rosettacode.org/wiki/Determine_if_a_string_is_numeric',\n", + " 'http://www.rosettacode.org/wiki/Pythagorean_triples',\n", + " 'http://www.rosettacode.org/wiki/Sorting_algorithms/Insertion_sort',\n", + " 'http://www.rosettacode.org/wiki/Count_in_octal',\n", + " 'http://www.rosettacode.org/wiki/Arena_storage_pool',\n", + " 'http://www.rosettacode.org/wiki/Word_wrap',\n", + " 'http://www.rosettacode.org/wiki/Averages/Median',\n", + " 'http://www.rosettacode.org/wiki/Function_composition',\n", + " 'http://www.rosettacode.org/wiki/Hickerson_series_of_almost_integers',\n", + " 'http://www.rosettacode.org/wiki/Machine_code',\n", + " 'http://www.rosettacode.org/wiki/Active_Directory/Search_for_a_user',\n", + " 'http://www.rosettacode.org/wiki/Stair-climbing_puzzle',\n", + " 'http://www.rosettacode.org/wiki/Modular_inverse',\n", + " 'http://www.rosettacode.org/wiki/Pascal_matrix_generation',\n", + " 'http://www.rosettacode.org/wiki/Time_a_function',\n", + " 'http://www.rosettacode.org/wiki/Matrix_transposition',\n", + " 'http://www.rosettacode.org/wiki/Hello_world/Web_server',\n", + " 'http://www.rosettacode.org/wiki/Trigonometric_functions',\n", + " 'http://www.rosettacode.org/wiki/URL_decoding',\n", + " 'http://www.rosettacode.org/wiki/Non-decimal_radices/Input',\n", + " 'http://www.rosettacode.org/wiki/Sierpinski_carpet',\n", + " 'http://www.rosettacode.org/wiki/Inheritance/Multiple',\n", + " 'http://www.rosettacode.org/wiki/Text_processing/Max_licenses_in_use',\n", + " 'http://www.rosettacode.org/wiki/Circles_of_given_radius_through_two_points',\n", + " 'http://www.rosettacode.org/wiki/Web_scraping',\n", + " 'http://www.rosettacode.org/wiki/Simulate_input/Mouse',\n", + " 'http://www.rosettacode.org/wiki/Roots_of_unity',\n", + " 'http://www.rosettacode.org/wiki/Digital_root',\n", + " 'http://www.rosettacode.org/wiki/Sierpinski_triangle/Graphical',\n", + " 'http://www.rosettacode.org/wiki/Sequence_of_non-squares',\n", + " 'http://www.rosettacode.org/wiki/Spiral_matrix',\n", + " 'http://www.rosettacode.org/wiki/Strip_block_comments',\n", + " 'http://www.rosettacode.org/wiki/Hello_world/Newbie',\n", + " 'http://www.rosettacode.org/wiki/Program_name',\n", + " 'http://www.rosettacode.org/wiki/Guess_the_number',\n", + " 'http://www.rosettacode.org/wiki/Infinity',\n", + " 'http://www.rosettacode.org/wiki/Median_filter',\n", + " 'http://www.rosettacode.org/wiki/Josephus_problem',\n", + " 'http://www.rosettacode.org/wiki/Sort_an_integer_array',\n", + " 'http://www.rosettacode.org/wiki/Align_columns',\n", + " 'http://www.rosettacode.org/wiki/Primes_-_allocate_descendants_to_their_ancestors',\n", + " 'http://www.rosettacode.org/wiki/Execute_a_system_command',\n", + " 'http://www.rosettacode.org/wiki/Sequence_of_primes_by_Trial_Division',\n", + " 'http://www.rosettacode.org/wiki/Function_definition',\n", + " 'http://www.rosettacode.org/wiki/Continued_fraction/Arithmetic/G(matrix_NG,_Contined_Fraction_N)',\n", + " 'http://www.rosettacode.org/wiki/Parsing/RPN_to_infix_conversion',\n", + " 'http://www.rosettacode.org/wiki/Hough_transform',\n", + " 'http://www.rosettacode.org/wiki/CSV_to_HTML_translation',\n", + " 'http://www.rosettacode.org/wiki/Extreme_floating_point_values',\n", + " 'http://www.rosettacode.org/wiki/Knapsack_problem/Unbounded',\n", + " 'http://www.rosettacode.org/wiki/AKS_test_for_primes',\n", + " 'http://www.rosettacode.org/wiki/Monty_Hall_problem',\n", + " 'http://www.rosettacode.org/wiki/Old_lady_swallowed_a_fly',\n", + " 'http://www.rosettacode.org/wiki/DNS_query',\n", + " 'http://www.rosettacode.org/wiki/Rot-13',\n", + " 'http://www.rosettacode.org/wiki/Execute_Brain****',\n", + " 'http://www.rosettacode.org/wiki/Almost_prime',\n", + " 'http://www.rosettacode.org/wiki/Sorting_algorithms/Merge_sort',\n", + " 'http://www.rosettacode.org/wiki/Animate_a_pendulum',\n", + " 'http://www.rosettacode.org/wiki/Pascal%27s_triangle',\n", + " 'http://www.rosettacode.org/wiki/Image_noise',\n", + " 'http://www.rosettacode.org/wiki/Atomic_updates',\n", + " 'http://www.rosettacode.org/wiki/Numerical_integration',\n", + " 'http://www.rosettacode.org/wiki/Longest_string_challenge',\n", + " 'http://www.rosettacode.org/wiki/Loops/While',\n", + " 'http://www.rosettacode.org/wiki/Total_circles_area',\n", + " 'http://www.rosettacode.org/wiki/Evaluate_binomial_coefficients',\n", + " 'http://www.rosettacode.org/wiki/Regular_expressions',\n", + " 'http://www.rosettacode.org/wiki/Subtractive_generator',\n", + " 'http://www.rosettacode.org/wiki/Real_constants_and_functions',\n", + " 'http://www.rosettacode.org/wiki/Formal_power_series',\n", + " 'http://www.rosettacode.org/wiki/Respond_to_an_unknown_method_call',\n", + " 'http://www.rosettacode.org/wiki/Multiplication_tables',\n", + " 'http://www.rosettacode.org/wiki/Object_serialization',\n", + " 'http://www.rosettacode.org/wiki/Heronian_triangles',\n", + " 'http://www.rosettacode.org/wiki/Sparkline_in_unicode',\n", + " 'http://www.rosettacode.org/wiki/Bitmap/Read_a_PPM_file',\n", + " 'http://www.rosettacode.org/wiki/Averages/Simple_moving_average',\n", + " 'http://www.rosettacode.org/wiki/History_variables',\n", + " 'http://www.rosettacode.org/wiki/Rosetta_Code:Privacy_policy',\n", + " 'http://www.rosettacode.org/wiki/Anonymous_recursion',\n", + " 'http://www.rosettacode.org/wiki/Compile-time_calculation',\n", + " 'http://www.rosettacode.org/wiki/Strip_whitespace_from_a_string/Top_and_tail',\n", + " 'http://www.rosettacode.org/wiki/Rosetta_Code:General_disclaimer',\n", + " 'http://www.rosettacode.org/wiki/Bulls_and_cows',\n", + " 'http://www.rosettacode.org/wiki/Narcissist',\n", + " 'http://www.rosettacode.org/wiki/SEDOLs',\n", + " 'http://www.rosettacode.org/wiki/Gaussian_elimination',\n", + " 'http://www.rosettacode.org/wiki/Associative_array/Creation',\n", + " 'http://www.rosettacode.org/wiki/Deconvolution/2D%2B',\n", + " 'http://www.rosettacode.org/wiki/Reduced_row_echelon_form',\n", + " 'http://www.rosettacode.org/wiki/Comments',\n", + " 'http://www.rosettacode.org/wiki/Hailstone_sequence',\n", + " 'http://www.rosettacode.org/wiki/Maze_generation',\n", + " 'http://www.rosettacode.org/wiki/Digital_root/Multiplicative_digital_root',\n", + " 'http://www.rosettacode.org/wiki/Rosetta_Code/Rank_languages_by_popularity',\n", + " 'http://www.rosettacode.org/wiki/XML/XPath',\n", + " 'http://www.rosettacode.org/wiki/Hostname',\n", + " 'http://www.rosettacode.org/wiki/Input_loop',\n", + " 'http://www.rosettacode.org/wiki/Minesweeper_game',\n", + " 'http://www.rosettacode.org/wiki/Canny_edge_detector',\n", + " 'http://www.rosettacode.org/wiki/Read_entire_file',\n", + " 'http://www.rosettacode.org/wiki/Problem_of_Apollonius',\n", + " 'http://www.rosettacode.org/wiki/Call_a_foreign-language_function',\n", + " 'http://www.rosettacode.org/wiki/Rock-paper-scissors',\n", + " 'http://www.rosettacode.org/wiki/Jensen%27s_Device',\n", + " 'http://www.rosettacode.org/wiki/Sokoban',\n", + " 'http://www.rosettacode.org/wiki/Thiele%27s_interpolation_formula',\n", + " 'http://www.rosettacode.org/wiki/Bitmap/B%C3%A9zier_curves/Quadratic',\n", + " 'http://www.rosettacode.org/wiki/Closest-pair_problem',\n", + " 'http://www.rosettacode.org/wiki/Sorting_algorithms/Bead_sort',\n", + " 'http://www.rosettacode.org/wiki/Percolation/Mean_cluster_density',\n", + " 'http://www.rosettacode.org/wiki/String_concatenation',\n", + " 'http://www.rosettacode.org/wiki/Percentage_difference_between_images',\n", + " 'http://www.rosettacode.org/wiki/Sorting_algorithms/Strand_sort',\n", + " 'http://www.rosettacode.org/wiki/Polynomial_long_division',\n", + " 'http://www.rosettacode.org/wiki/Truncate_a_file',\n", + " 'http://www.rosettacode.org/wiki/Verify_distribution_uniformity/Chi-squared_test',\n", + " 'http://www.rosettacode.org/wiki/Text_processing/2',\n", + " 'http://www.rosettacode.org/wiki/Null_object',\n", + " 'http://www.rosettacode.org/wiki/Permutation_test',\n", + " 'http://www.rosettacode.org/wiki/Forest_fire',\n", + " 'http://www.rosettacode.org/wiki/Left_factorials',\n", + " 'http://www.rosettacode.org/wiki/Longest_common_subsequence',\n", + " 'http://www.rosettacode.org/wiki/Move-to-front_algorithm',\n", + " 'http://www.rosettacode.org/wiki/Solve_a_Holy_Knight%27s_tour',\n", + " 'http://www.rosettacode.org/wiki/Empty_program',\n", + " 'http://www.rosettacode.org/wiki/Verify_distribution_uniformity/Naive',\n", + " 'http://www.rosettacode.org/wiki/Write_language_name_in_3D_ASCII',\n", + " 'http://www.rosettacode.org/wiki/Sudoku',\n", + " 'http://www.rosettacode.org/wiki/Sorting_algorithms/Sleep_sort',\n", + " 'http://www.rosettacode.org/wiki/Luhn_test_of_credit_card_numbers',\n", + " 'http://www.rosettacode.org/wiki/Empty_string',\n", + " 'http://www.rosettacode.org/wiki/CRC-32',\n", + " 'http://www.rosettacode.org/wiki/XML/DOM_serialization',\n", + " 'http://www.rosettacode.org/wiki/Letter_frequency',\n", + " 'http://www.rosettacode.org/wiki/Special:RecentChangesLinked/Category:Programming_Tasks',\n", + " 'http://www.rosettacode.org/wiki/Longest_increasing_subsequence',\n", + " 'http://www.rosettacode.org/wiki/Maximum_triangle_path_sum',\n", + " 'http://www.rosettacode.org/wiki/Forward_difference',\n", + " 'http://www.rosettacode.org/wiki/Rosetta_Code',\n", + " 'http://www.rosettacode.org/wiki/Terminal_control/Coloured_text',\n", + " 'http://www.rosettacode.org/wiki/Carmichael_3_strong_pseudoprimes',\n", + " 'http://www.rosettacode.org/wiki/Element-wise_operations',\n", + " 'http://www.rosettacode.org/wiki/Record_sound',\n", + " 'http://www.rosettacode.org/wiki/Paraffins',\n", + " 'http://www.rosettacode.org/wiki/Grayscale_image',\n", + " 'http://www.rosettacode.org/wiki/Polymorphism',\n", + " 'http://www.rosettacode.org/wiki/Show_the_epoch',\n", + " 'http://www.rosettacode.org/wiki/Deal_cards_for_FreeCell',\n", + " 'http://www.rosettacode.org/wiki/Conditional_structures',\n", + " 'http://www.rosettacode.org/wiki/Sort_stability',\n", + " 'http://www.rosettacode.org/wiki/Take_notes_on_the_command_line',\n", + " 'http://www.rosettacode.org/wiki/Voronoi_diagram',\n", + " 'http://www.rosettacode.org/wiki/Zebra_puzzle',\n", + " 'http://www.rosettacode.org/wiki/Horizontal_sundial_calculations',\n", + " 'http://www.rosettacode.org/wiki/Tree_traversal',\n", + " 'http://www.rosettacode.org/wiki/Averages/Mean_time_of_day',\n", + " 'http://www.rosettacode.org/wiki/Rate_counter',\n", + " 'http://www.rosettacode.org/wiki/Order_disjoint_list_items',\n", + " 'http://www.rosettacode.org/wiki/Repeat_a_string',\n", + " 'http://www.rosettacode.org/wiki/99_Bottles_of_Beer',\n", + " 'http://www.rosettacode.org/wiki/Iterated_digits_squaring',\n", + " 'http://www.rosettacode.org/wiki/Sorting_algorithms/Heapsort',\n", + " 'http://www.rosettacode.org/wiki/Logical_operations',\n", + " 'http://www.rosettacode.org/wiki/N%27th',\n", + " 'http://www.rosettacode.org/wiki/Inheritance/Single',\n", + " 'http://www.rosettacode.org/wiki/Non-decimal_radices/Convert',\n", + " 'http://www.rosettacode.org/wiki/Vogel%27s_approximation_method',\n", + " 'http://www.rosettacode.org/wiki/Accumulator_factory',\n", + " 'http://www.rosettacode.org/wiki/Hash_from_two_arrays',\n", + " 'http://www.rosettacode.org/wiki/Strip_control_codes_and_extended_characters_from_a_string',\n", + " 'http://www.rosettacode.org/wiki/Narcissistic_decimal_number',\n", + " 'http://www.rosettacode.org/wiki/Permutations',\n", + " 'http://www.rosettacode.org/wiki/Read_a_specific_line_from_a_file',\n", + " 'http://www.rosettacode.org/wiki/Quaternion_type',\n", + " 'http://www.rosettacode.org/wiki/Substring',\n", + " 'http://www.rosettacode.org/wiki/Pangram_checker',\n", + " 'http://www.rosettacode.org/wiki/HTTPS',\n", + " 'http://www.rosettacode.org/wiki/Conway%27s_Game_of_Life',\n", + " 'http://www.rosettacode.org/wiki/Literals/Floating_point',\n", + " 'http://www.rosettacode.org/wiki/Resistor_mesh',\n", + " 'http://www.rosettacode.org/wiki/Leap_year',\n", + " 'http://www.rosettacode.org/wiki/K-means%2B%2B_clustering',\n", + " 'http://www.rosettacode.org/wiki/Continued_fraction',\n", + " 'http://www.rosettacode.org/wiki/Active_object',\n", + " 'http://www.rosettacode.org/wiki/Number_names',\n", + " 'http://www.rosettacode.org/wiki/Number_reversal_game',\n", + " 'http://www.rosettacode.org/wiki/Arithmetic/Complex',\n", + " 'http://www.rosettacode.org/wiki/Parsing/RPN_calculator_algorithm',\n", + " 'http://www.rosettacode.org/wiki/Multifactorial',\n", + " 'http://www.rosettacode.org/wiki/Hello_world/Newline_omission',\n", + " 'http://www.rosettacode.org/wiki/Unicode_strings',\n", + " 'http://www.rosettacode.org/wiki/Greatest_subsequential_sum',\n", + " 'http://www.rosettacode.org/wiki/Natural_sorting',\n", + " 'http://www.rosettacode.org/wiki/Short-circuit_evaluation',\n", + " 'http://www.rosettacode.org/wiki/Named_parameters',\n", + " 'http://www.rosettacode.org/wiki/Check_Machin-like_formulas',\n", + " 'http://www.rosettacode.org/wiki/Ranking_methods',\n", + " 'http://www.rosettacode.org/wiki/Sieve_of_Eratosthenes',\n", + " 'http://www.rosettacode.org/wiki/Optional_parameters',\n", + " 'http://www.rosettacode.org/wiki/Bitmap/Bresenham%27s_line_algorithm',\n", + " 'http://www.rosettacode.org/wiki/Random_number_generator_(device)',\n", + " 'http://www.rosettacode.org/wiki/Guess_the_number/With_feedback',\n", + " 'http://www.rosettacode.org/wiki/RCRPG',\n", + " 'http://www.rosettacode.org/wiki/Quickselect_algorithm',\n", + " 'http://www.rosettacode.org/wiki/Percolation/Site_percolation',\n", + " 'http://www.rosettacode.org/wiki/Loops/For',\n", + " 'http://www.rosettacode.org/wiki/Bitwise_operations',\n", + " 'http://www.rosettacode.org/wiki/Hello_world/Standard_error',\n", + " 'http://www.rosettacode.org/wiki/Variables',\n", + " 'http://www.rosettacode.org/wiki/Parse_an_IP_Address',\n", + " 'http://www.rosettacode.org/wiki/One-dimensional_cellular_automata',\n", + " 'http://www.rosettacode.org/wiki/Dinesman%27s_multiple-dwelling_problem',\n", + " 'http://www.rosettacode.org/wiki/Classes',\n", + " 'http://www.rosettacode.org/wiki/Mad_Libs']" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "web_scraper" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
01
0autohotkeyBeadSort(data){\\tPole:=[]\\t, TempObj:=[], Resu...
1autohotkeyfor i, val in BeadSort([54,12,87,56,36])\\tres ...
2c#include <stdio.h>#include <stdlib.h> void bea...
3cpp//this algorithm only works with positive, who...
4clojure(defn transpose [xs] (loop [ret [], remain xs...
5lisp(defun transpose (remain &optional (ret '()))...
6dprogram BeadSortTest; {$APPTYPE CONSOLE} uses ...
7dimport std.stdio, std.algorithm, std.range, st...
8eiffelclass\\tBEAD_SORT feature \\tbead_sort (ar: ARR...
9eiffelclass\\tAPPLICATION create\\tmake feature \\tma...
11erlang-module(beadsort). -export([sort/1]). sort(L) ...
12erlang1> beadsort:sort([1,734,24,3,324,324,32,432,42...
13fsharpopen System let removeEmptyLists lists = lists...
16fortranprogram BeadSortTest use iso_fortran_env ! ...
17gopackage main import ( \"fmt\" \"sync\") var ...
18groovydef beadSort = { list -> final nPoles = lis...
19groovydef beadSortVerbose = { list -> final nPole...
20groovyprintln beadSort([23,76,99,58,97,57,35,89,51,3...
21haskellimport Data.List beadSort :: [Int] -> [Int]bea...
22haskell*Main> beadSort [2,4,1,3,3][4,3,3,2,1]
23iconprocedure main() #: demons...
24jbead=: [: +/ #\"0&1
26jbball=: ] (] + [: bead^:2 -) <./ - 1:
28javapublic class BeadSort {\\tpublic static void ...
36netrexx/* NetRexx */options replace format comments j...
38ocamllet rec columns l = match List.filter ((<>) [...
39octavefunction sorted = beadsort(a) sorted = a; m ...
40oorexxin='10 -12 1 0 999 8 2 2 4 4' Do i=1 To words(...
41oorexx/*REXX program sorts a list of integers using ...
42progressFUNCTION beadSort RETURNS CHAR ( i_c AS CHAR...
.........
222algol68# Non-recursive Knight's Tour with Warnsdorff'...
223autohotkey#SingleInstance, Force#NoEnvSetBatchLines, -1;...
224awk# syntax: GAWK -f KNIGHTS_TOUR.AWK [-v sr=x] ...
227c#include <stdio.h>#include <stdlib.h>#include ...
228cpp#include <iostream>#include <iomanip>#include ...
229csharpusing System;using System.Collections.Generic;...
230coffeescriptgraph_tours = (graph, max_num_solutions) -> ...
232dimport std.stdio, std.algorithm, std.random, s...
233dimport std.stdio, std.math, std.algorithm, std...
234erlang-module( knights_tour ). -export( [display/1,...
236go/* Adapted from \"Enumerating Knight's Tours us...
237haskellimport System (getArgs)import Data.Char (ord,...
238iconlink printf procedure main(A)ShowTour(KnightsT...
239iconprocedure DumpBoard(B) #: Dump Board internal...
240jNB. knight moves for each square of a (y,y) bo...
241jktourw 8 NB. solution for an 8 x 8 board...
242javaimport java.util.*; public class KnightsTour {...
244locobasic10 mode 1:defint a-z20 input \"Board size: \",si...
245luaN = 8 moves = { {1,-2},{2,-1},{2,1},{1,2},{-1,...
257perluse strict;use warnings;# Find a knight's tour...
258perl6my @board; my $I = 8;my $J = 8;my $F = $I*$J >...
261prolog% N is the number of lines of the chessboardkn...
262prolog:- initialization(main).  board_size(8).in_boa...
263pythonimport copy boardsize=6_kmoves = ((2,1), (1,2)...
266rexx/*REXX program solves the knight's tour probl...
267rubyclass Board Cell = Struct.new(:value, :adj) d...
268scheme;;/usr/bin/petite;;encoding:utf-8;;Author:Pan...
269tclpackage require Tcl 8.6; # For object suppo...
270tclset kt [KnightsTour new]$kt constructRandom$kt...
271tclset kt [KnightsTour new 7 7]$kt constructFrom ...
\n", + "

172 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " 0 1\n", + "0 autohotkey BeadSort(data){\\tPole:=[]\\t, TempObj:=[], Resu...\n", + "1 autohotkey for i, val in BeadSort([54,12,87,56,36])\\tres ...\n", + "2 c #include #include  void bea...\n", + "3 cpp //this algorithm only works with positive, who...\n", + "4 clojure (defn transpose [xs] (loop [ret [], remain xs...\n", + "5 lisp  (defun transpose (remain &optional (ret '()))...\n", + "6 d program BeadSortTest; {$APPTYPE CONSOLE} uses ...\n", + "7 d import std.stdio, std.algorithm, std.range, st...\n", + "8 eiffel  class\\tBEAD_SORT feature \\tbead_sort (ar: ARR...\n", + "9 eiffel   class\\tAPPLICATION create\\tmake feature \\tma...\n", + "11 erlang -module(beadsort). -export([sort/1]). sort(L) ...\n", + "12 erlang 1> beadsort:sort([1,734,24,3,324,324,32,432,42...\n", + "13 fsharp open System let removeEmptyLists lists = lists...\n", + "16 fortran program BeadSortTest use iso_fortran_env ! ...\n", + "17 go package main import ( \"fmt\" \"sync\") var ...\n", + "18 groovy def beadSort = { list -> final nPoles = lis...\n", + "19 groovy def beadSortVerbose = { list -> final nPole...\n", + "20 groovy println beadSort([23,76,99,58,97,57,35,89,51,3...\n", + "21 haskell import Data.List beadSort :: [Int] -> [Int]bea...\n", + "22 haskell *Main> beadSort [2,4,1,3,3][4,3,3,2,1]\n", + "23 icon procedure main() #: demons...\n", + "24 j bead=: [: +/ #\"0&1\n", + "26 j bball=: ] (] + [: bead^:2 -) <./ - 1:\n", + "28 java   public class BeadSort {\\tpublic static void ...\n", + "36 netrexx /* NetRexx */options replace format comments j...\n", + "38 ocaml let rec columns l = match List.filter ((<>) [...\n", + "39 octave function sorted = beadsort(a) sorted = a; m ...\n", + "40 oorexx in='10 -12 1 0 999 8 2 2 4 4' Do i=1 To words(...\n", + "41 oorexx /*REXX program sorts a list of integers using ...\n", + "42 progress FUNCTION beadSort RETURNS CHAR ( i_c AS CHAR...\n", + ".. ... ...\n", + "222 algol68 # Non-recursive Knight's Tour with Warnsdorff'...\n", + "223 autohotkey #SingleInstance, Force#NoEnvSetBatchLines, -1;...\n", + "224 awk  # syntax: GAWK -f KNIGHTS_TOUR.AWK [-v sr=x] ...\n", + "227 c #include #include #include ...\n", + "228 cpp #include #include #include ...\n", + "229 csharp using System;using System.Collections.Generic;...\n", + "230 coffeescript  graph_tours = (graph, max_num_solutions) -> ...\n", + "232 d import std.stdio, std.algorithm, std.random, s...\n", + "233 d import std.stdio, std.math, std.algorithm, std...\n", + "234 erlang  -module( knights_tour ). -export( [display/1,...\n", + "236 go /* Adapted from \"Enumerating Knight's Tours us...\n", + "237 haskell  import System (getArgs)import Data.Char (ord,...\n", + "238 icon link printf procedure main(A)ShowTour(KnightsT...\n", + "239 icon procedure DumpBoard(B) #: Dump Board internal...\n", + "240 j NB. knight moves for each square of a (y,y) bo...\n", + "241 j ktourw 8 NB. solution for an 8 x 8 board...\n", + "242 java import java.util.*; public class KnightsTour {...\n", + "244 locobasic 10 mode 1:defint a-z20 input \"Board size: \",si...\n", + "245 lua N = 8 moves = { {1,-2},{2,-1},{2,1},{1,2},{-1,...\n", + "257 perl use strict;use warnings;# Find a knight's tour...\n", + "258 perl6 my @board; my $I = 8;my $J = 8;my $F = $I*$J >...\n", + "261 prolog % N is the number of lines of the chessboardkn...\n", + "262 prolog :- initialization(main).  board_size(8).in_boa...\n", + "263 python import copy boardsize=6_kmoves = ((2,1), (1,2)...\n", + "266 rexx /*REXX program solves the knight's tour probl...\n", + "267 ruby class Board Cell = Struct.new(:value, :adj) d...\n", + "268 scheme  ;;/usr/bin/petite;;encoding:utf-8;;Author:Pan...\n", + "269 tcl package require Tcl 8.6; # For object suppo...\n", + "270 tcl set kt [KnightsTour new]$kt constructRandom$kt...\n", + "271 tcl set kt [KnightsTour new 7 7]$kt constructFrom ...\n", + "\n", + "[172 rows x 2 columns]" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = scrape_and_clean(5)\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
01
0autohotkeyBeadSort(data){\\tPole:=[]\\t, TempObj:=[], Resu...
1autohotkeyfor i, val in BeadSort([54,12,87,56,36])\\tres ...
2c#include <stdio.h>#include <stdlib.h> void bea...
3cpp//this algorithm only works with positive, who...
4clojure(defn transpose [xs] (loop [ret [], remain xs...
5lisp(defun transpose (remain &optional (ret '()))...
6dprogram BeadSortTest; {$APPTYPE CONSOLE} uses ...
7dimport std.stdio, std.algorithm, std.range, st...
8eiffelclass\\tBEAD_SORT feature \\tbead_sort (ar: ARR...
9eiffelclass\\tAPPLICATION create\\tmake feature \\tma...
11erlang-module(beadsort). -export([sort/1]). sort(L) ...
12erlang1> beadsort:sort([1,734,24,3,324,324,32,432,42...
13fsharpopen System let removeEmptyLists lists = lists...
16fortranprogram BeadSortTest use iso_fortran_env ! ...
17gopackage main import ( \"fmt\" \"sync\") var ...
18groovydef beadSort = { list -> final nPoles = lis...
19groovydef beadSortVerbose = { list -> final nPole...
20groovyprintln beadSort([23,76,99,58,97,57,35,89,51,3...
21haskellimport Data.List beadSort :: [Int] -> [Int]bea...
22haskell*Main> beadSort [2,4,1,3,3][4,3,3,2,1]
\n", + "
" + ], + "text/plain": [ + " 0 1\n", + "0 autohotkey BeadSort(data){\\tPole:=[]\\t, TempObj:=[], Resu...\n", + "1 autohotkey for i, val in BeadSort([54,12,87,56,36])\\tres ...\n", + "2 c #include #include  void bea...\n", + "3 cpp //this algorithm only works with positive, who...\n", + "4 clojure (defn transpose [xs] (loop [ret [], remain xs...\n", + "5 lisp  (defun transpose (remain &optional (ret '()))...\n", + "6 d program BeadSortTest; {$APPTYPE CONSOLE} uses ...\n", + "7 d import std.stdio, std.algorithm, std.range, st...\n", + "8 eiffel  class\\tBEAD_SORT feature \\tbead_sort (ar: ARR...\n", + "9 eiffel   class\\tAPPLICATION create\\tmake feature \\tma...\n", + "11 erlang -module(beadsort). -export([sort/1]). sort(L) ...\n", + "12 erlang 1> beadsort:sort([1,734,24,3,324,324,32,432,42...\n", + "13 fsharp open System let removeEmptyLists lists = lists...\n", + "16 fortran program BeadSortTest use iso_fortran_env ! ...\n", + "17 go package main import ( \"fmt\" \"sync\") var ...\n", + "18 groovy def beadSort = { list -> final nPoles = lis...\n", + "19 groovy def beadSortVerbose = { list -> final nPole...\n", + "20 groovy println beadSort([23,76,99,58,97,57,35,89,51,3...\n", + "21 haskell import Data.List beadSort :: [Int] -> [Int]bea...\n", + "22 haskell *Main> beadSort [2,4,1,3,3][4,3,3,2,1]" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head(20)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "new_df = df[df[0]!='text']" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
01
0adawith Glib; use Glib;with Cairo;...
1adawith Display; use Display;with Display.Basic;...
2ahk#NoEnvSetBatchLines, -1#SingleInstance, Force ...
6lbWindowWidth =420WindowHeight =460 nomainwin ...
7purebasic; Original by Comtois @ 28/03/06;; Updated/For...
8qbasicSCREEN 13 ' enter high-color graphic mode ' se...
13c#include <stdio.h>#include <stdlib.h>#include ...
14c#include <stdio.h>#include <stdlib.h>#include ...
15javausing System; namespace Sphere { internal c...
16clojure(use 'quil.core) (def w 500)(def h 400) (defn...
17dimport std.stdio, std.math, std.algorithm, std...
18delphiprogram DrawASphere; {$APPTYPE CONSOLE} uses ...
19delphitype TFloat3 = array[0..2] of Float; var ...
21gopackage main import ( \"fmt\" \"image\" \"...
22haskellimport Graphics.Rendering.OpenGL.GLimport Grap...
23uniconprocedure main()W := open(\"Demo\", \"gl\", \"size=...
24jload 'system/examples/graphics/opengl/simple/s...
25j'R k ambient' =. 10 2 0.4light =. (% +/&.:*:) ...
26javapublic class Sphere{ static char[] shades =...
27javascript<!DOCTYPE html><html><head><meta charset=\"utf-...
32luarequire (\"math\") shades = {'.', ':', '!', '*',...
37perluse strict;use warnings; my $x = my $y = 255;$...
38perl6my $x = my $y = 255;$x +|= 1; # must be odd my...
42povraycamera { location <0.0 , .8 ,-3.0> look_at 0...
44pythonimport math shades = ('.',':','!','*','o','e',...
45pythonimport pygamefrom pygame.locals import *impor...
46pythonclass Tricubic: def __init__(self,pts): ...
47pythonfrom visual import *scene.title = \"VPython: Dr...
48pythonfrom __future__ import print_function, divisi...
50rexx/*REXX program expresses a lighted sphere with...
.........
151ozdeclare [HTTPClient] = {Module.link ['x-ozlib...
152perlpackage YahooSearch; use Encode;use HTTP::Cook...
154pythonimport urllibimport re def fix(x): p = re.c...
157rubyrequire 'open-uri'require 'hpricot' SearchResu...
159tclpackage require http proc fix s { string ma...
160tclpackage require Tcl 8.6 oo::class create WebSe...
161tclpackage require Tcl 8.6 proc yahoo! term { ...
162tclpackage require Tcl 8.6package require httppac...
165c#include <stdio.h>#include <stdlib.h>#include ...
166lisp;; Assemble the mxn matrix A from the 2D row v...
167lisp(setf f #2A((-3 -6 -1 8 -6 3 -1 -9 -9 3 -2 5 2...
168dT[] deconv(T)(in T[] g, in T[] f) pure nothrow...
169fortran! Build! Windows: ifort /I \"%IFORT_COMPILER11...
170fortrandeconv(f, g) = -8, -9, -3, -1, -6, 7deconv(h,...
171gopackage main import \"fmt\" func main() { h :...
172gopackage main import ( \"fmt\" \"math\" \"m...
173haskellimport Data.List h, f, g :: [Double]h = [-8,-9...
174haskell*Main> h == deconv1d g fTrue *Main> f == decon...
175jAi=: (i.@] =/ i.@[ -/ i.@>:@-)&#divide=: [ +/...
176jh=: _8 _9 _3 _1 _6 7f=: _3 _6 _1 8 _6 3 _1 _9 ...
177jg divide f_8 _9 _3 _1 _6 7 g divide h_3 _...
178jdivide=: [ +/ .*~ [:%. ] +/ .* Ai
179javaimport java.util.Arrays; public class Deconvol...
180luafunction deconvolve(f, g) local h = setmetat...
181lualocal f = {-3,-6,-1,8,-6,3,-1,-9,-9,3,-2,5,2,...
183matlab>> h = [-8,-9,-3,-1,-6,7];>> g = [24,75,71,-34...
184perl6sub deconvolve (@g, @f) { my $h = 1 + @g - ...
187pythondef ToReducedRowEchelonForm( M ): if not M:...
194tclpackage require Tcl 8.5namespace eval 1D { ...
195tcl# Simple pretty-printerproc pp {name nlist} { ...
\n", + "

126 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " 0 1\n", + "0 ada with Glib; use Glib;with Cairo;...\n", + "1 ada  with Display; use Display;with Display.Basic;...\n", + "2 ahk #NoEnvSetBatchLines, -1#SingleInstance, Force ...\n", + "6 lb  WindowWidth =420WindowHeight =460 nomainwin ...\n", + "7 purebasic ; Original by Comtois @ 28/03/06;; Updated/For...\n", + "8 qbasic SCREEN 13 ' enter high-color graphic mode ' se...\n", + "13 c #include #include #include ...\n", + "14 c #include #include #include ...\n", + "15 java using System; namespace Sphere { internal c...\n", + "16 clojure  (use 'quil.core) (def w 500)(def h 400) (defn...\n", + "17 d import std.stdio, std.math, std.algorithm, std...\n", + "18 delphi  program DrawASphere; {$APPTYPE CONSOLE} uses ...\n", + "19 delphi  type TFloat3 = array[0..2] of Float; var ...\n", + "21 go package main import ( \"fmt\" \"image\" \"...\n", + "22 haskell import Graphics.Rendering.OpenGL.GLimport Grap...\n", + "23 unicon procedure main()W := open(\"Demo\", \"gl\", \"size=...\n", + "24 j load 'system/examples/graphics/opengl/simple/s...\n", + "25 j 'R k ambient' =. 10 2 0.4light =. (% +/&.:*:) ...\n", + "26 java public class Sphere{ static char[] shades =...\n", + "27 javascript look_at 0...\n", + "44 python import math shades = ('.',':','!','*','o','e',...\n", + "45 python  import pygamefrom pygame.locals import *impor...\n", + "46 python  class Tricubic: def __init__(self,pts): ...\n", + "47 python from visual import *scene.title = \"VPython: Dr...\n", + "48 python  from __future__ import print_function, divisi...\n", + "50 rexx /*REXX program expresses a lighted sphere with...\n", + ".. ... ...\n", + "151 oz declare [HTTPClient] = {Module.link ['x-ozlib...\n", + "152 perl package YahooSearch; use Encode;use HTTP::Cook...\n", + "154 python import urllibimport re def fix(x): p = re.c...\n", + "157 ruby require 'open-uri'require 'hpricot' SearchResu...\n", + "159 tcl package require http proc fix s { string ma...\n", + "160 tcl package require Tcl 8.6 oo::class create WebSe...\n", + "161 tcl package require Tcl 8.6 proc yahoo! term { ...\n", + "162 tcl package require Tcl 8.6package require httppac...\n", + "165 c #include #include #include ...\n", + "166 lisp ;; Assemble the mxn matrix A from the 2D row v...\n", + "167 lisp (setf f #2A((-3 -6 -1 8 -6 3 -1 -9 -9 3 -2 5 2...\n", + "168 d T[] deconv(T)(in T[] g, in T[] f) pure nothrow...\n", + "169 fortran  ! Build! Windows: ifort /I \"%IFORT_COMPILER11...\n", + "170 fortran  deconv(f, g) = -8, -9, -3, -1, -6, 7deconv(h,...\n", + "171 go package main import \"fmt\" func main() { h :...\n", + "172 go package main import ( \"fmt\" \"math\" \"m...\n", + "173 haskell import Data.List h, f, g :: [Double]h = [-8,-9...\n", + "174 haskell *Main> h == deconv1d g fTrue *Main> f == decon...\n", + "175 j Ai=: (i.@] =/ i.@[ -/ i.@>:@-)&#divide=: [ +/...\n", + "176 j h=: _8 _9 _3 _1 _6 7f=: _3 _6 _1 8 _6 3 _1 _9 ...\n", + "177 j g divide f_8 _9 _3 _1 _6 7 g divide h_3 _...\n", + "178 j divide=: [ +/ .*~ [:%. ] +/ .* Ai\n", + "179 java import java.util.Arrays; public class Deconvol...\n", + "180 lua function deconvolve(f, g) local h = setmetat...\n", + "181 lua  local f = {-3,-6,-1,8,-6,3,-1,-9,-9,3,-2,5,2,...\n", + "183 matlab >> h = [-8,-9,-3,-1,-6,7];>> g = [24,75,71,-34...\n", + "184 perl6 sub deconvolve (@g, @f) { my $h = 1 + @g - ...\n", + "187 python def ToReducedRowEchelonForm( M ): if not M:...\n", + "194 tcl package require Tcl 8.5namespace eval 1D { ...\n", + "195 tcl # Simple pretty-printerproc pp {name nlist} { ...\n", + "\n", + "[126 rows x 2 columns]" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "new_df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.4.3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/requirements.txt b/requirements.txt index 473a3b2..894d818 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,3 +3,4 @@ scipy pandas numpy matplotlib +beautifulsoup4 diff --git a/scraper.py b/scraper.py new file mode 100644 index 0000000..0d7ab68 --- /dev/null +++ b/scraper.py @@ -0,0 +1,118 @@ +from bs4 import BeautifulSoup +import urllib +from re import findall +import pandas as pd +import random +from sklearn.naive_bayes import MultinomialNB +from sklearn.ensemble import RandomForestClassifier +from sklearn.neighbors import KNeighborsClassifier +from sklearn.cross_validation import train_test_split +from sklearn.pipeline import Pipeline +from sklearn.feature_extraction.text import CountVectorizer +from sklearn.metrics import classification_report +from sklearn.cross_validation import cross_val_score +import pickle + + +def scrape_data(url): + """Must provide url. Returns a list of soup objects""" + req = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/5.0'}) + content = urllib.request.urlopen(req).read() + soup = BeautifulSoup(content) + return soup.find_all( "pre", class_="highlighted_source") + #pre is an html tag. We want all text from pre with class highlighted_source + #returns a list of soup objects + + +def pull_code_from_soup(soup_list): + """Takes list of soup objects and returns list of code as string.""" + return [[soup_list[i]['class'][0], soup_list[i].get_text()] for i in range(len(soup_list))] + + +def make_data(url_list): + """Makes dataframe with code examples""" + code_snippets = pd.DataFrame(columns=([0, 1])) + for url in url_list: + soup_list = scrape_data(url) + code_snippets = code_snippets.append(pd.DataFrame(pull_code_from_soup(soup_list)), ignore_index=True) + return code_snippets + + +def scrape_links(): + """There are over 700 example tasks on Rosetta Code. Returns a list of the web addresses for all tasks.""" + req = urllib.request.Request('http://rosettacode.org/wiki/Category:Programming_Tasks', headers={'User-Agent': 'Mozilla/5.0'}) + content = urllib.request.urlopen(req).read() + soup = BeautifulSoup(content) + link_list = [link.get('href') for link in soup.find_all('a')] + return ["http://www.rosettacode.org{}".format(link) for link in link_list[1:] if link.startswith('/wiki/')] + + +def make_links_list(num_links=30): + """Randomly selects which examples tasks to include.""" + return random.sample(scrape_links(), num_links) + + +def scrape_and_clean(num_links=30): + df = make_data(make_links_list(num_links)) + new_df = df[df[0]!='text'] + return new_df + +def scrape_clean_cut(num_links=100, min_examples=40, save=False): + """Scrapes the data using previous functions, + cleans the data by removing all the text files that are not code, and + cuts the data to only the number of links desired. Returns a dataframe.""" + df = make_data(make_links_list(num_links)) + new_df = df[df[0]!='text'] + new_df = new_df.groupby(0).filter(lambda x: len(x) >= min_examples) + if save: + new_df.to_pickle("scraper_{}x{}.pkl".format(num_links, min_examples)) + return new_df + + +def scrape_clean_cut_filter(num_links=100, min_examples=40, save=False): + """Scrapes the data using previous functions, + cleans the data by removing all the text files that are not code, + cuts the data to only the number of links desired, + and filters to only the languages in the test file. Returns a dataframe.""" + df = make_data(make_links_list(num_links)) + df = df[df[0]!='text'] + new_df = df[(df[0] == 'clojure') | (df[0] == 'haskell') | (df[0] == 'java') | (df[0] == 'javascript') + | (df[0] == 'ocaml') | (df[0] == 'php') | (df[0] == 'python') | (df[0] == 'ruby') + | (df[0] == 'scala') | (df[0] == 'scheme') | (df[0] == 'tcl')] + + new_df = new_df.groupby(0).filter(lambda x: len(x) >= min_examples) + if save: + new_df.to_pickle("scraper_filtered_{}x{}.pkl".format(num_links, min_examples)) + return new_df + +def pipeline_runner(dataframe, estimator='Multinomial', report=False): + """Runs train_test_split and pipline function and returns pipe.score. + Must provide the dataframe, and estimator. + If report=True the classification report will print instead of the pipe.score.""" + + #Re-testing with MultinomialNB + y = dataframe.loc[:, 0] + X = dataframe.loc[:, 1] + #splitting data + X_train, X_test, y_train, y_test = train_test_split(X, y) + #running pipe to vectorize and run estimator + if estimator == 'Multinomial': + estimator_pipe = Pipeline([('bag_of_words', CountVectorizer()), + ('mnb', MultinomialNB())]) + elif estimator == 'KNeighbors': + estimator_pipe = Pipeline([('bag_of_words', CountVectorizer()), + ('knn', KNeighborsClassifier())]) + elif estimator == 'Forest': + estimator_pipe = Pipeline([('bag_of_words', CountVectorizer()), + ('forest', RandomForestClassifier())]) + else: + return pipeline_runner(dataframe, estimator) + #fitting + estimator_pipe.fit(X_train, y_train) + #checking score + if report: + return (classification_report(estimator_pipe.predict(X_test), y_test)) + else: + return estimator_pipe.score(X_train, y_train), estimator_pipe.score(X_test, y_test) + + diff --git a/scraper_700x200.pkl b/scraper_700x200.pkl new file mode 100644 index 0000000..bcd3cd1 Binary files /dev/null and b/scraper_700x200.pkl differ diff --git a/scraper_filtered_700x1.pkl b/scraper_filtered_700x1.pkl new file mode 100644 index 0000000..dd537cf Binary files /dev/null and b/scraper_filtered_700x1.pkl differ