diff --git a/scrape.py b/scrape.py index aad16da..112e14a 100644 --- a/scrape.py +++ b/scrape.py @@ -1,7 +1,8 @@ +#!/usr/bin/env python + import re import os -import passwords -import sys +import getpass from subprocess import Popen from mechanize import Browser from BeautifulSoup import BeautifulSoup @@ -29,12 +30,12 @@ def download(work): # Import from a module outside of version control your SUNET id and password br.select_form(name="login") - br["username"] = passwords.my_username - br["password"] = passwords.my_password + br["username"] = raw_input('Username: ') + br["password"] = getpass.getpass(); - # Open the course page for the title you're looking for + # Open the course page for the title you're looking for response = br.submit() - response = br.follow_link(text=sys.argv[1]) + response = br.follow_link(text=raw_input('Class Description: ')) # Build up a list of lectures links = [] @@ -44,6 +45,7 @@ def download(work): videos = [] # These are done serially purely just to not look suspicious, we could probably parallelize this as well for link in links: + print "Reading contents of %s" % link response = br.open(link) soup = BeautifulSoup(response.read()) video = soup.find('object', id='WMPlayer')['data'] @@ -51,6 +53,8 @@ def download(work): output_name = re.search(r"[a-z]+[0-9]+[a-z]?/[0-9]+",video).group(0).replace("/","_") + ".wmv" videos.append((video, output_name)) + print "Downloading %d videos..." % len(videos) + # Make a thread pool and download 5 files at a time p = Pool(processes=5) p.map(download, videos)