Skip to content
This repository was archived by the owner on Dec 12, 2021. It is now read-only.
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 32 additions & 29 deletions crawlic.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,31 +35,12 @@ def crawl(self, data):
Load configuration files
"""

def loadDorks(dorks_file):
""" Load dorks from dorks file """
dorks_list = []
for line in [line.strip() for line in open(dorks_file)]:
dorks_list.append(line)
return dorks_list

def loadExtensions(extensions_file):
""" Load extensions from extensions file """
extensions_list = []
for line in [line.strip() for line in open(extensions_file)]:
extensions_list.append("(.*)%s" % line)
return extensions_list

def loadUserAgents(user_agent_file):
""" Load user agents from user_agent file """
for line in [line.strip() for line in open(user_agent_file)]:
user_agent_list.append(line)

def loadGoogleDorks(google_dorks_file):
""" Load google dorks from google_dorks_file """
google_dorks_list = []
for line in [line.strip() for line in open(google_dorks_file)]:
google_dorks_list.append(line)
return google_dorks_list

def loadList(filepath, callback=lambda s: s):
"""Load a list file. Apply a callback on each value if more processing
is needed.
"""
return [callback(l.strip()) for l in open(filepath)]

"""
Usefull methods
Expand Down Expand Up @@ -223,26 +204,48 @@ def main():

# Make sure the host is up
print "[*] Probe host %s" % args.url

try:
requests.head(args.url)
except requests.exceptions.ConnectionError:
print '[!] Url %s not reachable or is down. Aborting' % args.url
return

# Load configuration from files
loadUserAgents(args.user_agent)
Crawlic.extension_list = loadDorks(args.dorks)
try:
user_agent_list.extend(loadList(args.user_agent))
except IOError():
print '[!] User agent list %s doesn\'t exist' % args.user_agent
return

try:
Crawlic.extension_list = loadList(args.dorks)
except IOError():
print '[!] Dorks list %s doesn\'t exist' % args.dorks
return

page_not_found_pattern = getPageNotFoundPattern(args.url)
google_dorks = loadGoogleDorks(args.google_dorks)

try:
google_dorks = loadList(args.google_dorks)
except IOError():
print '[!] Google dorks list %s doesn\'t exist' % args.google_dorks
return

# Configure crawler
Crawlic.page_not_found_pattern = page_not_found_pattern
try:
valid_links = loadList(args.extensions, lambda s: '(.*%s)' % s)
except IOError():
print '[!] Extension list %s doesn\'t exists' % args.extensions
return

Crawlic.settings = {
'domain': domain,
'start_page': '/',
'stay_in_domain' : True,
'protocol': protocol + "://",
'valid_links': loadExtensions(args.extensions),
'valid_links': valid_links,
'headers' : {
'Referer': domain,
'User-Agent': getRandomUserAgent()
Expand Down