diff --git a/pcapscanner/analyzers/conversations.py b/pcapscanner/analyzers/conversations.py index d7e5cae..0617b83 100644 --- a/pcapscanner/analyzers/conversations.py +++ b/pcapscanner/analyzers/conversations.py @@ -1,10 +1,10 @@ -from multiprocessing import Manager +from multiprocessing import Lock import csv import os CSVFN = "conversations.csv" -manager = Manager() +lock = Lock() def __add_protocol(storage, pkt): @@ -20,7 +20,7 @@ def __add_port(storage, pkt): port = str(pkt.port_dst) if port not in storage.keys(): - storage[port] = manager.dict() + storage[port] = dict() __add_protocol(storage[port], pkt) @@ -28,12 +28,12 @@ def __add_dst_addr(storage, pkt): dst_addr = str(pkt.ip_dst) if dst_addr not in storage.keys(): - storage[dst_addr] = manager.dict() + storage[dst_addr] = dict() __add_port(storage[dst_addr], pkt) def init(): - setattr(analyze, 'storage', manager.dict()) + setattr(analyze, 'storage', dict()) def log(outputdir): @@ -52,15 +52,17 @@ def log(outputdir): def analyze(pkt): """ Count conversations between hosts. """ - + lock.acquire() conversations = analyze.storage try: src_addr = str(pkt.ip_src) if src_addr not in conversations.keys(): - conversations[src_addr] = manager.dict() + conversations[src_addr] = dict() __add_dst_addr(conversations[src_addr], pkt) except AttributeError as e: # ignore packets that aren't TCP/UDP or IPv4 pass + finally: + lock.release() diff --git a/pcapscanner/analyzers/hosts.py b/pcapscanner/analyzers/hosts.py index e70b3e6..2bf1e6d 100644 --- a/pcapscanner/analyzers/hosts.py +++ b/pcapscanner/analyzers/hosts.py @@ -1,14 +1,14 @@ -from multiprocessing import Manager +from multiprocessing import Lock import csv import os CSVFN = "hostcounter.csv" -manager = Manager() +lock = Lock() def init(): - setattr(analyze, 'storage', manager.dict()) + setattr(analyze, 'storage', dict()) def log(outputdir): @@ -20,7 +20,7 @@ def log(outputdir): def analyze(pkt): """ Count the occurences of all host either as src or dest. """ - + lock.acquire() hosts = analyze.storage try: src_addr = str(pkt.ip_src) @@ -39,3 +39,5 @@ def analyze(pkt): except AttributeError as e: # ignore packets that aren't TCP/UDP or IPv4 pass + finally: + lock.release() diff --git a/pcapscanner/main.py b/pcapscanner/main.py index 6cf9570..948a86c 100755 --- a/pcapscanner/main.py +++ b/pcapscanner/main.py @@ -11,7 +11,7 @@ import os import csv import time -from multiprocessing import Pool +from multiprocessing.dummy import Pool from analyzers import hosts, conversations import pcap @@ -38,6 +38,7 @@ """ + class Main: def __init__(self, outputdir, inputdir, parser): @@ -98,7 +99,8 @@ def start(self): # asynchronously pool.apply_async( pcap.process_pcap, - (fn, [a.analyze for a in ANALYZERS], progressbar_position, self.parser) + (fn, [a.analyze for a in ANALYZERS], + progressbar_position, self.parser) ) # close pool diff --git a/pcapscanner/pcap.py b/pcapscanner/pcap.py index 3a232ba..0d3dd40 100644 --- a/pcapscanner/pcap.py +++ b/pcapscanner/pcap.py @@ -4,7 +4,6 @@ import gzip import dpkt from enum import Enum -from dpkt.compat import compat_ord import pyshark import socket @@ -18,6 +17,7 @@ from datetime import datetime as dt from collections import namedtuple + """ This is the destination format of parsed pcap packages to decouple PCAP parser data structures from analysers code @@ -34,6 +34,7 @@ 'timestamp' ]) + class Parser(Enum): DPKT = 'dpkt' PYPACKER = 'pypacker' @@ -67,7 +68,8 @@ def sort_by_date(a, b): # in case we have no valid timestamp return 0 if aDateStr is None or bDateStr is None: print( - "sort_by_date: Was not able to extract timestamp comparing {} to {}". + "sort_by_date: Was not able to extract timestamp " + "comparing {} to {}". format(aBase, bBase) ) return 0 @@ -128,12 +130,12 @@ def parser_dpkt(pcapfile, progressbar_position): Parsing the RawIP encapsulated PCAPs using dpkt. Expects an unpacked file ref. https://pypi.python.org/pypi/dpkt """ - out=[] + out = [] try: pcap = dpkt.pcap.Reader(pcapfile) print("SUCCESS ", pcapfile.name) - for ts,buf in tqdm( + for ts, buf in tqdm( pcap, position=progressbar_position, unit=" packages", @@ -146,7 +148,7 @@ def parser_dpkt(pcapfile, progressbar_position): # fetch the infos we need # we use socket to convert inet IPv4 IP to human readable IP # socket.inet_ntop(socket.AF_INET, inet) - #FIXME: get MAC adress + # FIXME: get MAC adress parsedPkg = ParsedPackage( protocol=ip.p, ip_src=socket.inet_ntop(socket.AF_INET, ip.src), @@ -164,9 +166,10 @@ def parser_dpkt(pcapfile, progressbar_position): pass except ValueError: print( - "ValueError happend as packages where parsed. We expect RawIP " - "encapsulated PCAPs, maybe now we have a Ethernet encapsulated " - "one. Abort.") + "ValueError happend as packages where parsed. " + "We expect RawIP encapsulated PCAPs, maybe now " + "we have a Ethernet encapsulated one. Abort." + ) raise except KeyboardInterrupt: raise @@ -180,12 +183,12 @@ def parser_dpkt(pcapfile, progressbar_position): def parser_pyshark(pcapfile, progressbar_position): """ - Uses tshark CLI in a bash subprocess, parses stdout. Slow but works well with - pcap.gz and pcap files. + Uses tshark CLI in a bash subprocess, parses stdout. Slow but + works well with pcap.gz and pcap files. https://github.com/KimiNewt/pyshark """ out = [] - cap = pyshark.FileCapture(os.path.abspath(pcapfile.name), only_summaries=False) + cap = pyshark.FileCapture(os.path.abspath(pcapfile.name)) # read array (to resolve futures) and return only the information # we need to decouple data structures from analysers code @@ -219,9 +222,11 @@ def parser_pyshark(pcapfile, progressbar_position): def parser_pypacker(pcapfile, progressbar_position): """ Does not work! - Very fast, reads only .pcap (no .gz). Problem is it reads PCAPs with LinkType - Ethernet, but our dumps are RawIP. We can iterate and print the raw package - details, but parsing the packages does not work out of the box (because of RawIP). + Very fast, reads only .pcap (no .gz). Problem is it reads PCAPs + with LinkType Ethernet, but our dumps are RawIP. We can iterate and + print the raw package details, but parsing the packages does not + work out of the box (because of RawIP). + https://github.com/mike01/pypacker for encapsulation RawIP or Ethernet see here: @@ -232,7 +237,7 @@ def parser_pypacker(pcapfile, progressbar_position): # read array (to resolve futures) and return only the information # we need (to reduce memory needed) - for ts,buf in tqdm( + for ts, buf in tqdm( cap, position=progressbar_position, unit=" packages", @@ -272,9 +277,9 @@ def parser_scapy(pcapfile, progressbar_position): """ out = [] with PcapReader(pcapfile.name) as pcap_reader: - for pkt in pcap_reader: - #do something with the packet - pass + for pkt in pcap_reader: + # TODO + pass return out @@ -293,21 +298,16 @@ def process_pcap(pcapfilename, analysers, progressbar_position, parser): g = gzip.open(f, 'rb') # test if this is really GZIP, raises exception if not g.peek(1) - # if it is a gzipped files pass the unpacked file reference to the parser + # if it is a gzipped files pass the unpacked file + # reference to the parser f = g except: - #TODO: remove! just for debug - #print("THIS IS NOT A GZIP FILE: ",pcapfilename) pass if parser == Parser.PYSHARK.name: - # Pyshark CLI is slow but works (single thread ~1.200pkg/s, - # with 8 threads ~4.500pkg/s) parsed_packets = parser_pyshark(f, progressbar_position) elif parser == Parser.DPKT.name: - # DPKT works for pcap and pcap.gz and is fast (single thread ~50.000pkg/s, - # with 8 threads ~240.000pkg/s) parsed_packets = parser_dpkt(f, progressbar_position) elif parser == Parser.PYPACKER.name: @@ -322,7 +322,7 @@ def process_pcap(pcapfilename, analysers, progressbar_position, parser): print("illegal parser") return - #TODO: remove! just for debug + # TODO: remove! just for debug print( "FETCHED {amount} PACKAGES FROM PCAP {dir}.\n Example: {pkt} ". format( @@ -333,7 +333,8 @@ def process_pcap(pcapfilename, analysers, progressbar_position, parser): ) # process the stats we need - for p in tqdm(parsed_packets, + for p in tqdm( + parsed_packets, position=progressbar_position, ascii=True, unit=" packages", @@ -341,7 +342,6 @@ def process_pcap(pcapfilename, analysers, progressbar_position, parser): for analyser in analysers: analyser(p) - except KeyboardInterrupt: print("Bye") sys.exit()