Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 9 additions & 7 deletions pcapscanner/analyzers/conversations.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
from multiprocessing import Manager
from multiprocessing import Lock
import csv
import os

CSVFN = "conversations.csv"

manager = Manager()
lock = Lock()


def __add_protocol(storage, pkt):
Expand All @@ -20,20 +20,20 @@ def __add_port(storage, pkt):
port = str(pkt.port_dst)

if port not in storage.keys():
storage[port] = manager.dict()
storage[port] = dict()
__add_protocol(storage[port], pkt)


def __add_dst_addr(storage, pkt):
dst_addr = str(pkt.ip_dst)

if dst_addr not in storage.keys():
storage[dst_addr] = manager.dict()
storage[dst_addr] = dict()
__add_port(storage[dst_addr], pkt)


def init():
setattr(analyze, 'storage', manager.dict())
setattr(analyze, 'storage', dict())


def log(outputdir):
Expand All @@ -52,15 +52,17 @@ def log(outputdir):

def analyze(pkt):
""" Count conversations between hosts. """

lock.acquire()
conversations = analyze.storage
try:
src_addr = str(pkt.ip_src)

if src_addr not in conversations.keys():
conversations[src_addr] = manager.dict()
conversations[src_addr] = dict()
__add_dst_addr(conversations[src_addr], pkt)

except AttributeError as e:
# ignore packets that aren't TCP/UDP or IPv4
pass
finally:
lock.release()
10 changes: 6 additions & 4 deletions pcapscanner/analyzers/hosts.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
from multiprocessing import Manager
from multiprocessing import Lock
import csv
import os

CSVFN = "hostcounter.csv"

manager = Manager()
lock = Lock()


def init():
setattr(analyze, 'storage', manager.dict())
setattr(analyze, 'storage', dict())


def log(outputdir):
Expand All @@ -20,7 +20,7 @@ def log(outputdir):

def analyze(pkt):
""" Count the occurences of all host either as src or dest. """

lock.acquire()
hosts = analyze.storage
try:
src_addr = str(pkt.ip_src)
Expand All @@ -39,3 +39,5 @@ def analyze(pkt):
except AttributeError as e:
# ignore packets that aren't TCP/UDP or IPv4
pass
finally:
lock.release()
6 changes: 4 additions & 2 deletions pcapscanner/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
import os
import csv
import time
from multiprocessing import Pool
from multiprocessing.dummy import Pool

from analyzers import hosts, conversations
import pcap
Expand All @@ -38,6 +38,7 @@

"""


class Main:

def __init__(self, outputdir, inputdir, parser):
Expand Down Expand Up @@ -98,7 +99,8 @@ def start(self):
# asynchronously
pool.apply_async(
pcap.process_pcap,
(fn, [a.analyze for a in ANALYZERS], progressbar_position, self.parser)
(fn, [a.analyze for a in ANALYZERS],
progressbar_position, self.parser)
)

# close pool
Expand Down
56 changes: 28 additions & 28 deletions pcapscanner/pcap.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import gzip
import dpkt
from enum import Enum
from dpkt.compat import compat_ord
import pyshark
import socket

Expand All @@ -18,6 +17,7 @@
from datetime import datetime as dt
from collections import namedtuple


"""
This is the destination format of parsed pcap packages
to decouple PCAP parser data structures from analysers code
Expand All @@ -34,6 +34,7 @@
'timestamp'
])


class Parser(Enum):
DPKT = 'dpkt'
PYPACKER = 'pypacker'
Expand Down Expand Up @@ -67,7 +68,8 @@ def sort_by_date(a, b):
# in case we have no valid timestamp return 0
if aDateStr is None or bDateStr is None:
print(
"sort_by_date: Was not able to extract timestamp comparing {} to {}".
"sort_by_date: Was not able to extract timestamp "
"comparing {} to {}".
format(aBase, bBase)
)
return 0
Expand Down Expand Up @@ -128,12 +130,12 @@ def parser_dpkt(pcapfile, progressbar_position):
Parsing the RawIP encapsulated PCAPs using dpkt. Expects an unpacked file ref.
https://pypi.python.org/pypi/dpkt
"""
out=[]
out = []
try:
pcap = dpkt.pcap.Reader(pcapfile)

print("SUCCESS ", pcapfile.name)
for ts,buf in tqdm(
for ts, buf in tqdm(
pcap,
position=progressbar_position,
unit=" packages",
Expand All @@ -146,7 +148,7 @@ def parser_dpkt(pcapfile, progressbar_position):
# fetch the infos we need
# we use socket to convert inet IPv4 IP to human readable IP
# socket.inet_ntop(socket.AF_INET, inet)
#FIXME: get MAC adress
# FIXME: get MAC adress
parsedPkg = ParsedPackage(
protocol=ip.p,
ip_src=socket.inet_ntop(socket.AF_INET, ip.src),
Expand All @@ -164,9 +166,10 @@ def parser_dpkt(pcapfile, progressbar_position):
pass
except ValueError:
print(
"ValueError happend as packages where parsed. We expect RawIP "
"encapsulated PCAPs, maybe now we have a Ethernet encapsulated "
"one. Abort.")
"ValueError happend as packages where parsed. "
"We expect RawIP encapsulated PCAPs, maybe now "
"we have a Ethernet encapsulated one. Abort."
)
raise
except KeyboardInterrupt:
raise
Expand All @@ -180,12 +183,12 @@ def parser_dpkt(pcapfile, progressbar_position):

def parser_pyshark(pcapfile, progressbar_position):
"""
Uses tshark CLI in a bash subprocess, parses stdout. Slow but works well with
pcap.gz and pcap files.
Uses tshark CLI in a bash subprocess, parses stdout. Slow but
works well with pcap.gz and pcap files.
https://github.com/KimiNewt/pyshark
"""
out = []
cap = pyshark.FileCapture(os.path.abspath(pcapfile.name), only_summaries=False)
cap = pyshark.FileCapture(os.path.abspath(pcapfile.name))

# read array (to resolve futures) and return only the information
# we need to decouple data structures from analysers code
Expand Down Expand Up @@ -219,9 +222,11 @@ def parser_pyshark(pcapfile, progressbar_position):
def parser_pypacker(pcapfile, progressbar_position):
"""
Does not work!
Very fast, reads only .pcap (no .gz). Problem is it reads PCAPs with LinkType
Ethernet, but our dumps are RawIP. We can iterate and print the raw package
details, but parsing the packages does not work out of the box (because of RawIP).
Very fast, reads only .pcap (no .gz). Problem is it reads PCAPs
with LinkType Ethernet, but our dumps are RawIP. We can iterate and
print the raw package details, but parsing the packages does not
work out of the box (because of RawIP).

https://github.com/mike01/pypacker

for encapsulation RawIP or Ethernet see here:
Expand All @@ -232,7 +237,7 @@ def parser_pypacker(pcapfile, progressbar_position):

# read array (to resolve futures) and return only the information
# we need (to reduce memory needed)
for ts,buf in tqdm(
for ts, buf in tqdm(
cap,
position=progressbar_position,
unit=" packages",
Expand Down Expand Up @@ -272,9 +277,9 @@ def parser_scapy(pcapfile, progressbar_position):
"""
out = []
with PcapReader(pcapfile.name) as pcap_reader:
for pkt in pcap_reader:
#do something with the packet
pass
for pkt in pcap_reader:
# TODO
pass
return out


Expand All @@ -293,21 +298,16 @@ def process_pcap(pcapfilename, analysers, progressbar_position, parser):
g = gzip.open(f, 'rb')
# test if this is really GZIP, raises exception if not
g.peek(1)
# if it is a gzipped files pass the unpacked file reference to the parser
# if it is a gzipped files pass the unpacked file
# reference to the parser
f = g
except:
#TODO: remove! just for debug
#print("THIS IS NOT A GZIP FILE: ",pcapfilename)
pass

if parser == Parser.PYSHARK.name:
# Pyshark CLI is slow but works (single thread ~1.200pkg/s,
# with 8 threads ~4.500pkg/s)
parsed_packets = parser_pyshark(f, progressbar_position)

elif parser == Parser.DPKT.name:
# DPKT works for pcap and pcap.gz and is fast (single thread ~50.000pkg/s,
# with 8 threads ~240.000pkg/s)
parsed_packets = parser_dpkt(f, progressbar_position)

elif parser == Parser.PYPACKER.name:
Expand All @@ -322,7 +322,7 @@ def process_pcap(pcapfilename, analysers, progressbar_position, parser):
print("illegal parser")
return

#TODO: remove! just for debug
# TODO: remove! just for debug
print(
"FETCHED {amount} PACKAGES FROM PCAP {dir}.\n Example: {pkt} ".
format(
Expand All @@ -333,15 +333,15 @@ def process_pcap(pcapfilename, analysers, progressbar_position, parser):
)

# process the stats we need
for p in tqdm(parsed_packets,
for p in tqdm(
parsed_packets,
position=progressbar_position,
ascii=True,
unit=" packages",
):
for analyser in analysers:
analyser(p)


except KeyboardInterrupt:
print("Bye")
sys.exit()
Expand Down