From 442341c792ae1c02c931edb5477074c3ab928852 Mon Sep 17 00:00:00 2001 From: yampelo Date: Wed, 13 Nov 2019 19:49:06 -0500 Subject: [PATCH 01/25] analyzer skeleton --- beagle/analyzers/base_analyzer.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/beagle/analyzers/base_analyzer.py b/beagle/analyzers/base_analyzer.py index e69de29b..fd1f6119 100644 --- a/beagle/analyzers/base_analyzer.py +++ b/beagle/analyzers/base_analyzer.py @@ -0,0 +1,3 @@ +class Analyzer(object): + def __init__(self, backend): + pass From 1b7f7059d3143318a7253fb71fbf3cd9f2913a5a Mon Sep 17 00:00:00 2001 From: yampelo Date: Fri, 15 Nov 2019 00:02:17 -0500 Subject: [PATCH 02/25] Begins working on rule system, adds field lookups. --- beagle/analyzers/base_analyzer.py | 6 +- beagle/analyzers/statements/base_statement.py | 29 +++++++ beagle/analyzers/statements/conditionals.py | 0 beagle/analyzers/statements/lookups.py | 84 +++++++++++++++++++ .../analyzers/test_statements/test_lookups.py | 48 +++++++++++ 5 files changed, 165 insertions(+), 2 deletions(-) create mode 100644 beagle/analyzers/statements/base_statement.py create mode 100644 beagle/analyzers/statements/conditionals.py create mode 100644 beagle/analyzers/statements/lookups.py create mode 100644 tests/analyzers/test_statements/test_lookups.py diff --git a/beagle/analyzers/base_analyzer.py b/beagle/analyzers/base_analyzer.py index fd1f6119..790a006c 100644 --- a/beagle/analyzers/base_analyzer.py +++ b/beagle/analyzers/base_analyzer.py @@ -1,3 +1,5 @@ class Analyzer(object): - def __init__(self, backend): - pass + def __init__(self, name: str, description: str, score: int): + self.name = name + self.description = description + self.score = score diff --git a/beagle/analyzers/statements/base_statement.py b/beagle/analyzers/statements/base_statement.py new file mode 100644 index 00000000..df321838 --- /dev/null +++ b/beagle/analyzers/statements/base_statement.py @@ -0,0 +1,29 @@ +from typing import Dict, Type, List + +from beagle.backends import Backend, NetworkX +from beagle.nodes import Node + +from .lookups import FieldLookup + + +class Statement(object): + def execute(self, backend: Type[Backend]): + if isinstance(backend, NetworkX): + return self.execute_networkx(backend) + + def execute_networkx(self, backend: NetworkX): + raise NotImplementedError(f"NetworkX not supported for {self.__class__.__name__}") + + +class NodeByProps(Statement): + def __init__(self, node_type: Type[Node], props: Dict[str, FieldLookup]): + self.node_type = node_type + self.props = props + + def execute_networkx(self, backend: NetworkX) -> List[Node]: + result = [] + for node_id, node in backend.G.nodes(data=True): + if isinstance(node, self.node_type): + if all([lookup.test(getattr(node, prop)) for prop, lookup in self.props.items()]): + result.append(node) + return result diff --git a/beagle/analyzers/statements/conditionals.py b/beagle/analyzers/statements/conditionals.py new file mode 100644 index 00000000..e69de29b diff --git a/beagle/analyzers/statements/lookups.py b/beagle/analyzers/statements/lookups.py new file mode 100644 index 00000000..b78fe356 --- /dev/null +++ b/beagle/analyzers/statements/lookups.py @@ -0,0 +1,84 @@ +import re +from typing import Pattern, Union, cast +from abc import ABCMeta, abstractmethod +import functools + + +def not_null(f): + @functools.wraps(f) + def wrapper(prop, *args, **kwargs): + if prop is None: + return False + else: + return f(prop, *args, **kwargs) + + return wrapper + + +class FieldLookup(object, metaclass=ABCMeta): + def __init__(self, value): + self.value = value + + @abstractmethod + def test(self, prop) -> bool: + pass + + +class Contains(FieldLookup): + """Case sensitve contains""" + + @not_null + def test(self, prop: str): + return self.value in prop + + +class IContains(FieldLookup): + """Case insensitve Contains""" + + @not_null + def test(self, prop: str): + return str(self.value).lower() in str(prop).lower() + + +class Exact(FieldLookup): + """Exact match""" + + @not_null + def test(self, prop: str): + return self.value == prop + + +class IExact(FieldLookup): + """Insensitive Exact match""" + + @not_null + def test(self, prop: str): + return str(self.value).lower() == str(prop).lower() + + +class StartsWith(FieldLookup): + """Property begins with""" + + @not_null + def test(self, prop: str): + return prop.startswith(self.value) + + +class EndsWith(FieldLookup): + """Property begins endswith""" + + @not_null + def test(self, prop: str): + return prop.endswith(self.value) + + +class Regex(FieldLookup): + def __init__(self, value: Union[str, Pattern]): + if isinstance(value, str): + self.value: Pattern = re.compile(value) + else: + self.value = value + + @not_null + def test(self, prop: str): + return self.value.search(prop) is not None diff --git a/tests/analyzers/test_statements/test_lookups.py b/tests/analyzers/test_statements/test_lookups.py new file mode 100644 index 00000000..530fa1ad --- /dev/null +++ b/tests/analyzers/test_statements/test_lookups.py @@ -0,0 +1,48 @@ +import re +import pytest +from beagle.analyzers.statements.lookups import ( + FieldLookup, + Contains, + IContains, + Exact, + IExact, + StartsWith, + EndsWith, + Regex, +) + + +@pytest.mark.parametrize( + "cls,value,prop,result", + [ + (Contains, "test", "test", True), + (Contains, "test", "worst", False), + (Contains, "test", "he is the test", True), + (Contains, "test", "the test was bad", True), + (Contains, "test", "the TEST was bad", False), + (IContains, "test", "TEST", True), + (IContains, "test", "tEsT", True), + (IContains, "test", "worst", False), + (IContains, "test", "he is the test", True), + (IContains, "test", "the test was bad", True), + (Exact, "test", "test", True), + (Exact, "test", " test ", False), + (Exact, "test", "some test a", False), + (IExact, "test", "test", True), + (IExact, "test", "TEST", True), + (IExact, "test", "tEst", True), + (StartsWith, "test", "test", True), + (StartsWith, "test", "not a test", False), + (StartsWith, "test", "test is the best", True), + (EndsWith, "test", "test", True), + (EndsWith, "test", "not test but a nest", False), + (EndsWith, "test", "the best test", True), + (Regex, r"\d", "test 1 test", True), + (Regex, re.compile(r"\d"), "test 1 test", True), + (Regex, r"\d", "test test", False), + (Regex, re.compile(r"\d"), "test test", False), + ], +) +def test_lookups(cls: FieldLookup, value: str, prop: str, result: str): + # prop -> value being tested again, value -> the thing we're looking up + assert cls(value).test(prop) == result From 47cd6adec667a1a07138b2d94cf77aaadc577bfc Mon Sep 17 00:00:00 2001 From: yampelo Date: Fri, 15 Nov 2019 00:21:44 -0500 Subject: [PATCH 03/25] Adds tests for selecting a NodeWithProps in NetworkX --- beagle/analyzers/statements/base_statement.py | 6 +- beagle/analyzers/statements/conditionals.py | 6 ++ beagle/analyzers/statements/lookups.py | 2 +- .../statements/test_base_statement.py | 58 +++++++++++++++++++ .../test_lookups.py | 0 5 files changed, 69 insertions(+), 3 deletions(-) create mode 100644 tests/analyzers/statements/test_base_statement.py rename tests/analyzers/{test_statements => statements}/test_lookups.py (100%) diff --git a/beagle/analyzers/statements/base_statement.py b/beagle/analyzers/statements/base_statement.py index df321838..e1ac1e4b 100644 --- a/beagle/analyzers/statements/base_statement.py +++ b/beagle/analyzers/statements/base_statement.py @@ -11,7 +11,7 @@ def execute(self, backend: Type[Backend]): if isinstance(backend, NetworkX): return self.execute_networkx(backend) - def execute_networkx(self, backend: NetworkX): + def execute_networkx(self, backend: NetworkX): # pragma: no cover raise NotImplementedError(f"NetworkX not supported for {self.__class__.__name__}") @@ -22,7 +22,9 @@ def __init__(self, node_type: Type[Node], props: Dict[str, FieldLookup]): def execute_networkx(self, backend: NetworkX) -> List[Node]: result = [] - for node_id, node in backend.G.nodes(data=True): + for node_id, data in backend.G.nodes(data=True): + node = data["data"] + if isinstance(node, self.node_type): if all([lookup.test(getattr(node, prop)) for prop, lookup in self.props.items()]): result.append(node) diff --git a/beagle/analyzers/statements/conditionals.py b/beagle/analyzers/statements/conditionals.py index e69de29b..8b56c444 100644 --- a/beagle/analyzers/statements/conditionals.py +++ b/beagle/analyzers/statements/conditionals.py @@ -0,0 +1,6 @@ +class Conditional(object): + pass + + +class Not(object): + pass diff --git a/beagle/analyzers/statements/lookups.py b/beagle/analyzers/statements/lookups.py index b78fe356..a0c679f1 100644 --- a/beagle/analyzers/statements/lookups.py +++ b/beagle/analyzers/statements/lookups.py @@ -15,7 +15,7 @@ def wrapper(prop, *args, **kwargs): return wrapper -class FieldLookup(object, metaclass=ABCMeta): +class FieldLookup(object, metaclass=ABCMeta): # pragma: no cover def __init__(self, value): self.value = value diff --git a/tests/analyzers/statements/test_base_statement.py b/tests/analyzers/statements/test_base_statement.py new file mode 100644 index 00000000..b0a7c086 --- /dev/null +++ b/tests/analyzers/statements/test_base_statement.py @@ -0,0 +1,58 @@ +import pytest +from beagle.backends.networkx import NetworkX +from beagle.analyzers.statements.base_statement import NodeByProps +from beagle.analyzers.statements.lookups import Contains, EndsWith, StartsWith +from beagle.nodes.process import Process + + +@pytest.fixture +def G1(): + # A basic graph, with two nodes an an edge + proc = Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar") + other_proc = Process(process_id=12, process_image="best.exe", command_line="best.exe /c 123456") + + proc.launched[other_proc].append(timestamp=1) + + backend = NetworkX(consolidate_edges=True, nodes=[proc, other_proc]) + + backend.graph() + + return backend + + +def test_one_prop_test(G1): + statement = NodeByProps(node_type=Process, props={"command_line": Contains("test.exe")}) + + # Should match on `proc` from G1 + assert statement.execute(G1) == [ + Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar") + ] + + # should mathc on other proc + statement = NodeByProps(node_type=Process, props={"command_line": EndsWith("123456")}) + assert statement.execute(G1) == [ + Process(process_id=12, process_image="best.exe", command_line="best.exe /c 123456") + ] + + # should match on both + statement = NodeByProps(node_type=Process, props={"process_image": EndsWith("exe")}) + assert statement.execute(G1) == [ + Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar"), + Process(process_id=12, process_image="best.exe", command_line="best.exe /c 123456"), + ] + + # should match neither + statement = NodeByProps(node_type=Process, props={"process_image": StartsWith("exe")}) + assert statement.execute(G1) == [] + + +def test_multiple_prop_test(G1): + statement = NodeByProps( + node_type=Process, + props={"command_line": Contains("foobar"), "process_image": StartsWith("test")}, + ) + + # Should match on `proc` from G1 + assert statement.execute(G1) == [ + Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar") + ] diff --git a/tests/analyzers/test_statements/test_lookups.py b/tests/analyzers/statements/test_lookups.py similarity index 100% rename from tests/analyzers/test_statements/test_lookups.py rename to tests/analyzers/statements/test_lookups.py From 63e24b13fc3b4665971330e14941b9a7845af509 Mon Sep 17 00:00:00 2001 From: yampelo Date: Fri, 15 Nov 2019 00:28:36 -0500 Subject: [PATCH 04/25] Adds conditionals for matching --- beagle/analyzers/statements/conditionals.py | 6 ---- beagle/analyzers/statements/lookups.py | 32 +++++++++++++++++++++ tests/analyzers/statements/test_lookups.py | 16 +++++++++++ 3 files changed, 48 insertions(+), 6 deletions(-) delete mode 100644 beagle/analyzers/statements/conditionals.py diff --git a/beagle/analyzers/statements/conditionals.py b/beagle/analyzers/statements/conditionals.py deleted file mode 100644 index 8b56c444..00000000 --- a/beagle/analyzers/statements/conditionals.py +++ /dev/null @@ -1,6 +0,0 @@ -class Conditional(object): - pass - - -class Not(object): - pass diff --git a/beagle/analyzers/statements/lookups.py b/beagle/analyzers/statements/lookups.py index a0c679f1..ab97d837 100644 --- a/beagle/analyzers/statements/lookups.py +++ b/beagle/analyzers/statements/lookups.py @@ -24,6 +24,38 @@ def test(self, prop) -> bool: pass +class Or(FieldLookup): + """Boolean OR, Meant to be used with other lookups: + >>> Or(Contains("foo"), StartsWith("bar")) + """ + + def __init__(self, *args: FieldLookup): + self.lookups = args + + def test(self, prop: str): + for lookup in self.lookups: + if lookup.test(prop): + return True + + return False + + +class And(FieldLookup): + """Boolean And, Meant to be used with other lookups: + >>> And(Contains("foo"), StartsWith("bar"), EndsWith("zar")) + """ + + def __init__(self, *args: FieldLookup): + self.lookups = args + + def test(self, prop: str): + for lookup in self.lookups: + if not lookup.test(prop): + return False + + return True + + class Contains(FieldLookup): """Case sensitve contains""" diff --git a/tests/analyzers/statements/test_lookups.py b/tests/analyzers/statements/test_lookups.py index 530fa1ad..94fd87d2 100644 --- a/tests/analyzers/statements/test_lookups.py +++ b/tests/analyzers/statements/test_lookups.py @@ -9,6 +9,8 @@ StartsWith, EndsWith, Regex, + And, + Or, ) @@ -46,3 +48,17 @@ def test_lookups(cls: FieldLookup, value: str, prop: str, result: str): # prop -> value being tested again, value -> the thing we're looking up assert cls(value).test(prop) == result + + +def test_and(): + assert And(StartsWith("foo"), EndsWith("bar")).test("foo bar") is True + assert And(StartsWith("foo"), EndsWith("bar")).test("foo nar bar") is True + assert And(StartsWith("foo"), EndsWith("bar")).test("bar foo") is False + + +def test_or(): + assert Or(StartsWith("foo"), EndsWith("bar")).test("foo bar") is True + assert Or(StartsWith("foo"), EndsWith("bar")).test("foo") is True + assert Or(StartsWith("foo"), EndsWith("bar")).test("bar") is True + assert Or(StartsWith("foo"), EndsWith("bar")).test("foo nar bar") is True + assert Or(StartsWith("foo"), EndsWith("bar")).test("bar foo") is False From 7f784777f075fd060262fa12597c4761c79aed02 Mon Sep 17 00:00:00 2001 From: yampelo Date: Fri, 15 Nov 2019 00:32:29 -0500 Subject: [PATCH 05/25] Fixes not_null wrapper --- beagle/analyzers/statements/lookups.py | 6 +++--- tests/analyzers/statements/test_lookups.py | 2 ++ 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/beagle/analyzers/statements/lookups.py b/beagle/analyzers/statements/lookups.py index ab97d837..aa649182 100644 --- a/beagle/analyzers/statements/lookups.py +++ b/beagle/analyzers/statements/lookups.py @@ -1,16 +1,16 @@ import re -from typing import Pattern, Union, cast +from typing import Pattern, Union from abc import ABCMeta, abstractmethod import functools def not_null(f): @functools.wraps(f) - def wrapper(prop, *args, **kwargs): + def wrapper(self, prop, *args, **kwargs): if prop is None: return False else: - return f(prop, *args, **kwargs) + return f(self, prop, *args, **kwargs) return wrapper diff --git a/tests/analyzers/statements/test_lookups.py b/tests/analyzers/statements/test_lookups.py index 94fd87d2..40785653 100644 --- a/tests/analyzers/statements/test_lookups.py +++ b/tests/analyzers/statements/test_lookups.py @@ -25,6 +25,8 @@ (IContains, "test", "TEST", True), (IContains, "test", "tEsT", True), (IContains, "test", "worst", False), + # Test we reject null value. + (IContains, "test", None, False), (IContains, "test", "he is the test", True), (IContains, "test", "the test was bad", True), (Exact, "test", "test", True), From f82339e8eeba3feecf907c899bfa61ea6fc38dd7 Mon Sep 17 00:00:00 2001 From: yampelo Date: Fri, 15 Nov 2019 00:47:55 -0500 Subject: [PATCH 06/25] Adds operator overloading to lookups --- beagle/analyzers/statements/lookups.py | 22 +++++++++++++++++++ .../statements/test_base_statement.py | 16 ++++++++++++-- tests/analyzers/statements/test_lookups.py | 12 ++++++++++ 3 files changed, 48 insertions(+), 2 deletions(-) diff --git a/beagle/analyzers/statements/lookups.py b/beagle/analyzers/statements/lookups.py index aa649182..89a8703c 100644 --- a/beagle/analyzers/statements/lookups.py +++ b/beagle/analyzers/statements/lookups.py @@ -23,6 +23,16 @@ def __init__(self, value): def test(self, prop) -> bool: pass + def __and__(self, other) -> "FieldLookup": + # Contains("test.exe") & Contains("fest.exe") -> And(Contains("test.exe"), Contains("fest.exe")) + return And(self, other) + + def __or__(self, other) -> "FieldLookup": + return Or(self, other) + + def __invert__(self) -> "FieldLookup": + return Not(self) + class Or(FieldLookup): """Boolean OR, Meant to be used with other lookups: @@ -56,6 +66,18 @@ def test(self, prop: str): return True +class Not(FieldLookup): + """Boolean And, Meant to be used with other lookups: + >>> And(Contains("foo"), StartsWith("bar"), EndsWith("zar")) + """ + + def __init__(self, arg: FieldLookup): + self.lookup = arg + + def test(self, prop: str): + return not self.lookup.test(prop) + + class Contains(FieldLookup): """Case sensitve contains""" diff --git a/tests/analyzers/statements/test_base_statement.py b/tests/analyzers/statements/test_base_statement.py index b0a7c086..8a6ab555 100644 --- a/tests/analyzers/statements/test_base_statement.py +++ b/tests/analyzers/statements/test_base_statement.py @@ -20,7 +20,7 @@ def G1(): return backend -def test_one_prop_test(G1): +def test_one_node_prop_test(G1): statement = NodeByProps(node_type=Process, props={"command_line": Contains("test.exe")}) # Should match on `proc` from G1 @@ -46,7 +46,19 @@ def test_one_prop_test(G1): assert statement.execute(G1) == [] -def test_multiple_prop_test(G1): +def test_multiple_node_prop_test(G1): + statement = NodeByProps( + node_type=Process, + props={"command_line": Contains("foobar"), "process_image": StartsWith("test")}, + ) + + # Should match on `proc` from G1 + assert statement.execute(G1) == [ + Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar") + ] + + +def test_node_conditional(G1): statement = NodeByProps( node_type=Process, props={"command_line": Contains("foobar"), "process_image": StartsWith("test")}, diff --git a/tests/analyzers/statements/test_lookups.py b/tests/analyzers/statements/test_lookups.py index 40785653..c97d161a 100644 --- a/tests/analyzers/statements/test_lookups.py +++ b/tests/analyzers/statements/test_lookups.py @@ -11,6 +11,7 @@ Regex, And, Or, + Not, ) @@ -64,3 +65,14 @@ def test_or(): assert Or(StartsWith("foo"), EndsWith("bar")).test("bar") is True assert Or(StartsWith("foo"), EndsWith("bar")).test("foo nar bar") is True assert Or(StartsWith("foo"), EndsWith("bar")).test("bar foo") is False + + +def test_not(): + assert Not(Contains("test")).test("hello") is True + assert Not(Not(Contains("test"))).test("hello") is False + + +def test_operator_overloading(): + assert (~Contains("test")).test("hello") is True + assert (Contains("test") & EndsWith("hello")).test("test my hello") is True + assert (Contains("test") | EndsWith("hello")).test("hello") is True From 39c3dcf2c17e81acb926c35a3292898482aad152 Mon Sep 17 00:00:00 2001 From: yampelo Date: Fri, 15 Nov 2019 01:07:52 -0500 Subject: [PATCH 07/25] Moves statements to work on nx.Graph objects instead of NetworkX Backends --- beagle/analyzers/statements/base_statement.py | 25 ++++++------ beagle/analyzers/statements/lookups.py | 40 +++++++++++++++++-- .../statements/test_base_statement.py | 20 +++++----- 3 files changed, 59 insertions(+), 26 deletions(-) diff --git a/beagle/analyzers/statements/base_statement.py b/beagle/analyzers/statements/base_statement.py index e1ac1e4b..14ccea69 100644 --- a/beagle/analyzers/statements/base_statement.py +++ b/beagle/analyzers/statements/base_statement.py @@ -1,17 +1,13 @@ -from typing import Dict, Type, List +from typing import Dict, Type -from beagle.backends import Backend, NetworkX from beagle.nodes import Node from .lookups import FieldLookup +import networkx as nx class Statement(object): - def execute(self, backend: Type[Backend]): - if isinstance(backend, NetworkX): - return self.execute_networkx(backend) - - def execute_networkx(self, backend: NetworkX): # pragma: no cover + def execute_networkx(self, G: nx.Graph): # pragma: no cover raise NotImplementedError(f"NetworkX not supported for {self.__class__.__name__}") @@ -20,12 +16,17 @@ def __init__(self, node_type: Type[Node], props: Dict[str, FieldLookup]): self.node_type = node_type self.props = props - def execute_networkx(self, backend: NetworkX) -> List[Node]: - result = [] - for node_id, data in backend.G.nodes(data=True): + def execute_networkx(self, G: nx.Graph) -> nx.Graph: + subgraph_nodes = [] + + # For each node + for node_id, data in G.nodes(data=True): node = data["data"] + # If node matches the desired instance. if isinstance(node, self.node_type): + # Test the node if all([lookup.test(getattr(node, prop)) for prop, lookup in self.props.items()]): - result.append(node) - return result + subgraph_nodes.append(node_id) + + return G.subgraph(subgraph_nodes) diff --git a/beagle/analyzers/statements/lookups.py b/beagle/analyzers/statements/lookups.py index 89a8703c..76c764e3 100644 --- a/beagle/analyzers/statements/lookups.py +++ b/beagle/analyzers/statements/lookups.py @@ -5,6 +5,7 @@ def not_null(f): + # Ensures the passed in prop is not null @functools.wraps(f) def wrapper(self, prop, *args, **kwargs): if prop is None: @@ -23,14 +24,43 @@ def __init__(self, value): def test(self, prop) -> bool: pass - def __and__(self, other) -> "FieldLookup": - # Contains("test.exe") & Contains("fest.exe") -> And(Contains("test.exe"), Contains("fest.exe")) + def __and__(self, other) -> "And": + """Combines two FieldLookups to works as a logical and + + >>> Contains("test.exe") & Contains("fest.exe") + And(Contains("test.exe"), Contains("fest.exe")) + + Returns + ------- + And + And FieldLookup Object + """ return And(self, other) - def __or__(self, other) -> "FieldLookup": + def __or__(self, other) -> "Or": + """Combines two FieldLookup objects to work as a logical Or + + >>> Contains("test.exe") | Contains("fest.exe") + Or(Contains("test.exe"), Contains("fest.exe")) + + Returns + ------- + Or + Or FieldLookupObject + """ return Or(self, other) - def __invert__(self) -> "FieldLookup": + def __invert__(self) -> "Not": + """Negates a field lookup + + >>> ~Contains("test.exe") + Not(Contains("test.exe")) + + Returns + ------- + Not + Not FieldLookupObject + """ return Not(self) @@ -127,6 +157,8 @@ def test(self, prop: str): class Regex(FieldLookup): + """Regex Match""" + def __init__(self, value: Union[str, Pattern]): if isinstance(value, str): self.value: Pattern = re.compile(value) diff --git a/tests/analyzers/statements/test_base_statement.py b/tests/analyzers/statements/test_base_statement.py index 8a6ab555..19753f4c 100644 --- a/tests/analyzers/statements/test_base_statement.py +++ b/tests/analyzers/statements/test_base_statement.py @@ -15,35 +15,35 @@ def G1(): backend = NetworkX(consolidate_edges=True, nodes=[proc, other_proc]) - backend.graph() - - return backend + return backend.graph() def test_one_node_prop_test(G1): statement = NodeByProps(node_type=Process, props={"command_line": Contains("test.exe")}) # Should match on `proc` from G1 - assert statement.execute(G1) == [ - Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar") + nodes = statement.execute_networkx(G1).nodes(data=True) + assert len(nodes) == 1 + assert Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar") in [ + n["data"] for _, n in nodes ] # should mathc on other proc statement = NodeByProps(node_type=Process, props={"command_line": EndsWith("123456")}) - assert statement.execute(G1) == [ + assert [n["data"] for _, n in statement.execute_networkx(G1).nodes(data=True)] == [ Process(process_id=12, process_image="best.exe", command_line="best.exe /c 123456") ] # should match on both statement = NodeByProps(node_type=Process, props={"process_image": EndsWith("exe")}) - assert statement.execute(G1) == [ + assert [n["data"] for _, n in statement.execute_networkx(G1).nodes(data=True)] == [ Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar"), Process(process_id=12, process_image="best.exe", command_line="best.exe /c 123456"), ] # should match neither statement = NodeByProps(node_type=Process, props={"process_image": StartsWith("exe")}) - assert statement.execute(G1) == [] + assert [n["data"] for _, n in statement.execute_networkx(G1).nodes(data=True)] == [] def test_multiple_node_prop_test(G1): @@ -53,7 +53,7 @@ def test_multiple_node_prop_test(G1): ) # Should match on `proc` from G1 - assert statement.execute(G1) == [ + assert [n["data"] for _, n in statement.execute_networkx(G1).nodes(data=True)] == [ Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar") ] @@ -65,6 +65,6 @@ def test_node_conditional(G1): ) # Should match on `proc` from G1 - assert statement.execute(G1) == [ + assert [n["data"] for _, n in statement.execute_networkx(G1).nodes(data=True)] == [ Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar") ] From 3cbae4f254f532b274f880e8d49da0660632f944 Mon Sep 17 00:00:00 2001 From: yampelo Date: Fri, 15 Nov 2019 01:53:48 -0500 Subject: [PATCH 08/25] EdgeByProps: Adds statement to return subgraph that contains a matching edge. --- beagle/analyzers/statements/base_statement.py | 69 +++++++++++++++++++ beagle/backends/networkx.py | 5 +- .../statements/test_base_statement.py | 53 +++++++++++++- 3 files changed, 120 insertions(+), 7 deletions(-) diff --git a/beagle/analyzers/statements/base_statement.py b/beagle/analyzers/statements/base_statement.py index 14ccea69..0281de0a 100644 --- a/beagle/analyzers/statements/base_statement.py +++ b/beagle/analyzers/statements/base_statement.py @@ -13,10 +13,28 @@ def execute_networkx(self, G: nx.Graph): # pragma: no cover class NodeByProps(Statement): def __init__(self, node_type: Type[Node], props: Dict[str, FieldLookup]): + """Searches the graph for a node of type `node_type` with properties matching `props` + + Parameters + ---------- + node_type : Type[Node] + The type of node to look for. e.g. Process + props : Dict[str, FieldLookup] + The set of props to filter the resulting nodes by. + + Examples + ---------- + Filter for Process nodes, with command lines that contain `text.exe` + >>> NodeByProps(node_type=Process, props={"command_line": Contains("test.exe")}) + + """ self.node_type = node_type self.props = props def execute_networkx(self, G: nx.Graph) -> nx.Graph: + """Searches a `nx.Graph` object for nodes that match type `node_type` and contains + props matching `props`. This is O(V). + """ subgraph_nodes = [] # For each node @@ -30,3 +48,54 @@ def execute_networkx(self, G: nx.Graph) -> nx.Graph: subgraph_nodes.append(node_id) return G.subgraph(subgraph_nodes) + + +class EdgeByProps(Statement): + def __init__(self, edge_type: str, props: Dict[str, FieldLookup]): + """Searches the graph for an edge of type `edge_type` with properties matching `props` + + Parameters + ---------- + edge_type : str + The type of edge to look for. e.g. Wrote + props : Dict[str, FieldLookup] + The set of props to filter the resulting edges by. + + Examples + ---------- + Filter for TCP edges, with contents that match ".pdf" + >>> EdgeByProps(edge_type="TCP", props={"payload": Contains(".pdf")}) + + """ + self.edge_type = edge_type + self.props = props + + def execute_networkx(self, G: nx.Graph) -> nx.Graph: + """Searches a `nx.Graph` object for edges that match type `edge_type` and contains + props matching `props`. This is O(E). + + Returns a subgraph with all nodes contained in match edges + """ + subgraph_edges = [] + + # For each edge + for u, v, k, e_data in G.edges(data=True, keys=True): + + # pull out the data field from NX + data = e_data["data"] # edge data + e_type = e_data["edge_name"] # edge type + + # If edge matches the desired instance. + if e_type == self.edge_type: + + # Test the edge + if not isinstance(data, list): + data = [data] + + for entry in data: + if any([lookup.test(entry.get(prop)) for prop, lookup in self.props.items()]): + subgraph_edges.append((u, v, k)) + # can stop on first match + break + + return G.edge_subgraph(subgraph_edges) diff --git a/beagle/backends/networkx.py b/beagle/backends/networkx.py index 6ddaa55d..ac59b2ca 100644 --- a/beagle/backends/networkx.py +++ b/beagle/backends/networkx.py @@ -190,10 +190,7 @@ def insert_edges(self, u: Node, v: Node, edge_name: str, instances: List[dict]) # Otherwise, they key is assigned from NetworkX, and we add the edge type as a label: else: self.G.add_edges_from( - [ - (u_id, v_id, {"key": edge_name, "data": entry, "edge_name": edge_name}) - for entry in instances - ] + [(u_id, v_id, {"data": entry, "edge_name": edge_name}) for entry in instances] ) def update_node(self, node: Node, node_id: int) -> None: # pragma: no cover diff --git a/tests/analyzers/statements/test_base_statement.py b/tests/analyzers/statements/test_base_statement.py index 19753f4c..043ffbc5 100644 --- a/tests/analyzers/statements/test_base_statement.py +++ b/tests/analyzers/statements/test_base_statement.py @@ -1,8 +1,8 @@ import pytest from beagle.backends.networkx import NetworkX -from beagle.analyzers.statements.base_statement import NodeByProps -from beagle.analyzers.statements.lookups import Contains, EndsWith, StartsWith -from beagle.nodes.process import Process +from beagle.analyzers.statements.base_statement import NodeByProps, EdgeByProps +from beagle.analyzers.statements.lookups import Contains, EndsWith, StartsWith, Exact +from beagle.nodes.process import Process, File @pytest.fixture @@ -18,6 +18,33 @@ def G1(): return backend.graph() +@pytest.fixture +def G2(): + # A basic graph, with two nodes an an edge + proc = Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar") + f = File(file_name="foo", file_path="bar") + + proc.wrote[f].append(contents="foo") + + backend = NetworkX(consolidate_edges=True, nodes=[proc, f]) + + return backend.graph() + + +@pytest.fixture +def G3(): + # *no consolidating* + proc = Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar") + f = File(file_name="foo", file_path="bar") + + proc.wrote[f].append(contents="foo") + proc.wrote[f].append(contents="bar") + + backend = NetworkX(consolidate_edges=False, nodes=[proc, f]) + + return backend.graph() + + def test_one_node_prop_test(G1): statement = NodeByProps(node_type=Process, props={"command_line": Contains("test.exe")}) @@ -68,3 +95,23 @@ def test_node_conditional(G1): assert [n["data"] for _, n in statement.execute_networkx(G1).nodes(data=True)] == [ Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar") ] + + +def test_one_edge_prop_test(G2, G3): + statement = EdgeByProps(edge_type="Wrote", props={"contents": Exact("foo")}) + + assert [n["data"] for _, n in statement.execute_networkx(G2).nodes(data=True)] == [ + Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar"), + File(file_name="foo", file_path="bar"), + ] + + # Should work on the non-conslidating graph too. + assert [n["data"] for _, n in statement.execute_networkx(G3).nodes(data=True)] == [ + Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar"), + File(file_name="foo", file_path="bar"), + ] + + statement = EdgeByProps(edge_type="Launched", props={"contents": Exact("bar")}) + + # Should match on `proc` from G1 + assert [n["data"] for _, n in statement.execute_networkx(G2).nodes(data=True)] == [] From f2dc414fe96827007d978a8759e48314d7bf283e Mon Sep 17 00:00:00 2001 From: yampelo Date: Fri, 15 Nov 2019 11:53:29 -0500 Subject: [PATCH 09/25] Filter node and return ancestors/descendants/all reachable. NodeByPropsDescendents: Get a node and its descendants NodeByPropsAncestors: Get a node and its ancestors NodeByPropsReachable: Get a node and all reachable nodes from it (ancestors or descendants) --- beagle/analyzers/statements/base_statement.py | 69 +++++- .../statements/test_base_statement.py | 222 +++++++++++++++--- 2 files changed, 256 insertions(+), 35 deletions(-) diff --git a/beagle/analyzers/statements/base_statement.py b/beagle/analyzers/statements/base_statement.py index 0281de0a..cf0c083a 100644 --- a/beagle/analyzers/statements/base_statement.py +++ b/beagle/analyzers/statements/base_statement.py @@ -1,12 +1,20 @@ -from typing import Dict, Type +from typing import Dict, List, Set, Tuple, Type + +import networkx as nx from beagle.nodes import Node from .lookups import FieldLookup -import networkx as nx class Statement(object): + def __init__(self): + # The resulting node IDs + self.result_nodes: List[int] = [] + + # The resulting edge IDs + self.result_edges: List[Tuple[int, int, int]] = [] + def execute_networkx(self, G: nx.Graph): # pragma: no cover raise NotImplementedError(f"NetworkX not supported for {self.__class__.__name__}") @@ -30,6 +38,7 @@ def __init__(self, node_type: Type[Node], props: Dict[str, FieldLookup]): """ self.node_type = node_type self.props = props + super().__init__() def execute_networkx(self, G: nx.Graph) -> nx.Graph: """Searches a `nx.Graph` object for nodes that match type `node_type` and contains @@ -99,3 +108,59 @@ def execute_networkx(self, G: nx.Graph) -> nx.Graph: break return G.edge_subgraph(subgraph_edges) + + +class NodeByPropsDescendents(NodeByProps): + """Executes a `NodeByProps` query, and returns all descendants of the matching nodes. + see py:meth:`NodeByProps`""" + + def execute_networkx(self, G: nx.Graph) -> nx.Graph: + + # Get the next graph + next_graph = super().execute_networkx(G) + + subgraph_nodes: Set[int] = set() + + # For every node that matched `NodeByProps` + for node_id in next_graph.nodes(): + # Get the nodes descendants in the original graph, and add make a subgraph from those. + subgraph_nodes |= nx.descendants(G, node_id) | {node_id} + + return G.subgraph(subgraph_nodes) + + +class NodeByPropsAncestors(NodeByProps): + """Executes a `NodeByProps` query, and returns all ascendants of the matching nodes. + see py:meth:`NodeByProps`""" + + def execute_networkx(self, G: nx.Graph) -> nx.Graph: + + # Get the next graph + next_graph = super().execute_networkx(G) + + subgraph_nodes: Set[int] = set() + + # For every node that matched `NodeByProps` + for node_id in next_graph.nodes(): + # Get the nodes ancestors in the original graph, and add make a subgraph from those. + subgraph_nodes |= nx.ancestors(G, node_id) | {node_id} + + return G.subgraph(subgraph_nodes) + + +class NodeByPropsReachable(NodeByProps): + """Executes a `NodeByProps` query, and returns all ancestors and descendants of the matching nodes. + see py:meth:`NodeByProps`""" + + def execute_networkx(self, G: nx.Graph) -> nx.Graph: + + # Get the next graph + next_graph = super().execute_networkx(G) + + subgraph_nodes: Set[int] = set() + + # For every node that matched `NodeByProps` + for node_id in next_graph.nodes(): + subgraph_nodes |= nx.ancestors(G, node_id) | nx.descendants(G, node_id) | {node_id} + + return G.subgraph(subgraph_nodes) diff --git a/tests/analyzers/statements/test_base_statement.py b/tests/analyzers/statements/test_base_statement.py index 043ffbc5..dbc6d5fe 100644 --- a/tests/analyzers/statements/test_base_statement.py +++ b/tests/analyzers/statements/test_base_statement.py @@ -1,8 +1,18 @@ +from typing import List +import networkx as nx + import pytest from beagle.backends.networkx import NetworkX -from beagle.analyzers.statements.base_statement import NodeByProps, EdgeByProps +from beagle.analyzers.statements.base_statement import ( + NodeByProps, + EdgeByProps, + NodeByPropsDescendents, + NodeByPropsAncestors, + NodeByPropsReachable, +) from beagle.analyzers.statements.lookups import Contains, EndsWith, StartsWith, Exact -from beagle.nodes.process import Process, File + +from beagle.nodes import Node, File, Process @pytest.fixture @@ -45,32 +55,85 @@ def G3(): return backend.graph() +@pytest.fixture +def G4(): + # A graph with a four process tree: + # A -> B -> C -> D + A = Process(process_id=10, process_image="A", command_line="A") + B = Process(process_id=12, process_image="B", command_line="B") + C = Process(process_id=12, process_image="C", command_line="C") + D = Process(process_id=12, process_image="D", command_line="D") + + A.launched[B] + B.launched[C] + C.launched[D] + + backend = NetworkX(consolidate_edges=True, nodes=[A, B, B, C]) + + return backend.graph() + + +@pytest.fixture +def G5(): + # A graph with two, *disconnected* four process tree: + # A -> B -> C -> D + # E -> F -> G -> H + A = Process(process_id=10, process_image="A", command_line="A") + B = Process(process_id=12, process_image="B", command_line="B") + C = Process(process_id=12, process_image="C", command_line="C") + D = Process(process_id=12, process_image="D", command_line="D") + + E = Process(process_id=10, process_image="E", command_line="E") + F = Process(process_id=12, process_image="F", command_line="F") + G = Process(process_id=12, process_image="G", command_line="G") + H = Process(process_id=12, process_image="H", command_line="H") + + A.launched[B] + B.launched[C] + C.launched[D] + + E.launched[F] + F.launched[G] + G.launched[H] + + backend = NetworkX(consolidate_edges=True, nodes=[A, B, B, C, E, F, G, H]) + + return backend.graph() + + +def graph_nodes_match(graph: nx.Graph, nodes: List[Node]) -> bool: + return [n["data"] for _, n in graph.nodes(data=True)] == nodes + + def test_one_node_prop_test(G1): statement = NodeByProps(node_type=Process, props={"command_line": Contains("test.exe")}) - # Should match on `proc` from G1 - nodes = statement.execute_networkx(G1).nodes(data=True) - assert len(nodes) == 1 - assert Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar") in [ - n["data"] for _, n in nodes - ] + assert graph_nodes_match( + statement.execute_networkx(G1), + [Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar")], + ) # should mathc on other proc statement = NodeByProps(node_type=Process, props={"command_line": EndsWith("123456")}) - assert [n["data"] for _, n in statement.execute_networkx(G1).nodes(data=True)] == [ - Process(process_id=12, process_image="best.exe", command_line="best.exe /c 123456") - ] + + assert graph_nodes_match( + statement.execute_networkx(G1), + [Process(process_id=12, process_image="best.exe", command_line="best.exe /c 123456")], + ) # should match on both statement = NodeByProps(node_type=Process, props={"process_image": EndsWith("exe")}) - assert [n["data"] for _, n in statement.execute_networkx(G1).nodes(data=True)] == [ - Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar"), - Process(process_id=12, process_image="best.exe", command_line="best.exe /c 123456"), - ] - # should match neither + assert graph_nodes_match( + statement.execute_networkx(G1), + [ + Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar"), + Process(process_id=12, process_image="best.exe", command_line="best.exe /c 123456"), + ], + ) statement = NodeByProps(node_type=Process, props={"process_image": StartsWith("exe")}) - assert [n["data"] for _, n in statement.execute_networkx(G1).nodes(data=True)] == [] + + assert graph_nodes_match(statement.execute_networkx(G1), []) def test_multiple_node_prop_test(G1): @@ -80,9 +143,10 @@ def test_multiple_node_prop_test(G1): ) # Should match on `proc` from G1 - assert [n["data"] for _, n in statement.execute_networkx(G1).nodes(data=True)] == [ - Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar") - ] + assert graph_nodes_match( + statement.execute_networkx(G1), + [Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar")], + ) def test_node_conditional(G1): @@ -91,27 +155,119 @@ def test_node_conditional(G1): props={"command_line": Contains("foobar"), "process_image": StartsWith("test")}, ) - # Should match on `proc` from G1 - assert [n["data"] for _, n in statement.execute_networkx(G1).nodes(data=True)] == [ - Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar") - ] + assert graph_nodes_match( + statement.execute_networkx(G1), + [Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar")], + ) def test_one_edge_prop_test(G2, G3): statement = EdgeByProps(edge_type="Wrote", props={"contents": Exact("foo")}) - assert [n["data"] for _, n in statement.execute_networkx(G2).nodes(data=True)] == [ - Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar"), - File(file_name="foo", file_path="bar"), - ] + assert graph_nodes_match( + statement.execute_networkx(G2), + [ + Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar"), + File(file_name="foo", file_path="bar"), + ], + ) # Should work on the non-conslidating graph too. - assert [n["data"] for _, n in statement.execute_networkx(G3).nodes(data=True)] == [ - Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar"), - File(file_name="foo", file_path="bar"), - ] + assert graph_nodes_match( + statement.execute_networkx(G3), + [ + Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar"), + File(file_name="foo", file_path="bar"), + ], + ) statement = EdgeByProps(edge_type="Launched", props={"contents": Exact("bar")}) # Should match on `proc` from G1 - assert [n["data"] for _, n in statement.execute_networkx(G2).nodes(data=True)] == [] + assert graph_nodes_match(statement.execute_networkx(G2), []) + + +def test_node_with_descendants(G4): + + # A should return A->B->C->D + statement = NodeByPropsDescendents(node_type=Process, props={"process_image": Exact("A")}) + assert graph_nodes_match( + statement.execute_networkx(G4), + [ + Process(process_id=10, process_image="A", command_line="A"), + Process(process_id=12, process_image="B", command_line="B"), + Process(process_id=12, process_image="C", command_line="C"), + Process(process_id=12, process_image="D", command_line="D"), + ], + ) + + # B should return B->C->D + statement = NodeByPropsDescendents(node_type=Process, props={"process_image": Exact("B")}) + assert graph_nodes_match( + statement.execute_networkx(G4), + [ + Process(process_id=12, process_image="B", command_line="B"), + Process(process_id=12, process_image="C", command_line="C"), + Process(process_id=12, process_image="D", command_line="D"), + ], + ) + + +def test_node_with_ancestors(G4): + + # A should return A + statement = NodeByPropsAncestors(node_type=Process, props={"process_image": Exact("A")}) + assert graph_nodes_match( + statement.execute_networkx(G4), + [Process(process_id=10, process_image="A", command_line="A")], + ) + + # B should return A->B + statement = NodeByPropsAncestors(node_type=Process, props={"process_image": Exact("B")}) + assert graph_nodes_match( + statement.execute_networkx(G4), + [ + Process(process_id=10, process_image="A", command_line="A"), + Process(process_id=12, process_image="B", command_line="B"), + ], + ) + + # D should return A->B->C->D + statement = NodeByPropsAncestors(node_type=Process, props={"process_image": Exact("D")}) + assert graph_nodes_match( + statement.execute_networkx(G4), + [ + Process(process_id=10, process_image="A", command_line="A"), + Process(process_id=12, process_image="B", command_line="B"), + Process(process_id=12, process_image="C", command_line="C"), + Process(process_id=12, process_image="D", command_line="D"), + ], + ) + + +def test_nodes_reachable(G5): + + # All queries will return the full path. + # They should only return the path this process touches, A should return A->B->C->D and not E->F->G->H + + statement = NodeByPropsReachable(node_type=Process, props={"process_image": Exact("B")}) + assert graph_nodes_match( + statement.execute_networkx(G5), + [ + Process(process_id=10, process_image="A", command_line="A"), + Process(process_id=12, process_image="B", command_line="B"), + Process(process_id=12, process_image="C", command_line="C"), + Process(process_id=12, process_image="D", command_line="D"), + ], + ) + + statement = NodeByPropsReachable(node_type=Process, props={"process_image": Exact("G")}) + assert graph_nodes_match( + statement.execute_networkx(G5), + [ + Process(process_id=10, process_image="E", command_line="E"), + Process(process_id=12, process_image="F", command_line="F"), + Process(process_id=12, process_image="G", command_line="G"), + Process(process_id=12, process_image="H", command_line="H"), + ], + ) From 024b7b58a24164511895a68201eaae35f085e6d9 Mon Sep 17 00:00:00 2001 From: yampelo Date: Fri, 15 Nov 2019 12:22:27 -0500 Subject: [PATCH 10/25] Moves test graphs to fixture files --- beagle/analyzers/__init__.py | 0 beagle/analyzers/statements/__init__.py | 0 tests/analyzers/conftest.py | 93 +++++++++++++++++++ .../statements/test_base_statement.py | 88 ------------------ 4 files changed, 93 insertions(+), 88 deletions(-) create mode 100644 beagle/analyzers/__init__.py create mode 100644 beagle/analyzers/statements/__init__.py create mode 100644 tests/analyzers/conftest.py diff --git a/beagle/analyzers/__init__.py b/beagle/analyzers/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/beagle/analyzers/statements/__init__.py b/beagle/analyzers/statements/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/analyzers/conftest.py b/tests/analyzers/conftest.py new file mode 100644 index 00000000..08ad5efc --- /dev/null +++ b/tests/analyzers/conftest.py @@ -0,0 +1,93 @@ +import networkx as nx + +import pytest + +from beagle.backends.networkx import NetworkX + +from beagle.nodes import Node, File, Process + + +@pytest.fixture +def G1(): + # A basic graph, with two nodes an an edge + proc = Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar") + other_proc = Process(process_id=12, process_image="best.exe", command_line="best.exe /c 123456") + + proc.launched[other_proc].append(timestamp=1) + + backend = NetworkX(consolidate_edges=True, nodes=[proc, other_proc]) + + return backend.graph() + + +@pytest.fixture +def G2(): + # A basic graph, with two nodes an an edge + proc = Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar") + f = File(file_name="foo", file_path="bar") + + proc.wrote[f].append(contents="foo") + + backend = NetworkX(consolidate_edges=True, nodes=[proc, f]) + + return backend.graph() + + +@pytest.fixture +def G3(): + # *no consolidating* + proc = Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar") + f = File(file_name="foo", file_path="bar") + + proc.wrote[f].append(contents="foo") + proc.wrote[f].append(contents="bar") + + backend = NetworkX(consolidate_edges=False, nodes=[proc, f]) + + return backend.graph() + + +@pytest.fixture +def G4(): + # A graph with a four process tree: + # A -> B -> C -> D + A = Process(process_id=10, process_image="A", command_line="A") + B = Process(process_id=12, process_image="B", command_line="B") + C = Process(process_id=12, process_image="C", command_line="C") + D = Process(process_id=12, process_image="D", command_line="D") + + A.launched[B] + B.launched[C] + C.launched[D] + + backend = NetworkX(consolidate_edges=True, nodes=[A, B, B, C]) + + return backend.graph() + + +@pytest.fixture +def G5(): + # A graph with two, *disconnected* four process tree: + # A -> B -> C -> D + # E -> F -> G -> H + A = Process(process_id=10, process_image="A", command_line="A") + B = Process(process_id=12, process_image="B", command_line="B") + C = Process(process_id=12, process_image="C", command_line="C") + D = Process(process_id=12, process_image="D", command_line="D") + + E = Process(process_id=10, process_image="E", command_line="E") + F = Process(process_id=12, process_image="F", command_line="F") + G = Process(process_id=12, process_image="G", command_line="G") + H = Process(process_id=12, process_image="H", command_line="H") + + A.launched[B] + B.launched[C] + C.launched[D] + + E.launched[F] + F.launched[G] + G.launched[H] + + backend = NetworkX(consolidate_edges=True, nodes=[A, B, B, C, E, F, G, H]) + + return backend.graph() diff --git a/tests/analyzers/statements/test_base_statement.py b/tests/analyzers/statements/test_base_statement.py index dbc6d5fe..d170c5c2 100644 --- a/tests/analyzers/statements/test_base_statement.py +++ b/tests/analyzers/statements/test_base_statement.py @@ -1,8 +1,6 @@ from typing import List import networkx as nx -import pytest -from beagle.backends.networkx import NetworkX from beagle.analyzers.statements.base_statement import ( NodeByProps, EdgeByProps, @@ -15,92 +13,6 @@ from beagle.nodes import Node, File, Process -@pytest.fixture -def G1(): - # A basic graph, with two nodes an an edge - proc = Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar") - other_proc = Process(process_id=12, process_image="best.exe", command_line="best.exe /c 123456") - - proc.launched[other_proc].append(timestamp=1) - - backend = NetworkX(consolidate_edges=True, nodes=[proc, other_proc]) - - return backend.graph() - - -@pytest.fixture -def G2(): - # A basic graph, with two nodes an an edge - proc = Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar") - f = File(file_name="foo", file_path="bar") - - proc.wrote[f].append(contents="foo") - - backend = NetworkX(consolidate_edges=True, nodes=[proc, f]) - - return backend.graph() - - -@pytest.fixture -def G3(): - # *no consolidating* - proc = Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar") - f = File(file_name="foo", file_path="bar") - - proc.wrote[f].append(contents="foo") - proc.wrote[f].append(contents="bar") - - backend = NetworkX(consolidate_edges=False, nodes=[proc, f]) - - return backend.graph() - - -@pytest.fixture -def G4(): - # A graph with a four process tree: - # A -> B -> C -> D - A = Process(process_id=10, process_image="A", command_line="A") - B = Process(process_id=12, process_image="B", command_line="B") - C = Process(process_id=12, process_image="C", command_line="C") - D = Process(process_id=12, process_image="D", command_line="D") - - A.launched[B] - B.launched[C] - C.launched[D] - - backend = NetworkX(consolidate_edges=True, nodes=[A, B, B, C]) - - return backend.graph() - - -@pytest.fixture -def G5(): - # A graph with two, *disconnected* four process tree: - # A -> B -> C -> D - # E -> F -> G -> H - A = Process(process_id=10, process_image="A", command_line="A") - B = Process(process_id=12, process_image="B", command_line="B") - C = Process(process_id=12, process_image="C", command_line="C") - D = Process(process_id=12, process_image="D", command_line="D") - - E = Process(process_id=10, process_image="E", command_line="E") - F = Process(process_id=12, process_image="F", command_line="F") - G = Process(process_id=12, process_image="G", command_line="G") - H = Process(process_id=12, process_image="H", command_line="H") - - A.launched[B] - B.launched[C] - C.launched[D] - - E.launched[F] - F.launched[G] - G.launched[H] - - backend = NetworkX(consolidate_edges=True, nodes=[A, B, B, C, E, F, G, H]) - - return backend.graph() - - def graph_nodes_match(graph: nx.Graph, nodes: List[Node]) -> bool: return [n["data"] for _, n in graph.nodes(data=True)] == nodes From 6adb2e15b92e708a9e4aae86a23d7ca91b468a0e Mon Sep 17 00:00:00 2001 From: yampelo Date: Fri, 15 Nov 2019 13:27:25 -0500 Subject: [PATCH 11/25] ChainedStatement: Adds ability to perform statement1 | statement2 --- beagle/analyzers/statements/base_statement.py | 39 +++++++++++++++++-- beagle/analyzers/statements/process.py | 34 ++++++++++++++++ .../statements/test_base_statement.py | 23 +++++++++++ 3 files changed, 93 insertions(+), 3 deletions(-) create mode 100644 beagle/analyzers/statements/process.py diff --git a/beagle/analyzers/statements/base_statement.py b/beagle/analyzers/statements/base_statement.py index cf0c083a..0daa39ac 100644 --- a/beagle/analyzers/statements/base_statement.py +++ b/beagle/analyzers/statements/base_statement.py @@ -1,4 +1,4 @@ -from typing import Dict, List, Set, Tuple, Type +from typing import Dict, Set, Tuple, Type import networkx as nx @@ -10,14 +10,42 @@ class Statement(object): def __init__(self): # The resulting node IDs - self.result_nodes: List[int] = [] + self.result_nodes: Set[int] = set() # The resulting edge IDs - self.result_edges: List[Tuple[int, int, int]] = [] + self.result_edges: Set[Tuple[int, int, int]] = set() def execute_networkx(self, G: nx.Graph): # pragma: no cover raise NotImplementedError(f"NetworkX not supported for {self.__class__.__name__}") + def __or__(self, other): + return ChainedStatement(self, other) + + +class ChainedStatement(Statement): + def __init__(self, *args: Statement): + self.statements = args + super().__init__() + + def execute_networkx(self, G: nx.Graph): + # Get the subgraphs + + subgraphs = [] + for statement in self.statements: + # Get the subgraphs + subgraphs.append(statement.execute_networkx(G)) + + # add the reuslt_nodes, result_edges. + self.result_edges |= statement.result_edges + self.result_nodes |= statement.result_nodes + + # Compose the subgraphs + H = subgraphs[0] + for subgraph in subgraphs[1:]: + H = nx.compose(H, subgraph) + + return H + class NodeByProps(Statement): def __init__(self, node_type: Type[Node], props: Dict[str, FieldLookup]): @@ -56,6 +84,7 @@ def execute_networkx(self, G: nx.Graph) -> nx.Graph: if all([lookup.test(getattr(node, prop)) for prop, lookup in self.props.items()]): subgraph_nodes.append(node_id) + self.result_nodes = set(subgraph_nodes) return G.subgraph(subgraph_nodes) @@ -107,6 +136,7 @@ def execute_networkx(self, G: nx.Graph) -> nx.Graph: # can stop on first match break + self.result_edges = set(subgraph_edges) return G.edge_subgraph(subgraph_edges) @@ -126,6 +156,7 @@ def execute_networkx(self, G: nx.Graph) -> nx.Graph: # Get the nodes descendants in the original graph, and add make a subgraph from those. subgraph_nodes |= nx.descendants(G, node_id) | {node_id} + self.result_nodes = set(subgraph_nodes) return G.subgraph(subgraph_nodes) @@ -145,6 +176,7 @@ def execute_networkx(self, G: nx.Graph) -> nx.Graph: # Get the nodes ancestors in the original graph, and add make a subgraph from those. subgraph_nodes |= nx.ancestors(G, node_id) | {node_id} + self.result_nodes = set(subgraph_nodes) return G.subgraph(subgraph_nodes) @@ -163,4 +195,5 @@ def execute_networkx(self, G: nx.Graph) -> nx.Graph: for node_id in next_graph.nodes(): subgraph_nodes |= nx.ancestors(G, node_id) | nx.descendants(G, node_id) | {node_id} + self.result_nodes = set(subgraph_nodes) return G.subgraph(subgraph_nodes) diff --git a/beagle/analyzers/statements/process.py b/beagle/analyzers/statements/process.py new file mode 100644 index 00000000..f3ede113 --- /dev/null +++ b/beagle/analyzers/statements/process.py @@ -0,0 +1,34 @@ +from typing import Union, Type + +from beagle.nodes import Process + +from .base_statement import NodeByPropsReachable +from .lookups import Exact, FieldLookup + + +class FindProcess(NodeByPropsReachable): + """Finds statements relevant to a Process + + Parameters + ---------- + NodeByPropsReachable : [type] + [description] + """ + + @classmethod + def with_command_line( + cls: Type["FindProcess"], command_line: Union[str, FieldLookup] + ) -> "FindProcess": + + if isinstance(command_line, str): + command_line = Exact(command_line) + return cls(node_type=Process, props={"command_line": command_line}) + + @classmethod + def with_process_name( + cls: Type["FindProcess"], process_image: Union[str, FieldLookup] + ) -> "FindProcess": + + if isinstance(process_image, str): + process_image = Exact(process_image) + return cls(node_type=Process, props={"process_image": process_image}) diff --git a/tests/analyzers/statements/test_base_statement.py b/tests/analyzers/statements/test_base_statement.py index d170c5c2..950c6339 100644 --- a/tests/analyzers/statements/test_base_statement.py +++ b/tests/analyzers/statements/test_base_statement.py @@ -183,3 +183,26 @@ def test_nodes_reachable(G5): Process(process_id=12, process_image="H", command_line="H"), ], ) + + +def test_chained_statement(G5): + # Both paths should show up because we use a chained statement that returns both. + + Bstatement = NodeByPropsReachable(node_type=Process, props={"process_image": Exact("B")}) + Gstatement = NodeByPropsReachable(node_type=Process, props={"process_image": Exact("G")}) + + chained = Bstatement | Gstatement + + assert graph_nodes_match( + chained.execute_networkx(G5), + [ + Process(process_id=10, process_image="A", command_line="A"), + Process(process_id=12, process_image="B", command_line="B"), + Process(process_id=12, process_image="C", command_line="C"), + Process(process_id=12, process_image="D", command_line="D"), + Process(process_id=10, process_image="E", command_line="E"), + Process(process_id=12, process_image="F", command_line="F"), + Process(process_id=12, process_image="G", command_line="G"), + Process(process_id=12, process_image="H", command_line="H"), + ], + ) From 26ac070b9c609eb38b5f9383ee1a0ac066801e73 Mon Sep 17 00:00:00 2001 From: yampelo Date: Fri, 15 Nov 2019 18:22:37 -0500 Subject: [PATCH 12/25] FindProcess: adds processs queries NodeByProps / EdgeByProps: adds support for nested queries: ``` NodeByPropsReachable(node_type=Process, props={"hashes": {"sha256": Exact("1234")}}) ``` --- beagle/analyzers/statements/base_statement.py | 154 ++++++++++++++++-- beagle/analyzers/statements/process.py | 64 ++++++-- tests/analyzers/conftest.py | 37 ++++- .../statements/test_base_statement.py | 85 ++++++++-- tests/analyzers/statements/test_process.py | 105 ++++++++++++ 5 files changed, 392 insertions(+), 53 deletions(-) create mode 100644 tests/analyzers/statements/test_process.py diff --git a/beagle/analyzers/statements/base_statement.py b/beagle/analyzers/statements/base_statement.py index 0daa39ac..dbcb7334 100644 --- a/beagle/analyzers/statements/base_statement.py +++ b/beagle/analyzers/statements/base_statement.py @@ -1,33 +1,150 @@ -from typing import Dict, Set, Tuple, Type +from typing import Any, Dict, List, Set, Tuple, Type, Union, cast import networkx as nx from beagle.nodes import Node -from .lookups import FieldLookup +from .lookups import Exact, FieldLookup + + +def _str_to_exact(props: dict) -> Dict[str, Union[FieldLookup, Dict]]: + # Ensures strings become Exact, Works on nested dicts + for k, v in props.items(): + if isinstance(v, str): + props[k] = Exact(v) + elif isinstance(v, dict): + props[k] = _str_to_exact(v) + + return props class Statement(object): def __init__(self): + """A statement is the base building block of a query. A statement takes as input a graph, executes, + and returns the next graph. + + >>> G2 = statement.execute_networkx(G) + + Attributes + ---------- + result_nodes: Set[int]: + The set of node IDs which create the subgraph returned by the statement. + result_edges: Set[Tuple[int, int, int]]: + The set of (u, v, k) tuples representing the edges which created the subgraph. + """ # The resulting node IDs self.result_nodes: Set[int] = set() # The resulting edge IDs self.result_edges: Set[Tuple[int, int, int]] = set() + def __or__(self, other: "Statement") -> "ChainedStatement": + """Allows statements to be combined through the `|` operator. + The result of execution is the union of both substatements. + + >>> statement1 = Statement(...) + >>> statement2 = Statement(...) + >>> chained = statement1 | statement2 + + + Parameters + ---------- + other: Statement + The statement to chain with. + + Returns + ------- + ChainedStatement + A chained statement compromised of all three. + """ + return ChainedStatement(self, other) + def execute_networkx(self, G: nx.Graph): # pragma: no cover + """Execute a statement against a `networkx` graph.""" raise NotImplementedError(f"NetworkX not supported for {self.__class__.__name__}") - def __or__(self, other): - return ChainedStatement(self, other) + def _test_values_with_lookups( + self, + value_to_test: Union[Node, Dict[str, Any]], + lookup_tests: Dict[str, Union[FieldLookup, Dict]], + ) -> bool: + """Tests a node or dictionay against a configuration of lookup_tests. + + Parameters + ---------- + value_to_test : Union[Node, Dict[str, Any]] + The node or dict to test. + lookup_tests : Dict[str, FieldLookup] + The set of lookup_tests to test. + + Returns + ------- + bool + Did all of the tests pass? + """ + + if not value_to_test: + return False + + results: List[bool] = [] + + for attr_name, lookup in lookup_tests.items(): + if isinstance(lookup, dict): + # recursivly check props against nested entrys (e.g is hashes dict in Process) + if isinstance(value_to_test, Node): + results.append( + self._test_values_with_lookups( + value_to_test=getattr(value_to_test, attr_name), lookup_tests=lookup + ) + ) + else: + results.append( + self._test_values_with_lookups( + value_to_test=value_to_test.get(attr_name, {}), lookup_tests=lookup + ) + ) + else: + if isinstance(value_to_test, Node): + results.append(lookup.test(getattr(value_to_test, attr_name))) + else: + results.append(lookup.test(value_to_test.get(attr_name))) + + return any(results) + + +class FactoryMixin(object): + """Mixin to prevent Statement Factories from calling execute methods. + """ + + def execute_networkx(self, G: nx.graph): + raise UserWarning("Statement factories cannot be called directly") class ChainedStatement(Statement): def __init__(self, *args: Statement): + """Executes multiple Statements, combining their outputs. + + Parameters + ---------- + args: Statement + One ore more statements + """ self.statements = args super().__init__() - def execute_networkx(self, G: nx.Graph): + def execute_networkx(self, G: nx.Graph) -> nx.Graph: + """Executes multiple statements against a `nx.Graph` object, combining their outputs into one subgraph. + + Parameters + ---------- + G : nx.Graph + Graph to execute statements against + + Returns + ------- + nx.Graph + Graph composed from the output graphs of the executed statements. + """ # Get the subgraphs subgraphs = [] @@ -48,24 +165,30 @@ def execute_networkx(self, G: nx.Graph): class NodeByProps(Statement): - def __init__(self, node_type: Type[Node], props: Dict[str, FieldLookup]): + def __init__(self, node_type: Type[Node], props: Dict[str, Union[str, FieldLookup, Dict]]): """Searches the graph for a node of type `node_type` with properties matching `props` Parameters ---------- node_type : Type[Node] The type of node to look for. e.g. Process - props : Dict[str, FieldLookup] - The set of props to filter the resulting nodes by. + props : Dict[str, Union[str, FieldLookup, Dict]] + The set of props to filter the resulting nodes by. Any string is transformed to `Exact` lookups. Examples ---------- Filter for Process nodes, with command lines that contain `text.exe` >>> NodeByProps(node_type=Process, props={"command_line": Contains("test.exe")}) + This may also be a nested dict. + >>> NodeByProps(node_type=Process, props={"hashes": {"md5": Contains("test.exe")}}) + """ self.node_type = node_type - self.props = props + + self.props: Dict[str, Union[FieldLookup, Dict]] = _str_to_exact(props) + + # Cast and assign. super().__init__() def execute_networkx(self, G: nx.Graph) -> nx.Graph: @@ -81,7 +204,7 @@ def execute_networkx(self, G: nx.Graph) -> nx.Graph: # If node matches the desired instance. if isinstance(node, self.node_type): # Test the node - if all([lookup.test(getattr(node, prop)) for prop, lookup in self.props.items()]): + if self._test_values_with_lookups(node, self.props): subgraph_nodes.append(node_id) self.result_nodes = set(subgraph_nodes) @@ -89,15 +212,15 @@ def execute_networkx(self, G: nx.Graph) -> nx.Graph: class EdgeByProps(Statement): - def __init__(self, edge_type: str, props: Dict[str, FieldLookup]): + def __init__(self, edge_type: str, props: Dict[str, Union[str, FieldLookup]]): """Searches the graph for an edge of type `edge_type` with properties matching `props` Parameters ---------- edge_type : str The type of edge to look for. e.g. Wrote - props : Dict[str, FieldLookup] - The set of props to filter the resulting edges by. + props : Dict[str, Union[str, FieldLookup]] + The set of props to filter the resulting edges by. Any string is transformed to `Exact` lookups. Examples ---------- @@ -106,7 +229,8 @@ def __init__(self, edge_type: str, props: Dict[str, FieldLookup]): """ self.edge_type = edge_type - self.props = props + + self.props: Dict[str, Union[FieldLookup, Dict]] = _str_to_exact(props) def execute_networkx(self, G: nx.Graph) -> nx.Graph: """Searches a `nx.Graph` object for edges that match type `edge_type` and contains @@ -131,7 +255,7 @@ def execute_networkx(self, G: nx.Graph) -> nx.Graph: data = [data] for entry in data: - if any([lookup.test(entry.get(prop)) for prop, lookup in self.props.items()]): + if self._test_values_with_lookups(entry, self.props): subgraph_edges.append((u, v, k)) # can stop on first match break diff --git a/beagle/analyzers/statements/process.py b/beagle/analyzers/statements/process.py index f3ede113..3216466c 100644 --- a/beagle/analyzers/statements/process.py +++ b/beagle/analyzers/statements/process.py @@ -2,33 +2,61 @@ from beagle.nodes import Process -from .base_statement import NodeByPropsReachable -from .lookups import Exact, FieldLookup +from .base_statement import NodeByPropsReachable, FactoryMixin +from .lookups import FieldLookup -class FindProcess(NodeByPropsReachable): - """Finds statements relevant to a Process - - Parameters - ---------- - NodeByPropsReachable : [type] - [description] - """ +class FindProcess(FactoryMixin, NodeByPropsReachable): + """Executes statements relevant to a Process""" @classmethod def with_command_line( cls: Type["FindProcess"], command_line: Union[str, FieldLookup] - ) -> "FindProcess": + ) -> NodeByPropsReachable: # pragma: no cover - if isinstance(command_line, str): - command_line = Exact(command_line) - return cls(node_type=Process, props={"command_line": command_line}) + return NodeByPropsReachable(node_type=Process, props={"command_line": command_line}) @classmethod def with_process_name( cls: Type["FindProcess"], process_image: Union[str, FieldLookup] - ) -> "FindProcess": + ) -> NodeByPropsReachable: # pragma: no cover + + return NodeByPropsReachable(node_type=Process, props={"process_image": process_image}) + + @classmethod + def with_process_path( + cls: Type["FindProcess"], process_path: Union[str, FieldLookup] + ) -> NodeByPropsReachable: # pragma: no cover + + return NodeByPropsReachable(node_type=Process, props={"process_path": process_path}) + + @classmethod + def with_process_image_path( + cls: Type["FindProcess"], process_image_path: Union[str, FieldLookup] + ) -> NodeByPropsReachable: # pragma: no cover + + return NodeByPropsReachable( + node_type=Process, props={"process_image_path": process_image_path} + ) - if isinstance(process_image, str): - process_image = Exact(process_image) - return cls(node_type=Process, props={"process_image": process_image}) + @classmethod + def with_user(cls: Type["FindProcess"], user: Union[str, FieldLookup]) -> NodeByPropsReachable: + return NodeByPropsReachable(node_type=Process, props={"user": user}) + + @classmethod + def with_md5_hash( + cls: Type["FindProcess"], md5hash: Union[str, FieldLookup] + ) -> NodeByPropsReachable: # pragma: no cover + return NodeByPropsReachable(node_type=Process, props={"hashes": {"md5": md5hash}}) + + @classmethod + def with_sha256_hash( + cls: Type["FindProcess"], md5hash: Union[str, FieldLookup] + ) -> NodeByPropsReachable: # pragma: no cover + return NodeByPropsReachable(node_type=Process, props={"hashes": {"sha256": md5hash}}) + + @classmethod + def with_sha1_hash( + cls: Type["FindProcess"], md5hash: Union[str, FieldLookup] + ) -> NodeByPropsReachable: # pragma: no cover + return NodeByPropsReachable(node_type=Process, props={"hashes": {"sha1": md5hash}}) diff --git a/tests/analyzers/conftest.py b/tests/analyzers/conftest.py index 08ad5efc..e8fecd67 100644 --- a/tests/analyzers/conftest.py +++ b/tests/analyzers/conftest.py @@ -1,10 +1,18 @@ -import networkx as nx +from typing import List +import networkx as nx import pytest from beagle.backends.networkx import NetworkX +from beagle.nodes import File, Node, Process + + +@pytest.fixture +def graph_nodes_match(): + def validate_nodes_match(graph: nx.Graph, nodes: List[Node]) -> bool: + return [n["data"] for _, n in graph.nodes(data=True)] == nodes -from beagle.nodes import Node, File, Process + return validate_nodes_match @pytest.fixture @@ -91,3 +99,28 @@ def G5(): backend = NetworkX(consolidate_edges=True, nodes=[A, B, B, C, E, F, G, H]) return backend.graph() + + +@pytest.fixture +def G6(): + parent = Process( + process_id=1, process_image_path="d:\\", process_image="parent.exe", user="omer" + ) + child = Process( + process_id=2, process_image_path="d:\\users", process_image="child.exe", user="omer" + ) + + parent2 = Process( + process_id=4, process_image_path="c:\\", process_image="parent.exe", user="admin" + ) + child2 = Process( + process_id=3, process_image_path="c:\\users", process_image="child.exe", user="admin" + ) + + parent.launched[child].append(timestamp=12456) + + parent2.launched[child2].append(timestamp=2) + + backend = NetworkX(consolidate_edges=True, nodes=[parent, parent2, child, child2]) + + return backend.graph() diff --git a/tests/analyzers/statements/test_base_statement.py b/tests/analyzers/statements/test_base_statement.py index 950c6339..08d7e56f 100644 --- a/tests/analyzers/statements/test_base_statement.py +++ b/tests/analyzers/statements/test_base_statement.py @@ -1,23 +1,51 @@ -from typing import List -import networkx as nx - +import pytest from beagle.analyzers.statements.base_statement import ( - NodeByProps, + Statement, + FactoryMixin, EdgeByProps, - NodeByPropsDescendents, + NodeByProps, NodeByPropsAncestors, + NodeByPropsDescendents, NodeByPropsReachable, ) -from beagle.analyzers.statements.lookups import Contains, EndsWith, StartsWith, Exact +from beagle.analyzers.statements.lookups import Contains, EndsWith, Exact, StartsWith +from beagle.nodes import File, Process + + +def test_factory_mixin(): + class MyFactory(FactoryMixin): + pass -from beagle.nodes import Node, File, Process + with pytest.raises(UserWarning): + obj = MyFactory() + obj.execute_networkx(None) -def graph_nodes_match(graph: nx.Graph, nodes: List[Node]) -> bool: - return [n["data"] for _, n in graph.nodes(data=True)] == nodes +def test_test_props_nested_dict(): + s = Statement() + assert ( + s._test_values_with_lookups( + value_to_test={"hashes": {"md5": "1234"}}, + lookup_tests={"hashes": {"md5": Exact("1234")}}, + ) + is True + ) + + assert ( + s._test_values_with_lookups(value_to_test={"hashes": {}}, lookup_tests={"hashes": {"md5": Exact("1234")}}) + is False + ) -def test_one_node_prop_test(G1): + assert ( + s._test_values_with_lookups( + value_to_test={"hashes": None}, lookup_tests={"hashes": {"md5": Exact("1234")}} + ) + is False + ) + + +def test_one_node_prop_test(G1, graph_nodes_match): statement = NodeByProps(node_type=Process, props={"command_line": Contains("test.exe")}) assert graph_nodes_match( @@ -48,7 +76,7 @@ def test_one_node_prop_test(G1): assert graph_nodes_match(statement.execute_networkx(G1), []) -def test_multiple_node_prop_test(G1): +def test_multiple_node_prop_test(G1, graph_nodes_match): statement = NodeByProps( node_type=Process, props={"command_line": Contains("foobar"), "process_image": StartsWith("test")}, @@ -61,7 +89,7 @@ def test_multiple_node_prop_test(G1): ) -def test_node_conditional(G1): +def test_node_conditional(G1, graph_nodes_match): statement = NodeByProps( node_type=Process, props={"command_line": Contains("foobar"), "process_image": StartsWith("test")}, @@ -73,8 +101,10 @@ def test_node_conditional(G1): ) -def test_one_edge_prop_test(G2, G3): - statement = EdgeByProps(edge_type="Wrote", props={"contents": Exact("foo")}) +def test_one_edge_prop_test(G2, G3, graph_nodes_match): + + # String should get mapped to Exact("foo") + statement = EdgeByProps(edge_type="Wrote", props={"contents": "foo"}) assert graph_nodes_match( statement.execute_networkx(G2), @@ -99,7 +129,7 @@ def test_one_edge_prop_test(G2, G3): assert graph_nodes_match(statement.execute_networkx(G2), []) -def test_node_with_descendants(G4): +def test_node_with_descendants(G4, graph_nodes_match): # A should return A->B->C->D statement = NodeByPropsDescendents(node_type=Process, props={"process_image": Exact("A")}) @@ -125,7 +155,7 @@ def test_node_with_descendants(G4): ) -def test_node_with_ancestors(G4): +def test_node_with_ancestors(G4, graph_nodes_match): # A should return A statement = NodeByPropsAncestors(node_type=Process, props={"process_image": Exact("A")}) @@ -157,7 +187,7 @@ def test_node_with_ancestors(G4): ) -def test_nodes_reachable(G5): +def test_nodes_reachable(G5, graph_nodes_match): # All queries will return the full path. # They should only return the path this process touches, A should return A->B->C->D and not E->F->G->H @@ -185,7 +215,7 @@ def test_nodes_reachable(G5): ) -def test_chained_statement(G5): +def test_chained_statement(G5, graph_nodes_match): # Both paths should show up because we use a chained statement that returns both. Bstatement = NodeByPropsReachable(node_type=Process, props={"process_image": Exact("B")}) @@ -206,3 +236,22 @@ def test_chained_statement(G5): Process(process_id=12, process_image="H", command_line="H"), ], ) + + +def test_multiple_chained_statement(G5, graph_nodes_match): + # Should properly execute all three. + + Bstatement = NodeByProps(node_type=Process, props={"process_image": Exact("B")}) + Gstatement = NodeByProps(node_type=Process, props={"process_image": Exact("G")}) + Astatement = NodeByProps(node_type=Process, props={"process_image": Exact("A")}) + + chained = Bstatement | Gstatement | Astatement + + assert graph_nodes_match( + chained.execute_networkx(G5), + [ + Process(process_id=12, process_image="B", command_line="B"), + Process(process_id=12, process_image="G", command_line="G"), + Process(process_id=10, process_image="A", command_line="A"), + ], + ) diff --git a/tests/analyzers/statements/test_process.py b/tests/analyzers/statements/test_process.py new file mode 100644 index 00000000..49602beb --- /dev/null +++ b/tests/analyzers/statements/test_process.py @@ -0,0 +1,105 @@ +from beagle.analyzers.statements.process import FindProcess +from beagle.nodes import Process, File +from beagle.analyzers.statements.lookups import EndsWith + + +def test_get_by_command_line_no_lookup(G5, graph_nodes_match): + + # Should return all nodes reachable from A + statement = FindProcess.with_command_line("A") + + assert graph_nodes_match( + statement.execute_networkx(G5), + [ + Process(process_id=10, process_image="A", command_line="A"), + Process(process_id=12, process_image="B", command_line="B"), + Process(process_id=12, process_image="C", command_line="C"), + Process(process_id=12, process_image="D", command_line="D"), + ], + ) + + +def test_get_by_command_line_with_lookup(G5, graph_nodes_match): + + # Should return all nodes reachable from A Or G, (so all nodes) + statement = FindProcess.with_command_line(EndsWith("A") | EndsWith("G")) + + assert graph_nodes_match( + statement.execute_networkx(G5), + [ + Process(process_id=10, process_image="A", command_line="A"), + Process(process_id=12, process_image="B", command_line="B"), + Process(process_id=12, process_image="C", command_line="C"), + Process(process_id=12, process_image="D", command_line="D"), + Process(process_id=10, process_image="E", command_line="E"), + Process(process_id=12, process_image="F", command_line="F"), + Process(process_id=12, process_image="G", command_line="G"), + Process(process_id=12, process_image="H", command_line="H"), + ], + ) + + +def test_get_process_name_no_lookup(G2, graph_nodes_match): + + # No match, since defaults to exact. + statement = FindProcess.with_process_name("exe") + assert graph_nodes_match(statement.execute_networkx(G2), []) + + statement = FindProcess.with_process_name("test.exe") + assert graph_nodes_match( + statement.execute_networkx(G2), + [ + Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar"), + File(file_name="foo", file_path="bar"), + ], + ) + + +def test_get_process_name_lookup(G2, graph_nodes_match): + + # Should return test.exe because it ends with exe + statement = FindProcess.with_process_name(EndsWith("exe")) + + assert graph_nodes_match( + statement.execute_networkx(G2), + [ + Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar"), + File(file_name="foo", file_path="bar"), + ], + ) + + +def test_get_process_user(G6, graph_nodes_match): + + # Should return test.exe because it ends with exe + statement = FindProcess.with_user("omer") + + assert graph_nodes_match( + statement.execute_networkx(G6), + [ + Process( + process_id=1, process_image_path="d:\\", process_image="parent.exe", user="omer" + ), + Process( + process_id=2, process_image_path="d:\\users", process_image="child.exe", user="omer" + ), + ], + ) + + +def test_get_process_image_path(G6, graph_nodes_match): + + # Should return test.exe because it ends with exe + statement = FindProcess.with_process_image_path("d:\\") + + assert graph_nodes_match( + statement.execute_networkx(G6), + [ + Process( + process_id=1, process_image_path="d:\\", process_image="parent.exe", user="omer" + ), + Process( + process_id=2, process_image_path="d:\\users", process_image="child.exe", user="omer" + ), + ], + ) From 46c3dad2ca9515dd896bf84832d3509ef85e0043 Mon Sep 17 00:00:00 2001 From: yampelo Date: Sat, 16 Nov 2019 09:45:30 -0500 Subject: [PATCH 13/25] Splits Node/Edge statements into seperate files --- beagle/analyzers/statements/base_statement.py | 161 +------------- beagle/analyzers/statements/edge.py | 62 ++++++ beagle/analyzers/statements/node.py | 115 ++++++++++ beagle/analyzers/statements/process.py | 3 +- tests/analyzers/statements/__init__.py | 0 .../statements/test_base_statement.py | 209 +----------------- tests/analyzers/statements/test_edge.py | 31 +++ tests/analyzers/statements/test_node.py | 177 +++++++++++++++ tests/edges/__init__.py | 0 9 files changed, 392 insertions(+), 366 deletions(-) create mode 100644 beagle/analyzers/statements/edge.py create mode 100644 beagle/analyzers/statements/node.py create mode 100644 tests/analyzers/statements/__init__.py create mode 100644 tests/analyzers/statements/test_edge.py create mode 100644 tests/analyzers/statements/test_node.py create mode 100644 tests/edges/__init__.py diff --git a/beagle/analyzers/statements/base_statement.py b/beagle/analyzers/statements/base_statement.py index dbcb7334..753c4acf 100644 --- a/beagle/analyzers/statements/base_statement.py +++ b/beagle/analyzers/statements/base_statement.py @@ -1,4 +1,4 @@ -from typing import Any, Dict, List, Set, Tuple, Type, Union, cast +from typing import Any, Dict, List, Set, Tuple, Union import networkx as nx @@ -162,162 +162,3 @@ def execute_networkx(self, G: nx.Graph) -> nx.Graph: H = nx.compose(H, subgraph) return H - - -class NodeByProps(Statement): - def __init__(self, node_type: Type[Node], props: Dict[str, Union[str, FieldLookup, Dict]]): - """Searches the graph for a node of type `node_type` with properties matching `props` - - Parameters - ---------- - node_type : Type[Node] - The type of node to look for. e.g. Process - props : Dict[str, Union[str, FieldLookup, Dict]] - The set of props to filter the resulting nodes by. Any string is transformed to `Exact` lookups. - - Examples - ---------- - Filter for Process nodes, with command lines that contain `text.exe` - >>> NodeByProps(node_type=Process, props={"command_line": Contains("test.exe")}) - - This may also be a nested dict. - >>> NodeByProps(node_type=Process, props={"hashes": {"md5": Contains("test.exe")}}) - - """ - self.node_type = node_type - - self.props: Dict[str, Union[FieldLookup, Dict]] = _str_to_exact(props) - - # Cast and assign. - super().__init__() - - def execute_networkx(self, G: nx.Graph) -> nx.Graph: - """Searches a `nx.Graph` object for nodes that match type `node_type` and contains - props matching `props`. This is O(V). - """ - subgraph_nodes = [] - - # For each node - for node_id, data in G.nodes(data=True): - node = data["data"] - - # If node matches the desired instance. - if isinstance(node, self.node_type): - # Test the node - if self._test_values_with_lookups(node, self.props): - subgraph_nodes.append(node_id) - - self.result_nodes = set(subgraph_nodes) - return G.subgraph(subgraph_nodes) - - -class EdgeByProps(Statement): - def __init__(self, edge_type: str, props: Dict[str, Union[str, FieldLookup]]): - """Searches the graph for an edge of type `edge_type` with properties matching `props` - - Parameters - ---------- - edge_type : str - The type of edge to look for. e.g. Wrote - props : Dict[str, Union[str, FieldLookup]] - The set of props to filter the resulting edges by. Any string is transformed to `Exact` lookups. - - Examples - ---------- - Filter for TCP edges, with contents that match ".pdf" - >>> EdgeByProps(edge_type="TCP", props={"payload": Contains(".pdf")}) - - """ - self.edge_type = edge_type - - self.props: Dict[str, Union[FieldLookup, Dict]] = _str_to_exact(props) - - def execute_networkx(self, G: nx.Graph) -> nx.Graph: - """Searches a `nx.Graph` object for edges that match type `edge_type` and contains - props matching `props`. This is O(E). - - Returns a subgraph with all nodes contained in match edges - """ - subgraph_edges = [] - - # For each edge - for u, v, k, e_data in G.edges(data=True, keys=True): - - # pull out the data field from NX - data = e_data["data"] # edge data - e_type = e_data["edge_name"] # edge type - - # If edge matches the desired instance. - if e_type == self.edge_type: - - # Test the edge - if not isinstance(data, list): - data = [data] - - for entry in data: - if self._test_values_with_lookups(entry, self.props): - subgraph_edges.append((u, v, k)) - # can stop on first match - break - - self.result_edges = set(subgraph_edges) - return G.edge_subgraph(subgraph_edges) - - -class NodeByPropsDescendents(NodeByProps): - """Executes a `NodeByProps` query, and returns all descendants of the matching nodes. - see py:meth:`NodeByProps`""" - - def execute_networkx(self, G: nx.Graph) -> nx.Graph: - - # Get the next graph - next_graph = super().execute_networkx(G) - - subgraph_nodes: Set[int] = set() - - # For every node that matched `NodeByProps` - for node_id in next_graph.nodes(): - # Get the nodes descendants in the original graph, and add make a subgraph from those. - subgraph_nodes |= nx.descendants(G, node_id) | {node_id} - - self.result_nodes = set(subgraph_nodes) - return G.subgraph(subgraph_nodes) - - -class NodeByPropsAncestors(NodeByProps): - """Executes a `NodeByProps` query, and returns all ascendants of the matching nodes. - see py:meth:`NodeByProps`""" - - def execute_networkx(self, G: nx.Graph) -> nx.Graph: - - # Get the next graph - next_graph = super().execute_networkx(G) - - subgraph_nodes: Set[int] = set() - - # For every node that matched `NodeByProps` - for node_id in next_graph.nodes(): - # Get the nodes ancestors in the original graph, and add make a subgraph from those. - subgraph_nodes |= nx.ancestors(G, node_id) | {node_id} - - self.result_nodes = set(subgraph_nodes) - return G.subgraph(subgraph_nodes) - - -class NodeByPropsReachable(NodeByProps): - """Executes a `NodeByProps` query, and returns all ancestors and descendants of the matching nodes. - see py:meth:`NodeByProps`""" - - def execute_networkx(self, G: nx.Graph) -> nx.Graph: - - # Get the next graph - next_graph = super().execute_networkx(G) - - subgraph_nodes: Set[int] = set() - - # For every node that matched `NodeByProps` - for node_id in next_graph.nodes(): - subgraph_nodes |= nx.ancestors(G, node_id) | nx.descendants(G, node_id) | {node_id} - - self.result_nodes = set(subgraph_nodes) - return G.subgraph(subgraph_nodes) diff --git a/beagle/analyzers/statements/edge.py b/beagle/analyzers/statements/edge.py new file mode 100644 index 00000000..eddf277d --- /dev/null +++ b/beagle/analyzers/statements/edge.py @@ -0,0 +1,62 @@ +from typing import Dict, Union + +import networkx as nx + +from .base_statement import Statement, _str_to_exact +from .lookups import FieldLookup + + +class EdgeByProps(Statement): + def __init__(self, edge_type: str, props: Dict[str, Union[str, FieldLookup]]): + """Searches the graph for an edge of type `edge_type` with properties matching `props` + + Parameters + ---------- + edge_type : str + The type of edge to look for. e.g. Wrote + props : Dict[str, Union[str, FieldLookup]] + The set of props to filter the resulting edges by. Any string is transformed to `Exact` lookups. + + Examples + ---------- + Filter for TCP edges, with contents that match ".pdf" + >>> EdgeByProps(edge_type="TCP", props={"payload": Contains(".pdf")}) + + """ + self.edge_type = edge_type + + self.props: Dict[str, Union[FieldLookup, Dict]] = _str_to_exact(props) + + super().__init__() + + def execute_networkx(self, G: nx.Graph) -> nx.Graph: + """Searches a `nx.Graph` object for edges that match type `edge_type` and contains + props matching `props`. This is O(E). + + Returns a subgraph with all nodes contained in match edges + """ + subgraph_edges = [] + + # For each edge + for u, v, k, e_data in G.edges(data=True, keys=True): + + # pull out the data field from NX + data = e_data["data"] # edge data + e_type = e_data["edge_name"] # edge type + + # If edge matches the desired instance. + if e_type == self.edge_type: + + # Test the edge + if not isinstance(data, list): + data = [data] + + for entry in data: + if self._test_values_with_lookups(entry, self.props): + subgraph_edges.append((u, v, k)) + # can stop on first match + self.result_edges |= {(u, v, k)} + self.result_nodes |= {u, v} + break + + return G.edge_subgraph(subgraph_edges) diff --git a/beagle/analyzers/statements/node.py b/beagle/analyzers/statements/node.py new file mode 100644 index 00000000..cc314e2a --- /dev/null +++ b/beagle/analyzers/statements/node.py @@ -0,0 +1,115 @@ +from typing import Dict, Set, Type, Union + +import networkx as nx + +from beagle.nodes import Node + +from .base_statement import Statement, _str_to_exact +from .lookups import FieldLookup + + +class NodeByProps(Statement): + def __init__(self, node_type: Type[Node], props: Dict[str, Union[str, FieldLookup, Dict]]): + """Searches the graph for a node of type `node_type` with properties matching `props` + + Parameters + ---------- + node_type : Type[Node] + The type of node to look for. e.g. Process + props : Dict[str, Union[str, FieldLookup, Dict]] + The set of props to filter the resulting nodes by. Any string is transformed to `Exact` lookups. + + Examples + ---------- + Filter for Process nodes, with command lines that contain `text.exe` + >>> NodeByProps(node_type=Process, props={"command_line": Contains("test.exe")}) + + This may also be a nested dict. + >>> NodeByProps(node_type=Process, props={"hashes": {"md5": Contains("test.exe")}}) + + """ + self.node_type = node_type + + self.props: Dict[str, Union[FieldLookup, Dict]] = _str_to_exact(props) + + # Cast and assign. + super().__init__() + + def execute_networkx(self, G: nx.Graph) -> nx.Graph: + """Searches a `nx.Graph` object for nodes that match type `node_type` and contains + props matching `props`. This is O(V). + """ + subgraph_nodes = [] + + # For each node + for node_id, data in G.nodes(data=True): + node = data["data"] + + # If node matches the desired instance. + if isinstance(node, self.node_type): + # Test the node + if self._test_values_with_lookups(node, self.props): + subgraph_nodes.append(node_id) + self.result_nodes |= {node_id} + + return G.subgraph(subgraph_nodes) + + +class NodeByPropsDescendents(NodeByProps): + """Executes a `NodeByProps` query, and returns all descendants of the matching nodes. + see py:meth:`NodeByProps`""" + + def execute_networkx(self, G: nx.Graph) -> nx.Graph: + + # Get the next graph + next_graph = super().execute_networkx(G) + + subgraph_nodes: Set[int] = set() + + # For every node that matched `NodeByProps` + for node_id in next_graph.nodes(): + # Get the nodes descendants in the original graph, and add make a subgraph from those. + subgraph_nodes |= nx.descendants(G, node_id) | {node_id} + + self.result_nodes |= {node_id} + + return G.subgraph(subgraph_nodes) + + +class NodeByPropsAncestors(NodeByProps): + """Executes a `NodeByProps` query, and returns all ascendants of the matching nodes. + see py:meth:`NodeByProps`""" + + def execute_networkx(self, G: nx.Graph) -> nx.Graph: + + # Get the next graph + next_graph = super().execute_networkx(G) + + subgraph_nodes: Set[int] = set() + + # For every node that matched `NodeByProps` + for node_id in next_graph.nodes(): + # Get the nodes ancestors in the original graph, and add make a subgraph from those. + subgraph_nodes |= nx.ancestors(G, node_id) | {node_id} + self.result_nodes |= {node_id} + + return G.subgraph(subgraph_nodes) + + +class NodeByPropsReachable(NodeByProps): + """Executes a `NodeByProps` query, and returns all ancestors and descendants of the matching nodes. + see py:meth:`NodeByProps`""" + + def execute_networkx(self, G: nx.Graph) -> nx.Graph: + + # Get the next graph + next_graph = super().execute_networkx(G) + + subgraph_nodes: Set[int] = set() + + # For every node that matched `NodeByProps` + for node_id in next_graph.nodes(): + subgraph_nodes |= nx.ancestors(G, node_id) | nx.descendants(G, node_id) | {node_id} + self.result_nodes |= {node_id} + + return G.subgraph(subgraph_nodes) diff --git a/beagle/analyzers/statements/process.py b/beagle/analyzers/statements/process.py index 3216466c..7054ae95 100644 --- a/beagle/analyzers/statements/process.py +++ b/beagle/analyzers/statements/process.py @@ -2,7 +2,8 @@ from beagle.nodes import Process -from .base_statement import NodeByPropsReachable, FactoryMixin +from .node import NodeByPropsReachable +from .base_statement import FactoryMixin from .lookups import FieldLookup diff --git a/tests/analyzers/statements/__init__.py b/tests/analyzers/statements/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/analyzers/statements/test_base_statement.py b/tests/analyzers/statements/test_base_statement.py index 08d7e56f..9e201e5c 100644 --- a/tests/analyzers/statements/test_base_statement.py +++ b/tests/analyzers/statements/test_base_statement.py @@ -1,15 +1,8 @@ import pytest -from beagle.analyzers.statements.base_statement import ( - Statement, - FactoryMixin, - EdgeByProps, - NodeByProps, - NodeByPropsAncestors, - NodeByPropsDescendents, - NodeByPropsReachable, -) -from beagle.analyzers.statements.lookups import Contains, EndsWith, Exact, StartsWith -from beagle.nodes import File, Process +from beagle.analyzers.statements.base_statement import FactoryMixin +from beagle.analyzers.statements.node import NodeByPropsReachable, NodeByProps +from beagle.analyzers.statements.lookups import Exact +from beagle.nodes import Process def test_factory_mixin(): @@ -21,200 +14,6 @@ class MyFactory(FactoryMixin): obj.execute_networkx(None) -def test_test_props_nested_dict(): - s = Statement() - - assert ( - s._test_values_with_lookups( - value_to_test={"hashes": {"md5": "1234"}}, - lookup_tests={"hashes": {"md5": Exact("1234")}}, - ) - is True - ) - - assert ( - s._test_values_with_lookups(value_to_test={"hashes": {}}, lookup_tests={"hashes": {"md5": Exact("1234")}}) - is False - ) - - assert ( - s._test_values_with_lookups( - value_to_test={"hashes": None}, lookup_tests={"hashes": {"md5": Exact("1234")}} - ) - is False - ) - - -def test_one_node_prop_test(G1, graph_nodes_match): - statement = NodeByProps(node_type=Process, props={"command_line": Contains("test.exe")}) - - assert graph_nodes_match( - statement.execute_networkx(G1), - [Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar")], - ) - - # should mathc on other proc - statement = NodeByProps(node_type=Process, props={"command_line": EndsWith("123456")}) - - assert graph_nodes_match( - statement.execute_networkx(G1), - [Process(process_id=12, process_image="best.exe", command_line="best.exe /c 123456")], - ) - - # should match on both - statement = NodeByProps(node_type=Process, props={"process_image": EndsWith("exe")}) - - assert graph_nodes_match( - statement.execute_networkx(G1), - [ - Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar"), - Process(process_id=12, process_image="best.exe", command_line="best.exe /c 123456"), - ], - ) - statement = NodeByProps(node_type=Process, props={"process_image": StartsWith("exe")}) - - assert graph_nodes_match(statement.execute_networkx(G1), []) - - -def test_multiple_node_prop_test(G1, graph_nodes_match): - statement = NodeByProps( - node_type=Process, - props={"command_line": Contains("foobar"), "process_image": StartsWith("test")}, - ) - - # Should match on `proc` from G1 - assert graph_nodes_match( - statement.execute_networkx(G1), - [Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar")], - ) - - -def test_node_conditional(G1, graph_nodes_match): - statement = NodeByProps( - node_type=Process, - props={"command_line": Contains("foobar"), "process_image": StartsWith("test")}, - ) - - assert graph_nodes_match( - statement.execute_networkx(G1), - [Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar")], - ) - - -def test_one_edge_prop_test(G2, G3, graph_nodes_match): - - # String should get mapped to Exact("foo") - statement = EdgeByProps(edge_type="Wrote", props={"contents": "foo"}) - - assert graph_nodes_match( - statement.execute_networkx(G2), - [ - Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar"), - File(file_name="foo", file_path="bar"), - ], - ) - - # Should work on the non-conslidating graph too. - assert graph_nodes_match( - statement.execute_networkx(G3), - [ - Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar"), - File(file_name="foo", file_path="bar"), - ], - ) - - statement = EdgeByProps(edge_type="Launched", props={"contents": Exact("bar")}) - - # Should match on `proc` from G1 - assert graph_nodes_match(statement.execute_networkx(G2), []) - - -def test_node_with_descendants(G4, graph_nodes_match): - - # A should return A->B->C->D - statement = NodeByPropsDescendents(node_type=Process, props={"process_image": Exact("A")}) - assert graph_nodes_match( - statement.execute_networkx(G4), - [ - Process(process_id=10, process_image="A", command_line="A"), - Process(process_id=12, process_image="B", command_line="B"), - Process(process_id=12, process_image="C", command_line="C"), - Process(process_id=12, process_image="D", command_line="D"), - ], - ) - - # B should return B->C->D - statement = NodeByPropsDescendents(node_type=Process, props={"process_image": Exact("B")}) - assert graph_nodes_match( - statement.execute_networkx(G4), - [ - Process(process_id=12, process_image="B", command_line="B"), - Process(process_id=12, process_image="C", command_line="C"), - Process(process_id=12, process_image="D", command_line="D"), - ], - ) - - -def test_node_with_ancestors(G4, graph_nodes_match): - - # A should return A - statement = NodeByPropsAncestors(node_type=Process, props={"process_image": Exact("A")}) - assert graph_nodes_match( - statement.execute_networkx(G4), - [Process(process_id=10, process_image="A", command_line="A")], - ) - - # B should return A->B - statement = NodeByPropsAncestors(node_type=Process, props={"process_image": Exact("B")}) - assert graph_nodes_match( - statement.execute_networkx(G4), - [ - Process(process_id=10, process_image="A", command_line="A"), - Process(process_id=12, process_image="B", command_line="B"), - ], - ) - - # D should return A->B->C->D - statement = NodeByPropsAncestors(node_type=Process, props={"process_image": Exact("D")}) - assert graph_nodes_match( - statement.execute_networkx(G4), - [ - Process(process_id=10, process_image="A", command_line="A"), - Process(process_id=12, process_image="B", command_line="B"), - Process(process_id=12, process_image="C", command_line="C"), - Process(process_id=12, process_image="D", command_line="D"), - ], - ) - - -def test_nodes_reachable(G5, graph_nodes_match): - - # All queries will return the full path. - # They should only return the path this process touches, A should return A->B->C->D and not E->F->G->H - - statement = NodeByPropsReachable(node_type=Process, props={"process_image": Exact("B")}) - assert graph_nodes_match( - statement.execute_networkx(G5), - [ - Process(process_id=10, process_image="A", command_line="A"), - Process(process_id=12, process_image="B", command_line="B"), - Process(process_id=12, process_image="C", command_line="C"), - Process(process_id=12, process_image="D", command_line="D"), - ], - ) - - statement = NodeByPropsReachable(node_type=Process, props={"process_image": Exact("G")}) - assert graph_nodes_match( - statement.execute_networkx(G5), - [ - Process(process_id=10, process_image="E", command_line="E"), - Process(process_id=12, process_image="F", command_line="F"), - Process(process_id=12, process_image="G", command_line="G"), - Process(process_id=12, process_image="H", command_line="H"), - ], - ) - - def test_chained_statement(G5, graph_nodes_match): # Both paths should show up because we use a chained statement that returns both. diff --git a/tests/analyzers/statements/test_edge.py b/tests/analyzers/statements/test_edge.py new file mode 100644 index 00000000..4faefec2 --- /dev/null +++ b/tests/analyzers/statements/test_edge.py @@ -0,0 +1,31 @@ +from beagle.analyzers.statements.edge import EdgeByProps +from beagle.analyzers.statements.lookups import Exact +from beagle.nodes import File, Process + + +def test_one_edge_prop_test(G2, G3, graph_nodes_match): + + # String should get mapped to Exact("foo") + statement = EdgeByProps(edge_type="Wrote", props={"contents": "foo"}) + + assert graph_nodes_match( + statement.execute_networkx(G2), + [ + Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar"), + File(file_name="foo", file_path="bar"), + ], + ) + + # Should work on the non-conslidating graph too. + assert graph_nodes_match( + statement.execute_networkx(G3), + [ + Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar"), + File(file_name="foo", file_path="bar"), + ], + ) + + statement = EdgeByProps(edge_type="Launched", props={"contents": Exact("bar")}) + + # Should match on `proc` from G1 + assert graph_nodes_match(statement.execute_networkx(G2), []) diff --git a/tests/analyzers/statements/test_node.py b/tests/analyzers/statements/test_node.py new file mode 100644 index 00000000..58aefa86 --- /dev/null +++ b/tests/analyzers/statements/test_node.py @@ -0,0 +1,177 @@ +from beagle.analyzers.statements.base_statement import Statement +from beagle.analyzers.statements.lookups import Contains, EndsWith, Exact, StartsWith +from beagle.analyzers.statements.node import ( + NodeByProps, + NodeByPropsAncestors, + NodeByPropsDescendents, + NodeByPropsReachable, +) +from beagle.nodes import Process + + +def test_test_props_nested_dict(): + s = Statement() + + assert ( + s._test_values_with_lookups( + value_to_test={"hashes": {"md5": "1234"}}, + lookup_tests={"hashes": {"md5": Exact("1234")}}, + ) + is True + ) + + assert ( + s._test_values_with_lookups( + value_to_test={"hashes": {}}, lookup_tests={"hashes": {"md5": Exact("1234")}} + ) + is False + ) + + assert ( + s._test_values_with_lookups( + value_to_test={"hashes": None}, lookup_tests={"hashes": {"md5": Exact("1234")}} + ) + is False + ) + + +def test_one_node_prop_test(G1, graph_nodes_match): + statement = NodeByProps(node_type=Process, props={"command_line": Contains("test.exe")}) + + assert graph_nodes_match( + statement.execute_networkx(G1), + [Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar")], + ) + + # should mathc on other proc + statement = NodeByProps(node_type=Process, props={"command_line": EndsWith("123456")}) + + assert graph_nodes_match( + statement.execute_networkx(G1), + [Process(process_id=12, process_image="best.exe", command_line="best.exe /c 123456")], + ) + + # should match on both + statement = NodeByProps(node_type=Process, props={"process_image": EndsWith("exe")}) + + assert graph_nodes_match( + statement.execute_networkx(G1), + [ + Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar"), + Process(process_id=12, process_image="best.exe", command_line="best.exe /c 123456"), + ], + ) + statement = NodeByProps(node_type=Process, props={"process_image": StartsWith("exe")}) + + assert graph_nodes_match(statement.execute_networkx(G1), []) + + +def test_multiple_node_prop_test(G1, graph_nodes_match): + statement = NodeByProps( + node_type=Process, + props={"command_line": Contains("foobar"), "process_image": StartsWith("test")}, + ) + + # Should match on `proc` from G1 + assert graph_nodes_match( + statement.execute_networkx(G1), + [Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar")], + ) + + +def test_node_conditional(G1, graph_nodes_match): + statement = NodeByProps( + node_type=Process, + props={"command_line": Contains("foobar"), "process_image": StartsWith("test")}, + ) + + assert graph_nodes_match( + statement.execute_networkx(G1), + [Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar")], + ) + + +def test_node_with_descendants(G4, graph_nodes_match): + + # A should return A->B->C->D + statement = NodeByPropsDescendents(node_type=Process, props={"process_image": Exact("A")}) + assert graph_nodes_match( + statement.execute_networkx(G4), + [ + Process(process_id=10, process_image="A", command_line="A"), + Process(process_id=12, process_image="B", command_line="B"), + Process(process_id=12, process_image="C", command_line="C"), + Process(process_id=12, process_image="D", command_line="D"), + ], + ) + + # B should return B->C->D + statement = NodeByPropsDescendents(node_type=Process, props={"process_image": Exact("B")}) + assert graph_nodes_match( + statement.execute_networkx(G4), + [ + Process(process_id=12, process_image="B", command_line="B"), + Process(process_id=12, process_image="C", command_line="C"), + Process(process_id=12, process_image="D", command_line="D"), + ], + ) + + +def test_node_with_ancestors(G4, graph_nodes_match): + + # A should return A + statement = NodeByPropsAncestors(node_type=Process, props={"process_image": Exact("A")}) + assert graph_nodes_match( + statement.execute_networkx(G4), + [Process(process_id=10, process_image="A", command_line="A")], + ) + + # B should return A->B + statement = NodeByPropsAncestors(node_type=Process, props={"process_image": Exact("B")}) + assert graph_nodes_match( + statement.execute_networkx(G4), + [ + Process(process_id=10, process_image="A", command_line="A"), + Process(process_id=12, process_image="B", command_line="B"), + ], + ) + + # D should return A->B->C->D + statement = NodeByPropsAncestors(node_type=Process, props={"process_image": Exact("D")}) + assert graph_nodes_match( + statement.execute_networkx(G4), + [ + Process(process_id=10, process_image="A", command_line="A"), + Process(process_id=12, process_image="B", command_line="B"), + Process(process_id=12, process_image="C", command_line="C"), + Process(process_id=12, process_image="D", command_line="D"), + ], + ) + + +def test_nodes_reachable(G5, graph_nodes_match): + + # All queries will return the full path. + # They should only return the path this process touches, A should return A->B->C->D and not E->F->G->H + + statement = NodeByPropsReachable(node_type=Process, props={"process_image": Exact("B")}) + assert graph_nodes_match( + statement.execute_networkx(G5), + [ + Process(process_id=10, process_image="A", command_line="A"), + Process(process_id=12, process_image="B", command_line="B"), + Process(process_id=12, process_image="C", command_line="C"), + Process(process_id=12, process_image="D", command_line="D"), + ], + ) + + statement = NodeByPropsReachable(node_type=Process, props={"process_image": Exact("G")}) + assert graph_nodes_match( + statement.execute_networkx(G5), + [ + Process(process_id=10, process_image="E", command_line="E"), + Process(process_id=12, process_image="F", command_line="F"), + Process(process_id=12, process_image="G", command_line="G"), + Process(process_id=12, process_image="H", command_line="H"), + ], + ) diff --git a/tests/edges/__init__.py b/tests/edges/__init__.py new file mode 100644 index 00000000..e69de29b From 56d1e2c149eae10ec12813fac12921dbb45cd580 Mon Sep 17 00:00:00 2001 From: yampelo Date: Sat, 16 Nov 2019 14:06:26 -0500 Subject: [PATCH 14/25] Adds statement chaining using >> or << operators --- beagle/analyzers/statements/base_statement.py | 33 +++++++++++++++++++ .../statements/test_base_statement.py | 25 ++++++++++++++ 2 files changed, 58 insertions(+) diff --git a/beagle/analyzers/statements/base_statement.py b/beagle/analyzers/statements/base_statement.py index 753c4acf..fa9865b9 100644 --- a/beagle/analyzers/statements/base_statement.py +++ b/beagle/analyzers/statements/base_statement.py @@ -38,6 +38,34 @@ def __init__(self): # The resulting edge IDs self.result_edges: Set[Tuple[int, int, int]] = set() + # Set of statements that came before or after it. + self.downstream_statements: List[Statement] = [] + self.upstream_statements: List[Statement] = [] + + def __rshift__(self, other: "Statement") -> "Statement": + """Implements Self >> Other == self.downstream_statements.append(other) + + Parameters + ---------- + other : Statement + The other statement to add. + """ + self.downstream_statements.append(other) + other.upstream_statements.append(self) + return other + + def __lshift__(self, other: "Statement") -> "Statement": + """Implements Self << Other == self.upstream_statements.append(other) + + Parameters + ---------- + other : Statement + The other statement to add. + """ + other.downstream_statements.append(self) + self.upstream_statements.append(other) + return other + def __or__(self, other: "Statement") -> "ChainedStatement": """Allows statements to be combined through the `|` operator. The result of execution is the union of both substatements. @@ -162,3 +190,8 @@ def execute_networkx(self, G: nx.Graph) -> nx.Graph: H = nx.compose(H, subgraph) return H + + +class InteremediateStatement(Statement): + def __init__(self): + pass diff --git a/tests/analyzers/statements/test_base_statement.py b/tests/analyzers/statements/test_base_statement.py index 9e201e5c..59358ca4 100644 --- a/tests/analyzers/statements/test_base_statement.py +++ b/tests/analyzers/statements/test_base_statement.py @@ -54,3 +54,28 @@ def test_multiple_chained_statement(G5, graph_nodes_match): Process(process_id=10, process_image="A", command_line="A"), ], ) + + +def test_shift_operators(): + Bstatement = NodeByProps(node_type=Process, props={"process_image": Exact("B")}) + Gstatement = NodeByProps(node_type=Process, props={"process_image": Exact("G")}) + + Bstatement >> Gstatement + + assert Bstatement.downstream_statements == [Gstatement] + + Bstatement = NodeByProps(node_type=Process, props={"process_image": Exact("B")}) + Gstatement = NodeByProps(node_type=Process, props={"process_image": Exact("G")}) + + Bstatement << Gstatement + + assert Gstatement.downstream_statements == [Bstatement] + + Bstatement = NodeByProps(node_type=Process, props={"process_image": Exact("B")}) + Gstatement = NodeByProps(node_type=Process, props={"process_image": Exact("G")}) + Astatement = NodeByProps(node_type=Process, props={"process_image": Exact("A")}) + + Bstatement >> Gstatement + Bstatement >> Astatement + + assert Bstatement.downstream_statements == [Gstatement, Astatement] From e01203de96221231827cdcc19f334611ffe94f63 Mon Sep 17 00:00:00 2001 From: yampelo Date: Sat, 16 Nov 2019 15:10:37 -0500 Subject: [PATCH 15/25] adds intermediate statements, allowing to chain actions --- beagle/analyzers/statements/base_statement.py | 22 +++++++-- beagle/analyzers/statements/edge.py | 49 +++++++++++++++++-- beagle/analyzers/statements/node.py | 2 +- beagle/analyzers/statements/process.py | 2 +- tests/analyzers/statements/test_edge.py | 25 +++++++++- 5 files changed, 91 insertions(+), 9 deletions(-) diff --git a/beagle/analyzers/statements/base_statement.py b/beagle/analyzers/statements/base_statement.py index fa9865b9..0c7cd49c 100644 --- a/beagle/analyzers/statements/base_statement.py +++ b/beagle/analyzers/statements/base_statement.py @@ -111,6 +111,11 @@ def _test_values_with_lookups( Did all of the tests pass? """ + # Auto pass if no tests.s + if not lookup_tests: + return True + + # Auto fail on empty value (given we have tests) if not value_to_test: return False @@ -192,6 +197,17 @@ def execute_networkx(self, G: nx.Graph) -> nx.Graph: return H -class InteremediateStatement(Statement): - def __init__(self): - pass +class IntermediateStatement(Statement): + """An IntermediateStatement is a statement which depends on a previous initial Statement to run. + + For example, you may only want to find edges connected to one of the nodes identifed in `NodeByProps`. + """ + + def __init__(self, *args, **kwargs): + self.upstream_nodes: Set[int] = set() + self.upstream_edges: Set[Tuple[int, int, int]] = set() + super().__init__(*args, **kwargs) + + def set_upstream_nodes(self, upstream_statement: Statement): + self.upstream_nodes |= upstream_statement.result_nodes + self.upstream_edges |= upstream_statement.result_edges diff --git a/beagle/analyzers/statements/edge.py b/beagle/analyzers/statements/edge.py index eddf277d..4ff9a907 100644 --- a/beagle/analyzers/statements/edge.py +++ b/beagle/analyzers/statements/edge.py @@ -2,12 +2,14 @@ import networkx as nx -from .base_statement import Statement, _str_to_exact +from .base_statement import Statement, _str_to_exact, IntermediateStatement from .lookups import FieldLookup class EdgeByProps(Statement): - def __init__(self, edge_type: str, props: Dict[str, Union[str, FieldLookup]]): + def __init__( + self, edge_type: str, props: Dict[str, Union[str, FieldLookup]] = {}, *args, **kwargs + ): """Searches the graph for an edge of type `edge_type` with properties matching `props` Parameters @@ -27,7 +29,7 @@ def __init__(self, edge_type: str, props: Dict[str, Union[str, FieldLookup]]): self.props: Dict[str, Union[FieldLookup, Dict]] = _str_to_exact(props) - super().__init__() + super().__init__(*args, **kwargs) def execute_networkx(self, G: nx.Graph) -> nx.Graph: """Searches a `nx.Graph` object for edges that match type `edge_type` and contains @@ -60,3 +62,44 @@ def execute_networkx(self, G: nx.Graph) -> nx.Graph: break return G.edge_subgraph(subgraph_edges) + + +class IntermediateEdgeByProps(EdgeByProps, IntermediateStatement): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + def execute_networkx(self, G: nx.Graph) -> nx.Graph: + """Searches a `nx.Graph` object for edges that match type `edge_type` and contains + props matching `props`. This is O(E). + + Returns a subgraph with all nodes contained in match edges + """ + subgraph_edges = [] + + for u, v, k, e_data in G.edges( + # Only get the edges associate with nodes from the previous step. + self.upstream_nodes, + data=True, + keys=True, + ): + + # pull out the data field from NX + data = e_data["data"] # edge data + e_type = e_data["edge_name"] # edge type + + # If edge matches the desired instance. + if e_type == self.edge_type: + + # Test the edge + if not isinstance(data, list): + data = [data] + + for entry in data: + if self._test_values_with_lookups(entry, self.props): + subgraph_edges.append((u, v, k)) + # can stop on first match + self.result_edges |= {(u, v, k)} + self.result_nodes |= {u, v} + break + + return G.edge_subgraph(subgraph_edges) diff --git a/beagle/analyzers/statements/node.py b/beagle/analyzers/statements/node.py index cc314e2a..58d44fb2 100644 --- a/beagle/analyzers/statements/node.py +++ b/beagle/analyzers/statements/node.py @@ -9,7 +9,7 @@ class NodeByProps(Statement): - def __init__(self, node_type: Type[Node], props: Dict[str, Union[str, FieldLookup, Dict]]): + def __init__(self, node_type: Type[Node], props: Dict[str, Union[str, FieldLookup, Dict]] = {}): """Searches the graph for a node of type `node_type` with properties matching `props` Parameters diff --git a/beagle/analyzers/statements/process.py b/beagle/analyzers/statements/process.py index 7054ae95..f6357505 100644 --- a/beagle/analyzers/statements/process.py +++ b/beagle/analyzers/statements/process.py @@ -7,7 +7,7 @@ from .lookups import FieldLookup -class FindProcess(FactoryMixin, NodeByPropsReachable): +class FindProcess(FactoryMixin): """Executes statements relevant to a Process""" @classmethod diff --git a/tests/analyzers/statements/test_edge.py b/tests/analyzers/statements/test_edge.py index 4faefec2..aa4964e0 100644 --- a/tests/analyzers/statements/test_edge.py +++ b/tests/analyzers/statements/test_edge.py @@ -1,5 +1,6 @@ -from beagle.analyzers.statements.edge import EdgeByProps +from beagle.analyzers.statements.edge import EdgeByProps, IntermediateEdgeByProps from beagle.analyzers.statements.lookups import Exact +from beagle.analyzers.statements.process import FindProcess from beagle.nodes import File, Process @@ -29,3 +30,25 @@ def test_one_edge_prop_test(G2, G3, graph_nodes_match): # Should match on `proc` from G1 assert graph_nodes_match(statement.execute_networkx(G2), []) + + +def test_intermediate_edge_by_props(G5, graph_nodes_match): + + # Run the first statement. + statement1 = FindProcess.with_command_line("B") + statement2 = IntermediateEdgeByProps(edge_type="Launched") + + # get the subgraph. + G_s = statement1.execute_networkx(G5) + + # Set the upstream nodes of our next statement + statement2.set_upstream_nodes(statement1) + + # running statement two should only give us B->C + assert graph_nodes_match( + statement2.execute_networkx(G_s), + [ + Process(process_id=12, process_image="B", command_line="B"), + Process(process_id=12, process_image="C", command_line="C"), + ], + ) From 349d2da67043cf9a0204a8c2e8bffb88be1379c2 Mon Sep 17 00:00:00 2001 From: yampelo Date: Sat, 16 Nov 2019 19:46:31 -0500 Subject: [PATCH 16/25] classmethod -> staticmethod --- beagle/analyzers/statements/process.py | 42 +++++++++++++++----------- 1 file changed, 24 insertions(+), 18 deletions(-) diff --git a/beagle/analyzers/statements/process.py b/beagle/analyzers/statements/process.py index f6357505..a1c6d47b 100644 --- a/beagle/analyzers/statements/process.py +++ b/beagle/analyzers/statements/process.py @@ -10,54 +10,60 @@ class FindProcess(FactoryMixin): """Executes statements relevant to a Process""" - @classmethod + @staticmethod def with_command_line( - cls: Type["FindProcess"], command_line: Union[str, FieldLookup] + command_line: Union[str, FieldLookup] ) -> NodeByPropsReachable: # pragma: no cover return NodeByPropsReachable(node_type=Process, props={"command_line": command_line}) - @classmethod + @staticmethod def with_process_name( - cls: Type["FindProcess"], process_image: Union[str, FieldLookup] + process_image: Union[str, FieldLookup] ) -> NodeByPropsReachable: # pragma: no cover return NodeByPropsReachable(node_type=Process, props={"process_image": process_image}) - @classmethod + @staticmethod def with_process_path( - cls: Type["FindProcess"], process_path: Union[str, FieldLookup] + process_path: Union[str, FieldLookup] ) -> NodeByPropsReachable: # pragma: no cover return NodeByPropsReachable(node_type=Process, props={"process_path": process_path}) - @classmethod + @staticmethod def with_process_image_path( - cls: Type["FindProcess"], process_image_path: Union[str, FieldLookup] + process_image_path: Union[str, FieldLookup] ) -> NodeByPropsReachable: # pragma: no cover return NodeByPropsReachable( node_type=Process, props={"process_image_path": process_image_path} ) - @classmethod - def with_user(cls: Type["FindProcess"], user: Union[str, FieldLookup]) -> NodeByPropsReachable: + @staticmethod + def with_user(user: Union[str, FieldLookup]) -> NodeByPropsReachable: + return NodeByPropsReachable(node_type=Process, props={"user": user}) - @classmethod + @staticmethod def with_md5_hash( - cls: Type["FindProcess"], md5hash: Union[str, FieldLookup] + md5hash: Union[str, FieldLookup] ) -> NodeByPropsReachable: # pragma: no cover + return NodeByPropsReachable(node_type=Process, props={"hashes": {"md5": md5hash}}) - @classmethod + @staticmethod def with_sha256_hash( - cls: Type["FindProcess"], md5hash: Union[str, FieldLookup] + sha256hash: Union[str, FieldLookup] ) -> NodeByPropsReachable: # pragma: no cover - return NodeByPropsReachable(node_type=Process, props={"hashes": {"sha256": md5hash}}) - @classmethod + return NodeByPropsReachable(node_type=Process, props={"hashes": {"sha256": sha256hash}}) + + @staticmethod def with_sha1_hash( - cls: Type["FindProcess"], md5hash: Union[str, FieldLookup] + sha1hash: Union[str, FieldLookup] ) -> NodeByPropsReachable: # pragma: no cover - return NodeByPropsReachable(node_type=Process, props={"hashes": {"sha1": md5hash}}) + + return NodeByPropsReachable(node_type=Process, props={"hashes": {"sha1": sha1hash}}) + + def launched_by(): From 7a71a65a299aa6ef493b4da24a80627ec8ea290a Mon Sep 17 00:00:00 2001 From: yampelo Date: Sat, 16 Nov 2019 19:53:58 -0500 Subject: [PATCH 17/25] Analyzer: Class to execute statements --- beagle/analyzers/base_analyzer.py | 37 ++++++++++++++++++- beagle/analyzers/statements/base_statement.py | 25 +++++++------ beagle/analyzers/statements/edge.py | 8 +++- beagle/analyzers/statements/process.py | 13 ++++--- tests/analyzers/test_base_analyzer.py | 25 +++++++++++++ 5 files changed, 88 insertions(+), 20 deletions(-) create mode 100644 tests/analyzers/test_base_analyzer.py diff --git a/beagle/analyzers/base_analyzer.py b/beagle/analyzers/base_analyzer.py index 790a006c..a1eff422 100644 --- a/beagle/analyzers/base_analyzer.py +++ b/beagle/analyzers/base_analyzer.py @@ -1,5 +1,40 @@ +from typing import Type, cast + +import networkx as nx + +from beagle.analyzers.statements.base_statement import Statement +from beagle.backends import Backend, NetworkX + + class Analyzer(object): - def __init__(self, name: str, description: str, score: int): + def __init__(self, name: str, description: str, score: int, statement: Statement): self.name = name self.description = description self.score = score + + # Make sure we get the start. + while statement.upstream_statement is not None: + statement = statement.upstream_statement + + self.statement: Statement = statement + + def run(self, backend: Type[Backend]): + if isinstance(backend, NetworkX): + backend = cast(NetworkX, backend) + self.run_networkx(backend.G) + + def run_networkx(self, G: nx.Graph) -> nx.Graph: + + # H is a copy of our original graph. + H = G.copy() + + current_statement = self.statement + + while current_statement is not None: + # Run the statement. + H = current_statement.execute_networkx(H) + + # Get the next statement, and execute + current_statement = current_statement.downstream_statement + + return H diff --git a/beagle/analyzers/statements/base_statement.py b/beagle/analyzers/statements/base_statement.py index 0c7cd49c..a069d5a1 100644 --- a/beagle/analyzers/statements/base_statement.py +++ b/beagle/analyzers/statements/base_statement.py @@ -39,31 +39,31 @@ def __init__(self): self.result_edges: Set[Tuple[int, int, int]] = set() # Set of statements that came before or after it. - self.downstream_statements: List[Statement] = [] - self.upstream_statements: List[Statement] = [] + self.downstream_statement: Statement = None + self.upstream_statement: Statement = None def __rshift__(self, other: "Statement") -> "Statement": - """Implements Self >> Other == self.downstream_statements.append(other) + """Implements Self >> Other == self.downstream_statements = other Parameters ---------- other : Statement The other statement to add. """ - self.downstream_statements.append(other) - other.upstream_statements.append(self) + self.downstream_statement = other + other.upstream_statement = self return other def __lshift__(self, other: "Statement") -> "Statement": - """Implements Self << Other == self.upstream_statements.append(other) + """Implements Self << Other == self.upstream_statements = other Parameters ---------- other : Statement The other statement to add. """ - other.downstream_statements.append(self) - self.upstream_statements.append(other) + other.downstream_statement = self + self.upstream_statement = other return other def __or__(self, other: "Statement") -> "ChainedStatement": @@ -208,6 +208,9 @@ def __init__(self, *args, **kwargs): self.upstream_edges: Set[Tuple[int, int, int]] = set() super().__init__(*args, **kwargs) - def set_upstream_nodes(self, upstream_statement: Statement): - self.upstream_nodes |= upstream_statement.result_nodes - self.upstream_edges |= upstream_statement.result_edges + def get_upstream_results(self) -> Tuple[Set[int], Set[Tuple[int, int, int]]]: + return self.upstream_statement.result_nodes, self.upstream_statement.result_edges + + def set_upstream_nodes(self): + self.upstream_nodes |= self.upstream_statement.result_nodes + self.upstream_edges |= self.upstream_statement.result_edges diff --git a/beagle/analyzers/statements/edge.py b/beagle/analyzers/statements/edge.py index 4ff9a907..444879bf 100644 --- a/beagle/analyzers/statements/edge.py +++ b/beagle/analyzers/statements/edge.py @@ -29,7 +29,7 @@ def __init__( self.props: Dict[str, Union[FieldLookup, Dict]] = _str_to_exact(props) - super().__init__(*args, **kwargs) + super().__init__() def execute_networkx(self, G: nx.Graph) -> nx.Graph: """Searches a `nx.Graph` object for edges that match type `edge_type` and contains @@ -74,11 +74,15 @@ def execute_networkx(self, G: nx.Graph) -> nx.Graph: Returns a subgraph with all nodes contained in match edges """ + + # Grab upstream information + upstream_nodes, _ = self.get_upstream_results() + subgraph_edges = [] for u, v, k, e_data in G.edges( # Only get the edges associate with nodes from the previous step. - self.upstream_nodes, + upstream_nodes, data=True, keys=True, ): diff --git a/beagle/analyzers/statements/process.py b/beagle/analyzers/statements/process.py index a1c6d47b..52758584 100644 --- a/beagle/analyzers/statements/process.py +++ b/beagle/analyzers/statements/process.py @@ -1,10 +1,11 @@ -from typing import Union, Type +from typing import Union from beagle.nodes import Process -from .node import NodeByPropsReachable from .base_statement import FactoryMixin +from .edge import IntermediateEdgeByProps from .lookups import FieldLookup +from .node import NodeByPropsReachable class FindProcess(FactoryMixin): @@ -46,9 +47,7 @@ def with_user(user: Union[str, FieldLookup]) -> NodeByPropsReachable: return NodeByPropsReachable(node_type=Process, props={"user": user}) @staticmethod - def with_md5_hash( - md5hash: Union[str, FieldLookup] - ) -> NodeByPropsReachable: # pragma: no cover + def with_md5_hash(md5hash: Union[str, FieldLookup]) -> NodeByPropsReachable: # pragma: no cover return NodeByPropsReachable(node_type=Process, props={"hashes": {"md5": md5hash}}) @@ -66,4 +65,6 @@ def with_sha1_hash( return NodeByPropsReachable(node_type=Process, props={"hashes": {"sha1": sha1hash}}) - def launched_by(): + @staticmethod + def that_was_launched(): + return IntermediateEdgeByProps(edge_type="Launched") diff --git a/tests/analyzers/test_base_analyzer.py b/tests/analyzers/test_base_analyzer.py new file mode 100644 index 00000000..461a9f8c --- /dev/null +++ b/tests/analyzers/test_base_analyzer.py @@ -0,0 +1,25 @@ +from beagle.analyzers.base_analyzer import Analyzer +from beagle.analyzers.statements.edge import IntermediateEdgeByProps + +from beagle.analyzers.statements.process import FindProcess +from beagle.nodes import Process + + +def test_analyzer_two_statements(G5, graph_nodes_match): + + analyzer = Analyzer( + name="test_analyzer_two_statements", + description="test_analyzer_two_statements", + score=0, + statement=FindProcess.with_command_line("B") >> FindProcess.that_was_launched(), + ) + + G = analyzer.run_networkx(G5) + + assert graph_nodes_match( + G, + [ + Process(process_id=12, process_image="B", command_line="B"), + Process(process_id=12, process_image="C", command_line="C"), + ], + ) From 1fc6e3d3e5f99ea2c624c42a6528e365bdc8e8b4 Mon Sep 17 00:00:00 2001 From: yampelo Date: Sat, 16 Nov 2019 20:15:16 -0500 Subject: [PATCH 18/25] Fixes unit tests --- beagle/analyzers/statements/base_statement.py | 4 +-- tests/analyzers/conftest.py | 21 ++++++++++++++- .../statements/test_base_statement.py | 13 ++------- tests/analyzers/statements/test_edge.py | 5 ++-- tests/analyzers/test_base_analyzer.py | 27 +++++++++++++++++-- 5 files changed, 51 insertions(+), 19 deletions(-) diff --git a/beagle/analyzers/statements/base_statement.py b/beagle/analyzers/statements/base_statement.py index a069d5a1..f6fe36b0 100644 --- a/beagle/analyzers/statements/base_statement.py +++ b/beagle/analyzers/statements/base_statement.py @@ -43,7 +43,7 @@ def __init__(self): self.upstream_statement: Statement = None def __rshift__(self, other: "Statement") -> "Statement": - """Implements Self >> Other == self.downstream_statements = other + """Implements Self >> Other == self.downstream_statement = other Parameters ---------- @@ -55,7 +55,7 @@ def __rshift__(self, other: "Statement") -> "Statement": return other def __lshift__(self, other: "Statement") -> "Statement": - """Implements Self << Other == self.upstream_statements = other + """Implements Self << Other == self.upstream_statement = other Parameters ---------- diff --git a/tests/analyzers/conftest.py b/tests/analyzers/conftest.py index e8fecd67..cfd10c58 100644 --- a/tests/analyzers/conftest.py +++ b/tests/analyzers/conftest.py @@ -96,7 +96,7 @@ def G5(): F.launched[G] G.launched[H] - backend = NetworkX(consolidate_edges=True, nodes=[A, B, B, C, E, F, G, H]) + backend = NetworkX(consolidate_edges=True, nodes=[A, B, C, D, E, F, G, H]) return backend.graph() @@ -124,3 +124,22 @@ def G6(): backend = NetworkX(consolidate_edges=True, nodes=[parent, parent2, child, child2]) return backend.graph() + + +@pytest.fixture +def G7(): + # A graph with two, *disconnected* four process tree: + # A -> B -> C -> D + # E -> F -> G -> H + A = Process(process_id=10, process_image="A", command_line="A") + B = Process(process_id=12, process_image="B", command_line="B") + C = Process(process_id=12, process_image="C", command_line="C") + D = Process(process_id=12, process_image="D", command_line="D") + + A.launched[B] + B.launched[C] + C.launched[D] + + backend = NetworkX(consolidate_edges=True, nodes=[A, B, C, D]) + + return backend.graph() diff --git a/tests/analyzers/statements/test_base_statement.py b/tests/analyzers/statements/test_base_statement.py index 59358ca4..586c772c 100644 --- a/tests/analyzers/statements/test_base_statement.py +++ b/tests/analyzers/statements/test_base_statement.py @@ -62,20 +62,11 @@ def test_shift_operators(): Bstatement >> Gstatement - assert Bstatement.downstream_statements == [Gstatement] + assert Bstatement.downstream_statement == Gstatement Bstatement = NodeByProps(node_type=Process, props={"process_image": Exact("B")}) Gstatement = NodeByProps(node_type=Process, props={"process_image": Exact("G")}) Bstatement << Gstatement - assert Gstatement.downstream_statements == [Bstatement] - - Bstatement = NodeByProps(node_type=Process, props={"process_image": Exact("B")}) - Gstatement = NodeByProps(node_type=Process, props={"process_image": Exact("G")}) - Astatement = NodeByProps(node_type=Process, props={"process_image": Exact("A")}) - - Bstatement >> Gstatement - Bstatement >> Astatement - - assert Bstatement.downstream_statements == [Gstatement, Astatement] + assert Gstatement.downstream_statement == Bstatement diff --git a/tests/analyzers/statements/test_edge.py b/tests/analyzers/statements/test_edge.py index aa4964e0..6440aae1 100644 --- a/tests/analyzers/statements/test_edge.py +++ b/tests/analyzers/statements/test_edge.py @@ -38,12 +38,11 @@ def test_intermediate_edge_by_props(G5, graph_nodes_match): statement1 = FindProcess.with_command_line("B") statement2 = IntermediateEdgeByProps(edge_type="Launched") + statement1 >> statement2 + # get the subgraph. G_s = statement1.execute_networkx(G5) - # Set the upstream nodes of our next statement - statement2.set_upstream_nodes(statement1) - # running statement two should only give us B->C assert graph_nodes_match( statement2.execute_networkx(G_s), diff --git a/tests/analyzers/test_base_analyzer.py b/tests/analyzers/test_base_analyzer.py index 461a9f8c..de68b1d9 100644 --- a/tests/analyzers/test_base_analyzer.py +++ b/tests/analyzers/test_base_analyzer.py @@ -1,6 +1,4 @@ from beagle.analyzers.base_analyzer import Analyzer -from beagle.analyzers.statements.edge import IntermediateEdgeByProps - from beagle.analyzers.statements.process import FindProcess from beagle.nodes import Process @@ -23,3 +21,28 @@ def test_analyzer_two_statements(G5, graph_nodes_match): Process(process_id=12, process_image="C", command_line="C"), ], ) + + +def test_analyzer_or_statement_statements(G5, graph_nodes_match): + + query = ( + FindProcess.with_command_line("B") | FindProcess.with_command_line("A") + ) >> FindProcess.that_was_launched() + + analyzer = Analyzer( + name="test_analyzer_two_statements", + description="test_analyzer_two_statements", + score=0, + statement=query, + ) + + G = analyzer.run_networkx(G5) + + assert graph_nodes_match( + G, + [ + Process(process_id=10, process_image="A", command_line="A"), + Process(process_id=12, process_image="B", command_line="B"), + Process(process_id=12, process_image="C", command_line="C"), + ], + ) From db98490451ee753006cc28aa2e6f4824eb936d2d Mon Sep 17 00:00:00 2001 From: yampelo Date: Sat, 16 Nov 2019 20:28:16 -0500 Subject: [PATCH 19/25] Tests edges with tree structures graphs --- tests/analyzers/conftest.py | 24 ++++++++++++++++------ tests/analyzers/statements/test_edge.py | 27 +++++++++++++++++++++++++ 2 files changed, 45 insertions(+), 6 deletions(-) diff --git a/tests/analyzers/conftest.py b/tests/analyzers/conftest.py index cfd10c58..654bf74f 100644 --- a/tests/analyzers/conftest.py +++ b/tests/analyzers/conftest.py @@ -128,18 +128,30 @@ def G6(): @pytest.fixture def G7(): - # A graph with two, *disconnected* four process tree: - # A -> B -> C -> D - # E -> F -> G -> H + # A graph that's a tree of process launches + # A + # / \ + # B C + # / \ / \ + # D E F G + A = Process(process_id=10, process_image="A", command_line="A") B = Process(process_id=12, process_image="B", command_line="B") C = Process(process_id=12, process_image="C", command_line="C") D = Process(process_id=12, process_image="D", command_line="D") + E = Process(process_id=10, process_image="E", command_line="E") + F = Process(process_id=12, process_image="F", command_line="F") + G = Process(process_id=12, process_image="G", command_line="G") A.launched[B] - B.launched[C] - C.launched[D] + A.launched[C] + + B.launched[D] + B.launched[E] + + C.launched[F] + C.launched[G] - backend = NetworkX(consolidate_edges=True, nodes=[A, B, C, D]) + backend = NetworkX(consolidate_edges=True, nodes=[A, B, C, D, E, F, G]) return backend.graph() diff --git a/tests/analyzers/statements/test_edge.py b/tests/analyzers/statements/test_edge.py index 6440aae1..5abdbf06 100644 --- a/tests/analyzers/statements/test_edge.py +++ b/tests/analyzers/statements/test_edge.py @@ -2,6 +2,7 @@ from beagle.analyzers.statements.lookups import Exact from beagle.analyzers.statements.process import FindProcess from beagle.nodes import File, Process +from beagle.analyzers.base_analyzer import Analyzer def test_one_edge_prop_test(G2, G3, graph_nodes_match): @@ -51,3 +52,29 @@ def test_intermediate_edge_by_props(G5, graph_nodes_match): Process(process_id=12, process_image="C", command_line="C"), ], ) + + +def test_intermediate_edge_all_candidates_found(G7, graph_nodes_match): + + analyzer = Analyzer( + name="test_intermediate_edge_all_candidates_found", + description="test_intermediate_edge_all_candidates_found", + score=0, + statement=FindProcess.with_command_line("C") >> FindProcess.that_was_launched(), + ) + + G = analyzer.run_networkx(G7) + + # should return + # C + # / \ + # F G + + assert graph_nodes_match( + G, + [ + Process(process_id=12, process_image="C", command_line="C"), + Process(process_id=12, process_image="F", command_line="F"), + Process(process_id=12, process_image="G", command_line="G"), + ], + ) From f6b602721e1993205cac5f7d091022d351fc579a Mon Sep 17 00:00:00 2001 From: yampelo Date: Sun, 17 Nov 2019 15:01:09 -0500 Subject: [PATCH 20/25] Renames Statement as Query --- beagle/analyzers/base_analyzer.py | 22 ++-- .../{statements => queries}/__init__.py | 0 .../base_query.py} | 102 +++++++++--------- .../analyzers/{statements => queries}/edge.py | 6 +- .../{statements => queries}/lookups.py | 0 .../analyzers/{statements => queries}/node.py | 4 +- .../{statements => queries}/process.py | 4 +- .../statements/test_base_statement.py | 42 ++++---- tests/analyzers/statements/test_edge.py | 32 +++--- tests/analyzers/statements/test_lookups.py | 2 +- tests/analyzers/statements/test_node.py | 60 +++++------ tests/analyzers/statements/test_process.py | 32 +++--- tests/analyzers/test_base_analyzer.py | 18 ++-- 13 files changed, 162 insertions(+), 162 deletions(-) rename beagle/analyzers/{statements => queries}/__init__.py (100%) rename beagle/analyzers/{statements/base_statement.py => queries/base_query.py} (62%) rename beagle/analyzers/{statements => queries}/edge.py (95%) rename beagle/analyzers/{statements => queries}/lookups.py (100%) rename beagle/analyzers/{statements => queries}/node.py (97%) rename beagle/analyzers/{statements => queries}/process.py (95%) diff --git a/beagle/analyzers/base_analyzer.py b/beagle/analyzers/base_analyzer.py index a1eff422..5107d967 100644 --- a/beagle/analyzers/base_analyzer.py +++ b/beagle/analyzers/base_analyzer.py @@ -2,21 +2,21 @@ import networkx as nx -from beagle.analyzers.statements.base_statement import Statement +from beagle.analyzers.queries.base_query import Query from beagle.backends import Backend, NetworkX class Analyzer(object): - def __init__(self, name: str, description: str, score: int, statement: Statement): + def __init__(self, name: str, description: str, score: int, query: Query): self.name = name self.description = description self.score = score # Make sure we get the start. - while statement.upstream_statement is not None: - statement = statement.upstream_statement + while query.upstream_query is not None: + query = query.upstream_query - self.statement: Statement = statement + self.query: Query = query def run(self, backend: Type[Backend]): if isinstance(backend, NetworkX): @@ -28,13 +28,13 @@ def run_networkx(self, G: nx.Graph) -> nx.Graph: # H is a copy of our original graph. H = G.copy() - current_statement = self.statement + current_query = self.query - while current_statement is not None: - # Run the statement. - H = current_statement.execute_networkx(H) + while current_query is not None: + # Run the query. + H = current_query.execute_networkx(H) - # Get the next statement, and execute - current_statement = current_statement.downstream_statement + # Get the next query, and execute + current_query = current_query.downstream_query return H diff --git a/beagle/analyzers/statements/__init__.py b/beagle/analyzers/queries/__init__.py similarity index 100% rename from beagle/analyzers/statements/__init__.py rename to beagle/analyzers/queries/__init__.py diff --git a/beagle/analyzers/statements/base_statement.py b/beagle/analyzers/queries/base_query.py similarity index 62% rename from beagle/analyzers/statements/base_statement.py rename to beagle/analyzers/queries/base_query.py index f6fe36b0..26f85199 100644 --- a/beagle/analyzers/statements/base_statement.py +++ b/beagle/analyzers/queries/base_query.py @@ -18,17 +18,17 @@ def _str_to_exact(props: dict) -> Dict[str, Union[FieldLookup, Dict]]: return props -class Statement(object): +class Query(object): def __init__(self): - """A statement is the base building block of a query. A statement takes as input a graph, executes, + """A query is the base building block of a query. A query takes as input a graph, executes, and returns the next graph. - >>> G2 = statement.execute_networkx(G) + >>> G2 = query.execute_networkx(G) Attributes ---------- result_nodes: Set[int]: - The set of node IDs which create the subgraph returned by the statement. + The set of node IDs which create the subgraph returned by the query. result_edges: Set[Tuple[int, int, int]]: The set of (u, v, k) tuples representing the edges which created the subgraph. """ @@ -38,57 +38,57 @@ def __init__(self): # The resulting edge IDs self.result_edges: Set[Tuple[int, int, int]] = set() - # Set of statements that came before or after it. - self.downstream_statement: Statement = None - self.upstream_statement: Statement = None + # Set of queries that came before or after it. + self.downstream_query: Query = None + self.upstream_query: Query = None - def __rshift__(self, other: "Statement") -> "Statement": - """Implements Self >> Other == self.downstream_statement = other + def __rshift__(self, other: "Query") -> "Query": + """Implements Self >> Other == self.downstream_query = other Parameters ---------- - other : Statement - The other statement to add. + other : Query + The other query to add. """ - self.downstream_statement = other - other.upstream_statement = self + self.downstream_query = other + other.upstream_query = self return other - def __lshift__(self, other: "Statement") -> "Statement": - """Implements Self << Other == self.upstream_statement = other + def __lshift__(self, other: "Query") -> "Query": + """Implements Self << Other == self.upstream_query = other Parameters ---------- - other : Statement - The other statement to add. + other : Query + The other query to add. """ - other.downstream_statement = self - self.upstream_statement = other + other.downstream_query = self + self.upstream_query = other return other - def __or__(self, other: "Statement") -> "ChainedStatement": - """Allows statements to be combined through the `|` operator. - The result of execution is the union of both substatements. + def __or__(self, other: "Query") -> "ChainedQuery": + """Allows queries to be combined through the `|` operator. + The result of execution is the union of both subqueries. - >>> statement1 = Statement(...) - >>> statement2 = Statement(...) - >>> chained = statement1 | statement2 + >>> query1 = Query(...) + >>> query2 = Query(...) + >>> chained = query1 | query2 Parameters ---------- - other: Statement - The statement to chain with. + other: Query + The query to chain with. Returns ------- - ChainedStatement - A chained statement compromised of all three. + ChainedQuery + A chained query compromised of all three. """ - return ChainedStatement(self, other) + return ChainedQuery(self, other) def execute_networkx(self, G: nx.Graph): # pragma: no cover - """Execute a statement against a `networkx` graph.""" + """Execute a query against a `networkx` graph.""" raise NotImplementedError(f"NetworkX not supported for {self.__class__.__name__}") def _test_values_with_lookups( @@ -146,48 +146,48 @@ def _test_values_with_lookups( class FactoryMixin(object): - """Mixin to prevent Statement Factories from calling execute methods. + """Mixin to prevent Query Factories from calling execute methods. """ def execute_networkx(self, G: nx.graph): - raise UserWarning("Statement factories cannot be called directly") + raise UserWarning("Query factories cannot be called directly") -class ChainedStatement(Statement): - def __init__(self, *args: Statement): - """Executes multiple Statements, combining their outputs. +class ChainedQuery(Query): + def __init__(self, *args: Query): + """Executes multiple Querys, combining their outputs. Parameters ---------- - args: Statement - One ore more statements + args: Query + One ore more queries """ - self.statements = args + self.queries = args super().__init__() def execute_networkx(self, G: nx.Graph) -> nx.Graph: - """Executes multiple statements against a `nx.Graph` object, combining their outputs into one subgraph. + """Executes multiple queries against a `nx.Graph` object, combining their outputs into one subgraph. Parameters ---------- G : nx.Graph - Graph to execute statements against + Graph to execute queries against Returns ------- nx.Graph - Graph composed from the output graphs of the executed statements. + Graph composed from the output graphs of the executed queries. """ # Get the subgraphs subgraphs = [] - for statement in self.statements: + for query in self.queries: # Get the subgraphs - subgraphs.append(statement.execute_networkx(G)) + subgraphs.append(query.execute_networkx(G)) # add the reuslt_nodes, result_edges. - self.result_edges |= statement.result_edges - self.result_nodes |= statement.result_nodes + self.result_edges |= query.result_edges + self.result_nodes |= query.result_nodes # Compose the subgraphs H = subgraphs[0] @@ -197,8 +197,8 @@ def execute_networkx(self, G: nx.Graph) -> nx.Graph: return H -class IntermediateStatement(Statement): - """An IntermediateStatement is a statement which depends on a previous initial Statement to run. +class IntermediateQuery(Query): + """An IntermediateQuery is a query which depends on a previous initial Query to run. For example, you may only want to find edges connected to one of the nodes identifed in `NodeByProps`. """ @@ -209,8 +209,8 @@ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) def get_upstream_results(self) -> Tuple[Set[int], Set[Tuple[int, int, int]]]: - return self.upstream_statement.result_nodes, self.upstream_statement.result_edges + return self.upstream_query.result_nodes, self.upstream_query.result_edges def set_upstream_nodes(self): - self.upstream_nodes |= self.upstream_statement.result_nodes - self.upstream_edges |= self.upstream_statement.result_edges + self.upstream_nodes |= self.upstream_query.result_nodes + self.upstream_edges |= self.upstream_query.result_edges diff --git a/beagle/analyzers/statements/edge.py b/beagle/analyzers/queries/edge.py similarity index 95% rename from beagle/analyzers/statements/edge.py rename to beagle/analyzers/queries/edge.py index 444879bf..5190ee67 100644 --- a/beagle/analyzers/statements/edge.py +++ b/beagle/analyzers/queries/edge.py @@ -2,11 +2,11 @@ import networkx as nx -from .base_statement import Statement, _str_to_exact, IntermediateStatement +from .base_query import Query, _str_to_exact, IntermediateQuery from .lookups import FieldLookup -class EdgeByProps(Statement): +class EdgeByProps(Query): def __init__( self, edge_type: str, props: Dict[str, Union[str, FieldLookup]] = {}, *args, **kwargs ): @@ -64,7 +64,7 @@ def execute_networkx(self, G: nx.Graph) -> nx.Graph: return G.edge_subgraph(subgraph_edges) -class IntermediateEdgeByProps(EdgeByProps, IntermediateStatement): +class IntermediateEdgeByProps(EdgeByProps, IntermediateQuery): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) diff --git a/beagle/analyzers/statements/lookups.py b/beagle/analyzers/queries/lookups.py similarity index 100% rename from beagle/analyzers/statements/lookups.py rename to beagle/analyzers/queries/lookups.py diff --git a/beagle/analyzers/statements/node.py b/beagle/analyzers/queries/node.py similarity index 97% rename from beagle/analyzers/statements/node.py rename to beagle/analyzers/queries/node.py index 58d44fb2..138e4177 100644 --- a/beagle/analyzers/statements/node.py +++ b/beagle/analyzers/queries/node.py @@ -4,11 +4,11 @@ from beagle.nodes import Node -from .base_statement import Statement, _str_to_exact +from .base_query import Query, _str_to_exact from .lookups import FieldLookup -class NodeByProps(Statement): +class NodeByProps(Query): def __init__(self, node_type: Type[Node], props: Dict[str, Union[str, FieldLookup, Dict]] = {}): """Searches the graph for a node of type `node_type` with properties matching `props` diff --git a/beagle/analyzers/statements/process.py b/beagle/analyzers/queries/process.py similarity index 95% rename from beagle/analyzers/statements/process.py rename to beagle/analyzers/queries/process.py index 52758584..77de2ea1 100644 --- a/beagle/analyzers/statements/process.py +++ b/beagle/analyzers/queries/process.py @@ -2,14 +2,14 @@ from beagle.nodes import Process -from .base_statement import FactoryMixin +from .base_query import FactoryMixin from .edge import IntermediateEdgeByProps from .lookups import FieldLookup from .node import NodeByPropsReachable class FindProcess(FactoryMixin): - """Executes statements relevant to a Process""" + """Executes queries relevant to a Process""" @staticmethod def with_command_line( diff --git a/tests/analyzers/statements/test_base_statement.py b/tests/analyzers/statements/test_base_statement.py index 586c772c..58419677 100644 --- a/tests/analyzers/statements/test_base_statement.py +++ b/tests/analyzers/statements/test_base_statement.py @@ -1,7 +1,7 @@ import pytest -from beagle.analyzers.statements.base_statement import FactoryMixin -from beagle.analyzers.statements.node import NodeByPropsReachable, NodeByProps -from beagle.analyzers.statements.lookups import Exact +from beagle.analyzers.queries.base_query import FactoryMixin +from beagle.analyzers.queries.node import NodeByPropsReachable, NodeByProps +from beagle.analyzers.queries.lookups import Exact from beagle.nodes import Process @@ -14,13 +14,13 @@ class MyFactory(FactoryMixin): obj.execute_networkx(None) -def test_chained_statement(G5, graph_nodes_match): - # Both paths should show up because we use a chained statement that returns both. +def test_chained_query(G5, graph_nodes_match): + # Both paths should show up because we use a chained query that returns both. - Bstatement = NodeByPropsReachable(node_type=Process, props={"process_image": Exact("B")}) - Gstatement = NodeByPropsReachable(node_type=Process, props={"process_image": Exact("G")}) + Bquery = NodeByPropsReachable(node_type=Process, props={"process_image": Exact("B")}) + Gquery = NodeByPropsReachable(node_type=Process, props={"process_image": Exact("G")}) - chained = Bstatement | Gstatement + chained = Bquery | Gquery assert graph_nodes_match( chained.execute_networkx(G5), @@ -37,14 +37,14 @@ def test_chained_statement(G5, graph_nodes_match): ) -def test_multiple_chained_statement(G5, graph_nodes_match): +def test_multiple_chained_query(G5, graph_nodes_match): # Should properly execute all three. - Bstatement = NodeByProps(node_type=Process, props={"process_image": Exact("B")}) - Gstatement = NodeByProps(node_type=Process, props={"process_image": Exact("G")}) - Astatement = NodeByProps(node_type=Process, props={"process_image": Exact("A")}) + Bquery = NodeByProps(node_type=Process, props={"process_image": Exact("B")}) + Gquery = NodeByProps(node_type=Process, props={"process_image": Exact("G")}) + Aquery = NodeByProps(node_type=Process, props={"process_image": Exact("A")}) - chained = Bstatement | Gstatement | Astatement + chained = Bquery | Gquery | Aquery assert graph_nodes_match( chained.execute_networkx(G5), @@ -57,16 +57,16 @@ def test_multiple_chained_statement(G5, graph_nodes_match): def test_shift_operators(): - Bstatement = NodeByProps(node_type=Process, props={"process_image": Exact("B")}) - Gstatement = NodeByProps(node_type=Process, props={"process_image": Exact("G")}) + Bquery = NodeByProps(node_type=Process, props={"process_image": Exact("B")}) + Gquery = NodeByProps(node_type=Process, props={"process_image": Exact("G")}) - Bstatement >> Gstatement + Bquery >> Gquery - assert Bstatement.downstream_statement == Gstatement + assert Bquery.downstream_query == Gquery - Bstatement = NodeByProps(node_type=Process, props={"process_image": Exact("B")}) - Gstatement = NodeByProps(node_type=Process, props={"process_image": Exact("G")}) + Bquery = NodeByProps(node_type=Process, props={"process_image": Exact("B")}) + Gquery = NodeByProps(node_type=Process, props={"process_image": Exact("G")}) - Bstatement << Gstatement + Bquery << Gquery - assert Gstatement.downstream_statement == Bstatement + assert Gquery.downstream_query == Bquery diff --git a/tests/analyzers/statements/test_edge.py b/tests/analyzers/statements/test_edge.py index 5abdbf06..c7b51412 100644 --- a/tests/analyzers/statements/test_edge.py +++ b/tests/analyzers/statements/test_edge.py @@ -1,6 +1,6 @@ -from beagle.analyzers.statements.edge import EdgeByProps, IntermediateEdgeByProps -from beagle.analyzers.statements.lookups import Exact -from beagle.analyzers.statements.process import FindProcess +from beagle.analyzers.queries.edge import EdgeByProps, IntermediateEdgeByProps +from beagle.analyzers.queries.lookups import Exact +from beagle.analyzers.queries.process import FindProcess from beagle.nodes import File, Process from beagle.analyzers.base_analyzer import Analyzer @@ -8,10 +8,10 @@ def test_one_edge_prop_test(G2, G3, graph_nodes_match): # String should get mapped to Exact("foo") - statement = EdgeByProps(edge_type="Wrote", props={"contents": "foo"}) + query = EdgeByProps(edge_type="Wrote", props={"contents": "foo"}) assert graph_nodes_match( - statement.execute_networkx(G2), + query.execute_networkx(G2), [ Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar"), File(file_name="foo", file_path="bar"), @@ -20,33 +20,33 @@ def test_one_edge_prop_test(G2, G3, graph_nodes_match): # Should work on the non-conslidating graph too. assert graph_nodes_match( - statement.execute_networkx(G3), + query.execute_networkx(G3), [ Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar"), File(file_name="foo", file_path="bar"), ], ) - statement = EdgeByProps(edge_type="Launched", props={"contents": Exact("bar")}) + query = EdgeByProps(edge_type="Launched", props={"contents": Exact("bar")}) # Should match on `proc` from G1 - assert graph_nodes_match(statement.execute_networkx(G2), []) + assert graph_nodes_match(query.execute_networkx(G2), []) def test_intermediate_edge_by_props(G5, graph_nodes_match): - # Run the first statement. - statement1 = FindProcess.with_command_line("B") - statement2 = IntermediateEdgeByProps(edge_type="Launched") + # Run the first query. + query1 = FindProcess.with_command_line("B") + query2 = IntermediateEdgeByProps(edge_type="Launched") - statement1 >> statement2 + query1 >> query2 # get the subgraph. - G_s = statement1.execute_networkx(G5) + G_s = query1.execute_networkx(G5) - # running statement two should only give us B->C + # running query two should only give us B->C assert graph_nodes_match( - statement2.execute_networkx(G_s), + query2.execute_networkx(G_s), [ Process(process_id=12, process_image="B", command_line="B"), Process(process_id=12, process_image="C", command_line="C"), @@ -60,7 +60,7 @@ def test_intermediate_edge_all_candidates_found(G7, graph_nodes_match): name="test_intermediate_edge_all_candidates_found", description="test_intermediate_edge_all_candidates_found", score=0, - statement=FindProcess.with_command_line("C") >> FindProcess.that_was_launched(), + query=FindProcess.with_command_line("C") >> FindProcess.that_was_launched(), ) G = analyzer.run_networkx(G7) diff --git a/tests/analyzers/statements/test_lookups.py b/tests/analyzers/statements/test_lookups.py index c97d161a..b9ef687a 100644 --- a/tests/analyzers/statements/test_lookups.py +++ b/tests/analyzers/statements/test_lookups.py @@ -1,6 +1,6 @@ import re import pytest -from beagle.analyzers.statements.lookups import ( +from beagle.analyzers.queries.lookups import ( FieldLookup, Contains, IContains, diff --git a/tests/analyzers/statements/test_node.py b/tests/analyzers/statements/test_node.py index 58aefa86..97bcee3a 100644 --- a/tests/analyzers/statements/test_node.py +++ b/tests/analyzers/statements/test_node.py @@ -1,6 +1,6 @@ -from beagle.analyzers.statements.base_statement import Statement -from beagle.analyzers.statements.lookups import Contains, EndsWith, Exact, StartsWith -from beagle.analyzers.statements.node import ( +from beagle.analyzers.queries.base_query import Query +from beagle.analyzers.queries.lookups import Contains, EndsWith, Exact, StartsWith +from beagle.analyzers.queries.node import ( NodeByProps, NodeByPropsAncestors, NodeByPropsDescendents, @@ -10,7 +10,7 @@ def test_test_props_nested_dict(): - s = Statement() + s = Query() assert ( s._test_values_with_lookups( @@ -36,57 +36,57 @@ def test_test_props_nested_dict(): def test_one_node_prop_test(G1, graph_nodes_match): - statement = NodeByProps(node_type=Process, props={"command_line": Contains("test.exe")}) + query = NodeByProps(node_type=Process, props={"command_line": Contains("test.exe")}) assert graph_nodes_match( - statement.execute_networkx(G1), + query.execute_networkx(G1), [Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar")], ) # should mathc on other proc - statement = NodeByProps(node_type=Process, props={"command_line": EndsWith("123456")}) + query = NodeByProps(node_type=Process, props={"command_line": EndsWith("123456")}) assert graph_nodes_match( - statement.execute_networkx(G1), + query.execute_networkx(G1), [Process(process_id=12, process_image="best.exe", command_line="best.exe /c 123456")], ) # should match on both - statement = NodeByProps(node_type=Process, props={"process_image": EndsWith("exe")}) + query = NodeByProps(node_type=Process, props={"process_image": EndsWith("exe")}) assert graph_nodes_match( - statement.execute_networkx(G1), + query.execute_networkx(G1), [ Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar"), Process(process_id=12, process_image="best.exe", command_line="best.exe /c 123456"), ], ) - statement = NodeByProps(node_type=Process, props={"process_image": StartsWith("exe")}) + query = NodeByProps(node_type=Process, props={"process_image": StartsWith("exe")}) - assert graph_nodes_match(statement.execute_networkx(G1), []) + assert graph_nodes_match(query.execute_networkx(G1), []) def test_multiple_node_prop_test(G1, graph_nodes_match): - statement = NodeByProps( + query = NodeByProps( node_type=Process, props={"command_line": Contains("foobar"), "process_image": StartsWith("test")}, ) # Should match on `proc` from G1 assert graph_nodes_match( - statement.execute_networkx(G1), + query.execute_networkx(G1), [Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar")], ) def test_node_conditional(G1, graph_nodes_match): - statement = NodeByProps( + query = NodeByProps( node_type=Process, props={"command_line": Contains("foobar"), "process_image": StartsWith("test")}, ) assert graph_nodes_match( - statement.execute_networkx(G1), + query.execute_networkx(G1), [Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar")], ) @@ -94,9 +94,9 @@ def test_node_conditional(G1, graph_nodes_match): def test_node_with_descendants(G4, graph_nodes_match): # A should return A->B->C->D - statement = NodeByPropsDescendents(node_type=Process, props={"process_image": Exact("A")}) + query = NodeByPropsDescendents(node_type=Process, props={"process_image": Exact("A")}) assert graph_nodes_match( - statement.execute_networkx(G4), + query.execute_networkx(G4), [ Process(process_id=10, process_image="A", command_line="A"), Process(process_id=12, process_image="B", command_line="B"), @@ -106,9 +106,9 @@ def test_node_with_descendants(G4, graph_nodes_match): ) # B should return B->C->D - statement = NodeByPropsDescendents(node_type=Process, props={"process_image": Exact("B")}) + query = NodeByPropsDescendents(node_type=Process, props={"process_image": Exact("B")}) assert graph_nodes_match( - statement.execute_networkx(G4), + query.execute_networkx(G4), [ Process(process_id=12, process_image="B", command_line="B"), Process(process_id=12, process_image="C", command_line="C"), @@ -120,16 +120,16 @@ def test_node_with_descendants(G4, graph_nodes_match): def test_node_with_ancestors(G4, graph_nodes_match): # A should return A - statement = NodeByPropsAncestors(node_type=Process, props={"process_image": Exact("A")}) + query = NodeByPropsAncestors(node_type=Process, props={"process_image": Exact("A")}) assert graph_nodes_match( - statement.execute_networkx(G4), + query.execute_networkx(G4), [Process(process_id=10, process_image="A", command_line="A")], ) # B should return A->B - statement = NodeByPropsAncestors(node_type=Process, props={"process_image": Exact("B")}) + query = NodeByPropsAncestors(node_type=Process, props={"process_image": Exact("B")}) assert graph_nodes_match( - statement.execute_networkx(G4), + query.execute_networkx(G4), [ Process(process_id=10, process_image="A", command_line="A"), Process(process_id=12, process_image="B", command_line="B"), @@ -137,9 +137,9 @@ def test_node_with_ancestors(G4, graph_nodes_match): ) # D should return A->B->C->D - statement = NodeByPropsAncestors(node_type=Process, props={"process_image": Exact("D")}) + query = NodeByPropsAncestors(node_type=Process, props={"process_image": Exact("D")}) assert graph_nodes_match( - statement.execute_networkx(G4), + query.execute_networkx(G4), [ Process(process_id=10, process_image="A", command_line="A"), Process(process_id=12, process_image="B", command_line="B"), @@ -154,9 +154,9 @@ def test_nodes_reachable(G5, graph_nodes_match): # All queries will return the full path. # They should only return the path this process touches, A should return A->B->C->D and not E->F->G->H - statement = NodeByPropsReachable(node_type=Process, props={"process_image": Exact("B")}) + query = NodeByPropsReachable(node_type=Process, props={"process_image": Exact("B")}) assert graph_nodes_match( - statement.execute_networkx(G5), + query.execute_networkx(G5), [ Process(process_id=10, process_image="A", command_line="A"), Process(process_id=12, process_image="B", command_line="B"), @@ -165,9 +165,9 @@ def test_nodes_reachable(G5, graph_nodes_match): ], ) - statement = NodeByPropsReachable(node_type=Process, props={"process_image": Exact("G")}) + query = NodeByPropsReachable(node_type=Process, props={"process_image": Exact("G")}) assert graph_nodes_match( - statement.execute_networkx(G5), + query.execute_networkx(G5), [ Process(process_id=10, process_image="E", command_line="E"), Process(process_id=12, process_image="F", command_line="F"), diff --git a/tests/analyzers/statements/test_process.py b/tests/analyzers/statements/test_process.py index 49602beb..7e7175b8 100644 --- a/tests/analyzers/statements/test_process.py +++ b/tests/analyzers/statements/test_process.py @@ -1,15 +1,15 @@ -from beagle.analyzers.statements.process import FindProcess +from beagle.analyzers.queries.process import FindProcess from beagle.nodes import Process, File -from beagle.analyzers.statements.lookups import EndsWith +from beagle.analyzers.queries.lookups import EndsWith def test_get_by_command_line_no_lookup(G5, graph_nodes_match): # Should return all nodes reachable from A - statement = FindProcess.with_command_line("A") + query = FindProcess.with_command_line("A") assert graph_nodes_match( - statement.execute_networkx(G5), + query.execute_networkx(G5), [ Process(process_id=10, process_image="A", command_line="A"), Process(process_id=12, process_image="B", command_line="B"), @@ -22,10 +22,10 @@ def test_get_by_command_line_no_lookup(G5, graph_nodes_match): def test_get_by_command_line_with_lookup(G5, graph_nodes_match): # Should return all nodes reachable from A Or G, (so all nodes) - statement = FindProcess.with_command_line(EndsWith("A") | EndsWith("G")) + query = FindProcess.with_command_line(EndsWith("A") | EndsWith("G")) assert graph_nodes_match( - statement.execute_networkx(G5), + query.execute_networkx(G5), [ Process(process_id=10, process_image="A", command_line="A"), Process(process_id=12, process_image="B", command_line="B"), @@ -42,12 +42,12 @@ def test_get_by_command_line_with_lookup(G5, graph_nodes_match): def test_get_process_name_no_lookup(G2, graph_nodes_match): # No match, since defaults to exact. - statement = FindProcess.with_process_name("exe") - assert graph_nodes_match(statement.execute_networkx(G2), []) + query = FindProcess.with_process_name("exe") + assert graph_nodes_match(query.execute_networkx(G2), []) - statement = FindProcess.with_process_name("test.exe") + query = FindProcess.with_process_name("test.exe") assert graph_nodes_match( - statement.execute_networkx(G2), + query.execute_networkx(G2), [ Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar"), File(file_name="foo", file_path="bar"), @@ -58,10 +58,10 @@ def test_get_process_name_no_lookup(G2, graph_nodes_match): def test_get_process_name_lookup(G2, graph_nodes_match): # Should return test.exe because it ends with exe - statement = FindProcess.with_process_name(EndsWith("exe")) + query = FindProcess.with_process_name(EndsWith("exe")) assert graph_nodes_match( - statement.execute_networkx(G2), + query.execute_networkx(G2), [ Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar"), File(file_name="foo", file_path="bar"), @@ -72,10 +72,10 @@ def test_get_process_name_lookup(G2, graph_nodes_match): def test_get_process_user(G6, graph_nodes_match): # Should return test.exe because it ends with exe - statement = FindProcess.with_user("omer") + query = FindProcess.with_user("omer") assert graph_nodes_match( - statement.execute_networkx(G6), + query.execute_networkx(G6), [ Process( process_id=1, process_image_path="d:\\", process_image="parent.exe", user="omer" @@ -90,10 +90,10 @@ def test_get_process_user(G6, graph_nodes_match): def test_get_process_image_path(G6, graph_nodes_match): # Should return test.exe because it ends with exe - statement = FindProcess.with_process_image_path("d:\\") + query = FindProcess.with_process_image_path("d:\\") assert graph_nodes_match( - statement.execute_networkx(G6), + query.execute_networkx(G6), [ Process( process_id=1, process_image_path="d:\\", process_image="parent.exe", user="omer" diff --git a/tests/analyzers/test_base_analyzer.py b/tests/analyzers/test_base_analyzer.py index de68b1d9..67762784 100644 --- a/tests/analyzers/test_base_analyzer.py +++ b/tests/analyzers/test_base_analyzer.py @@ -1,15 +1,15 @@ from beagle.analyzers.base_analyzer import Analyzer -from beagle.analyzers.statements.process import FindProcess +from beagle.analyzers.queries.process import FindProcess from beagle.nodes import Process -def test_analyzer_two_statements(G5, graph_nodes_match): +def test_analyzer_two_queries(G5, graph_nodes_match): analyzer = Analyzer( - name="test_analyzer_two_statements", - description="test_analyzer_two_statements", + name="test_analyzer_two_queries", + description="test_analyzer_two_queries", score=0, - statement=FindProcess.with_command_line("B") >> FindProcess.that_was_launched(), + query=FindProcess.with_command_line("B") >> FindProcess.that_was_launched(), ) G = analyzer.run_networkx(G5) @@ -23,17 +23,17 @@ def test_analyzer_two_statements(G5, graph_nodes_match): ) -def test_analyzer_or_statement_statements(G5, graph_nodes_match): +def test_analyzer_or_query_queries(G5, graph_nodes_match): query = ( FindProcess.with_command_line("B") | FindProcess.with_command_line("A") ) >> FindProcess.that_was_launched() analyzer = Analyzer( - name="test_analyzer_two_statements", - description="test_analyzer_two_statements", + name="test_analyzer_two_queries", + description="test_analyzer_two_queries", score=0, - statement=query, + query=query, ) G = analyzer.run_networkx(G5) From e2205a5509ac02ce6c0143b8309f10d24e0c47e7 Mon Sep 17 00:00:00 2001 From: yampelo Date: Sun, 17 Nov 2019 16:14:00 -0500 Subject: [PATCH 21/25] Adds FindProcess.that_was_launched --- beagle/analyzers/base_analyzer.py | 2 +- beagle/analyzers/queries/base_query.py | 5 +- beagle/analyzers/queries/edge.py | 23 +++++++- beagle/analyzers/queries/node.py | 2 +- beagle/analyzers/queries/process.py | 9 ++-- tests/analyzers/conftest.py | 14 ++++- ...t_base_statement.py => test_base_query.py} | 0 tests/analyzers/statements/test_edge.py | 26 --------- tests/analyzers/statements/test_lookups.py | 19 ++++--- tests/analyzers/statements/test_node.py | 3 +- tests/analyzers/statements/test_process.py | 53 ++++++++++++++++++- tests/analyzers/test_base_analyzer.py | 4 +- 12 files changed, 109 insertions(+), 51 deletions(-) rename tests/analyzers/statements/{test_base_statement.py => test_base_query.py} (100%) diff --git a/beagle/analyzers/base_analyzer.py b/beagle/analyzers/base_analyzer.py index 5107d967..08c7d930 100644 --- a/beagle/analyzers/base_analyzer.py +++ b/beagle/analyzers/base_analyzer.py @@ -7,7 +7,7 @@ class Analyzer(object): - def __init__(self, name: str, description: str, score: int, query: Query): + def __init__(self, name: str, query: Query, description: str = None, score: int = None): self.name = name self.description = description self.score = score diff --git a/beagle/analyzers/queries/base_query.py b/beagle/analyzers/queries/base_query.py index 26f85199..0ea7aac8 100644 --- a/beagle/analyzers/queries/base_query.py +++ b/beagle/analyzers/queries/base_query.py @@ -20,8 +20,7 @@ def _str_to_exact(props: dict) -> Dict[str, Union[FieldLookup, Dict]]: class Query(object): def __init__(self): - """A query is the base building block of a query. A query takes as input a graph, executes, - and returns the next graph. + """A query takes as input a graph, executes, and returns the next graph. >>> G2 = query.execute_networkx(G) @@ -96,7 +95,7 @@ def _test_values_with_lookups( value_to_test: Union[Node, Dict[str, Any]], lookup_tests: Dict[str, Union[FieldLookup, Dict]], ) -> bool: - """Tests a node or dictionay against a configuration of lookup_tests. + """Tests a node or dictionary against a configuration of lookup_tests. Parameters ---------- diff --git a/beagle/analyzers/queries/edge.py b/beagle/analyzers/queries/edge.py index 5190ee67..ba95e16a 100644 --- a/beagle/analyzers/queries/edge.py +++ b/beagle/analyzers/queries/edge.py @@ -1,4 +1,4 @@ -from typing import Dict, Union +from typing import Dict, Union, Set import networkx as nx @@ -101,9 +101,28 @@ def execute_networkx(self, G: nx.Graph) -> nx.Graph: for entry in data: if self._test_values_with_lookups(entry, self.props): subgraph_edges.append((u, v, k)) - # can stop on first match self.result_edges |= {(u, v, k)} self.result_nodes |= {u, v} + + # can stop on first match break return G.edge_subgraph(subgraph_edges) + + +class IntermediateEdgeByPropsDescendants(IntermediateEdgeByProps): + """Perform a `IntermediateEdgeByProps` query, expanding the descendants of the found edges.""" + + def execute_networkx(self, G: nx.Graph) -> nx.Graph: + next_graph = super().execute_networkx(G) + + # get the nodes from the previous graph. + subgraph_nodes: Set[int] = {node_id for node_id in next_graph.nodes()} + + # For every node that matched `in IntermediateEdgeByProps` + for _, v, _ in self.result_edges: + subgraph_nodes |= nx.descendants(G, v) | {v} + + self.result_nodes |= subgraph_nodes + + return G.subgraph(subgraph_nodes) diff --git a/beagle/analyzers/queries/node.py b/beagle/analyzers/queries/node.py index 138e4177..ed72550c 100644 --- a/beagle/analyzers/queries/node.py +++ b/beagle/analyzers/queries/node.py @@ -4,7 +4,7 @@ from beagle.nodes import Node -from .base_query import Query, _str_to_exact +from .base_query import IntermediateQuery, Query, _str_to_exact from .lookups import FieldLookup diff --git a/beagle/analyzers/queries/process.py b/beagle/analyzers/queries/process.py index 77de2ea1..c5a3a8a9 100644 --- a/beagle/analyzers/queries/process.py +++ b/beagle/analyzers/queries/process.py @@ -3,7 +3,7 @@ from beagle.nodes import Process from .base_query import FactoryMixin -from .edge import IntermediateEdgeByProps +from .edge import IntermediateEdgeByProps, IntermediateEdgeByPropsDescendants from .lookups import FieldLookup from .node import NodeByPropsReachable @@ -66,5 +66,8 @@ def with_sha1_hash( return NodeByPropsReachable(node_type=Process, props={"hashes": {"sha1": sha1hash}}) @staticmethod - def that_was_launched(): - return IntermediateEdgeByProps(edge_type="Launched") + def that_was_launched(descendants: bool = True): + if descendants: + return IntermediateEdgeByPropsDescendants(edge_type="Launched") + else: + return IntermediateEdgeByProps(edge_type="Launched") diff --git a/tests/analyzers/conftest.py b/tests/analyzers/conftest.py index 654bf74f..2d97a929 100644 --- a/tests/analyzers/conftest.py +++ b/tests/analyzers/conftest.py @@ -10,7 +10,19 @@ @pytest.fixture def graph_nodes_match(): def validate_nodes_match(graph: nx.Graph, nodes: List[Node]) -> bool: - return [n["data"] for _, n in graph.nodes(data=True)] == nodes + + node_objs = [n["data"] for _, n in graph.nodes(data=True)] + + length_match = len(graph.nodes()) == len(nodes) + + node_match = all([n in node_objs for n in nodes]) + + if length_match and node_match: + return True + + else: + print(f"Expected {nodes} got {node_objs}") + return False return validate_nodes_match diff --git a/tests/analyzers/statements/test_base_statement.py b/tests/analyzers/statements/test_base_query.py similarity index 100% rename from tests/analyzers/statements/test_base_statement.py rename to tests/analyzers/statements/test_base_query.py diff --git a/tests/analyzers/statements/test_edge.py b/tests/analyzers/statements/test_edge.py index c7b51412..74cb0dbf 100644 --- a/tests/analyzers/statements/test_edge.py +++ b/tests/analyzers/statements/test_edge.py @@ -52,29 +52,3 @@ def test_intermediate_edge_by_props(G5, graph_nodes_match): Process(process_id=12, process_image="C", command_line="C"), ], ) - - -def test_intermediate_edge_all_candidates_found(G7, graph_nodes_match): - - analyzer = Analyzer( - name="test_intermediate_edge_all_candidates_found", - description="test_intermediate_edge_all_candidates_found", - score=0, - query=FindProcess.with_command_line("C") >> FindProcess.that_was_launched(), - ) - - G = analyzer.run_networkx(G7) - - # should return - # C - # / \ - # F G - - assert graph_nodes_match( - G, - [ - Process(process_id=12, process_image="C", command_line="C"), - Process(process_id=12, process_image="F", command_line="F"), - Process(process_id=12, process_image="G", command_line="G"), - ], - ) diff --git a/tests/analyzers/statements/test_lookups.py b/tests/analyzers/statements/test_lookups.py index b9ef687a..72b235b4 100644 --- a/tests/analyzers/statements/test_lookups.py +++ b/tests/analyzers/statements/test_lookups.py @@ -1,17 +1,20 @@ import re +from typing import Type + import pytest + from beagle.analyzers.queries.lookups import ( - FieldLookup, + And, Contains, - IContains, + EndsWith, Exact, + FieldLookup, + IContains, IExact, - StartsWith, - EndsWith, - Regex, - And, - Or, Not, + Or, + Regex, + StartsWith, ) @@ -48,7 +51,7 @@ (Regex, re.compile(r"\d"), "test test", False), ], ) -def test_lookups(cls: FieldLookup, value: str, prop: str, result: str): +def test_lookups(cls: Type[FieldLookup], value: str, prop: str, result: str): # prop -> value being tested again, value -> the thing we're looking up assert cls(value).test(prop) == result diff --git a/tests/analyzers/statements/test_node.py b/tests/analyzers/statements/test_node.py index 97bcee3a..0020e269 100644 --- a/tests/analyzers/statements/test_node.py +++ b/tests/analyzers/statements/test_node.py @@ -122,8 +122,7 @@ def test_node_with_ancestors(G4, graph_nodes_match): # A should return A query = NodeByPropsAncestors(node_type=Process, props={"process_image": Exact("A")}) assert graph_nodes_match( - query.execute_networkx(G4), - [Process(process_id=10, process_image="A", command_line="A")], + query.execute_networkx(G4), [Process(process_id=10, process_image="A", command_line="A")] ) # B should return A->B diff --git a/tests/analyzers/statements/test_process.py b/tests/analyzers/statements/test_process.py index 7e7175b8..8d7d6dc1 100644 --- a/tests/analyzers/statements/test_process.py +++ b/tests/analyzers/statements/test_process.py @@ -1,6 +1,7 @@ -from beagle.analyzers.queries.process import FindProcess -from beagle.nodes import Process, File +from beagle.analyzers.base_analyzer import Analyzer from beagle.analyzers.queries.lookups import EndsWith +from beagle.analyzers.queries.process import FindProcess +from beagle.nodes import File, Process def test_get_by_command_line_no_lookup(G5, graph_nodes_match): @@ -103,3 +104,51 @@ def test_get_process_image_path(G6, graph_nodes_match): ), ], ) + + +def test_process_launched_no_descendants(G7, graph_nodes_match): + analyzer = Analyzer( + name="test_process_launched_descendants", + query=FindProcess.with_command_line("C") + >> FindProcess.that_was_launched(descendants=False), + ) + + G = analyzer.run_networkx(G7) + + # should return + # C + # / \ + # F G + + assert graph_nodes_match( + G, + [ + Process(process_id=12, process_image="C", command_line="C"), + Process(process_id=12, process_image="F", command_line="F"), + Process(process_id=12, process_image="G", command_line="G"), + ], + ) + + +def test_process_launched_descendants(G7, graph_nodes_match): + analyzer = Analyzer( + name="test_process_launched_descendants", + query=FindProcess.with_command_line("A") >> FindProcess.that_was_launched(), + ) + + G = analyzer.run_networkx(G7) + + # Should return the full graph. + # since it should find B and C which are children of A, then expand their children. + assert graph_nodes_match( + G, + [ + Process(process_id=10, process_image="A", command_line="A"), + Process(process_id=12, process_image="B", command_line="B"), + Process(process_id=12, process_image="C", command_line="C"), + Process(process_id=12, process_image="D", command_line="D"), + Process(process_id=10, process_image="E", command_line="E"), + Process(process_id=12, process_image="F", command_line="F"), + Process(process_id=12, process_image="G", command_line="G"), + ], + ) diff --git a/tests/analyzers/test_base_analyzer.py b/tests/analyzers/test_base_analyzer.py index 67762784..120b8ed0 100644 --- a/tests/analyzers/test_base_analyzer.py +++ b/tests/analyzers/test_base_analyzer.py @@ -9,7 +9,7 @@ def test_analyzer_two_queries(G5, graph_nodes_match): name="test_analyzer_two_queries", description="test_analyzer_two_queries", score=0, - query=FindProcess.with_command_line("B") >> FindProcess.that_was_launched(), + query=FindProcess.with_command_line("B") >> FindProcess.that_was_launched(descendants=False), ) G = analyzer.run_networkx(G5) @@ -27,7 +27,7 @@ def test_analyzer_or_query_queries(G5, graph_nodes_match): query = ( FindProcess.with_command_line("B") | FindProcess.with_command_line("A") - ) >> FindProcess.that_was_launched() + ) >> FindProcess.that_was_launched(descendants=False) analyzer = Analyzer( name="test_analyzer_two_queries", From 22e7043cad84283ca3aa7aca65ab46dd4db01819 Mon Sep 17 00:00:00 2001 From: yampelo Date: Sun, 17 Nov 2019 17:07:59 -0500 Subject: [PATCH 22/25] Adds query factory for Files --- beagle/analyzers/base_analyzer.py | 6 +- beagle/analyzers/queries/base_query.py | 7 ++- beagle/analyzers/queries/edge.py | 6 +- beagle/analyzers/queries/file.py | 58 +++++++++++++++++++ beagle/analyzers/queries/lookups.py | 3 + beagle/analyzers/queries/node.py | 4 +- beagle/nodes/file.py | 2 +- tests/analyzers/statements/test_base_query.py | 14 ++++- tests/analyzers/statements/test_file.py | 53 +++++++++++++++++ tests/analyzers/test_base_analyzer.py | 26 ++++++++- 10 files changed, 165 insertions(+), 14 deletions(-) create mode 100644 beagle/analyzers/queries/file.py create mode 100644 tests/analyzers/statements/test_file.py diff --git a/beagle/analyzers/base_analyzer.py b/beagle/analyzers/base_analyzer.py index 08c7d930..4388b651 100644 --- a/beagle/analyzers/base_analyzer.py +++ b/beagle/analyzers/base_analyzer.py @@ -1,4 +1,4 @@ -from typing import Type, cast +from typing import Type, cast, Any import networkx as nx @@ -18,10 +18,10 @@ def __init__(self, name: str, query: Query, description: str = None, score: int self.query: Query = query - def run(self, backend: Type[Backend]): + def run(self, backend: Type[Backend]) -> Any: if isinstance(backend, NetworkX): backend = cast(NetworkX, backend) - self.run_networkx(backend.G) + return self.run_networkx(backend.G) def run_networkx(self, G: nx.Graph) -> nx.Graph: diff --git a/beagle/analyzers/queries/base_query.py b/beagle/analyzers/queries/base_query.py index 0ea7aac8..965bac7f 100644 --- a/beagle/analyzers/queries/base_query.py +++ b/beagle/analyzers/queries/base_query.py @@ -7,6 +7,9 @@ from .lookups import Exact, FieldLookup +PropsDict = Dict[str, Union[str, FieldLookup, Dict]] + + def _str_to_exact(props: dict) -> Dict[str, Union[FieldLookup, Dict]]: # Ensures strings become Exact, Works on nested dicts for k, v in props.items(): @@ -123,7 +126,7 @@ def _test_values_with_lookups( for attr_name, lookup in lookup_tests.items(): if isinstance(lookup, dict): # recursivly check props against nested entrys (e.g is hashes dict in Process) - if isinstance(value_to_test, Node): + if isinstance(value_to_test, Node): # pragma: no cover results.append( self._test_values_with_lookups( value_to_test=getattr(value_to_test, attr_name), lookup_tests=lookup @@ -210,6 +213,6 @@ def __init__(self, *args, **kwargs): def get_upstream_results(self) -> Tuple[Set[int], Set[Tuple[int, int, int]]]: return self.upstream_query.result_nodes, self.upstream_query.result_edges - def set_upstream_nodes(self): + def set_upstream_nodes(self): # pragma: no cover self.upstream_nodes |= self.upstream_query.result_nodes self.upstream_edges |= self.upstream_query.result_edges diff --git a/beagle/analyzers/queries/edge.py b/beagle/analyzers/queries/edge.py index ba95e16a..5693d89b 100644 --- a/beagle/analyzers/queries/edge.py +++ b/beagle/analyzers/queries/edge.py @@ -2,14 +2,12 @@ import networkx as nx -from .base_query import Query, _str_to_exact, IntermediateQuery +from .base_query import Query, _str_to_exact, IntermediateQuery, PropsDict from .lookups import FieldLookup class EdgeByProps(Query): - def __init__( - self, edge_type: str, props: Dict[str, Union[str, FieldLookup]] = {}, *args, **kwargs - ): + def __init__(self, edge_type: str, props: PropsDict = {}, *args, **kwargs): """Searches the graph for an edge of type `edge_type` with properties matching `props` Parameters diff --git a/beagle/analyzers/queries/file.py b/beagle/analyzers/queries/file.py new file mode 100644 index 00000000..02961b92 --- /dev/null +++ b/beagle/analyzers/queries/file.py @@ -0,0 +1,58 @@ +from typing import Union + +from beagle.nodes import File + +from .base_query import FactoryMixin, PropsDict +from .edge import IntermediateEdgeByProps, IntermediateEdgeByPropsDescendants +from .lookups import FieldLookup +from .node import NodeByPropsReachable + + +class FindFile(FactoryMixin): + """Executes queries relevant to a File""" + + @staticmethod + def with_full_path(full_path: Union[str, FieldLookup]) -> NodeByPropsReachable: + return NodeByPropsReachable(node_type=File, props={"full_path": full_path}) + + @staticmethod + def with_file_path(file_path: Union[str, FieldLookup]) -> NodeByPropsReachable: + return NodeByPropsReachable(node_type=File, props={"file_path": file_path}) + + @staticmethod + def with_file_name(file_name: Union[str, FieldLookup]) -> NodeByPropsReachable: + return NodeByPropsReachable(node_type=File, props={"file_name": file_name}) + + @staticmethod + def with_extension( + extension: Union[str, FieldLookup] + ) -> NodeByPropsReachable: # pragma: no cover + return NodeByPropsReachable(node_type=File, props={"extension": extension}) + + @staticmethod + def with_timestamp( + timestamp: Union[str, FieldLookup] + ) -> NodeByPropsReachable: # pragma: no cover + return NodeByPropsReachable(node_type=File, props={"timestamp": timestamp}) + + @staticmethod + def with_hashes(hashes: Union[str, FieldLookup]) -> NodeByPropsReachable: # pragma: no cover + return NodeByPropsReachable(node_type=File, props={"hashes": hashes}) + + @staticmethod + def with_props(props: PropsDict) -> NodeByPropsReachable: # pragma: no cover + return NodeByPropsReachable(node_type=File, props=props) + + @staticmethod + def that_was_written(descendants: bool = False): + if descendants: + return IntermediateEdgeByPropsDescendants(edge_type="Wrote") + else: + return IntermediateEdgeByProps(edge_type="Wrote") + + @staticmethod + def that_was_copied(descendants: bool = False): + if descendants: + return IntermediateEdgeByPropsDescendants(edge_type="Copied To") + else: + return IntermediateEdgeByProps(edge_type="Copied To") diff --git a/beagle/analyzers/queries/lookups.py b/beagle/analyzers/queries/lookups.py index 76c764e3..917468eb 100644 --- a/beagle/analyzers/queries/lookups.py +++ b/beagle/analyzers/queries/lookups.py @@ -63,6 +63,9 @@ def __invert__(self) -> "Not": """ return Not(self) + def __eq__(self, other): + return (type(self) == type(other)) and (self.value == other.value) + class Or(FieldLookup): """Boolean OR, Meant to be used with other lookups: diff --git a/beagle/analyzers/queries/node.py b/beagle/analyzers/queries/node.py index ed72550c..d4fb7274 100644 --- a/beagle/analyzers/queries/node.py +++ b/beagle/analyzers/queries/node.py @@ -4,12 +4,12 @@ from beagle.nodes import Node -from .base_query import IntermediateQuery, Query, _str_to_exact +from .base_query import Query, _str_to_exact, PropsDict from .lookups import FieldLookup class NodeByProps(Query): - def __init__(self, node_type: Type[Node], props: Dict[str, Union[str, FieldLookup, Dict]] = {}): + def __init__(self, node_type: Type[Node], props: PropsDict = {}): """Searches the graph for a node of type `node_type` with properties matching `props` Parameters diff --git a/beagle/nodes/file.py b/beagle/nodes/file.py index a8fbcb76..4c795c75 100644 --- a/beagle/nodes/file.py +++ b/beagle/nodes/file.py @@ -5,7 +5,7 @@ from beagle.edges import FileOf, CopiedTo # mypy type hinting -if TYPE_CHECKING: +if TYPE_CHECKING: # pragma: no cover from beagle.nodes import Process # noqa: F401 diff --git a/tests/analyzers/statements/test_base_query.py b/tests/analyzers/statements/test_base_query.py index 58419677..940a046a 100644 --- a/tests/analyzers/statements/test_base_query.py +++ b/tests/analyzers/statements/test_base_query.py @@ -1,5 +1,5 @@ import pytest -from beagle.analyzers.queries.base_query import FactoryMixin +from beagle.analyzers.queries.base_query import FactoryMixin, _str_to_exact from beagle.analyzers.queries.node import NodeByPropsReachable, NodeByProps from beagle.analyzers.queries.lookups import Exact from beagle.nodes import Process @@ -14,6 +14,18 @@ class MyFactory(FactoryMixin): obj.execute_networkx(None) +@pytest.mark.parametrize( + "props,expected", + [ + ({"process_image": "A"}, {"process_image": Exact("A")}), + ({"hashes": {"md5": "A"}}, {"hashes": {"md5": Exact("A")}}), + ({"hashes": {"md5": "A", "baz": {"foo": "bar"}}}, {"hashes": {"md5": Exact("A"), "baz": {"foo": Exact("bar")}}}), + ], +) +def test_str_to_exact(props, expected): + assert _str_to_exact(props) == expected + + def test_chained_query(G5, graph_nodes_match): # Both paths should show up because we use a chained query that returns both. diff --git a/tests/analyzers/statements/test_file.py b/tests/analyzers/statements/test_file.py new file mode 100644 index 00000000..25250d6e --- /dev/null +++ b/tests/analyzers/statements/test_file.py @@ -0,0 +1,53 @@ +from beagle.analyzers.base_analyzer import Analyzer +from beagle.analyzers.queries.file import FindFile +from beagle.nodes import File, Process + + +def test_file_with_name(G3, graph_nodes_match): + analyzer = Analyzer(name="test_file_with_name", query=FindFile.with_file_name("foo")) + + G = analyzer.run_networkx(G3) + + assert graph_nodes_match( + G, + [ + Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar"), + File(file_name="foo", file_path="bar"), + ], + ) + + +def test_file_with_path(G3, graph_nodes_match): + analyzer = Analyzer(name="test_file_with_path", query=FindFile.with_file_path("bar")) + + G = analyzer.run_networkx(G3) + + assert graph_nodes_match( + G, + [ + Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar"), + File(file_name="foo", file_path="bar"), + ], + ) + + +def test_file_with_full_path(G3, graph_nodes_match): + analyzer = Analyzer(name="test_file_with_full_path", query=FindFile.with_full_path("bar\\foo")) + + G = analyzer.run_networkx(G3) + + assert graph_nodes_match( + G, + [ + Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar"), + File(file_name="foo", file_path="bar"), + ], + ) + + +def test_file_that_was_written(G3, graph_nodes_match): + analyzer = Analyzer(name="test_file_that_was_written", query=FindFile.that_was_written()) + + G = analyzer.run_networkx(G3) + + assert graph_nodes_match(G, [File(file_name="foo", file_path="bar")]) diff --git a/tests/analyzers/test_base_analyzer.py b/tests/analyzers/test_base_analyzer.py index 120b8ed0..2ac411e0 100644 --- a/tests/analyzers/test_base_analyzer.py +++ b/tests/analyzers/test_base_analyzer.py @@ -2,6 +2,29 @@ from beagle.analyzers.queries.process import FindProcess from beagle.nodes import Process +from beagle.backends import NetworkX + + +def test_analyzer_from_networx_backed(G5, graph_nodes_match): + analyzer = Analyzer( + name="test_analyzer_two_queries", + description="test_analyzer_two_queries", + score=0, + query=FindProcess.with_command_line("B") + >> FindProcess.that_was_launched(descendants=False), + ) + + backend = NetworkX(nodes=[]) + backend.G = G5 + + assert graph_nodes_match( + analyzer.run(backend), + [ + Process(process_id=12, process_image="B", command_line="B"), + Process(process_id=12, process_image="C", command_line="C"), + ], + ) + def test_analyzer_two_queries(G5, graph_nodes_match): @@ -9,7 +32,8 @@ def test_analyzer_two_queries(G5, graph_nodes_match): name="test_analyzer_two_queries", description="test_analyzer_two_queries", score=0, - query=FindProcess.with_command_line("B") >> FindProcess.that_was_launched(descendants=False), + query=FindProcess.with_command_line("B") + >> FindProcess.that_was_launched(descendants=False), ) G = analyzer.run_networkx(G5) From 460e1d9d91500c473058f085ffa8e8ee1028af12 Mon Sep 17 00:00:00 2001 From: yampelo Date: Sun, 17 Nov 2019 21:56:13 -0500 Subject: [PATCH 23/25] All queries can now be intermediary by default. --- beagle/analyzers/queries/__init__.py | 24 ++++ beagle/analyzers/queries/base_query.py | 123 ++++++++---------- beagle/analyzers/queries/edge.py | 72 +++++----- beagle/analyzers/queries/file.py | 22 ++-- beagle/analyzers/queries/process.py | 12 +- tests/analyzers/conftest.py | 17 +++ tests/analyzers/statements/test_base_query.py | 8 +- tests/analyzers/statements/test_edge.py | 5 +- tests/analyzers/statements/test_file.py | 16 ++- 9 files changed, 163 insertions(+), 136 deletions(-) diff --git a/beagle/analyzers/queries/__init__.py b/beagle/analyzers/queries/__init__.py index e69de29b..f2d8da77 100644 --- a/beagle/analyzers/queries/__init__.py +++ b/beagle/analyzers/queries/__init__.py @@ -0,0 +1,24 @@ +from networkx import nx +from .base_query import Query, PropsDict +from .edge import EdgeByProps, EdgeByPropsAncestors, EdgeByPropsDescendants, EdgeByPropsReachable + + +def make_edge_query( + edge_type: str, descendants=True, ancestors=False, reachable=False, edge_props: PropsDict = {} +) -> Query: + if reachable or (descendants and reachable): + return EdgeByPropsReachable(edge_type=edge_type, edge_props=edge_props) + elif descendants: + return EdgeByPropsDescendants(edge_type=edge_type, edge_props=edge_props) + elif ancestors: + return EdgeByPropsAncestors(edge_type=edge_type, edge_props=edge_props) + else: + return EdgeByProps(edge_type=edge_type, edge_props=edge_props) + + +class FactoryMixin(object): + """Mixin to prevent Query Factories from calling execute methods. + """ + + def execute_networkx(self, G: nx.graph): + raise UserWarning("Query factories cannot be called directly") diff --git a/beagle/analyzers/queries/base_query.py b/beagle/analyzers/queries/base_query.py index 965bac7f..a10b2250 100644 --- a/beagle/analyzers/queries/base_query.py +++ b/beagle/analyzers/queries/base_query.py @@ -44,54 +44,15 @@ def __init__(self): self.downstream_query: Query = None self.upstream_query: Query = None - def __rshift__(self, other: "Query") -> "Query": - """Implements Self >> Other == self.downstream_query = other - - Parameters - ---------- - other : Query - The other query to add. - """ - self.downstream_query = other - other.upstream_query = self - return other - - def __lshift__(self, other: "Query") -> "Query": - """Implements Self << Other == self.upstream_query = other - - Parameters - ---------- - other : Query - The other query to add. - """ - other.downstream_query = self - self.upstream_query = other - return other - - def __or__(self, other: "Query") -> "ChainedQuery": - """Allows queries to be combined through the `|` operator. - The result of execution is the union of both subqueries. - - >>> query1 = Query(...) - >>> query2 = Query(...) - >>> chained = query1 | query2 - - - Parameters - ---------- - other: Query - The query to chain with. + self.upstream_nodes: Set[int] = set() + self.upstream_edges: Set[Tuple[int, int, int]] = set() - Returns - ------- - ChainedQuery - A chained query compromised of all three. - """ - return ChainedQuery(self, other) + def get_upstream_results(self) -> Tuple[Set[int], Set[Tuple[int, int, int]]]: + return self.upstream_query.result_nodes, self.upstream_query.result_edges - def execute_networkx(self, G: nx.Graph): # pragma: no cover - """Execute a query against a `networkx` graph.""" - raise NotImplementedError(f"NetworkX not supported for {self.__class__.__name__}") + def set_upstream_nodes(self): # pragma: no cover + self.upstream_nodes |= self.upstream_query.result_nodes + self.upstream_edges |= self.upstream_query.result_edges def _test_values_with_lookups( self, @@ -146,13 +107,54 @@ def _test_values_with_lookups( return any(results) + def execute_networkx(self, G: nx.Graph): # pragma: no cover + """Execute a query against a `networkx` graph.""" + raise NotImplementedError(f"NetworkX not supported for {self.__class__.__name__}") + + def __rshift__(self, other: "Query") -> "Query": + """Implements Self >> Other == self.downstream_query = other + + Parameters + ---------- + other : Query + The other query to add. + """ + self.downstream_query = other + other.upstream_query = self + return other + + def __lshift__(self, other: "Query") -> "Query": + """Implements Self << Other == self.upstream_query = other + + Parameters + ---------- + other : Query + The other query to add. + """ + other.downstream_query = self + self.upstream_query = other + return other + + def __or__(self, other: "Query") -> "ChainedQuery": + """Allows queries to be combined through the `|` operator. + The result of execution is the union of both subqueries. + + >>> query1 = Query(...) + >>> query2 = Query(...) + >>> chained = query1 | query2 -class FactoryMixin(object): - """Mixin to prevent Query Factories from calling execute methods. - """ - def execute_networkx(self, G: nx.graph): - raise UserWarning("Query factories cannot be called directly") + Parameters + ---------- + other: Query + The query to chain with. + + Returns + ------- + ChainedQuery + A chained query compromised of all three. + """ + return ChainedQuery(self, other) class ChainedQuery(Query): @@ -197,22 +199,3 @@ def execute_networkx(self, G: nx.Graph) -> nx.Graph: H = nx.compose(H, subgraph) return H - - -class IntermediateQuery(Query): - """An IntermediateQuery is a query which depends on a previous initial Query to run. - - For example, you may only want to find edges connected to one of the nodes identifed in `NodeByProps`. - """ - - def __init__(self, *args, **kwargs): - self.upstream_nodes: Set[int] = set() - self.upstream_edges: Set[Tuple[int, int, int]] = set() - super().__init__(*args, **kwargs) - - def get_upstream_results(self) -> Tuple[Set[int], Set[Tuple[int, int, int]]]: - return self.upstream_query.result_nodes, self.upstream_query.result_edges - - def set_upstream_nodes(self): # pragma: no cover - self.upstream_nodes |= self.upstream_query.result_nodes - self.upstream_edges |= self.upstream_query.result_edges diff --git a/beagle/analyzers/queries/edge.py b/beagle/analyzers/queries/edge.py index 5693d89b..e0ce9724 100644 --- a/beagle/analyzers/queries/edge.py +++ b/beagle/analyzers/queries/edge.py @@ -2,7 +2,7 @@ import networkx as nx -from .base_query import Query, _str_to_exact, IntermediateQuery, PropsDict +from .base_query import Query, _str_to_exact, PropsDict from .lookups import FieldLookup @@ -37,8 +37,14 @@ def execute_networkx(self, G: nx.Graph) -> nx.Graph: """ subgraph_edges = [] + if self.upstream_query is not None: + upstream_nodes, _ = self.get_upstream_results() + edges = G.edges(upstream_nodes, data=True, keys=True) + else: + edges = G.edges(data=True, keys=True) + # For each edge - for u, v, k, e_data in G.edges(data=True, keys=True): + for u, v, k, e_data in edges: # pull out the data field from NX data = e_data["data"] # edge data @@ -62,54 +68,44 @@ def execute_networkx(self, G: nx.Graph) -> nx.Graph: return G.edge_subgraph(subgraph_edges) -class IntermediateEdgeByProps(EdgeByProps, IntermediateQuery): - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) +class EdgeByPropsDescendants(EdgeByProps): + """Perform a `EdgeByProps` query, expanding the descendants of the found edges.""" def execute_networkx(self, G: nx.Graph) -> nx.Graph: - """Searches a `nx.Graph` object for edges that match type `edge_type` and contains - props matching `props`. This is O(E). + next_graph = super().execute_networkx(G) - Returns a subgraph with all nodes contained in match edges - """ + # get the nodes from the previous graph. + subgraph_nodes: Set[int] = {node_id for node_id in next_graph.nodes()} - # Grab upstream information - upstream_nodes, _ = self.get_upstream_results() + # For every node that matched `in EdgeByProps` + for _, v, _ in self.result_edges: + subgraph_nodes |= nx.descendants(G, v) | {v} - subgraph_edges = [] + self.result_nodes |= subgraph_nodes - for u, v, k, e_data in G.edges( - # Only get the edges associate with nodes from the previous step. - upstream_nodes, - data=True, - keys=True, - ): + return G.subgraph(subgraph_nodes) - # pull out the data field from NX - data = e_data["data"] # edge data - e_type = e_data["edge_name"] # edge type - # If edge matches the desired instance. - if e_type == self.edge_type: +class EdgeByPropsAncestors(EdgeByProps): + """Perform a `EdgeByProps` query, expanding the ancestors of the found edges.""" - # Test the edge - if not isinstance(data, list): - data = [data] + def execute_networkx(self, G: nx.Graph) -> nx.Graph: + next_graph = super().execute_networkx(G) - for entry in data: - if self._test_values_with_lookups(entry, self.props): - subgraph_edges.append((u, v, k)) - self.result_edges |= {(u, v, k)} - self.result_nodes |= {u, v} + # get the nodes from the previous graph. + subgraph_nodes: Set[int] = {node_id for node_id in next_graph.nodes()} - # can stop on first match - break + # For every node that matched `in EdgeByProps` + for _, v, _ in self.result_edges: + subgraph_nodes |= nx.ancestors(G, v) | {v} - return G.edge_subgraph(subgraph_edges) + self.result_nodes |= subgraph_nodes + + return G.subgraph(subgraph_nodes) -class IntermediateEdgeByPropsDescendants(IntermediateEdgeByProps): - """Perform a `IntermediateEdgeByProps` query, expanding the descendants of the found edges.""" +class EdgeByPropsReachable(EdgeByProps): + """Perform a `EdgeByProps` query, including all reachable nodes.""" def execute_networkx(self, G: nx.Graph) -> nx.Graph: next_graph = super().execute_networkx(G) @@ -117,9 +113,9 @@ def execute_networkx(self, G: nx.Graph) -> nx.Graph: # get the nodes from the previous graph. subgraph_nodes: Set[int] = {node_id for node_id in next_graph.nodes()} - # For every node that matched `in IntermediateEdgeByProps` + # For every node that matched `in EdgeByProps` for _, v, _ in self.result_edges: - subgraph_nodes |= nx.descendants(G, v) | {v} + subgraph_nodes |= nx.ancestors(G, v) | nx.descendants(G, v) | {v} self.result_nodes |= subgraph_nodes diff --git a/beagle/analyzers/queries/file.py b/beagle/analyzers/queries/file.py index 02961b92..7004a755 100644 --- a/beagle/analyzers/queries/file.py +++ b/beagle/analyzers/queries/file.py @@ -2,8 +2,8 @@ from beagle.nodes import File -from .base_query import FactoryMixin, PropsDict -from .edge import IntermediateEdgeByProps, IntermediateEdgeByPropsDescendants +from . import FactoryMixin, make_edge_query +from .base_query import PropsDict from .lookups import FieldLookup from .node import NodeByPropsReachable @@ -44,15 +44,13 @@ def with_props(props: PropsDict) -> NodeByPropsReachable: # pragma: no cover return NodeByPropsReachable(node_type=File, props=props) @staticmethod - def that_was_written(descendants: bool = False): - if descendants: - return IntermediateEdgeByPropsDescendants(edge_type="Wrote") - else: - return IntermediateEdgeByProps(edge_type="Wrote") + def that_was_written(descendants=True, ancestors=False, reachable=False): + return make_edge_query( + edge_type="Wrote", descendants=descendants, ancestors=ancestors, reachable=reachable + ) @staticmethod - def that_was_copied(descendants: bool = False): - if descendants: - return IntermediateEdgeByPropsDescendants(edge_type="Copied To") - else: - return IntermediateEdgeByProps(edge_type="Copied To") + def that_was_copied(descendants=True, ancestors=False, reachable=False): + return make_edge_query( + edge_type="Copied To", descendants=descendants, ancestors=ancestors, reachable=reachable + ) diff --git a/beagle/analyzers/queries/process.py b/beagle/analyzers/queries/process.py index c5a3a8a9..0019fdff 100644 --- a/beagle/analyzers/queries/process.py +++ b/beagle/analyzers/queries/process.py @@ -2,8 +2,7 @@ from beagle.nodes import Process -from .base_query import FactoryMixin -from .edge import IntermediateEdgeByProps, IntermediateEdgeByPropsDescendants +from . import FactoryMixin, make_edge_query from .lookups import FieldLookup from .node import NodeByPropsReachable @@ -66,8 +65,7 @@ def with_sha1_hash( return NodeByPropsReachable(node_type=Process, props={"hashes": {"sha1": sha1hash}}) @staticmethod - def that_was_launched(descendants: bool = True): - if descendants: - return IntermediateEdgeByPropsDescendants(edge_type="Launched") - else: - return IntermediateEdgeByProps(edge_type="Launched") + def that_was_launched(descendants=True, ancestors=False, reachable=False): + return make_edge_query( + edge_type="Launched", descendants=descendants, ancestors=ancestors, reachable=reachable + ) diff --git a/tests/analyzers/conftest.py b/tests/analyzers/conftest.py index 2d97a929..ee4b68c6 100644 --- a/tests/analyzers/conftest.py +++ b/tests/analyzers/conftest.py @@ -167,3 +167,20 @@ def G7(): backend = NetworkX(consolidate_edges=True, nodes=[A, B, C, D, E, F, G]) return backend.graph() + + +@pytest.fixture +def G8(): + # A launches B, B writes to F2 + + A = Process(process_id=10, process_image="A", command_line="A") + + B = Process(process_id=12, process_image="B", command_line="B") + F1 = File(file_name="bar", file_path="bar") + + A.launched[B] + B.wrote[F1].append(contents="bar") + + backend = NetworkX(consolidate_edges=True, nodes=[A, B, F1]) + + return backend.graph() diff --git a/tests/analyzers/statements/test_base_query.py b/tests/analyzers/statements/test_base_query.py index 940a046a..e8bf28ef 100644 --- a/tests/analyzers/statements/test_base_query.py +++ b/tests/analyzers/statements/test_base_query.py @@ -1,5 +1,6 @@ import pytest -from beagle.analyzers.queries.base_query import FactoryMixin, _str_to_exact +from beagle.analyzers.queries import FactoryMixin +from beagle.analyzers.queries.base_query import _str_to_exact from beagle.analyzers.queries.node import NodeByPropsReachable, NodeByProps from beagle.analyzers.queries.lookups import Exact from beagle.nodes import Process @@ -19,7 +20,10 @@ class MyFactory(FactoryMixin): [ ({"process_image": "A"}, {"process_image": Exact("A")}), ({"hashes": {"md5": "A"}}, {"hashes": {"md5": Exact("A")}}), - ({"hashes": {"md5": "A", "baz": {"foo": "bar"}}}, {"hashes": {"md5": Exact("A"), "baz": {"foo": Exact("bar")}}}), + ( + {"hashes": {"md5": "A", "baz": {"foo": "bar"}}}, + {"hashes": {"md5": Exact("A"), "baz": {"foo": Exact("bar")}}}, + ), ], ) def test_str_to_exact(props, expected): diff --git a/tests/analyzers/statements/test_edge.py b/tests/analyzers/statements/test_edge.py index 74cb0dbf..5c2d1932 100644 --- a/tests/analyzers/statements/test_edge.py +++ b/tests/analyzers/statements/test_edge.py @@ -1,8 +1,7 @@ -from beagle.analyzers.queries.edge import EdgeByProps, IntermediateEdgeByProps +from beagle.analyzers.queries.edge import EdgeByProps from beagle.analyzers.queries.lookups import Exact from beagle.analyzers.queries.process import FindProcess from beagle.nodes import File, Process -from beagle.analyzers.base_analyzer import Analyzer def test_one_edge_prop_test(G2, G3, graph_nodes_match): @@ -37,7 +36,7 @@ def test_intermediate_edge_by_props(G5, graph_nodes_match): # Run the first query. query1 = FindProcess.with_command_line("B") - query2 = IntermediateEdgeByProps(edge_type="Launched") + query2 = EdgeByProps(edge_type="Launched") query1 >> query2 diff --git a/tests/analyzers/statements/test_file.py b/tests/analyzers/statements/test_file.py index 25250d6e..3f3d15dd 100644 --- a/tests/analyzers/statements/test_file.py +++ b/tests/analyzers/statements/test_file.py @@ -45,9 +45,17 @@ def test_file_with_full_path(G3, graph_nodes_match): ) -def test_file_that_was_written(G3, graph_nodes_match): - analyzer = Analyzer(name="test_file_that_was_written", query=FindFile.that_was_written()) +def test_file_that_was_written(G8, graph_nodes_match): + analyzer = Analyzer( + name="test_file_that_was_written", query=FindFile.that_was_written(descendants=False) + ) - G = analyzer.run_networkx(G3) + G = analyzer.run_networkx(G8) - assert graph_nodes_match(G, [File(file_name="foo", file_path="bar")]) + assert graph_nodes_match( + G, + [ + Process(process_id=12, process_image="B", command_line="B"), + File(file_name="bar", file_path="bar"), + ], + ) From cfea7bd86b4e03e08a10ce6790ff9c48c77dbee8 Mon Sep 17 00:00:00 2001 From: yampelo Date: Sun, 17 Nov 2019 22:06:53 -0500 Subject: [PATCH 24/25] FindFile: finishes file queries --- beagle/analyzers/queries/base_query.py | 4 +++- beagle/analyzers/queries/file.py | 32 +++++++++++++++++++++++--- beagle/analyzers/queries/process.py | 5 ++++ 3 files changed, 37 insertions(+), 4 deletions(-) diff --git a/beagle/analyzers/queries/base_query.py b/beagle/analyzers/queries/base_query.py index a10b2250..927b9571 100644 --- a/beagle/analyzers/queries/base_query.py +++ b/beagle/analyzers/queries/base_query.py @@ -7,7 +7,7 @@ from .lookups import Exact, FieldLookup -PropsDict = Dict[str, Union[str, FieldLookup, Dict]] +PropsDict = Dict[str, Union[str, FieldLookup, Dict, None]] def _str_to_exact(props: dict) -> Dict[str, Union[FieldLookup, Dict]]: @@ -17,6 +17,8 @@ def _str_to_exact(props: dict) -> Dict[str, Union[FieldLookup, Dict]]: props[k] = Exact(v) elif isinstance(v, dict): props[k] = _str_to_exact(v) + elif v is None: + del props[k] return props diff --git a/beagle/analyzers/queries/file.py b/beagle/analyzers/queries/file.py index 7004a755..d40f6c65 100644 --- a/beagle/analyzers/queries/file.py +++ b/beagle/analyzers/queries/file.py @@ -43,14 +43,40 @@ def with_hashes(hashes: Union[str, FieldLookup]) -> NodeByPropsReachable: # pra def with_props(props: PropsDict) -> NodeByPropsReachable: # pragma: no cover return NodeByPropsReachable(node_type=File, props=props) + # ---- Edge methods ----- # + @staticmethod - def that_was_written(descendants=True, ancestors=False, reachable=False): + def that_was_written( + contents: str = None, descendants=True, ancestors=False, reachable=False + ): # pragma: no cover return make_edge_query( - edge_type="Wrote", descendants=descendants, ancestors=ancestors, reachable=reachable + edge_type="Wrote", + edge_props={"contents": contents}, + descendants=descendants, + ancestors=ancestors, + reachable=reachable, ) @staticmethod - def that_was_copied(descendants=True, ancestors=False, reachable=False): + def that_was_copied(descendants=True, ancestors=False, reachable=False): # pragma: no cover return make_edge_query( edge_type="Copied To", descendants=descendants, ancestors=ancestors, reachable=reachable ) + + @staticmethod + def that_was_loaded(descendants=True, ancestors=False, reachable=False): # pragma: no cover + return make_edge_query( + edge_type="Loaded", descendants=descendants, ancestors=ancestors, reachable=reachable + ) + + @staticmethod + def that_was_accessed(descendants=True, ancestors=False, reachable=False): # pragma: no cover + return make_edge_query( + edge_type="Accessed", descendants=descendants, ancestors=ancestors, reachable=reachable + ) + + @staticmethod + def that_was_deleted(descendants=True, ancestors=False, reachable=False): # pragma: no cover + return make_edge_query( + edge_type="Deleted", descendants=descendants, ancestors=ancestors, reachable=reachable + ) diff --git a/beagle/analyzers/queries/process.py b/beagle/analyzers/queries/process.py index 0019fdff..2f475519 100644 --- a/beagle/analyzers/queries/process.py +++ b/beagle/analyzers/queries/process.py @@ -3,6 +3,7 @@ from beagle.nodes import Process from . import FactoryMixin, make_edge_query +from .base_query import PropsDict from .lookups import FieldLookup from .node import NodeByPropsReachable @@ -64,6 +65,10 @@ def with_sha1_hash( return NodeByPropsReachable(node_type=Process, props={"hashes": {"sha1": sha1hash}}) + @staticmethod + def with_props(props: PropsDict) -> NodeByPropsReachable: # pragma: no cover + return NodeByPropsReachable(node_type=Process, props=props) + @staticmethod def that_was_launched(descendants=True, ancestors=False, reachable=False): return make_edge_query( From 55404f38a791fbc88defb6bbcb8354d21c931202 Mon Sep 17 00:00:00 2001 From: yampelo Date: Mon, 18 Nov 2019 00:36:49 -0500 Subject: [PATCH 25/25] SummaryQuery: adds ability to summarize information gathered --- beagle/analyzers/base_analyzer.py | 18 ++++++++++--- beagle/analyzers/queries/edge.py | 2 +- beagle/analyzers/queries/file.py | 8 +++--- beagle/analyzers/queries/process.py | 2 +- beagle/analyzers/queries/summary.py | 35 +++++++++++++++++++++++++ beagle/datasources/base_datasource.py | 22 ++++++++++++++++ beagle/transformers/base_transformer.py | 22 ++++++++++++++++ 7 files changed, 100 insertions(+), 9 deletions(-) create mode 100644 beagle/analyzers/queries/summary.py diff --git a/beagle/analyzers/base_analyzer.py b/beagle/analyzers/base_analyzer.py index 4388b651..9433808a 100644 --- a/beagle/analyzers/base_analyzer.py +++ b/beagle/analyzers/base_analyzer.py @@ -1,9 +1,12 @@ -from typing import Type, cast, Any +from typing import Any, Type, cast import networkx as nx -from beagle.analyzers.queries.base_query import Query from beagle.backends import Backend, NetworkX +from beagle.common import logger + +from .queries.base_query import Query +from .queries.summary import SummaryQuery class Analyzer(object): @@ -19,11 +22,13 @@ def __init__(self, name: str, query: Query, description: str = None, score: int self.query: Query = query def run(self, backend: Type[Backend]) -> Any: + if isinstance(backend, NetworkX): backend = cast(NetworkX, backend) return self.run_networkx(backend.G) def run_networkx(self, G: nx.Graph) -> nx.Graph: + logger.info(f"Running analyzer {self.name}") # H is a copy of our original graph. H = G.copy() @@ -32,9 +37,16 @@ def run_networkx(self, G: nx.Graph) -> nx.Graph: while current_query is not None: # Run the query. - H = current_query.execute_networkx(H) + if isinstance(current_query, SummaryQuery): + # SummaryQueries get the original graph. + H = current_query.execute_networkx(G.copy()) + else: + H = current_query.execute_networkx(H) # Get the next query, and execute current_query = current_query.downstream_query + if len(H.nodes()) > 0: + logger.info(f"Analyzer query returned a matching subgraph.") + return H diff --git a/beagle/analyzers/queries/edge.py b/beagle/analyzers/queries/edge.py index e0ce9724..098b5f71 100644 --- a/beagle/analyzers/queries/edge.py +++ b/beagle/analyzers/queries/edge.py @@ -62,7 +62,7 @@ def execute_networkx(self, G: nx.Graph) -> nx.Graph: subgraph_edges.append((u, v, k)) # can stop on first match self.result_edges |= {(u, v, k)} - self.result_nodes |= {u, v} + self.result_nodes |= {v} break return G.edge_subgraph(subgraph_edges) diff --git a/beagle/analyzers/queries/file.py b/beagle/analyzers/queries/file.py index d40f6c65..4d716cfe 100644 --- a/beagle/analyzers/queries/file.py +++ b/beagle/analyzers/queries/file.py @@ -58,25 +58,25 @@ def that_was_written( ) @staticmethod - def that_was_copied(descendants=True, ancestors=False, reachable=False): # pragma: no cover + def that_was_copied(descendants=False, ancestors=False, reachable=False): # pragma: no cover return make_edge_query( edge_type="Copied To", descendants=descendants, ancestors=ancestors, reachable=reachable ) @staticmethod - def that_was_loaded(descendants=True, ancestors=False, reachable=False): # pragma: no cover + def that_was_loaded(descendants=False, ancestors=False, reachable=False): # pragma: no cover return make_edge_query( edge_type="Loaded", descendants=descendants, ancestors=ancestors, reachable=reachable ) @staticmethod - def that_was_accessed(descendants=True, ancestors=False, reachable=False): # pragma: no cover + def that_was_accessed(descendants=False, ancestors=False, reachable=False): # pragma: no cover return make_edge_query( edge_type="Accessed", descendants=descendants, ancestors=ancestors, reachable=reachable ) @staticmethod - def that_was_deleted(descendants=True, ancestors=False, reachable=False): # pragma: no cover + def that_was_deleted(descendants=False, ancestors=False, reachable=False): # pragma: no cover return make_edge_query( edge_type="Deleted", descendants=descendants, ancestors=ancestors, reachable=reachable ) diff --git a/beagle/analyzers/queries/process.py b/beagle/analyzers/queries/process.py index 2f475519..adc5af6f 100644 --- a/beagle/analyzers/queries/process.py +++ b/beagle/analyzers/queries/process.py @@ -70,7 +70,7 @@ def with_props(props: PropsDict) -> NodeByPropsReachable: # pragma: no cover return NodeByPropsReachable(node_type=Process, props=props) @staticmethod - def that_was_launched(descendants=True, ancestors=False, reachable=False): + def that_was_launched(descendants=False, ancestors=False, reachable=False): return make_edge_query( edge_type="Launched", descendants=descendants, ancestors=ancestors, reachable=reachable ) diff --git a/beagle/analyzers/queries/summary.py b/beagle/analyzers/queries/summary.py new file mode 100644 index 00000000..ae89cb15 --- /dev/null +++ b/beagle/analyzers/queries/summary.py @@ -0,0 +1,35 @@ +from typing import List, Set, Type + +import networkx as nx + +from beagle.analyzers.queries import Query +from beagle.nodes import Node + + +class SummaryQuery(Query): + # Nothing special, just a type for detecting when we reach a summary operator. + pass + + +class CollectDetectedNodes(SummaryQuery): + def __init__(self, node_types: List[Type[Node]] = []): + self.node_types = tuple(node_types) + super().__init__() + + def execute_networkx(self, G: nx.Graph) -> nx.Graph: + + all_resulting_nodes: Set[int] = set() + + # Get the upstream nodes. + upstream_query = self.upstream_query + while upstream_query is not None: + all_resulting_nodes |= upstream_query.result_nodes + upstream_query = upstream_query.upstream_query + + if self.node_types: + node_attrs = nx.get_node_attributes(G, "data") + all_resulting_nodes = filter( + lambda node: isinstance(node_attrs[node], self.node_types), all_resulting_nodes + ) + + return G.subgraph(all_resulting_nodes) diff --git a/beagle/datasources/base_datasource.py b/beagle/datasources/base_datasource.py index 83ca6ef3..94bd092d 100644 --- a/beagle/datasources/base_datasource.py +++ b/beagle/datasources/base_datasource.py @@ -112,6 +112,28 @@ def to_graph(self, *args, **kwargs) -> Any: return self.to_transformer(self.transformers[0]).to_graph(*args, **kwargs) # type: ignore + def to_backend(self, graph=False, *args, **kwargs) -> Any: + """Allows to hop immediatly from a datasource to a backend. + + Supports parameters for the to_graph() function of the transformer. + + see :py:method:`beagle.transformers.base_transformer.Transformer.to_graph` + + Examples + -------- + >>> SysmonEVTX('data/sysmon/autoruns-sysmon.evtx').to_backend(Graphistry, render=True) + + + Returns + ------- + Any + Returns a backend, prior to being graphed. + """ + + return self.to_transformer(self.transformers[0]).to_backend( + graph=graph, *args, **kwargs + ) # type: ignore + def _convert_to_parent_fields(self, process: dict) -> dict: """Converts a process to represent a child process. diff --git a/beagle/transformers/base_transformer.py b/beagle/transformers/base_transformer.py index f3fe7dce..efb92af1 100644 --- a/beagle/transformers/base_transformer.py +++ b/beagle/transformers/base_transformer.py @@ -60,6 +60,28 @@ def to_graph(self, backend: "Backend" = NetworkX, *args, **kwargs) -> Any: backend = backend(nodes=nodes, metadata=self.datasource.metadata(), *args, **kwargs) return backend.graph() + def to_backend(self, backend: "Backend" = NetworkX, graph=False, *args, **kwargs) -> Any: + """Graphs the nodes created by :py:meth:`run`. If no backend is specific, + the default used is NetworkX. + + Parameters + ---------- + backend : [type], optional + [description] (the default is NetworkX, which [default_description]) + + Returns + ------- + [type] + [description] + """ + + nodes = self.run() + + backend = backend(nodes=nodes, metadata=self.datasource.metadata(), *args, **kwargs) + if graph: + backend.graph() + return backend + def run(self) -> List[Node]: """Generates the list of nodes from the datasource.