From 442341c792ae1c02c931edb5477074c3ab928852 Mon Sep 17 00:00:00 2001
From: yampelo <omeryampel@gmail.com>
Date: Wed, 13 Nov 2019 19:49:06 -0500
Subject: [PATCH 01/25] analyzer skeleton

---
 beagle/analyzers/base_analyzer.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/beagle/analyzers/base_analyzer.py b/beagle/analyzers/base_analyzer.py
index e69de29b..fd1f6119 100644
--- a/beagle/analyzers/base_analyzer.py
+++ b/beagle/analyzers/base_analyzer.py
@@ -0,0 +1,3 @@
+class Analyzer(object):
+    def __init__(self, backend):
+        pass

From 1b7f7059d3143318a7253fb71fbf3cd9f2913a5a Mon Sep 17 00:00:00 2001
From: yampelo <omeryampel@gmail.com>
Date: Fri, 15 Nov 2019 00:02:17 -0500
Subject: [PATCH 02/25] Begins working on rule system, adds field lookups.

---
 beagle/analyzers/base_analyzer.py             |  6 +-
 beagle/analyzers/statements/base_statement.py | 29 +++++++
 beagle/analyzers/statements/conditionals.py   |  0
 beagle/analyzers/statements/lookups.py        | 84 +++++++++++++++++++
 .../analyzers/test_statements/test_lookups.py | 48 +++++++++++
 5 files changed, 165 insertions(+), 2 deletions(-)
 create mode 100644 beagle/analyzers/statements/base_statement.py
 create mode 100644 beagle/analyzers/statements/conditionals.py
 create mode 100644 beagle/analyzers/statements/lookups.py
 create mode 100644 tests/analyzers/test_statements/test_lookups.py

diff --git a/beagle/analyzers/base_analyzer.py b/beagle/analyzers/base_analyzer.py
index fd1f6119..790a006c 100644
--- a/beagle/analyzers/base_analyzer.py
+++ b/beagle/analyzers/base_analyzer.py
@@ -1,3 +1,5 @@
 class Analyzer(object):
-    def __init__(self, backend):
-        pass
+    def __init__(self, name: str, description: str, score: int):
+        self.name = name
+        self.description = description
+        self.score = score
diff --git a/beagle/analyzers/statements/base_statement.py b/beagle/analyzers/statements/base_statement.py
new file mode 100644
index 00000000..df321838
--- /dev/null
+++ b/beagle/analyzers/statements/base_statement.py
@@ -0,0 +1,29 @@
+from typing import Dict, Type, List
+
+from beagle.backends import Backend, NetworkX
+from beagle.nodes import Node
+
+from .lookups import FieldLookup
+
+
+class Statement(object):
+    def execute(self, backend: Type[Backend]):
+        if isinstance(backend, NetworkX):
+            return self.execute_networkx(backend)
+
+    def execute_networkx(self, backend: NetworkX):
+        raise NotImplementedError(f"NetworkX not supported for {self.__class__.__name__}")
+
+
+class NodeByProps(Statement):
+    def __init__(self, node_type: Type[Node], props: Dict[str, FieldLookup]):
+        self.node_type = node_type
+        self.props = props
+
+    def execute_networkx(self, backend: NetworkX) -> List[Node]:
+        result = []
+        for node_id, node in backend.G.nodes(data=True):
+            if isinstance(node, self.node_type):
+                if all([lookup.test(getattr(node, prop)) for prop, lookup in self.props.items()]):
+                    result.append(node)
+        return result
diff --git a/beagle/analyzers/statements/conditionals.py b/beagle/analyzers/statements/conditionals.py
new file mode 100644
index 00000000..e69de29b
diff --git a/beagle/analyzers/statements/lookups.py b/beagle/analyzers/statements/lookups.py
new file mode 100644
index 00000000..b78fe356
--- /dev/null
+++ b/beagle/analyzers/statements/lookups.py
@@ -0,0 +1,84 @@
+import re
+from typing import Pattern, Union, cast
+from abc import ABCMeta, abstractmethod
+import functools
+
+
+def not_null(f):
+    @functools.wraps(f)
+    def wrapper(prop, *args, **kwargs):
+        if prop is None:
+            return False
+        else:
+            return f(prop, *args, **kwargs)
+
+    return wrapper
+
+
+class FieldLookup(object, metaclass=ABCMeta):
+    def __init__(self, value):
+        self.value = value
+
+    @abstractmethod
+    def test(self, prop) -> bool:
+        pass
+
+
+class Contains(FieldLookup):
+    """Case sensitve contains"""
+
+    @not_null
+    def test(self, prop: str):
+        return self.value in prop
+
+
+class IContains(FieldLookup):
+    """Case insensitve Contains"""
+
+    @not_null
+    def test(self, prop: str):
+        return str(self.value).lower() in str(prop).lower()
+
+
+class Exact(FieldLookup):
+    """Exact match"""
+
+    @not_null
+    def test(self, prop: str):
+        return self.value == prop
+
+
+class IExact(FieldLookup):
+    """Insensitive Exact match"""
+
+    @not_null
+    def test(self, prop: str):
+        return str(self.value).lower() == str(prop).lower()
+
+
+class StartsWith(FieldLookup):
+    """Property begins with"""
+
+    @not_null
+    def test(self, prop: str):
+        return prop.startswith(self.value)
+
+
+class EndsWith(FieldLookup):
+    """Property begins endswith"""
+
+    @not_null
+    def test(self, prop: str):
+        return prop.endswith(self.value)
+
+
+class Regex(FieldLookup):
+    def __init__(self, value: Union[str, Pattern]):
+        if isinstance(value, str):
+            self.value: Pattern = re.compile(value)
+        else:
+            self.value = value
+
+    @not_null
+    def test(self, prop: str):
+        return self.value.search(prop) is not None
diff --git a/tests/analyzers/test_statements/test_lookups.py b/tests/analyzers/test_statements/test_lookups.py
new file mode 100644
index 00000000..530fa1ad
--- /dev/null
+++ b/tests/analyzers/test_statements/test_lookups.py
@@ -0,0 +1,48 @@
+import re
+import pytest
+from beagle.analyzers.statements.lookups import (
+    FieldLookup,
+    Contains,
+    IContains,
+    Exact,
+    IExact,
+    StartsWith,
+    EndsWith,
+    Regex,
+)
+
+
+@pytest.mark.parametrize(
+    "cls,value,prop,result",
+    [
+        (Contains, "test", "test", True),
+        (Contains, "test", "worst", False),
+        (Contains, "test", "he is the test", True),
+        (Contains, "test", "the test was bad", True),
+        (Contains, "test", "the TEST was bad", False),
+        (IContains, "test", "TEST", True),
+        (IContains, "test", "tEsT", True),
+        (IContains, "test", "worst", False),
+        (IContains, "test", "he is the test", True),
+        (IContains, "test", "the test was bad", True),
+        (Exact, "test", "test", True),
+        (Exact, "test", " test ", False),
+        (Exact, "test", "some test a", False),
+        (IExact, "test", "test", True),
+        (IExact, "test", "TEST", True),
+        (IExact, "test", "tEst", True),
+        (StartsWith, "test", "test", True),
+        (StartsWith, "test", "not a test", False),
+        (StartsWith, "test", "test is the best", True),
+        (EndsWith, "test", "test", True),
+        (EndsWith, "test", "not test but a nest", False),
+        (EndsWith, "test", "the best test", True),
+        (Regex, r"\d", "test 1 test", True),
+        (Regex, re.compile(r"\d"), "test 1 test", True),
+        (Regex, r"\d", "test test", False),
+        (Regex, re.compile(r"\d"), "test test", False),
+    ],
+)
+def test_lookups(cls: FieldLookup, value: str, prop: str, result: str):
+    # prop -> value being tested again, value -> the thing we're looking up
+    assert cls(value).test(prop) == result

From 47cd6adec667a1a07138b2d94cf77aaadc577bfc Mon Sep 17 00:00:00 2001
From: yampelo <omeryampel@gmail.com>
Date: Fri, 15 Nov 2019 00:21:44 -0500
Subject: [PATCH 03/25] Adds tests for selecting a NodeWithProps in NetworkX

---
 beagle/analyzers/statements/base_statement.py |  6 +-
 beagle/analyzers/statements/conditionals.py   |  6 ++
 beagle/analyzers/statements/lookups.py        |  2 +-
 .../statements/test_base_statement.py         | 58 +++++++++++++++++++
 .../test_lookups.py                           |  0
 5 files changed, 69 insertions(+), 3 deletions(-)
 create mode 100644 tests/analyzers/statements/test_base_statement.py
 rename tests/analyzers/{test_statements => statements}/test_lookups.py (100%)

diff --git a/beagle/analyzers/statements/base_statement.py b/beagle/analyzers/statements/base_statement.py
index df321838..e1ac1e4b 100644
--- a/beagle/analyzers/statements/base_statement.py
+++ b/beagle/analyzers/statements/base_statement.py
@@ -11,7 +11,7 @@ def execute(self, backend: Type[Backend]):
         if isinstance(backend, NetworkX):
             return self.execute_networkx(backend)
 
-    def execute_networkx(self, backend: NetworkX):
+    def execute_networkx(self, backend: NetworkX):  # pragma: no cover
         raise NotImplementedError(f"NetworkX not supported for {self.__class__.__name__}")
 
 
@@ -22,7 +22,9 @@ def __init__(self, node_type: Type[Node], props: Dict[str, FieldLookup]):
 
     def execute_networkx(self, backend: NetworkX) -> List[Node]:
         result = []
-        for node_id, node in backend.G.nodes(data=True):
+        for node_id, data in backend.G.nodes(data=True):
+            node = data["data"]
+
             if isinstance(node, self.node_type):
                 if all([lookup.test(getattr(node, prop)) for prop, lookup in self.props.items()]):
                     result.append(node)
diff --git a/beagle/analyzers/statements/conditionals.py b/beagle/analyzers/statements/conditionals.py
index e69de29b..8b56c444 100644
--- a/beagle/analyzers/statements/conditionals.py
+++ b/beagle/analyzers/statements/conditionals.py
@@ -0,0 +1,6 @@
+class Conditional(object):
+    pass
+
+
+class Not(object):
+    pass
diff --git a/beagle/analyzers/statements/lookups.py b/beagle/analyzers/statements/lookups.py
index b78fe356..a0c679f1 100644
--- a/beagle/analyzers/statements/lookups.py
+++ b/beagle/analyzers/statements/lookups.py
@@ -15,7 +15,7 @@ def wrapper(prop, *args, **kwargs):
     return wrapper
 
 
-class FieldLookup(object, metaclass=ABCMeta):
+class FieldLookup(object, metaclass=ABCMeta):  # pragma: no cover
     def __init__(self, value):
         self.value = value
 
diff --git a/tests/analyzers/statements/test_base_statement.py b/tests/analyzers/statements/test_base_statement.py
new file mode 100644
index 00000000..b0a7c086
--- /dev/null
+++ b/tests/analyzers/statements/test_base_statement.py
@@ -0,0 +1,58 @@
+import pytest
+from beagle.backends.networkx import NetworkX
+from beagle.analyzers.statements.base_statement import NodeByProps
+from beagle.analyzers.statements.lookups import Contains, EndsWith, StartsWith
+from beagle.nodes.process import Process
+
+
+@pytest.fixture
+def G1():
+    # A basic graph, with two nodes an an edge
+    proc = Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar")
+    other_proc = Process(process_id=12, process_image="best.exe", command_line="best.exe /c 123456")
+
+    proc.launched[other_proc].append(timestamp=1)
+
+    backend = NetworkX(consolidate_edges=True, nodes=[proc, other_proc])
+
+    backend.graph()
+
+    return backend
+
+
+def test_one_prop_test(G1):
+    statement = NodeByProps(node_type=Process, props={"command_line": Contains("test.exe")})
+
+    # Should match on `proc` from G1
+    assert statement.execute(G1) == [
+        Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar")
+    ]
+
+    # should mathc on other proc
+    statement = NodeByProps(node_type=Process, props={"command_line": EndsWith("123456")})
+    assert statement.execute(G1) == [
+        Process(process_id=12, process_image="best.exe", command_line="best.exe /c 123456")
+    ]
+
+    # should match on both
+    statement = NodeByProps(node_type=Process, props={"process_image": EndsWith("exe")})
+    assert statement.execute(G1) == [
+        Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar"),
+        Process(process_id=12, process_image="best.exe", command_line="best.exe /c 123456"),
+    ]
+
+    # should match neither
+    statement = NodeByProps(node_type=Process, props={"process_image": StartsWith("exe")})
+    assert statement.execute(G1) == []
+
+
+def test_multiple_prop_test(G1):
+    statement = NodeByProps(
+        node_type=Process,
+        props={"command_line": Contains("foobar"), "process_image": StartsWith("test")},
+    )
+
+    # Should match on `proc` from G1
+    assert statement.execute(G1) == [
+        Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar")
+    ]
diff --git a/tests/analyzers/test_statements/test_lookups.py b/tests/analyzers/statements/test_lookups.py
similarity index 100%
rename from tests/analyzers/test_statements/test_lookups.py
rename to tests/analyzers/statements/test_lookups.py

From 63e24b13fc3b4665971330e14941b9a7845af509 Mon Sep 17 00:00:00 2001
From: yampelo <omeryampel@gmail.com>
Date: Fri, 15 Nov 2019 00:28:36 -0500
Subject: [PATCH 04/25] Adds conditionals for matching

---
 beagle/analyzers/statements/conditionals.py |  6 ----
 beagle/analyzers/statements/lookups.py      | 32 +++++++++++++++++++++
 tests/analyzers/statements/test_lookups.py  | 16 +++++++++++
 3 files changed, 48 insertions(+), 6 deletions(-)
 delete mode 100644 beagle/analyzers/statements/conditionals.py

diff --git a/beagle/analyzers/statements/conditionals.py b/beagle/analyzers/statements/conditionals.py
deleted file mode 100644
index 8b56c444..00000000
--- a/beagle/analyzers/statements/conditionals.py
+++ /dev/null
@@ -1,6 +0,0 @@
-class Conditional(object):
-    pass
-
-
-class Not(object):
-    pass
diff --git a/beagle/analyzers/statements/lookups.py b/beagle/analyzers/statements/lookups.py
index a0c679f1..ab97d837 100644
--- a/beagle/analyzers/statements/lookups.py
+++ b/beagle/analyzers/statements/lookups.py
@@ -24,6 +24,38 @@ def test(self, prop) -> bool:
         pass
 
 
+class Or(FieldLookup):
+    """Boolean OR, Meant to be used with other lookups:
+    >>> Or(Contains("foo"), StartsWith("bar"))
+    """
+
+    def __init__(self, *args: FieldLookup):
+        self.lookups = args
+
+    def test(self, prop: str):
+        for lookup in self.lookups:
+            if lookup.test(prop):
+                return True
+
+        return False
+
+
+class And(FieldLookup):
+    """Boolean And, Meant to be used with other lookups:
+    >>> And(Contains("foo"), StartsWith("bar"), EndsWith("zar"))
+    """
+
+    def __init__(self, *args: FieldLookup):
+        self.lookups = args
+
+    def test(self, prop: str):
+        for lookup in self.lookups:
+            if not lookup.test(prop):
+                return False
+
+        return True
+
+
 class Contains(FieldLookup):
     """Case sensitve contains"""
 
diff --git a/tests/analyzers/statements/test_lookups.py b/tests/analyzers/statements/test_lookups.py
index 530fa1ad..94fd87d2 100644
--- a/tests/analyzers/statements/test_lookups.py
+++ b/tests/analyzers/statements/test_lookups.py
@@ -9,6 +9,8 @@
     StartsWith,
     EndsWith,
     Regex,
+    And,
+    Or,
 )
 
 
@@ -46,3 +48,17 @@
 def test_lookups(cls: FieldLookup, value: str, prop: str, result: str):
     # prop -> value being tested again, value -> the thing we're looking up
     assert cls(value).test(prop) == result
+
+
+def test_and():
+    assert And(StartsWith("foo"), EndsWith("bar")).test("foo bar") is True
+    assert And(StartsWith("foo"), EndsWith("bar")).test("foo nar bar") is True
+    assert And(StartsWith("foo"), EndsWith("bar")).test("bar foo") is False
+
+
+def test_or():
+    assert Or(StartsWith("foo"), EndsWith("bar")).test("foo bar") is True
+    assert Or(StartsWith("foo"), EndsWith("bar")).test("foo") is True
+    assert Or(StartsWith("foo"), EndsWith("bar")).test("bar") is True
+    assert Or(StartsWith("foo"), EndsWith("bar")).test("foo nar bar") is True
+    assert Or(StartsWith("foo"), EndsWith("bar")).test("bar foo") is False

From 7f784777f075fd060262fa12597c4761c79aed02 Mon Sep 17 00:00:00 2001
From: yampelo <omeryampel@gmail.com>
Date: Fri, 15 Nov 2019 00:32:29 -0500
Subject: [PATCH 05/25] Fixes not_null wrapper

---
 beagle/analyzers/statements/lookups.py     | 6 +++---
 tests/analyzers/statements/test_lookups.py | 2 ++
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/beagle/analyzers/statements/lookups.py b/beagle/analyzers/statements/lookups.py
index ab97d837..aa649182 100644
--- a/beagle/analyzers/statements/lookups.py
+++ b/beagle/analyzers/statements/lookups.py
@@ -1,16 +1,16 @@
 import re
-from typing import Pattern, Union, cast
+from typing import Pattern, Union
 from abc import ABCMeta, abstractmethod
 import functools
 
 
 def not_null(f):
     @functools.wraps(f)
-    def wrapper(prop, *args, **kwargs):
+    def wrapper(self, prop, *args, **kwargs):
         if prop is None:
             return False
         else:
-            return f(prop, *args, **kwargs)
+            return f(self, prop, *args, **kwargs)
 
     return wrapper
 
diff --git a/tests/analyzers/statements/test_lookups.py b/tests/analyzers/statements/test_lookups.py
index 94fd87d2..40785653 100644
--- a/tests/analyzers/statements/test_lookups.py
+++ b/tests/analyzers/statements/test_lookups.py
@@ -25,6 +25,8 @@
         (IContains, "test", "TEST", True),
         (IContains, "test", "tEsT", True),
         (IContains, "test", "worst", False),
+        # Test we reject null value.
+        (IContains, "test", None, False),
         (IContains, "test", "he is the test", True),
         (IContains, "test", "the test was bad", True),
         (Exact, "test", "test", True),

From f82339e8eeba3feecf907c899bfa61ea6fc38dd7 Mon Sep 17 00:00:00 2001
From: yampelo <omeryampel@gmail.com>
Date: Fri, 15 Nov 2019 00:47:55 -0500
Subject: [PATCH 06/25] Adds operator overloading to lookups

---
 beagle/analyzers/statements/lookups.py        | 22 +++++++++++++++++++
 .../statements/test_base_statement.py         | 16 ++++++++++++--
 tests/analyzers/statements/test_lookups.py    | 12 ++++++++++
 3 files changed, 48 insertions(+), 2 deletions(-)

diff --git a/beagle/analyzers/statements/lookups.py b/beagle/analyzers/statements/lookups.py
index aa649182..89a8703c 100644
--- a/beagle/analyzers/statements/lookups.py
+++ b/beagle/analyzers/statements/lookups.py
@@ -23,6 +23,16 @@ def __init__(self, value):
     def test(self, prop) -> bool:
         pass
 
+    def __and__(self, other) -> "FieldLookup":
+        # Contains("test.exe") & Contains("fest.exe") -> And(Contains("test.exe"), Contains("fest.exe"))
+        return And(self, other)
+
+    def __or__(self, other) -> "FieldLookup":
+        return Or(self, other)
+
+    def __invert__(self) -> "FieldLookup":
+        return Not(self)
+
 
 class Or(FieldLookup):
     """Boolean OR, Meant to be used with other lookups:
@@ -56,6 +66,18 @@ def test(self, prop: str):
         return True
 
 
+class Not(FieldLookup):
+    """Boolean And, Meant to be used with other lookups:
+    >>> And(Contains("foo"), StartsWith("bar"), EndsWith("zar"))
+    """
+
+    def __init__(self, arg: FieldLookup):
+        self.lookup = arg
+
+    def test(self, prop: str):
+        return not self.lookup.test(prop)
+
+
 class Contains(FieldLookup):
     """Case sensitve contains"""
 
diff --git a/tests/analyzers/statements/test_base_statement.py b/tests/analyzers/statements/test_base_statement.py
index b0a7c086..8a6ab555 100644
--- a/tests/analyzers/statements/test_base_statement.py
+++ b/tests/analyzers/statements/test_base_statement.py
@@ -20,7 +20,7 @@ def G1():
     return backend
 
 
-def test_one_prop_test(G1):
+def test_one_node_prop_test(G1):
     statement = NodeByProps(node_type=Process, props={"command_line": Contains("test.exe")})
 
     # Should match on `proc` from G1
@@ -46,7 +46,19 @@ def test_one_prop_test(G1):
     assert statement.execute(G1) == []
 
 
-def test_multiple_prop_test(G1):
+def test_multiple_node_prop_test(G1):
+    statement = NodeByProps(
+        node_type=Process,
+        props={"command_line": Contains("foobar"), "process_image": StartsWith("test")},
+    )
+
+    # Should match on `proc` from G1
+    assert statement.execute(G1) == [
+        Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar")
+    ]
+
+
+def test_node_conditional(G1):
     statement = NodeByProps(
         node_type=Process,
         props={"command_line": Contains("foobar"), "process_image": StartsWith("test")},
diff --git a/tests/analyzers/statements/test_lookups.py b/tests/analyzers/statements/test_lookups.py
index 40785653..c97d161a 100644
--- a/tests/analyzers/statements/test_lookups.py
+++ b/tests/analyzers/statements/test_lookups.py
@@ -11,6 +11,7 @@
     Regex,
     And,
     Or,
+    Not,
 )
 
 
@@ -64,3 +65,14 @@ def test_or():
     assert Or(StartsWith("foo"), EndsWith("bar")).test("bar") is True
     assert Or(StartsWith("foo"), EndsWith("bar")).test("foo nar bar") is True
     assert Or(StartsWith("foo"), EndsWith("bar")).test("bar foo") is False
+
+
+def test_not():
+    assert Not(Contains("test")).test("hello") is True
+    assert Not(Not(Contains("test"))).test("hello") is False
+
+
+def test_operator_overloading():
+    assert (~Contains("test")).test("hello") is True
+    assert (Contains("test") & EndsWith("hello")).test("test my hello") is True
+    assert (Contains("test") | EndsWith("hello")).test("hello") is True

From 39c3dcf2c17e81acb926c35a3292898482aad152 Mon Sep 17 00:00:00 2001
From: yampelo <omeryampel@gmail.com>
Date: Fri, 15 Nov 2019 01:07:52 -0500
Subject: [PATCH 07/25] Moves statements to work on nx.Graph objects instead of
 NetworkX Backends

---
 beagle/analyzers/statements/base_statement.py | 25 ++++++------
 beagle/analyzers/statements/lookups.py        | 40 +++++++++++++++++--
 .../statements/test_base_statement.py         | 20 +++++-----
 3 files changed, 59 insertions(+), 26 deletions(-)

diff --git a/beagle/analyzers/statements/base_statement.py b/beagle/analyzers/statements/base_statement.py
index e1ac1e4b..14ccea69 100644
--- a/beagle/analyzers/statements/base_statement.py
+++ b/beagle/analyzers/statements/base_statement.py
@@ -1,17 +1,13 @@
-from typing import Dict, Type, List
+from typing import Dict, Type
 
-from beagle.backends import Backend, NetworkX
 from beagle.nodes import Node
 
 from .lookups import FieldLookup
+import networkx as nx
 
 
 class Statement(object):
-    def execute(self, backend: Type[Backend]):
-        if isinstance(backend, NetworkX):
-            return self.execute_networkx(backend)
-
-    def execute_networkx(self, backend: NetworkX):  # pragma: no cover
+    def execute_networkx(self, G: nx.Graph):  # pragma: no cover
         raise NotImplementedError(f"NetworkX not supported for {self.__class__.__name__}")
 
 
@@ -20,12 +16,17 @@ def __init__(self, node_type: Type[Node], props: Dict[str, FieldLookup]):
         self.node_type = node_type
         self.props = props
 
-    def execute_networkx(self, backend: NetworkX) -> List[Node]:
-        result = []
-        for node_id, data in backend.G.nodes(data=True):
+    def execute_networkx(self, G: nx.Graph) -> nx.Graph:
+        subgraph_nodes = []
+
+        # For each node
+        for node_id, data in G.nodes(data=True):
             node = data["data"]
 
+            # If node matches the desired instance.
             if isinstance(node, self.node_type):
+                # Test the node
                 if all([lookup.test(getattr(node, prop)) for prop, lookup in self.props.items()]):
-                    result.append(node)
-        return result
+                    subgraph_nodes.append(node_id)
+
+        return G.subgraph(subgraph_nodes)
diff --git a/beagle/analyzers/statements/lookups.py b/beagle/analyzers/statements/lookups.py
index 89a8703c..76c764e3 100644
--- a/beagle/analyzers/statements/lookups.py
+++ b/beagle/analyzers/statements/lookups.py
@@ -5,6 +5,7 @@
 
 
 def not_null(f):
+    # Ensures the passed in prop is not null
     @functools.wraps(f)
     def wrapper(self, prop, *args, **kwargs):
         if prop is None:
@@ -23,14 +24,43 @@ def __init__(self, value):
     def test(self, prop) -> bool:
         pass
 
-    def __and__(self, other) -> "FieldLookup":
-        # Contains("test.exe") & Contains("fest.exe") -> And(Contains("test.exe"), Contains("fest.exe"))
+    def __and__(self, other) -> "And":
+        """Combines two FieldLookups to works as a logical and
+
+        >>> Contains("test.exe") & Contains("fest.exe")
+        And(Contains("test.exe"), Contains("fest.exe"))
+
+        Returns
+        -------
+        And
+            And FieldLookup Object
+        """
         return And(self, other)
 
-    def __or__(self, other) -> "FieldLookup":
+    def __or__(self, other) -> "Or":
+        """Combines two FieldLookup objects to work as a logical Or
+
+        >>> Contains("test.exe") | Contains("fest.exe")
+        Or(Contains("test.exe"), Contains("fest.exe"))
+
+        Returns
+        -------
+        Or
+            Or FieldLookupObject
+        """
         return Or(self, other)
 
-    def __invert__(self) -> "FieldLookup":
+    def __invert__(self) -> "Not":
+        """Negates a field lookup
+
+        >>> ~Contains("test.exe")
+        Not(Contains("test.exe"))
+
+        Returns
+        -------
+        Not
+            Not FieldLookupObject
+        """
         return Not(self)
 
 
@@ -127,6 +157,8 @@ def test(self, prop: str):
 
 
 class Regex(FieldLookup):
+    """Regex Match"""
+
     def __init__(self, value: Union[str, Pattern]):
         if isinstance(value, str):
             self.value: Pattern = re.compile(value)
diff --git a/tests/analyzers/statements/test_base_statement.py b/tests/analyzers/statements/test_base_statement.py
index 8a6ab555..19753f4c 100644
--- a/tests/analyzers/statements/test_base_statement.py
+++ b/tests/analyzers/statements/test_base_statement.py
@@ -15,35 +15,35 @@ def G1():
 
     backend = NetworkX(consolidate_edges=True, nodes=[proc, other_proc])
 
-    backend.graph()
-
-    return backend
+    return backend.graph()
 
 
 def test_one_node_prop_test(G1):
     statement = NodeByProps(node_type=Process, props={"command_line": Contains("test.exe")})
 
     # Should match on `proc` from G1
-    assert statement.execute(G1) == [
-        Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar")
+    nodes = statement.execute_networkx(G1).nodes(data=True)
+    assert len(nodes) == 1
+    assert Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar") in [
+        n["data"] for _, n in nodes
     ]
 
     # should mathc on other proc
     statement = NodeByProps(node_type=Process, props={"command_line": EndsWith("123456")})
-    assert statement.execute(G1) == [
+    assert [n["data"] for _, n in statement.execute_networkx(G1).nodes(data=True)] == [
         Process(process_id=12, process_image="best.exe", command_line="best.exe /c 123456")
     ]
 
     # should match on both
     statement = NodeByProps(node_type=Process, props={"process_image": EndsWith("exe")})
-    assert statement.execute(G1) == [
+    assert [n["data"] for _, n in statement.execute_networkx(G1).nodes(data=True)] == [
         Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar"),
         Process(process_id=12, process_image="best.exe", command_line="best.exe /c 123456"),
     ]
 
     # should match neither
     statement = NodeByProps(node_type=Process, props={"process_image": StartsWith("exe")})
-    assert statement.execute(G1) == []
+    assert [n["data"] for _, n in statement.execute_networkx(G1).nodes(data=True)] == []
 
 
 def test_multiple_node_prop_test(G1):
@@ -53,7 +53,7 @@ def test_multiple_node_prop_test(G1):
     )
 
     # Should match on `proc` from G1
-    assert statement.execute(G1) == [
+    assert [n["data"] for _, n in statement.execute_networkx(G1).nodes(data=True)] == [
         Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar")
     ]
 
@@ -65,6 +65,6 @@ def test_node_conditional(G1):
     )
 
     # Should match on `proc` from G1
-    assert statement.execute(G1) == [
+    assert [n["data"] for _, n in statement.execute_networkx(G1).nodes(data=True)] == [
         Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar")
     ]

From 3cbae4f254f532b274f880e8d49da0660632f944 Mon Sep 17 00:00:00 2001
From: yampelo <omeryampel@gmail.com>
Date: Fri, 15 Nov 2019 01:53:48 -0500
Subject: [PATCH 08/25] EdgeByProps: Adds statement to return subgraph that
 contains a matching edge.

---
 beagle/analyzers/statements/base_statement.py | 69 +++++++++++++++++++
 beagle/backends/networkx.py                   |  5 +-
 .../statements/test_base_statement.py         | 53 +++++++++++++-
 3 files changed, 120 insertions(+), 7 deletions(-)

diff --git a/beagle/analyzers/statements/base_statement.py b/beagle/analyzers/statements/base_statement.py
index 14ccea69..0281de0a 100644
--- a/beagle/analyzers/statements/base_statement.py
+++ b/beagle/analyzers/statements/base_statement.py
@@ -13,10 +13,28 @@ def execute_networkx(self, G: nx.Graph):  # pragma: no cover
 
 class NodeByProps(Statement):
     def __init__(self, node_type: Type[Node], props: Dict[str, FieldLookup]):
+        """Searches the graph for a node of type `node_type` with properties matching `props`
+
+        Parameters
+        ----------
+        node_type : Type[Node]
+            The type of node to look for. e.g. Process
+        props : Dict[str, FieldLookup]
+            The set of props to filter the resulting nodes by.
+
+        Examples
+        ----------
+        Filter for Process nodes, with command lines that contain `text.exe`
+        >>> NodeByProps(node_type=Process, props={"command_line": Contains("test.exe")})
+
+        """
         self.node_type = node_type
         self.props = props
 
     def execute_networkx(self, G: nx.Graph) -> nx.Graph:
+        """Searches a `nx.Graph` object for nodes that match type `node_type` and contains
+        props matching `props`. This is O(V).
+        """
         subgraph_nodes = []
 
         # For each node
@@ -30,3 +48,54 @@ def execute_networkx(self, G: nx.Graph) -> nx.Graph:
                     subgraph_nodes.append(node_id)
 
         return G.subgraph(subgraph_nodes)
+
+
+class EdgeByProps(Statement):
+    def __init__(self, edge_type: str, props: Dict[str, FieldLookup]):
+        """Searches the graph for an edge of type `edge_type` with properties matching `props`
+
+        Parameters
+        ----------
+        edge_type : str
+            The type of edge to look for. e.g. Wrote
+        props : Dict[str, FieldLookup]
+            The set of props to filter the resulting edges by.
+
+        Examples
+        ----------
+        Filter for TCP edges, with contents that match ".pdf"
+        >>> EdgeByProps(edge_type="TCP", props={"payload": Contains(".pdf")})
+
+        """
+        self.edge_type = edge_type
+        self.props = props
+
+    def execute_networkx(self, G: nx.Graph) -> nx.Graph:
+        """Searches a `nx.Graph` object for edges that match type `edge_type` and contains
+        props matching `props`. This is O(E).
+
+        Returns a subgraph with all nodes contained in match edges
+        """
+        subgraph_edges = []
+
+        # For each edge
+        for u, v, k, e_data in G.edges(data=True, keys=True):
+
+            # pull out the data field from NX
+            data = e_data["data"]  # edge data
+            e_type = e_data["edge_name"]  # edge type
+
+            # If edge matches the desired instance.
+            if e_type == self.edge_type:
+
+                # Test the edge
+                if not isinstance(data, list):
+                    data = [data]
+
+                for entry in data:
+                    if any([lookup.test(entry.get(prop)) for prop, lookup in self.props.items()]):
+                        subgraph_edges.append((u, v, k))
+                        # can stop on first match
+                        break
+
+        return G.edge_subgraph(subgraph_edges)
diff --git a/beagle/backends/networkx.py b/beagle/backends/networkx.py
index 6ddaa55d..ac59b2ca 100644
--- a/beagle/backends/networkx.py
+++ b/beagle/backends/networkx.py
@@ -190,10 +190,7 @@ def insert_edges(self, u: Node, v: Node, edge_name: str, instances: List[dict])
         # Otherwise, they key is assigned from NetworkX, and we add the edge type as a label:
         else:
             self.G.add_edges_from(
-                [
-                    (u_id, v_id, {"key": edge_name, "data": entry, "edge_name": edge_name})
-                    for entry in instances
-                ]
+                [(u_id, v_id, {"data": entry, "edge_name": edge_name}) for entry in instances]
             )
 
     def update_node(self, node: Node, node_id: int) -> None:  # pragma: no cover
diff --git a/tests/analyzers/statements/test_base_statement.py b/tests/analyzers/statements/test_base_statement.py
index 19753f4c..043ffbc5 100644
--- a/tests/analyzers/statements/test_base_statement.py
+++ b/tests/analyzers/statements/test_base_statement.py
@@ -1,8 +1,8 @@
 import pytest
 from beagle.backends.networkx import NetworkX
-from beagle.analyzers.statements.base_statement import NodeByProps
-from beagle.analyzers.statements.lookups import Contains, EndsWith, StartsWith
-from beagle.nodes.process import Process
+from beagle.analyzers.statements.base_statement import NodeByProps, EdgeByProps
+from beagle.analyzers.statements.lookups import Contains, EndsWith, StartsWith, Exact
+from beagle.nodes.process import Process, File
 
 
 @pytest.fixture
@@ -18,6 +18,33 @@ def G1():
     return backend.graph()
 
 
+@pytest.fixture
+def G2():
+    # A basic graph, with two nodes an an edge
+    proc = Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar")
+    f = File(file_name="foo", file_path="bar")
+
+    proc.wrote[f].append(contents="foo")
+
+    backend = NetworkX(consolidate_edges=True, nodes=[proc, f])
+
+    return backend.graph()
+
+
+@pytest.fixture
+def G3():
+    # *no consolidating*
+    proc = Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar")
+    f = File(file_name="foo", file_path="bar")
+
+    proc.wrote[f].append(contents="foo")
+    proc.wrote[f].append(contents="bar")
+
+    backend = NetworkX(consolidate_edges=False, nodes=[proc, f])
+
+    return backend.graph()
+
+
 def test_one_node_prop_test(G1):
     statement = NodeByProps(node_type=Process, props={"command_line": Contains("test.exe")})
 
@@ -68,3 +95,23 @@ def test_node_conditional(G1):
     assert [n["data"] for _, n in statement.execute_networkx(G1).nodes(data=True)] == [
         Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar")
     ]
+
+
+def test_one_edge_prop_test(G2, G3):
+    statement = EdgeByProps(edge_type="Wrote", props={"contents": Exact("foo")})
+
+    assert [n["data"] for _, n in statement.execute_networkx(G2).nodes(data=True)] == [
+        Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar"),
+        File(file_name="foo", file_path="bar"),
+    ]
+
+    # Should work on the non-conslidating graph too.
+    assert [n["data"] for _, n in statement.execute_networkx(G3).nodes(data=True)] == [
+        Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar"),
+        File(file_name="foo", file_path="bar"),
+    ]
+
+    statement = EdgeByProps(edge_type="Launched", props={"contents": Exact("bar")})
+
+    # Should match on `proc` from G1
+    assert [n["data"] for _, n in statement.execute_networkx(G2).nodes(data=True)] == []

From f2dc414fe96827007d978a8759e48314d7bf283e Mon Sep 17 00:00:00 2001
From: yampelo <omeryampel@gmail.com>
Date: Fri, 15 Nov 2019 11:53:29 -0500
Subject: [PATCH 09/25] Filter node and return ancestors/descendants/all
 reachable.

NodeByPropsDescendents: Get a node and its descendants

NodeByPropsAncestors: Get a node and its ancestors

NodeByPropsReachable: Get a node and all reachable nodes from it (ancestors or descendants)
---
 beagle/analyzers/statements/base_statement.py |  69 +++++-
 .../statements/test_base_statement.py         | 222 +++++++++++++++---
 2 files changed, 256 insertions(+), 35 deletions(-)

diff --git a/beagle/analyzers/statements/base_statement.py b/beagle/analyzers/statements/base_statement.py
index 0281de0a..cf0c083a 100644
--- a/beagle/analyzers/statements/base_statement.py
+++ b/beagle/analyzers/statements/base_statement.py
@@ -1,12 +1,20 @@
-from typing import Dict, Type
+from typing import Dict, List, Set, Tuple, Type
+
+import networkx as nx
 
 from beagle.nodes import Node
 
 from .lookups import FieldLookup
-import networkx as nx
 
 
 class Statement(object):
+    def __init__(self):
+        # The resulting node IDs
+        self.result_nodes: List[int] = []
+
+        # The resulting edge IDs
+        self.result_edges: List[Tuple[int, int, int]] = []
+
     def execute_networkx(self, G: nx.Graph):  # pragma: no cover
         raise NotImplementedError(f"NetworkX not supported for {self.__class__.__name__}")
 
@@ -30,6 +38,7 @@ def __init__(self, node_type: Type[Node], props: Dict[str, FieldLookup]):
         """
         self.node_type = node_type
         self.props = props
+        super().__init__()
 
     def execute_networkx(self, G: nx.Graph) -> nx.Graph:
         """Searches a `nx.Graph` object for nodes that match type `node_type` and contains
@@ -99,3 +108,59 @@ def execute_networkx(self, G: nx.Graph) -> nx.Graph:
                         break
 
         return G.edge_subgraph(subgraph_edges)
+
+
+class NodeByPropsDescendents(NodeByProps):
+    """Executes a `NodeByProps` query, and returns all descendants of the matching nodes.
+    see py:meth:`NodeByProps`"""
+
+    def execute_networkx(self, G: nx.Graph) -> nx.Graph:
+
+        # Get the next graph
+        next_graph = super().execute_networkx(G)
+
+        subgraph_nodes: Set[int] = set()
+
+        # For every node that matched `NodeByProps`
+        for node_id in next_graph.nodes():
+            # Get the nodes descendants in the original graph, and add make a subgraph from those.
+            subgraph_nodes |= nx.descendants(G, node_id) | {node_id}
+
+        return G.subgraph(subgraph_nodes)
+
+
+class NodeByPropsAncestors(NodeByProps):
+    """Executes a `NodeByProps` query, and returns all ascendants of the matching nodes.
+    see py:meth:`NodeByProps`"""
+
+    def execute_networkx(self, G: nx.Graph) -> nx.Graph:
+
+        # Get the next graph
+        next_graph = super().execute_networkx(G)
+
+        subgraph_nodes: Set[int] = set()
+
+        # For every node that matched `NodeByProps`
+        for node_id in next_graph.nodes():
+            # Get the nodes ancestors in the original graph, and add make a subgraph from those.
+            subgraph_nodes |= nx.ancestors(G, node_id) | {node_id}
+
+        return G.subgraph(subgraph_nodes)
+
+
+class NodeByPropsReachable(NodeByProps):
+    """Executes a `NodeByProps` query, and returns all ancestors and descendants of the matching nodes.
+    see py:meth:`NodeByProps`"""
+
+    def execute_networkx(self, G: nx.Graph) -> nx.Graph:
+
+        # Get the next graph
+        next_graph = super().execute_networkx(G)
+
+        subgraph_nodes: Set[int] = set()
+
+        # For every node that matched `NodeByProps`
+        for node_id in next_graph.nodes():
+            subgraph_nodes |= nx.ancestors(G, node_id) | nx.descendants(G, node_id) | {node_id}
+
+        return G.subgraph(subgraph_nodes)
diff --git a/tests/analyzers/statements/test_base_statement.py b/tests/analyzers/statements/test_base_statement.py
index 043ffbc5..dbc6d5fe 100644
--- a/tests/analyzers/statements/test_base_statement.py
+++ b/tests/analyzers/statements/test_base_statement.py
@@ -1,8 +1,18 @@
+from typing import List
+import networkx as nx
+
 import pytest
 from beagle.backends.networkx import NetworkX
-from beagle.analyzers.statements.base_statement import NodeByProps, EdgeByProps
+from beagle.analyzers.statements.base_statement import (
+    NodeByProps,
+    EdgeByProps,
+    NodeByPropsDescendents,
+    NodeByPropsAncestors,
+    NodeByPropsReachable,
+)
 from beagle.analyzers.statements.lookups import Contains, EndsWith, StartsWith, Exact
-from beagle.nodes.process import Process, File
+
+from beagle.nodes import Node, File, Process
 
 
 @pytest.fixture
@@ -45,32 +55,85 @@ def G3():
     return backend.graph()
 
 
+@pytest.fixture
+def G4():
+    # A graph with a four process tree:
+    # A -> B -> C -> D
+    A = Process(process_id=10, process_image="A", command_line="A")
+    B = Process(process_id=12, process_image="B", command_line="B")
+    C = Process(process_id=12, process_image="C", command_line="C")
+    D = Process(process_id=12, process_image="D", command_line="D")
+
+    A.launched[B]
+    B.launched[C]
+    C.launched[D]
+
+    backend = NetworkX(consolidate_edges=True, nodes=[A, B, B, C])
+
+    return backend.graph()
+
+
+@pytest.fixture
+def G5():
+    # A graph with two, *disconnected* four process tree:
+    # A -> B -> C -> D
+    # E -> F -> G -> H
+    A = Process(process_id=10, process_image="A", command_line="A")
+    B = Process(process_id=12, process_image="B", command_line="B")
+    C = Process(process_id=12, process_image="C", command_line="C")
+    D = Process(process_id=12, process_image="D", command_line="D")
+
+    E = Process(process_id=10, process_image="E", command_line="E")
+    F = Process(process_id=12, process_image="F", command_line="F")
+    G = Process(process_id=12, process_image="G", command_line="G")
+    H = Process(process_id=12, process_image="H", command_line="H")
+
+    A.launched[B]
+    B.launched[C]
+    C.launched[D]
+
+    E.launched[F]
+    F.launched[G]
+    G.launched[H]
+
+    backend = NetworkX(consolidate_edges=True, nodes=[A, B, B, C, E, F, G, H])
+
+    return backend.graph()
+
+
+def graph_nodes_match(graph: nx.Graph, nodes: List[Node]) -> bool:
+    return [n["data"] for _, n in graph.nodes(data=True)] == nodes
+
+
 def test_one_node_prop_test(G1):
     statement = NodeByProps(node_type=Process, props={"command_line": Contains("test.exe")})
 
-    # Should match on `proc` from G1
-    nodes = statement.execute_networkx(G1).nodes(data=True)
-    assert len(nodes) == 1
-    assert Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar") in [
-        n["data"] for _, n in nodes
-    ]
+    assert graph_nodes_match(
+        statement.execute_networkx(G1),
+        [Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar")],
+    )
 
     # should mathc on other proc
     statement = NodeByProps(node_type=Process, props={"command_line": EndsWith("123456")})
-    assert [n["data"] for _, n in statement.execute_networkx(G1).nodes(data=True)] == [
-        Process(process_id=12, process_image="best.exe", command_line="best.exe /c 123456")
-    ]
+
+    assert graph_nodes_match(
+        statement.execute_networkx(G1),
+        [Process(process_id=12, process_image="best.exe", command_line="best.exe /c 123456")],
+    )
 
     # should match on both
     statement = NodeByProps(node_type=Process, props={"process_image": EndsWith("exe")})
-    assert [n["data"] for _, n in statement.execute_networkx(G1).nodes(data=True)] == [
-        Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar"),
-        Process(process_id=12, process_image="best.exe", command_line="best.exe /c 123456"),
-    ]
 
-    # should match neither
+    assert graph_nodes_match(
+        statement.execute_networkx(G1),
+        [
+            Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar"),
+            Process(process_id=12, process_image="best.exe", command_line="best.exe /c 123456"),
+        ],
+    )
     statement = NodeByProps(node_type=Process, props={"process_image": StartsWith("exe")})
-    assert [n["data"] for _, n in statement.execute_networkx(G1).nodes(data=True)] == []
+
+    assert graph_nodes_match(statement.execute_networkx(G1), [])
 
 
 def test_multiple_node_prop_test(G1):
@@ -80,9 +143,10 @@ def test_multiple_node_prop_test(G1):
     )
 
     # Should match on `proc` from G1
-    assert [n["data"] for _, n in statement.execute_networkx(G1).nodes(data=True)] == [
-        Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar")
-    ]
+    assert graph_nodes_match(
+        statement.execute_networkx(G1),
+        [Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar")],
+    )
 
 
 def test_node_conditional(G1):
@@ -91,27 +155,119 @@ def test_node_conditional(G1):
         props={"command_line": Contains("foobar"), "process_image": StartsWith("test")},
     )
 
-    # Should match on `proc` from G1
-    assert [n["data"] for _, n in statement.execute_networkx(G1).nodes(data=True)] == [
-        Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar")
-    ]
+    assert graph_nodes_match(
+        statement.execute_networkx(G1),
+        [Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar")],
+    )
 
 
 def test_one_edge_prop_test(G2, G3):
     statement = EdgeByProps(edge_type="Wrote", props={"contents": Exact("foo")})
 
-    assert [n["data"] for _, n in statement.execute_networkx(G2).nodes(data=True)] == [
-        Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar"),
-        File(file_name="foo", file_path="bar"),
-    ]
+    assert graph_nodes_match(
+        statement.execute_networkx(G2),
+        [
+            Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar"),
+            File(file_name="foo", file_path="bar"),
+        ],
+    )
 
     # Should work on the non-conslidating graph too.
-    assert [n["data"] for _, n in statement.execute_networkx(G3).nodes(data=True)] == [
-        Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar"),
-        File(file_name="foo", file_path="bar"),
-    ]
+    assert graph_nodes_match(
+        statement.execute_networkx(G3),
+        [
+            Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar"),
+            File(file_name="foo", file_path="bar"),
+        ],
+    )
 
     statement = EdgeByProps(edge_type="Launched", props={"contents": Exact("bar")})
 
     # Should match on `proc` from G1
-    assert [n["data"] for _, n in statement.execute_networkx(G2).nodes(data=True)] == []
+    assert graph_nodes_match(statement.execute_networkx(G2), [])
+
+
+def test_node_with_descendants(G4):
+
+    # A should return A->B->C->D
+    statement = NodeByPropsDescendents(node_type=Process, props={"process_image": Exact("A")})
+    assert graph_nodes_match(
+        statement.execute_networkx(G4),
+        [
+            Process(process_id=10, process_image="A", command_line="A"),
+            Process(process_id=12, process_image="B", command_line="B"),
+            Process(process_id=12, process_image="C", command_line="C"),
+            Process(process_id=12, process_image="D", command_line="D"),
+        ],
+    )
+
+    # B should return B->C->D
+    statement = NodeByPropsDescendents(node_type=Process, props={"process_image": Exact("B")})
+    assert graph_nodes_match(
+        statement.execute_networkx(G4),
+        [
+            Process(process_id=12, process_image="B", command_line="B"),
+            Process(process_id=12, process_image="C", command_line="C"),
+            Process(process_id=12, process_image="D", command_line="D"),
+        ],
+    )
+
+
+def test_node_with_ancestors(G4):
+
+    # A should return A
+    statement = NodeByPropsAncestors(node_type=Process, props={"process_image": Exact("A")})
+    assert graph_nodes_match(
+        statement.execute_networkx(G4),
+        [Process(process_id=10, process_image="A", command_line="A")],
+    )
+
+    # B should return A->B
+    statement = NodeByPropsAncestors(node_type=Process, props={"process_image": Exact("B")})
+    assert graph_nodes_match(
+        statement.execute_networkx(G4),
+        [
+            Process(process_id=10, process_image="A", command_line="A"),
+            Process(process_id=12, process_image="B", command_line="B"),
+        ],
+    )
+
+    # D should return A->B->C->D
+    statement = NodeByPropsAncestors(node_type=Process, props={"process_image": Exact("D")})
+    assert graph_nodes_match(
+        statement.execute_networkx(G4),
+        [
+            Process(process_id=10, process_image="A", command_line="A"),
+            Process(process_id=12, process_image="B", command_line="B"),
+            Process(process_id=12, process_image="C", command_line="C"),
+            Process(process_id=12, process_image="D", command_line="D"),
+        ],
+    )
+
+
+def test_nodes_reachable(G5):
+
+    # All queries will return the full path.
+    # They should only return the path this process touches, A should return A->B->C->D and not E->F->G->H
+
+    statement = NodeByPropsReachable(node_type=Process, props={"process_image": Exact("B")})
+    assert graph_nodes_match(
+        statement.execute_networkx(G5),
+        [
+            Process(process_id=10, process_image="A", command_line="A"),
+            Process(process_id=12, process_image="B", command_line="B"),
+            Process(process_id=12, process_image="C", command_line="C"),
+            Process(process_id=12, process_image="D", command_line="D"),
+        ],
+    )
+
+    statement = NodeByPropsReachable(node_type=Process, props={"process_image": Exact("G")})
+    assert graph_nodes_match(
+        statement.execute_networkx(G5),
+        [
+            Process(process_id=10, process_image="E", command_line="E"),
+            Process(process_id=12, process_image="F", command_line="F"),
+            Process(process_id=12, process_image="G", command_line="G"),
+            Process(process_id=12, process_image="H", command_line="H"),
+        ],
+    )

From 024b7b58a24164511895a68201eaae35f085e6d9 Mon Sep 17 00:00:00 2001
From: yampelo <omeryampel@gmail.com>
Date: Fri, 15 Nov 2019 12:22:27 -0500
Subject: [PATCH 10/25] Moves test graphs to fixture files

---
 beagle/analyzers/__init__.py                  |  0
 beagle/analyzers/statements/__init__.py       |  0
 tests/analyzers/conftest.py                   | 93 +++++++++++++++++++
 .../statements/test_base_statement.py         | 88 ------------------
 4 files changed, 93 insertions(+), 88 deletions(-)
 create mode 100644 beagle/analyzers/__init__.py
 create mode 100644 beagle/analyzers/statements/__init__.py
 create mode 100644 tests/analyzers/conftest.py

diff --git a/beagle/analyzers/__init__.py b/beagle/analyzers/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/beagle/analyzers/statements/__init__.py b/beagle/analyzers/statements/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/analyzers/conftest.py b/tests/analyzers/conftest.py
new file mode 100644
index 00000000..08ad5efc
--- /dev/null
+++ b/tests/analyzers/conftest.py
@@ -0,0 +1,93 @@
+import networkx as nx
+
+import pytest
+
+from beagle.backends.networkx import NetworkX
+
+from beagle.nodes import Node, File, Process
+
+
+@pytest.fixture
+def G1():
+    # A basic graph, with two nodes an an edge
+    proc = Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar")
+    other_proc = Process(process_id=12, process_image="best.exe", command_line="best.exe /c 123456")
+
+    proc.launched[other_proc].append(timestamp=1)
+
+    backend = NetworkX(consolidate_edges=True, nodes=[proc, other_proc])
+
+    return backend.graph()
+
+
+@pytest.fixture
+def G2():
+    # A basic graph, with two nodes an an edge
+    proc = Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar")
+    f = File(file_name="foo", file_path="bar")
+
+    proc.wrote[f].append(contents="foo")
+
+    backend = NetworkX(consolidate_edges=True, nodes=[proc, f])
+
+    return backend.graph()
+
+
+@pytest.fixture
+def G3():
+    # *no consolidating*
+    proc = Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar")
+    f = File(file_name="foo", file_path="bar")
+
+    proc.wrote[f].append(contents="foo")
+    proc.wrote[f].append(contents="bar")
+
+    backend = NetworkX(consolidate_edges=False, nodes=[proc, f])
+
+    return backend.graph()
+
+
+@pytest.fixture
+def G4():
+    # A graph with a four process tree:
+    # A -> B -> C -> D
+    A = Process(process_id=10, process_image="A", command_line="A")
+    B = Process(process_id=12, process_image="B", command_line="B")
+    C = Process(process_id=12, process_image="C", command_line="C")
+    D = Process(process_id=12, process_image="D", command_line="D")
+
+    A.launched[B]
+    B.launched[C]
+    C.launched[D]
+
+    backend = NetworkX(consolidate_edges=True, nodes=[A, B, B, C])
+
+    return backend.graph()
+
+
+@pytest.fixture
+def G5():
+    # A graph with two, *disconnected* four process tree:
+    # A -> B -> C -> D
+    # E -> F -> G -> H
+    A = Process(process_id=10, process_image="A", command_line="A")
+    B = Process(process_id=12, process_image="B", command_line="B")
+    C = Process(process_id=12, process_image="C", command_line="C")
+    D = Process(process_id=12, process_image="D", command_line="D")
+
+    E = Process(process_id=10, process_image="E", command_line="E")
+    F = Process(process_id=12, process_image="F", command_line="F")
+    G = Process(process_id=12, process_image="G", command_line="G")
+    H = Process(process_id=12, process_image="H", command_line="H")
+
+    A.launched[B]
+    B.launched[C]
+    C.launched[D]
+
+    E.launched[F]
+    F.launched[G]
+    G.launched[H]
+
+    backend = NetworkX(consolidate_edges=True, nodes=[A, B, B, C, E, F, G, H])
+
+    return backend.graph()
diff --git a/tests/analyzers/statements/test_base_statement.py b/tests/analyzers/statements/test_base_statement.py
index dbc6d5fe..d170c5c2 100644
--- a/tests/analyzers/statements/test_base_statement.py
+++ b/tests/analyzers/statements/test_base_statement.py
@@ -1,8 +1,6 @@
 from typing import List
 import networkx as nx
 
-import pytest
-from beagle.backends.networkx import NetworkX
 from beagle.analyzers.statements.base_statement import (
     NodeByProps,
     EdgeByProps,
@@ -15,92 +13,6 @@
 from beagle.nodes import Node, File, Process
 
 
-@pytest.fixture
-def G1():
-    # A basic graph, with two nodes an an edge
-    proc = Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar")
-    other_proc = Process(process_id=12, process_image="best.exe", command_line="best.exe /c 123456")
-
-    proc.launched[other_proc].append(timestamp=1)
-
-    backend = NetworkX(consolidate_edges=True, nodes=[proc, other_proc])
-
-    return backend.graph()
-
-
-@pytest.fixture
-def G2():
-    # A basic graph, with two nodes an an edge
-    proc = Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar")
-    f = File(file_name="foo", file_path="bar")
-
-    proc.wrote[f].append(contents="foo")
-
-    backend = NetworkX(consolidate_edges=True, nodes=[proc, f])
-
-    return backend.graph()
-
-
-@pytest.fixture
-def G3():
-    # *no consolidating*
-    proc = Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar")
-    f = File(file_name="foo", file_path="bar")
-
-    proc.wrote[f].append(contents="foo")
-    proc.wrote[f].append(contents="bar")
-
-    backend = NetworkX(consolidate_edges=False, nodes=[proc, f])
-
-    return backend.graph()
-
-
-@pytest.fixture
-def G4():
-    # A graph with a four process tree:
-    # A -> B -> C -> D
-    A = Process(process_id=10, process_image="A", command_line="A")
-    B = Process(process_id=12, process_image="B", command_line="B")
-    C = Process(process_id=12, process_image="C", command_line="C")
-    D = Process(process_id=12, process_image="D", command_line="D")
-
-    A.launched[B]
-    B.launched[C]
-    C.launched[D]
-
-    backend = NetworkX(consolidate_edges=True, nodes=[A, B, B, C])
-
-    return backend.graph()
-
-
-@pytest.fixture
-def G5():
-    # A graph with two, *disconnected* four process tree:
-    # A -> B -> C -> D
-    # E -> F -> G -> H
-    A = Process(process_id=10, process_image="A", command_line="A")
-    B = Process(process_id=12, process_image="B", command_line="B")
-    C = Process(process_id=12, process_image="C", command_line="C")
-    D = Process(process_id=12, process_image="D", command_line="D")
-
-    E = Process(process_id=10, process_image="E", command_line="E")
-    F = Process(process_id=12, process_image="F", command_line="F")
-    G = Process(process_id=12, process_image="G", command_line="G")
-    H = Process(process_id=12, process_image="H", command_line="H")
-
-    A.launched[B]
-    B.launched[C]
-    C.launched[D]
-
-    E.launched[F]
-    F.launched[G]
-    G.launched[H]
-
-    backend = NetworkX(consolidate_edges=True, nodes=[A, B, B, C, E, F, G, H])
-
-    return backend.graph()
-
-
 def graph_nodes_match(graph: nx.Graph, nodes: List[Node]) -> bool:
     return [n["data"] for _, n in graph.nodes(data=True)] == nodes
 

From 6adb2e15b92e708a9e4aae86a23d7ca91b468a0e Mon Sep 17 00:00:00 2001
From: yampelo <omeryampel@gmail.com>
Date: Fri, 15 Nov 2019 13:27:25 -0500
Subject: [PATCH 11/25] ChainedStatement: Adds ability to perform statement1 |
 statement2

---
 beagle/analyzers/statements/base_statement.py | 39 +++++++++++++++++--
 beagle/analyzers/statements/process.py        | 34 ++++++++++++++++
 .../statements/test_base_statement.py         | 23 +++++++++++
 3 files changed, 93 insertions(+), 3 deletions(-)
 create mode 100644 beagle/analyzers/statements/process.py

diff --git a/beagle/analyzers/statements/base_statement.py b/beagle/analyzers/statements/base_statement.py
index cf0c083a..0daa39ac 100644
--- a/beagle/analyzers/statements/base_statement.py
+++ b/beagle/analyzers/statements/base_statement.py
@@ -1,4 +1,4 @@
-from typing import Dict, List, Set, Tuple, Type
+from typing import Dict, Set, Tuple, Type
 
 import networkx as nx
 
@@ -10,14 +10,42 @@
 class Statement(object):
     def __init__(self):
         # The resulting node IDs
-        self.result_nodes: List[int] = []
+        self.result_nodes: Set[int] = set()
 
         # The resulting edge IDs
-        self.result_edges: List[Tuple[int, int, int]] = []
+        self.result_edges: Set[Tuple[int, int, int]] = set()
 
     def execute_networkx(self, G: nx.Graph):  # pragma: no cover
         raise NotImplementedError(f"NetworkX not supported for {self.__class__.__name__}")
 
+    def __or__(self, other):
+        return ChainedStatement(self, other)
+
+
+class ChainedStatement(Statement):
+    def __init__(self, *args: Statement):
+        self.statements = args
+        super().__init__()
+
+    def execute_networkx(self, G: nx.Graph):
+        # Get the subgraphs
+
+        subgraphs = []
+        for statement in self.statements:
+            # Get the subgraphs
+            subgraphs.append(statement.execute_networkx(G))
+
+            # add the reuslt_nodes, result_edges.
+            self.result_edges |= statement.result_edges
+            self.result_nodes |= statement.result_nodes
+
+        # Compose the subgraphs
+        H = subgraphs[0]
+        for subgraph in subgraphs[1:]:
+            H = nx.compose(H, subgraph)
+
+        return H
+
 
 class NodeByProps(Statement):
     def __init__(self, node_type: Type[Node], props: Dict[str, FieldLookup]):
@@ -56,6 +84,7 @@ def execute_networkx(self, G: nx.Graph) -> nx.Graph:
                 if all([lookup.test(getattr(node, prop)) for prop, lookup in self.props.items()]):
                     subgraph_nodes.append(node_id)
 
+        self.result_nodes = set(subgraph_nodes)
         return G.subgraph(subgraph_nodes)
 
 
@@ -107,6 +136,7 @@ def execute_networkx(self, G: nx.Graph) -> nx.Graph:
                         # can stop on first match
                         break
 
+        self.result_edges = set(subgraph_edges)
         return G.edge_subgraph(subgraph_edges)
 
 
@@ -126,6 +156,7 @@ def execute_networkx(self, G: nx.Graph) -> nx.Graph:
             # Get the nodes descendants in the original graph, and add make a subgraph from those.
             subgraph_nodes |= nx.descendants(G, node_id) | {node_id}
 
+        self.result_nodes = set(subgraph_nodes)
         return G.subgraph(subgraph_nodes)
 
 
@@ -145,6 +176,7 @@ def execute_networkx(self, G: nx.Graph) -> nx.Graph:
             # Get the nodes ancestors in the original graph, and add make a subgraph from those.
             subgraph_nodes |= nx.ancestors(G, node_id) | {node_id}
 
+        self.result_nodes = set(subgraph_nodes)
         return G.subgraph(subgraph_nodes)
 
 
@@ -163,4 +195,5 @@ def execute_networkx(self, G: nx.Graph) -> nx.Graph:
         for node_id in next_graph.nodes():
             subgraph_nodes |= nx.ancestors(G, node_id) | nx.descendants(G, node_id) | {node_id}
 
+        self.result_nodes = set(subgraph_nodes)
         return G.subgraph(subgraph_nodes)
diff --git a/beagle/analyzers/statements/process.py b/beagle/analyzers/statements/process.py
new file mode 100644
index 00000000..f3ede113
--- /dev/null
+++ b/beagle/analyzers/statements/process.py
@@ -0,0 +1,34 @@
+from typing import Union, Type
+
+from beagle.nodes import Process
+
+from .base_statement import NodeByPropsReachable
+from .lookups import Exact, FieldLookup
+
+
+class FindProcess(NodeByPropsReachable):
+    """Finds statements relevant to a Process
+
+    Parameters
+    ----------
+    NodeByPropsReachable : [type]
+        [description]
+    """
+
+    @classmethod
+    def with_command_line(
+        cls: Type["FindProcess"], command_line: Union[str, FieldLookup]
+    ) -> "FindProcess":
+
+        if isinstance(command_line, str):
+            command_line = Exact(command_line)
+        return cls(node_type=Process, props={"command_line": command_line})
+
+    @classmethod
+    def with_process_name(
+        cls: Type["FindProcess"], process_image: Union[str, FieldLookup]
+    ) -> "FindProcess":
+
+        if isinstance(process_image, str):
+            process_image = Exact(process_image)
+        return cls(node_type=Process, props={"process_image": process_image})
diff --git a/tests/analyzers/statements/test_base_statement.py b/tests/analyzers/statements/test_base_statement.py
index d170c5c2..950c6339 100644
--- a/tests/analyzers/statements/test_base_statement.py
+++ b/tests/analyzers/statements/test_base_statement.py
@@ -183,3 +183,26 @@ def test_nodes_reachable(G5):
             Process(process_id=12, process_image="H", command_line="H"),
         ],
     )
+
+
+def test_chained_statement(G5):
+    # Both paths should show up because we use a chained statement that returns both.
+
+    Bstatement = NodeByPropsReachable(node_type=Process, props={"process_image": Exact("B")})
+    Gstatement = NodeByPropsReachable(node_type=Process, props={"process_image": Exact("G")})
+
+    chained = Bstatement | Gstatement
+
+    assert graph_nodes_match(
+        chained.execute_networkx(G5),
+        [
+            Process(process_id=10, process_image="A", command_line="A"),
+            Process(process_id=12, process_image="B", command_line="B"),
+            Process(process_id=12, process_image="C", command_line="C"),
+            Process(process_id=12, process_image="D", command_line="D"),
+            Process(process_id=10, process_image="E", command_line="E"),
+            Process(process_id=12, process_image="F", command_line="F"),
+            Process(process_id=12, process_image="G", command_line="G"),
+            Process(process_id=12, process_image="H", command_line="H"),
+        ],
+    )

From 26ac070b9c609eb38b5f9383ee1a0ac066801e73 Mon Sep 17 00:00:00 2001
From: yampelo <omeryampel@gmail.com>
Date: Fri, 15 Nov 2019 18:22:37 -0500
Subject: [PATCH 12/25] FindProcess: adds processs queries

NodeByProps / EdgeByProps: adds support for nested queries:

```
NodeByPropsReachable(node_type=Process, props={"hashes": {"sha256": Exact("1234")}})
```
---
 beagle/analyzers/statements/base_statement.py | 154 ++++++++++++++++--
 beagle/analyzers/statements/process.py        |  64 ++++++--
 tests/analyzers/conftest.py                   |  37 ++++-
 .../statements/test_base_statement.py         |  85 ++++++++--
 tests/analyzers/statements/test_process.py    | 105 ++++++++++++
 5 files changed, 392 insertions(+), 53 deletions(-)
 create mode 100644 tests/analyzers/statements/test_process.py

diff --git a/beagle/analyzers/statements/base_statement.py b/beagle/analyzers/statements/base_statement.py
index 0daa39ac..dbcb7334 100644
--- a/beagle/analyzers/statements/base_statement.py
+++ b/beagle/analyzers/statements/base_statement.py
@@ -1,33 +1,150 @@
-from typing import Dict, Set, Tuple, Type
+from typing import Any, Dict, List, Set, Tuple, Type, Union, cast
 
 import networkx as nx
 
 from beagle.nodes import Node
 
-from .lookups import FieldLookup
+from .lookups import Exact, FieldLookup
+
+
+def _str_to_exact(props: dict) -> Dict[str, Union[FieldLookup, Dict]]:
+    # Ensures strings become Exact, Works on nested dicts
+    for k, v in props.items():
+        if isinstance(v, str):
+            props[k] = Exact(v)
+        elif isinstance(v, dict):
+            props[k] = _str_to_exact(v)
+
+    return props
 
 
 class Statement(object):
     def __init__(self):
+        """A statement is the base building block of a query. A statement takes as input a graph, executes,
+        and returns the next graph.
+
+        >>> G2 = statement.execute_networkx(G)
+
+        Attributes
+        ----------
+        result_nodes: Set[int]:
+            The set of node IDs which create the subgraph returned by the statement.
+        result_edges: Set[Tuple[int, int, int]]:
+            The set of (u, v, k) tuples representing the edges which created the subgraph.
+        """
         # The resulting node IDs
         self.result_nodes: Set[int] = set()
 
         # The resulting edge IDs
         self.result_edges: Set[Tuple[int, int, int]] = set()
 
+    def __or__(self, other: "Statement") -> "ChainedStatement":
+        """Allows statements to be combined through the `|` operator.
+        The result of execution is the union of both substatements.
+
+        >>> statement1 = Statement(...)
+        >>> statement2 = Statement(...)
+        >>> chained = statement1 | statement2
+
+
+        Parameters
+        ----------
+        other: Statement
+            The statement to chain with.
+
+        Returns
+        -------
+        ChainedStatement
+            A chained statement compromised of all three.
+        """
+        return ChainedStatement(self, other)
+
     def execute_networkx(self, G: nx.Graph):  # pragma: no cover
+        """Execute a statement against a `networkx` graph."""
         raise NotImplementedError(f"NetworkX not supported for {self.__class__.__name__}")
 
-    def __or__(self, other):
-        return ChainedStatement(self, other)
+    def _test_values_with_lookups(
+        self,
+        value_to_test: Union[Node, Dict[str, Any]],
+        lookup_tests: Dict[str, Union[FieldLookup, Dict]],
+    ) -> bool:
+        """Tests a node or dictionay against a configuration of lookup_tests.
+
+        Parameters
+        ----------
+        value_to_test : Union[Node, Dict[str, Any]]
+            The node or dict to test.
+        lookup_tests : Dict[str, FieldLookup]
+            The set of lookup_tests to test.
+
+        Returns
+        -------
+        bool
+            Did all of the tests pass?
+        """
+
+        if not value_to_test:
+            return False
+
+        results: List[bool] = []
+
+        for attr_name, lookup in lookup_tests.items():
+            if isinstance(lookup, dict):
+                # recursivly check props against nested entrys (e.g is hashes dict in Process)
+                if isinstance(value_to_test, Node):
+                    results.append(
+                        self._test_values_with_lookups(
+                            value_to_test=getattr(value_to_test, attr_name), lookup_tests=lookup
+                        )
+                    )
+                else:
+                    results.append(
+                        self._test_values_with_lookups(
+                            value_to_test=value_to_test.get(attr_name, {}), lookup_tests=lookup
+                        )
+                    )
+            else:
+                if isinstance(value_to_test, Node):
+                    results.append(lookup.test(getattr(value_to_test, attr_name)))
+                else:
+                    results.append(lookup.test(value_to_test.get(attr_name)))
+
+        return any(results)
+
+
+class FactoryMixin(object):
+    """Mixin to prevent Statement Factories from calling execute methods.
+    """
+
+    def execute_networkx(self, G: nx.graph):
+        raise UserWarning("Statement factories cannot be called directly")
 
 
 class ChainedStatement(Statement):
     def __init__(self, *args: Statement):
+        """Executes multiple Statements, combining their outputs.
+
+        Parameters
+        ----------
+        args: Statement
+            One ore more statements
+        """
         self.statements = args
         super().__init__()
 
-    def execute_networkx(self, G: nx.Graph):
+    def execute_networkx(self, G: nx.Graph) -> nx.Graph:
+        """Executes multiple statements against a `nx.Graph` object, combining their outputs into one subgraph.
+
+        Parameters
+        ----------
+        G : nx.Graph
+            Graph to execute statements against
+
+        Returns
+        -------
+        nx.Graph
+            Graph composed from the output graphs of the executed statements.
+        """
         # Get the subgraphs
 
         subgraphs = []
@@ -48,24 +165,30 @@ def execute_networkx(self, G: nx.Graph):
 
 
 class NodeByProps(Statement):
-    def __init__(self, node_type: Type[Node], props: Dict[str, FieldLookup]):
+    def __init__(self, node_type: Type[Node], props: Dict[str, Union[str, FieldLookup, Dict]]):
         """Searches the graph for a node of type `node_type` with properties matching `props`
 
         Parameters
         ----------
         node_type : Type[Node]
             The type of node to look for. e.g. Process
-        props : Dict[str, FieldLookup]
-            The set of props to filter the resulting nodes by.
+        props : Dict[str, Union[str, FieldLookup, Dict]]
+            The set of props to filter the resulting nodes by. Any string is transformed to `Exact` lookups.
 
         Examples
         ----------
         Filter for Process nodes, with command lines that contain `text.exe`
         >>> NodeByProps(node_type=Process, props={"command_line": Contains("test.exe")})
 
+        This may also be a nested dict.
+        >>> NodeByProps(node_type=Process, props={"hashes": {"md5": Contains("test.exe")}})
+
         """
         self.node_type = node_type
-        self.props = props
+
+        self.props: Dict[str, Union[FieldLookup, Dict]] = _str_to_exact(props)
+
+        # Cast and assign.
         super().__init__()
 
     def execute_networkx(self, G: nx.Graph) -> nx.Graph:
@@ -81,7 +204,7 @@ def execute_networkx(self, G: nx.Graph) -> nx.Graph:
             # If node matches the desired instance.
             if isinstance(node, self.node_type):
                 # Test the node
-                if all([lookup.test(getattr(node, prop)) for prop, lookup in self.props.items()]):
+                if self._test_values_with_lookups(node, self.props):
                     subgraph_nodes.append(node_id)
 
         self.result_nodes = set(subgraph_nodes)
@@ -89,15 +212,15 @@ def execute_networkx(self, G: nx.Graph) -> nx.Graph:
 
 
 class EdgeByProps(Statement):
-    def __init__(self, edge_type: str, props: Dict[str, FieldLookup]):
+    def __init__(self, edge_type: str, props: Dict[str, Union[str, FieldLookup]]):
         """Searches the graph for an edge of type `edge_type` with properties matching `props`
 
         Parameters
         ----------
         edge_type : str
             The type of edge to look for. e.g. Wrote
-        props : Dict[str, FieldLookup]
-            The set of props to filter the resulting edges by.
+        props : Dict[str, Union[str, FieldLookup]]
+            The set of props to filter the resulting edges by. Any string is transformed to `Exact` lookups.
 
         Examples
         ----------
@@ -106,7 +229,8 @@ def __init__(self, edge_type: str, props: Dict[str, FieldLookup]):
 
         """
         self.edge_type = edge_type
-        self.props = props
+
+        self.props: Dict[str, Union[FieldLookup, Dict]] = _str_to_exact(props)
 
     def execute_networkx(self, G: nx.Graph) -> nx.Graph:
         """Searches a `nx.Graph` object for edges that match type `edge_type` and contains
@@ -131,7 +255,7 @@ def execute_networkx(self, G: nx.Graph) -> nx.Graph:
                     data = [data]
 
                 for entry in data:
-                    if any([lookup.test(entry.get(prop)) for prop, lookup in self.props.items()]):
+                    if self._test_values_with_lookups(entry, self.props):
                         subgraph_edges.append((u, v, k))
                         # can stop on first match
                         break
diff --git a/beagle/analyzers/statements/process.py b/beagle/analyzers/statements/process.py
index f3ede113..3216466c 100644
--- a/beagle/analyzers/statements/process.py
+++ b/beagle/analyzers/statements/process.py
@@ -2,33 +2,61 @@
 
 from beagle.nodes import Process
 
-from .base_statement import NodeByPropsReachable
-from .lookups import Exact, FieldLookup
+from .base_statement import NodeByPropsReachable, FactoryMixin
+from .lookups import FieldLookup
 
 
-class FindProcess(NodeByPropsReachable):
-    """Finds statements relevant to a Process
-
-    Parameters
-    ----------
-    NodeByPropsReachable : [type]
-        [description]
-    """
+class FindProcess(FactoryMixin, NodeByPropsReachable):
+    """Executes statements relevant to a Process"""
 
     @classmethod
     def with_command_line(
         cls: Type["FindProcess"], command_line: Union[str, FieldLookup]
-    ) -> "FindProcess":
+    ) -> NodeByPropsReachable:  # pragma: no cover
 
-        if isinstance(command_line, str):
-            command_line = Exact(command_line)
-        return cls(node_type=Process, props={"command_line": command_line})
+        return NodeByPropsReachable(node_type=Process, props={"command_line": command_line})
 
     @classmethod
     def with_process_name(
         cls: Type["FindProcess"], process_image: Union[str, FieldLookup]
-    ) -> "FindProcess":
+    ) -> NodeByPropsReachable:  # pragma: no cover
+
+        return NodeByPropsReachable(node_type=Process, props={"process_image": process_image})
+
+    @classmethod
+    def with_process_path(
+        cls: Type["FindProcess"], process_path: Union[str, FieldLookup]
+    ) -> NodeByPropsReachable:  # pragma: no cover
+
+        return NodeByPropsReachable(node_type=Process, props={"process_path": process_path})
+
+    @classmethod
+    def with_process_image_path(
+        cls: Type["FindProcess"], process_image_path: Union[str, FieldLookup]
+    ) -> NodeByPropsReachable:  # pragma: no cover
+
+        return NodeByPropsReachable(
+            node_type=Process, props={"process_image_path": process_image_path}
+        )
 
-        if isinstance(process_image, str):
-            process_image = Exact(process_image)
-        return cls(node_type=Process, props={"process_image": process_image})
+    @classmethod
+    def with_user(cls: Type["FindProcess"], user: Union[str, FieldLookup]) -> NodeByPropsReachable:
+        return NodeByPropsReachable(node_type=Process, props={"user": user})
+
+    @classmethod
+    def with_md5_hash(
+        cls: Type["FindProcess"], md5hash: Union[str, FieldLookup]
+    ) -> NodeByPropsReachable:  # pragma: no cover
+        return NodeByPropsReachable(node_type=Process, props={"hashes": {"md5": md5hash}})
+
+    @classmethod
+    def with_sha256_hash(
+        cls: Type["FindProcess"], md5hash: Union[str, FieldLookup]
+    ) -> NodeByPropsReachable:  # pragma: no cover
+        return NodeByPropsReachable(node_type=Process, props={"hashes": {"sha256": md5hash}})
+
+    @classmethod
+    def with_sha1_hash(
+        cls: Type["FindProcess"], md5hash: Union[str, FieldLookup]
+    ) -> NodeByPropsReachable:  # pragma: no cover
+        return NodeByPropsReachable(node_type=Process, props={"hashes": {"sha1": md5hash}})
diff --git a/tests/analyzers/conftest.py b/tests/analyzers/conftest.py
index 08ad5efc..e8fecd67 100644
--- a/tests/analyzers/conftest.py
+++ b/tests/analyzers/conftest.py
@@ -1,10 +1,18 @@
-import networkx as nx
+from typing import List
 
+import networkx as nx
 import pytest
 
 from beagle.backends.networkx import NetworkX
+from beagle.nodes import File, Node, Process
+
+
+@pytest.fixture
+def graph_nodes_match():
+    def validate_nodes_match(graph: nx.Graph, nodes: List[Node]) -> bool:
+        return [n["data"] for _, n in graph.nodes(data=True)] == nodes
 
-from beagle.nodes import Node, File, Process
+    return validate_nodes_match
 
 
 @pytest.fixture
@@ -91,3 +99,28 @@ def G5():
     backend = NetworkX(consolidate_edges=True, nodes=[A, B, B, C, E, F, G, H])
 
     return backend.graph()
+
+
+@pytest.fixture
+def G6():
+    parent = Process(
+        process_id=1, process_image_path="d:\\", process_image="parent.exe", user="omer"
+    )
+    child = Process(
+        process_id=2, process_image_path="d:\\users", process_image="child.exe", user="omer"
+    )
+
+    parent2 = Process(
+        process_id=4, process_image_path="c:\\", process_image="parent.exe", user="admin"
+    )
+    child2 = Process(
+        process_id=3, process_image_path="c:\\users", process_image="child.exe", user="admin"
+    )
+
+    parent.launched[child].append(timestamp=12456)
+
+    parent2.launched[child2].append(timestamp=2)
+
+    backend = NetworkX(consolidate_edges=True, nodes=[parent, parent2, child, child2])
+
+    return backend.graph()
diff --git a/tests/analyzers/statements/test_base_statement.py b/tests/analyzers/statements/test_base_statement.py
index 950c6339..08d7e56f 100644
--- a/tests/analyzers/statements/test_base_statement.py
+++ b/tests/analyzers/statements/test_base_statement.py
@@ -1,23 +1,51 @@
-from typing import List
-import networkx as nx
-
+import pytest
 from beagle.analyzers.statements.base_statement import (
-    NodeByProps,
+    Statement,
+    FactoryMixin,
     EdgeByProps,
-    NodeByPropsDescendents,
+    NodeByProps,
     NodeByPropsAncestors,
+    NodeByPropsDescendents,
     NodeByPropsReachable,
 )
-from beagle.analyzers.statements.lookups import Contains, EndsWith, StartsWith, Exact
+from beagle.analyzers.statements.lookups import Contains, EndsWith, Exact, StartsWith
+from beagle.nodes import File, Process
+
+
+def test_factory_mixin():
+    class MyFactory(FactoryMixin):
+        pass
 
-from beagle.nodes import Node, File, Process
+    with pytest.raises(UserWarning):
+        obj = MyFactory()
+        obj.execute_networkx(None)
 
 
-def graph_nodes_match(graph: nx.Graph, nodes: List[Node]) -> bool:
-    return [n["data"] for _, n in graph.nodes(data=True)] == nodes
+def test_test_props_nested_dict():
+    s = Statement()
 
+    assert (
+        s._test_values_with_lookups(
+            value_to_test={"hashes": {"md5": "1234"}},
+            lookup_tests={"hashes": {"md5": Exact("1234")}},
+        )
+        is True
+    )
+
+    assert (
+        s._test_values_with_lookups(value_to_test={"hashes": {}}, lookup_tests={"hashes": {"md5": Exact("1234")}})
+        is False
+    )
 
-def test_one_node_prop_test(G1):
+    assert (
+        s._test_values_with_lookups(
+            value_to_test={"hashes": None}, lookup_tests={"hashes": {"md5": Exact("1234")}}
+        )
+        is False
+    )
+
+
+def test_one_node_prop_test(G1, graph_nodes_match):
     statement = NodeByProps(node_type=Process, props={"command_line": Contains("test.exe")})
 
     assert graph_nodes_match(
@@ -48,7 +76,7 @@ def test_one_node_prop_test(G1):
     assert graph_nodes_match(statement.execute_networkx(G1), [])
 
 
-def test_multiple_node_prop_test(G1):
+def test_multiple_node_prop_test(G1, graph_nodes_match):
     statement = NodeByProps(
         node_type=Process,
         props={"command_line": Contains("foobar"), "process_image": StartsWith("test")},
@@ -61,7 +89,7 @@ def test_multiple_node_prop_test(G1):
     )
 
 
-def test_node_conditional(G1):
+def test_node_conditional(G1, graph_nodes_match):
     statement = NodeByProps(
         node_type=Process,
         props={"command_line": Contains("foobar"), "process_image": StartsWith("test")},
@@ -73,8 +101,10 @@ def test_node_conditional(G1):
     )
 
 
-def test_one_edge_prop_test(G2, G3):
-    statement = EdgeByProps(edge_type="Wrote", props={"contents": Exact("foo")})
+def test_one_edge_prop_test(G2, G3, graph_nodes_match):
+
+    # String should get mapped to Exact("foo")
+    statement = EdgeByProps(edge_type="Wrote", props={"contents": "foo"})
 
     assert graph_nodes_match(
         statement.execute_networkx(G2),
@@ -99,7 +129,7 @@ def test_one_edge_prop_test(G2, G3):
     assert graph_nodes_match(statement.execute_networkx(G2), [])
 
 
-def test_node_with_descendants(G4):
+def test_node_with_descendants(G4, graph_nodes_match):
 
     # A should return A->B->C->D
     statement = NodeByPropsDescendents(node_type=Process, props={"process_image": Exact("A")})
@@ -125,7 +155,7 @@ def test_node_with_descendants(G4):
     )
 
 
-def test_node_with_ancestors(G4):
+def test_node_with_ancestors(G4, graph_nodes_match):
 
     # A should return A
     statement = NodeByPropsAncestors(node_type=Process, props={"process_image": Exact("A")})
@@ -157,7 +187,7 @@ def test_node_with_ancestors(G4):
     )
 
 
-def test_nodes_reachable(G5):
+def test_nodes_reachable(G5, graph_nodes_match):
 
     # All queries will return the full path.
     # They should only return the path this process touches, A should return A->B->C->D and not E->F->G->H
@@ -185,7 +215,7 @@ def test_nodes_reachable(G5):
     )
 
 
-def test_chained_statement(G5):
+def test_chained_statement(G5, graph_nodes_match):
     # Both paths should show up because we use a chained statement that returns both.
 
     Bstatement = NodeByPropsReachable(node_type=Process, props={"process_image": Exact("B")})
@@ -206,3 +236,22 @@ def test_chained_statement(G5):
             Process(process_id=12, process_image="H", command_line="H"),
         ],
     )
+
+
+def test_multiple_chained_statement(G5, graph_nodes_match):
+    # Should properly execute all three.
+
+    Bstatement = NodeByProps(node_type=Process, props={"process_image": Exact("B")})
+    Gstatement = NodeByProps(node_type=Process, props={"process_image": Exact("G")})
+    Astatement = NodeByProps(node_type=Process, props={"process_image": Exact("A")})
+
+    chained = Bstatement | Gstatement | Astatement
+
+    assert graph_nodes_match(
+        chained.execute_networkx(G5),
+        [
+            Process(process_id=12, process_image="B", command_line="B"),
+            Process(process_id=12, process_image="G", command_line="G"),
+            Process(process_id=10, process_image="A", command_line="A"),
+        ],
+    )
diff --git a/tests/analyzers/statements/test_process.py b/tests/analyzers/statements/test_process.py
new file mode 100644
index 00000000..49602beb
--- /dev/null
+++ b/tests/analyzers/statements/test_process.py
@@ -0,0 +1,105 @@
+from beagle.analyzers.statements.process import FindProcess
+from beagle.nodes import Process, File
+from beagle.analyzers.statements.lookups import EndsWith
+
+
+def test_get_by_command_line_no_lookup(G5, graph_nodes_match):
+
+    # Should return all nodes reachable from A
+    statement = FindProcess.with_command_line("A")
+
+    assert graph_nodes_match(
+        statement.execute_networkx(G5),
+        [
+            Process(process_id=10, process_image="A", command_line="A"),
+            Process(process_id=12, process_image="B", command_line="B"),
+            Process(process_id=12, process_image="C", command_line="C"),
+            Process(process_id=12, process_image="D", command_line="D"),
+        ],
+    )
+
+
+def test_get_by_command_line_with_lookup(G5, graph_nodes_match):
+
+    # Should return all nodes reachable from A Or G, (so all nodes)
+    statement = FindProcess.with_command_line(EndsWith("A") | EndsWith("G"))
+
+    assert graph_nodes_match(
+        statement.execute_networkx(G5),
+        [
+            Process(process_id=10, process_image="A", command_line="A"),
+            Process(process_id=12, process_image="B", command_line="B"),
+            Process(process_id=12, process_image="C", command_line="C"),
+            Process(process_id=12, process_image="D", command_line="D"),
+            Process(process_id=10, process_image="E", command_line="E"),
+            Process(process_id=12, process_image="F", command_line="F"),
+            Process(process_id=12, process_image="G", command_line="G"),
+            Process(process_id=12, process_image="H", command_line="H"),
+        ],
+    )
+
+
+def test_get_process_name_no_lookup(G2, graph_nodes_match):
+
+    # No match, since defaults to exact.
+    statement = FindProcess.with_process_name("exe")
+    assert graph_nodes_match(statement.execute_networkx(G2), [])
+
+    statement = FindProcess.with_process_name("test.exe")
+    assert graph_nodes_match(
+        statement.execute_networkx(G2),
+        [
+            Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar"),
+            File(file_name="foo", file_path="bar"),
+        ],
+    )
+
+
+def test_get_process_name_lookup(G2, graph_nodes_match):
+
+    # Should return test.exe because it ends with exe
+    statement = FindProcess.with_process_name(EndsWith("exe"))
+
+    assert graph_nodes_match(
+        statement.execute_networkx(G2),
+        [
+            Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar"),
+            File(file_name="foo", file_path="bar"),
+        ],
+    )
+
+
+def test_get_process_user(G6, graph_nodes_match):
+
+    # Should return test.exe because it ends with exe
+    statement = FindProcess.with_user("omer")
+
+    assert graph_nodes_match(
+        statement.execute_networkx(G6),
+        [
+            Process(
+                process_id=1, process_image_path="d:\\", process_image="parent.exe", user="omer"
+            ),
+            Process(
+                process_id=2, process_image_path="d:\\users", process_image="child.exe", user="omer"
+            ),
+        ],
+    )
+
+
+def test_get_process_image_path(G6, graph_nodes_match):
+
+    # Should return test.exe because it ends with exe
+    statement = FindProcess.with_process_image_path("d:\\")
+
+    assert graph_nodes_match(
+        statement.execute_networkx(G6),
+        [
+            Process(
+                process_id=1, process_image_path="d:\\", process_image="parent.exe", user="omer"
+            ),
+            Process(
+                process_id=2, process_image_path="d:\\users", process_image="child.exe", user="omer"
+            ),
+        ],
+    )

From 46c3dad2ca9515dd896bf84832d3509ef85e0043 Mon Sep 17 00:00:00 2001
From: yampelo <omeryampel@gmail.com>
Date: Sat, 16 Nov 2019 09:45:30 -0500
Subject: [PATCH 13/25] Splits Node/Edge statements into seperate files

---
 beagle/analyzers/statements/base_statement.py | 161 +-------------
 beagle/analyzers/statements/edge.py           |  62 ++++++
 beagle/analyzers/statements/node.py           | 115 ++++++++++
 beagle/analyzers/statements/process.py        |   3 +-
 tests/analyzers/statements/__init__.py        |   0
 .../statements/test_base_statement.py         | 209 +-----------------
 tests/analyzers/statements/test_edge.py       |  31 +++
 tests/analyzers/statements/test_node.py       | 177 +++++++++++++++
 tests/edges/__init__.py                       |   0
 9 files changed, 392 insertions(+), 366 deletions(-)
 create mode 100644 beagle/analyzers/statements/edge.py
 create mode 100644 beagle/analyzers/statements/node.py
 create mode 100644 tests/analyzers/statements/__init__.py
 create mode 100644 tests/analyzers/statements/test_edge.py
 create mode 100644 tests/analyzers/statements/test_node.py
 create mode 100644 tests/edges/__init__.py

diff --git a/beagle/analyzers/statements/base_statement.py b/beagle/analyzers/statements/base_statement.py
index dbcb7334..753c4acf 100644
--- a/beagle/analyzers/statements/base_statement.py
+++ b/beagle/analyzers/statements/base_statement.py
@@ -1,4 +1,4 @@
-from typing import Any, Dict, List, Set, Tuple, Type, Union, cast
+from typing import Any, Dict, List, Set, Tuple, Union
 
 import networkx as nx
 
@@ -162,162 +162,3 @@ def execute_networkx(self, G: nx.Graph) -> nx.Graph:
             H = nx.compose(H, subgraph)
 
         return H
-
-
-class NodeByProps(Statement):
-    def __init__(self, node_type: Type[Node], props: Dict[str, Union[str, FieldLookup, Dict]]):
-        """Searches the graph for a node of type `node_type` with properties matching `props`
-
-        Parameters
-        ----------
-        node_type : Type[Node]
-            The type of node to look for. e.g. Process
-        props : Dict[str, Union[str, FieldLookup, Dict]]
-            The set of props to filter the resulting nodes by. Any string is transformed to `Exact` lookups.
-
-        Examples
-        ----------
-        Filter for Process nodes, with command lines that contain `text.exe`
-        >>> NodeByProps(node_type=Process, props={"command_line": Contains("test.exe")})
-
-        This may also be a nested dict.
-        >>> NodeByProps(node_type=Process, props={"hashes": {"md5": Contains("test.exe")}})
-
-        """
-        self.node_type = node_type
-
-        self.props: Dict[str, Union[FieldLookup, Dict]] = _str_to_exact(props)
-
-        # Cast and assign.
-        super().__init__()
-
-    def execute_networkx(self, G: nx.Graph) -> nx.Graph:
-        """Searches a `nx.Graph` object for nodes that match type `node_type` and contains
-        props matching `props`. This is O(V).
-        """
-        subgraph_nodes = []
-
-        # For each node
-        for node_id, data in G.nodes(data=True):
-            node = data["data"]
-
-            # If node matches the desired instance.
-            if isinstance(node, self.node_type):
-                # Test the node
-                if self._test_values_with_lookups(node, self.props):
-                    subgraph_nodes.append(node_id)
-
-        self.result_nodes = set(subgraph_nodes)
-        return G.subgraph(subgraph_nodes)
-
-
-class EdgeByProps(Statement):
-    def __init__(self, edge_type: str, props: Dict[str, Union[str, FieldLookup]]):
-        """Searches the graph for an edge of type `edge_type` with properties matching `props`
-
-        Parameters
-        ----------
-        edge_type : str
-            The type of edge to look for. e.g. Wrote
-        props : Dict[str, Union[str, FieldLookup]]
-            The set of props to filter the resulting edges by. Any string is transformed to `Exact` lookups.
-
-        Examples
-        ----------
-        Filter for TCP edges, with contents that match ".pdf"
-        >>> EdgeByProps(edge_type="TCP", props={"payload": Contains(".pdf")})
-
-        """
-        self.edge_type = edge_type
-
-        self.props: Dict[str, Union[FieldLookup, Dict]] = _str_to_exact(props)
-
-    def execute_networkx(self, G: nx.Graph) -> nx.Graph:
-        """Searches a `nx.Graph` object for edges that match type `edge_type` and contains
-        props matching `props`. This is O(E).
-
-        Returns a subgraph with all nodes contained in match edges
-        """
-        subgraph_edges = []
-
-        # For each edge
-        for u, v, k, e_data in G.edges(data=True, keys=True):
-
-            # pull out the data field from NX
-            data = e_data["data"]  # edge data
-            e_type = e_data["edge_name"]  # edge type
-
-            # If edge matches the desired instance.
-            if e_type == self.edge_type:
-
-                # Test the edge
-                if not isinstance(data, list):
-                    data = [data]
-
-                for entry in data:
-                    if self._test_values_with_lookups(entry, self.props):
-                        subgraph_edges.append((u, v, k))
-                        # can stop on first match
-                        break
-
-        self.result_edges = set(subgraph_edges)
-        return G.edge_subgraph(subgraph_edges)
-
-
-class NodeByPropsDescendents(NodeByProps):
-    """Executes a `NodeByProps` query, and returns all descendants of the matching nodes.
-    see py:meth:`NodeByProps`"""
-
-    def execute_networkx(self, G: nx.Graph) -> nx.Graph:
-
-        # Get the next graph
-        next_graph = super().execute_networkx(G)
-
-        subgraph_nodes: Set[int] = set()
-
-        # For every node that matched `NodeByProps`
-        for node_id in next_graph.nodes():
-            # Get the nodes descendants in the original graph, and add make a subgraph from those.
-            subgraph_nodes |= nx.descendants(G, node_id) | {node_id}
-
-        self.result_nodes = set(subgraph_nodes)
-        return G.subgraph(subgraph_nodes)
-
-
-class NodeByPropsAncestors(NodeByProps):
-    """Executes a `NodeByProps` query, and returns all ascendants of the matching nodes.
-    see py:meth:`NodeByProps`"""
-
-    def execute_networkx(self, G: nx.Graph) -> nx.Graph:
-
-        # Get the next graph
-        next_graph = super().execute_networkx(G)
-
-        subgraph_nodes: Set[int] = set()
-
-        # For every node that matched `NodeByProps`
-        for node_id in next_graph.nodes():
-            # Get the nodes ancestors in the original graph, and add make a subgraph from those.
-            subgraph_nodes |= nx.ancestors(G, node_id) | {node_id}
-
-        self.result_nodes = set(subgraph_nodes)
-        return G.subgraph(subgraph_nodes)
-
-
-class NodeByPropsReachable(NodeByProps):
-    """Executes a `NodeByProps` query, and returns all ancestors and descendants of the matching nodes.
-    see py:meth:`NodeByProps`"""
-
-    def execute_networkx(self, G: nx.Graph) -> nx.Graph:
-
-        # Get the next graph
-        next_graph = super().execute_networkx(G)
-
-        subgraph_nodes: Set[int] = set()
-
-        # For every node that matched `NodeByProps`
-        for node_id in next_graph.nodes():
-            subgraph_nodes |= nx.ancestors(G, node_id) | nx.descendants(G, node_id) | {node_id}
-
-        self.result_nodes = set(subgraph_nodes)
-        return G.subgraph(subgraph_nodes)
diff --git a/beagle/analyzers/statements/edge.py b/beagle/analyzers/statements/edge.py
new file mode 100644
index 00000000..eddf277d
--- /dev/null
+++ b/beagle/analyzers/statements/edge.py
@@ -0,0 +1,62 @@
+from typing import Dict, Union
+
+import networkx as nx
+
+from .base_statement import Statement, _str_to_exact
+from .lookups import FieldLookup
+
+
+class EdgeByProps(Statement):
+    def __init__(self, edge_type: str, props: Dict[str, Union[str, FieldLookup]]):
+        """Searches the graph for an edge of type `edge_type` with properties matching `props`
+
+        Parameters
+        ----------
+        edge_type : str
+            The type of edge to look for. e.g. Wrote
+        props : Dict[str, Union[str, FieldLookup]]
+            The set of props to filter the resulting edges by. Any string is transformed to `Exact` lookups.
+
+        Examples
+        ----------
+        Filter for TCP edges, with contents that match ".pdf"
+        >>> EdgeByProps(edge_type="TCP", props={"payload": Contains(".pdf")})
+
+        """
+        self.edge_type = edge_type
+
+        self.props: Dict[str, Union[FieldLookup, Dict]] = _str_to_exact(props)
+
+        super().__init__()
+
+    def execute_networkx(self, G: nx.Graph) -> nx.Graph:
+        """Searches a `nx.Graph` object for edges that match type `edge_type` and contains
+        props matching `props`. This is O(E).
+
+        Returns a subgraph with all nodes contained in match edges
+        """
+        subgraph_edges = []
+
+        # For each edge
+        for u, v, k, e_data in G.edges(data=True, keys=True):
+
+            # pull out the data field from NX
+            data = e_data["data"]  # edge data
+            e_type = e_data["edge_name"]  # edge type
+
+            # If edge matches the desired instance.
+            if e_type == self.edge_type:
+
+                # Test the edge
+                if not isinstance(data, list):
+                    data = [data]
+
+                for entry in data:
+                    if self._test_values_with_lookups(entry, self.props):
+                        subgraph_edges.append((u, v, k))
+                        # can stop on first match
+                        self.result_edges |= {(u, v, k)}
+                        self.result_nodes |= {u, v}
+                        break
+
+        return G.edge_subgraph(subgraph_edges)
diff --git a/beagle/analyzers/statements/node.py b/beagle/analyzers/statements/node.py
new file mode 100644
index 00000000..cc314e2a
--- /dev/null
+++ b/beagle/analyzers/statements/node.py
@@ -0,0 +1,115 @@
+from typing import Dict, Set, Type, Union
+
+import networkx as nx
+
+from beagle.nodes import Node
+
+from .base_statement import Statement, _str_to_exact
+from .lookups import FieldLookup
+
+
+class NodeByProps(Statement):
+    def __init__(self, node_type: Type[Node], props: Dict[str, Union[str, FieldLookup, Dict]]):
+        """Searches the graph for a node of type `node_type` with properties matching `props`
+
+        Parameters
+        ----------
+        node_type : Type[Node]
+            The type of node to look for. e.g. Process
+        props : Dict[str, Union[str, FieldLookup, Dict]]
+            The set of props to filter the resulting nodes by. Any string is transformed to `Exact` lookups.
+
+        Examples
+        ----------
+        Filter for Process nodes, with command lines that contain `text.exe`
+        >>> NodeByProps(node_type=Process, props={"command_line": Contains("test.exe")})
+
+        This may also be a nested dict.
+        >>> NodeByProps(node_type=Process, props={"hashes": {"md5": Contains("test.exe")}})
+
+        """
+        self.node_type = node_type
+
+        self.props: Dict[str, Union[FieldLookup, Dict]] = _str_to_exact(props)
+
+        # Cast and assign.
+        super().__init__()
+
+    def execute_networkx(self, G: nx.Graph) -> nx.Graph:
+        """Searches a `nx.Graph` object for nodes that match type `node_type` and contains
+        props matching `props`. This is O(V).
+        """
+        subgraph_nodes = []
+
+        # For each node
+        for node_id, data in G.nodes(data=True):
+            node = data["data"]
+
+            # If node matches the desired instance.
+            if isinstance(node, self.node_type):
+                # Test the node
+                if self._test_values_with_lookups(node, self.props):
+                    subgraph_nodes.append(node_id)
+                    self.result_nodes |= {node_id}
+
+        return G.subgraph(subgraph_nodes)
+
+
+class NodeByPropsDescendents(NodeByProps):
+    """Executes a `NodeByProps` query, and returns all descendants of the matching nodes.
+    see py:meth:`NodeByProps`"""
+
+    def execute_networkx(self, G: nx.Graph) -> nx.Graph:
+
+        # Get the next graph
+        next_graph = super().execute_networkx(G)
+
+        subgraph_nodes: Set[int] = set()
+
+        # For every node that matched `NodeByProps`
+        for node_id in next_graph.nodes():
+            # Get the nodes descendants in the original graph, and add make a subgraph from those.
+            subgraph_nodes |= nx.descendants(G, node_id) | {node_id}
+
+            self.result_nodes |= {node_id}
+
+        return G.subgraph(subgraph_nodes)
+
+
+class NodeByPropsAncestors(NodeByProps):
+    """Executes a `NodeByProps` query, and returns all ascendants of the matching nodes.
+    see py:meth:`NodeByProps`"""
+
+    def execute_networkx(self, G: nx.Graph) -> nx.Graph:
+
+        # Get the next graph
+        next_graph = super().execute_networkx(G)
+
+        subgraph_nodes: Set[int] = set()
+
+        # For every node that matched `NodeByProps`
+        for node_id in next_graph.nodes():
+            # Get the nodes ancestors in the original graph, and add make a subgraph from those.
+            subgraph_nodes |= nx.ancestors(G, node_id) | {node_id}
+            self.result_nodes |= {node_id}
+
+        return G.subgraph(subgraph_nodes)
+
+
+class NodeByPropsReachable(NodeByProps):
+    """Executes a `NodeByProps` query, and returns all ancestors and descendants of the matching nodes.
+    see py:meth:`NodeByProps`"""
+
+    def execute_networkx(self, G: nx.Graph) -> nx.Graph:
+
+        # Get the next graph
+        next_graph = super().execute_networkx(G)
+
+        subgraph_nodes: Set[int] = set()
+
+        # For every node that matched `NodeByProps`
+        for node_id in next_graph.nodes():
+            subgraph_nodes |= nx.ancestors(G, node_id) | nx.descendants(G, node_id) | {node_id}
+            self.result_nodes |= {node_id}
+
+        return G.subgraph(subgraph_nodes)
diff --git a/beagle/analyzers/statements/process.py b/beagle/analyzers/statements/process.py
index 3216466c..7054ae95 100644
--- a/beagle/analyzers/statements/process.py
+++ b/beagle/analyzers/statements/process.py
@@ -2,7 +2,8 @@
 
 from beagle.nodes import Process
 
-from .base_statement import NodeByPropsReachable, FactoryMixin
+from .node import NodeByPropsReachable
+from .base_statement import FactoryMixin
 from .lookups import FieldLookup
 
 
diff --git a/tests/analyzers/statements/__init__.py b/tests/analyzers/statements/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/analyzers/statements/test_base_statement.py b/tests/analyzers/statements/test_base_statement.py
index 08d7e56f..9e201e5c 100644
--- a/tests/analyzers/statements/test_base_statement.py
+++ b/tests/analyzers/statements/test_base_statement.py
@@ -1,15 +1,8 @@
 import pytest
-from beagle.analyzers.statements.base_statement import (
-    Statement,
-    FactoryMixin,
-    EdgeByProps,
-    NodeByProps,
-    NodeByPropsAncestors,
-    NodeByPropsDescendents,
-    NodeByPropsReachable,
-)
-from beagle.analyzers.statements.lookups import Contains, EndsWith, Exact, StartsWith
-from beagle.nodes import File, Process
+from beagle.analyzers.statements.base_statement import FactoryMixin
+from beagle.analyzers.statements.node import NodeByPropsReachable, NodeByProps
+from beagle.analyzers.statements.lookups import Exact
+from beagle.nodes import Process
 
 
 def test_factory_mixin():
@@ -21,200 +14,6 @@ class MyFactory(FactoryMixin):
         obj.execute_networkx(None)
 
 
-def test_test_props_nested_dict():
-    s = Statement()
-
-    assert (
-        s._test_values_with_lookups(
-            value_to_test={"hashes": {"md5": "1234"}},
-            lookup_tests={"hashes": {"md5": Exact("1234")}},
-        )
-        is True
-    )
-
-    assert (
-        s._test_values_with_lookups(value_to_test={"hashes": {}}, lookup_tests={"hashes": {"md5": Exact("1234")}})
-        is False
-    )
-
-    assert (
-        s._test_values_with_lookups(
-            value_to_test={"hashes": None}, lookup_tests={"hashes": {"md5": Exact("1234")}}
-        )
-        is False
-    )
-
-
-def test_one_node_prop_test(G1, graph_nodes_match):
-    statement = NodeByProps(node_type=Process, props={"command_line": Contains("test.exe")})
-
-    assert graph_nodes_match(
-        statement.execute_networkx(G1),
-        [Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar")],
-    )
-
-    # should mathc on other proc
-    statement = NodeByProps(node_type=Process, props={"command_line": EndsWith("123456")})
-
-    assert graph_nodes_match(
-        statement.execute_networkx(G1),
-        [Process(process_id=12, process_image="best.exe", command_line="best.exe /c 123456")],
-    )
-
-    # should match on both
-    statement = NodeByProps(node_type=Process, props={"process_image": EndsWith("exe")})
-
-    assert graph_nodes_match(
-        statement.execute_networkx(G1),
-        [
-            Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar"),
-            Process(process_id=12, process_image="best.exe", command_line="best.exe /c 123456"),
-        ],
-    )
-    statement = NodeByProps(node_type=Process, props={"process_image": StartsWith("exe")})
-
-    assert graph_nodes_match(statement.execute_networkx(G1), [])
-
-
-def test_multiple_node_prop_test(G1, graph_nodes_match):
-    statement = NodeByProps(
-        node_type=Process,
-        props={"command_line": Contains("foobar"), "process_image": StartsWith("test")},
-    )
-
-    # Should match on `proc` from G1
-    assert graph_nodes_match(
-        statement.execute_networkx(G1),
-        [Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar")],
-    )
-
-
-def test_node_conditional(G1, graph_nodes_match):
-    statement = NodeByProps(
-        node_type=Process,
-        props={"command_line": Contains("foobar"), "process_image": StartsWith("test")},
-    )
-
-    assert graph_nodes_match(
-        statement.execute_networkx(G1),
-        [Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar")],
-    )
-
-
-def test_one_edge_prop_test(G2, G3, graph_nodes_match):
-
-    # String should get mapped to Exact("foo")
-    statement = EdgeByProps(edge_type="Wrote", props={"contents": "foo"})
-
-    assert graph_nodes_match(
-        statement.execute_networkx(G2),
-        [
-            Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar"),
-            File(file_name="foo", file_path="bar"),
-        ],
-    )
-
-    # Should work on the non-conslidating graph too.
-    assert graph_nodes_match(
-        statement.execute_networkx(G3),
-        [
-            Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar"),
-            File(file_name="foo", file_path="bar"),
-        ],
-    )
-
-    statement = EdgeByProps(edge_type="Launched", props={"contents": Exact("bar")})
-
-    # Should match on `proc` from G1
-    assert graph_nodes_match(statement.execute_networkx(G2), [])
-
-
-def test_node_with_descendants(G4, graph_nodes_match):
-
-    # A should return A->B->C->D
-    statement = NodeByPropsDescendents(node_type=Process, props={"process_image": Exact("A")})
-    assert graph_nodes_match(
-        statement.execute_networkx(G4),
-        [
-            Process(process_id=10, process_image="A", command_line="A"),
-            Process(process_id=12, process_image="B", command_line="B"),
-            Process(process_id=12, process_image="C", command_line="C"),
-            Process(process_id=12, process_image="D", command_line="D"),
-        ],
-    )
-
-    # B should return B->C->D
-    statement = NodeByPropsDescendents(node_type=Process, props={"process_image": Exact("B")})
-    assert graph_nodes_match(
-        statement.execute_networkx(G4),
-        [
-            Process(process_id=12, process_image="B", command_line="B"),
-            Process(process_id=12, process_image="C", command_line="C"),
-            Process(process_id=12, process_image="D", command_line="D"),
-        ],
-    )
-
-
-def test_node_with_ancestors(G4, graph_nodes_match):
-
-    # A should return A
-    statement = NodeByPropsAncestors(node_type=Process, props={"process_image": Exact("A")})
-    assert graph_nodes_match(
-        statement.execute_networkx(G4),
-        [Process(process_id=10, process_image="A", command_line="A")],
-    )
-
-    # B should return A->B
-    statement = NodeByPropsAncestors(node_type=Process, props={"process_image": Exact("B")})
-    assert graph_nodes_match(
-        statement.execute_networkx(G4),
-        [
-            Process(process_id=10, process_image="A", command_line="A"),
-            Process(process_id=12, process_image="B", command_line="B"),
-        ],
-    )
-
-    # D should return A->B->C->D
-    statement = NodeByPropsAncestors(node_type=Process, props={"process_image": Exact("D")})
-    assert graph_nodes_match(
-        statement.execute_networkx(G4),
-        [
-            Process(process_id=10, process_image="A", command_line="A"),
-            Process(process_id=12, process_image="B", command_line="B"),
-            Process(process_id=12, process_image="C", command_line="C"),
-            Process(process_id=12, process_image="D", command_line="D"),
-        ],
-    )
-
-
-def test_nodes_reachable(G5, graph_nodes_match):
-
-    # All queries will return the full path.
-    # They should only return the path this process touches, A should return A->B->C->D and not E->F->G->H
-
-    statement = NodeByPropsReachable(node_type=Process, props={"process_image": Exact("B")})
-    assert graph_nodes_match(
-        statement.execute_networkx(G5),
-        [
-            Process(process_id=10, process_image="A", command_line="A"),
-            Process(process_id=12, process_image="B", command_line="B"),
-            Process(process_id=12, process_image="C", command_line="C"),
-            Process(process_id=12, process_image="D", command_line="D"),
-        ],
-    )
-
-    statement = NodeByPropsReachable(node_type=Process, props={"process_image": Exact("G")})
-    assert graph_nodes_match(
-        statement.execute_networkx(G5),
-        [
-            Process(process_id=10, process_image="E", command_line="E"),
-            Process(process_id=12, process_image="F", command_line="F"),
-            Process(process_id=12, process_image="G", command_line="G"),
-            Process(process_id=12, process_image="H", command_line="H"),
-        ],
-    )
-
-
 def test_chained_statement(G5, graph_nodes_match):
     # Both paths should show up because we use a chained statement that returns both.
 
diff --git a/tests/analyzers/statements/test_edge.py b/tests/analyzers/statements/test_edge.py
new file mode 100644
index 00000000..4faefec2
--- /dev/null
+++ b/tests/analyzers/statements/test_edge.py
@@ -0,0 +1,31 @@
+from beagle.analyzers.statements.edge import EdgeByProps
+from beagle.analyzers.statements.lookups import Exact
+from beagle.nodes import File, Process
+
+
+def test_one_edge_prop_test(G2, G3, graph_nodes_match):
+
+    # String should get mapped to Exact("foo")
+    statement = EdgeByProps(edge_type="Wrote", props={"contents": "foo"})
+
+    assert graph_nodes_match(
+        statement.execute_networkx(G2),
+        [
+            Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar"),
+            File(file_name="foo", file_path="bar"),
+        ],
+    )
+
+    # Should work on the non-conslidating graph too.
+    assert graph_nodes_match(
+        statement.execute_networkx(G3),
+        [
+            Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar"),
+            File(file_name="foo", file_path="bar"),
+        ],
+    )
+
+    statement = EdgeByProps(edge_type="Launched", props={"contents": Exact("bar")})
+
+    # Should match on `proc` from G1
+    assert graph_nodes_match(statement.execute_networkx(G2), [])
diff --git a/tests/analyzers/statements/test_node.py b/tests/analyzers/statements/test_node.py
new file mode 100644
index 00000000..58aefa86
--- /dev/null
+++ b/tests/analyzers/statements/test_node.py
@@ -0,0 +1,177 @@
+from beagle.analyzers.statements.base_statement import Statement
+from beagle.analyzers.statements.lookups import Contains, EndsWith, Exact, StartsWith
+from beagle.analyzers.statements.node import (
+    NodeByProps,
+    NodeByPropsAncestors,
+    NodeByPropsDescendents,
+    NodeByPropsReachable,
+)
+from beagle.nodes import Process
+
+
+def test_test_props_nested_dict():
+    s = Statement()
+
+    assert (
+        s._test_values_with_lookups(
+            value_to_test={"hashes": {"md5": "1234"}},
+            lookup_tests={"hashes": {"md5": Exact("1234")}},
+        )
+        is True
+    )
+
+    assert (
+        s._test_values_with_lookups(
+            value_to_test={"hashes": {}}, lookup_tests={"hashes": {"md5": Exact("1234")}}
+        )
+        is False
+    )
+
+    assert (
+        s._test_values_with_lookups(
+            value_to_test={"hashes": None}, lookup_tests={"hashes": {"md5": Exact("1234")}}
+        )
+        is False
+    )
+
+
+def test_one_node_prop_test(G1, graph_nodes_match):
+    statement = NodeByProps(node_type=Process, props={"command_line": Contains("test.exe")})
+
+    assert graph_nodes_match(
+        statement.execute_networkx(G1),
+        [Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar")],
+    )
+
+    # should mathc on other proc
+    statement = NodeByProps(node_type=Process, props={"command_line": EndsWith("123456")})
+
+    assert graph_nodes_match(
+        statement.execute_networkx(G1),
+        [Process(process_id=12, process_image="best.exe", command_line="best.exe /c 123456")],
+    )
+
+    # should match on both
+    statement = NodeByProps(node_type=Process, props={"process_image": EndsWith("exe")})
+
+    assert graph_nodes_match(
+        statement.execute_networkx(G1),
+        [
+            Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar"),
+            Process(process_id=12, process_image="best.exe", command_line="best.exe /c 123456"),
+        ],
+    )
+    statement = NodeByProps(node_type=Process, props={"process_image": StartsWith("exe")})
+
+    assert graph_nodes_match(statement.execute_networkx(G1), [])
+
+
+def test_multiple_node_prop_test(G1, graph_nodes_match):
+    statement = NodeByProps(
+        node_type=Process,
+        props={"command_line": Contains("foobar"), "process_image": StartsWith("test")},
+    )
+
+    # Should match on `proc` from G1
+    assert graph_nodes_match(
+        statement.execute_networkx(G1),
+        [Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar")],
+    )
+
+
+def test_node_conditional(G1, graph_nodes_match):
+    statement = NodeByProps(
+        node_type=Process,
+        props={"command_line": Contains("foobar"), "process_image": StartsWith("test")},
+    )
+
+    assert graph_nodes_match(
+        statement.execute_networkx(G1),
+        [Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar")],
+    )
+
+
+def test_node_with_descendants(G4, graph_nodes_match):
+
+    # A should return A->B->C->D
+    statement = NodeByPropsDescendents(node_type=Process, props={"process_image": Exact("A")})
+    assert graph_nodes_match(
+        statement.execute_networkx(G4),
+        [
+            Process(process_id=10, process_image="A", command_line="A"),
+            Process(process_id=12, process_image="B", command_line="B"),
+            Process(process_id=12, process_image="C", command_line="C"),
+            Process(process_id=12, process_image="D", command_line="D"),
+        ],
+    )
+
+    # B should return B->C->D
+    statement = NodeByPropsDescendents(node_type=Process, props={"process_image": Exact("B")})
+    assert graph_nodes_match(
+        statement.execute_networkx(G4),
+        [
+            Process(process_id=12, process_image="B", command_line="B"),
+            Process(process_id=12, process_image="C", command_line="C"),
+            Process(process_id=12, process_image="D", command_line="D"),
+        ],
+    )
+
+
+def test_node_with_ancestors(G4, graph_nodes_match):
+
+    # A should return A
+    statement = NodeByPropsAncestors(node_type=Process, props={"process_image": Exact("A")})
+    assert graph_nodes_match(
+        statement.execute_networkx(G4),
+        [Process(process_id=10, process_image="A", command_line="A")],
+    )
+
+    # B should return A->B
+    statement = NodeByPropsAncestors(node_type=Process, props={"process_image": Exact("B")})
+    assert graph_nodes_match(
+        statement.execute_networkx(G4),
+        [
+            Process(process_id=10, process_image="A", command_line="A"),
+            Process(process_id=12, process_image="B", command_line="B"),
+        ],
+    )
+
+    # D should return A->B->C->D
+    statement = NodeByPropsAncestors(node_type=Process, props={"process_image": Exact("D")})
+    assert graph_nodes_match(
+        statement.execute_networkx(G4),
+        [
+            Process(process_id=10, process_image="A", command_line="A"),
+            Process(process_id=12, process_image="B", command_line="B"),
+            Process(process_id=12, process_image="C", command_line="C"),
+            Process(process_id=12, process_image="D", command_line="D"),
+        ],
+    )
+
+
+def test_nodes_reachable(G5, graph_nodes_match):
+
+    # All queries will return the full path.
+    # They should only return the path this process touches, A should return A->B->C->D and not E->F->G->H
+
+    statement = NodeByPropsReachable(node_type=Process, props={"process_image": Exact("B")})
+    assert graph_nodes_match(
+        statement.execute_networkx(G5),
+        [
+            Process(process_id=10, process_image="A", command_line="A"),
+            Process(process_id=12, process_image="B", command_line="B"),
+            Process(process_id=12, process_image="C", command_line="C"),
+            Process(process_id=12, process_image="D", command_line="D"),
+        ],
+    )
+
+    statement = NodeByPropsReachable(node_type=Process, props={"process_image": Exact("G")})
+    assert graph_nodes_match(
+        statement.execute_networkx(G5),
+        [
+            Process(process_id=10, process_image="E", command_line="E"),
+            Process(process_id=12, process_image="F", command_line="F"),
+            Process(process_id=12, process_image="G", command_line="G"),
+            Process(process_id=12, process_image="H", command_line="H"),
+        ],
+    )
diff --git a/tests/edges/__init__.py b/tests/edges/__init__.py
new file mode 100644
index 00000000..e69de29b

From 56d1e2c149eae10ec12813fac12921dbb45cd580 Mon Sep 17 00:00:00 2001
From: yampelo <omeryampel@gmail.com>
Date: Sat, 16 Nov 2019 14:06:26 -0500
Subject: [PATCH 14/25] Adds statement chaining using >> or << operators

---
 beagle/analyzers/statements/base_statement.py | 33 +++++++++++++++++++
 .../statements/test_base_statement.py         | 25 ++++++++++++++
 2 files changed, 58 insertions(+)

diff --git a/beagle/analyzers/statements/base_statement.py b/beagle/analyzers/statements/base_statement.py
index 753c4acf..fa9865b9 100644
--- a/beagle/analyzers/statements/base_statement.py
+++ b/beagle/analyzers/statements/base_statement.py
@@ -38,6 +38,34 @@ def __init__(self):
         # The resulting edge IDs
         self.result_edges: Set[Tuple[int, int, int]] = set()
 
+        # Set of statements that came before or after it.
+        self.downstream_statements: List[Statement] = []
+        self.upstream_statements: List[Statement] = []
+
+    def __rshift__(self, other: "Statement") -> "Statement":
+        """Implements Self >> Other == self.downstream_statements.append(other)
+
+        Parameters
+        ----------
+        other : Statement
+            The other statement to add.
+        """
+        self.downstream_statements.append(other)
+        other.upstream_statements.append(self)
+        return other
+
+    def __lshift__(self, other: "Statement") -> "Statement":
+        """Implements Self << Other == self.upstream_statements.append(other)
+
+        Parameters
+        ----------
+        other : Statement
+            The other statement to add.
+        """
+        other.downstream_statements.append(self)
+        self.upstream_statements.append(other)
+        return other
+
     def __or__(self, other: "Statement") -> "ChainedStatement":
         """Allows statements to be combined through the `|` operator.
         The result of execution is the union of both substatements.
@@ -162,3 +190,8 @@ def execute_networkx(self, G: nx.Graph) -> nx.Graph:
             H = nx.compose(H, subgraph)
 
         return H
+
+
+class InteremediateStatement(Statement):
+    def __init__(self):
+        pass
diff --git a/tests/analyzers/statements/test_base_statement.py b/tests/analyzers/statements/test_base_statement.py
index 9e201e5c..59358ca4 100644
--- a/tests/analyzers/statements/test_base_statement.py
+++ b/tests/analyzers/statements/test_base_statement.py
@@ -54,3 +54,28 @@ def test_multiple_chained_statement(G5, graph_nodes_match):
             Process(process_id=10, process_image="A", command_line="A"),
         ],
     )
+
+
+def test_shift_operators():
+    Bstatement = NodeByProps(node_type=Process, props={"process_image": Exact("B")})
+    Gstatement = NodeByProps(node_type=Process, props={"process_image": Exact("G")})
+
+    Bstatement >> Gstatement
+
+    assert Bstatement.downstream_statements == [Gstatement]
+
+    Bstatement = NodeByProps(node_type=Process, props={"process_image": Exact("B")})
+    Gstatement = NodeByProps(node_type=Process, props={"process_image": Exact("G")})
+
+    Bstatement << Gstatement
+
+    assert Gstatement.downstream_statements == [Bstatement]
+
+    Bstatement = NodeByProps(node_type=Process, props={"process_image": Exact("B")})
+    Gstatement = NodeByProps(node_type=Process, props={"process_image": Exact("G")})
+    Astatement = NodeByProps(node_type=Process, props={"process_image": Exact("A")})
+
+    Bstatement >> Gstatement
+    Bstatement >> Astatement
+
+    assert Bstatement.downstream_statements == [Gstatement, Astatement]

From e01203de96221231827cdcc19f334611ffe94f63 Mon Sep 17 00:00:00 2001
From: yampelo <omeryampel@gmail.com>
Date: Sat, 16 Nov 2019 15:10:37 -0500
Subject: [PATCH 15/25] adds intermediate statements, allowing to chain actions

---
 beagle/analyzers/statements/base_statement.py | 22 +++++++--
 beagle/analyzers/statements/edge.py           | 49 +++++++++++++++++--
 beagle/analyzers/statements/node.py           |  2 +-
 beagle/analyzers/statements/process.py        |  2 +-
 tests/analyzers/statements/test_edge.py       | 25 +++++++++-
 5 files changed, 91 insertions(+), 9 deletions(-)

diff --git a/beagle/analyzers/statements/base_statement.py b/beagle/analyzers/statements/base_statement.py
index fa9865b9..0c7cd49c 100644
--- a/beagle/analyzers/statements/base_statement.py
+++ b/beagle/analyzers/statements/base_statement.py
@@ -111,6 +111,11 @@ def _test_values_with_lookups(
             Did all of the tests pass?
         """
 
+        # Auto pass if no tests.s
+        if not lookup_tests:
+            return True
+
+        # Auto fail on empty value (given we have tests)
         if not value_to_test:
             return False
 
@@ -192,6 +197,17 @@ def execute_networkx(self, G: nx.Graph) -> nx.Graph:
         return H
 
 
-class InteremediateStatement(Statement):
-    def __init__(self):
-        pass
+class IntermediateStatement(Statement):
+    """An IntermediateStatement is a statement which depends on a previous initial Statement to run.
+
+    For example, you may only want to find edges connected to one of the nodes identifed in `NodeByProps`.
+    """
+
+    def __init__(self, *args, **kwargs):
+        self.upstream_nodes: Set[int] = set()
+        self.upstream_edges: Set[Tuple[int, int, int]] = set()
+        super().__init__(*args, **kwargs)
+
+    def set_upstream_nodes(self, upstream_statement: Statement):
+        self.upstream_nodes |= upstream_statement.result_nodes
+        self.upstream_edges |= upstream_statement.result_edges
diff --git a/beagle/analyzers/statements/edge.py b/beagle/analyzers/statements/edge.py
index eddf277d..4ff9a907 100644
--- a/beagle/analyzers/statements/edge.py
+++ b/beagle/analyzers/statements/edge.py
@@ -2,12 +2,14 @@
 
 import networkx as nx
 
-from .base_statement import Statement, _str_to_exact
+from .base_statement import Statement, _str_to_exact, IntermediateStatement
 from .lookups import FieldLookup
 
 
 class EdgeByProps(Statement):
-    def __init__(self, edge_type: str, props: Dict[str, Union[str, FieldLookup]]):
+    def __init__(
+        self, edge_type: str, props: Dict[str, Union[str, FieldLookup]] = {}, *args, **kwargs
+    ):
         """Searches the graph for an edge of type `edge_type` with properties matching `props`
 
         Parameters
@@ -27,7 +29,7 @@ def __init__(self, edge_type: str, props: Dict[str, Union[str, FieldLookup]]):
 
         self.props: Dict[str, Union[FieldLookup, Dict]] = _str_to_exact(props)
 
-        super().__init__()
+        super().__init__(*args, **kwargs)
 
     def execute_networkx(self, G: nx.Graph) -> nx.Graph:
         """Searches a `nx.Graph` object for edges that match type `edge_type` and contains
@@ -60,3 +62,44 @@ def execute_networkx(self, G: nx.Graph) -> nx.Graph:
                         break
 
         return G.edge_subgraph(subgraph_edges)
+
+
+class IntermediateEdgeByProps(EdgeByProps, IntermediateStatement):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    def execute_networkx(self, G: nx.Graph) -> nx.Graph:
+        """Searches a `nx.Graph` object for edges that match type `edge_type` and contains
+        props matching `props`. This is O(E).
+
+        Returns a subgraph with all nodes contained in match edges
+        """
+        subgraph_edges = []
+
+        for u, v, k, e_data in G.edges(
+            # Only get the edges associate with nodes from the previous step.
+            self.upstream_nodes,
+            data=True,
+            keys=True,
+        ):
+
+            # pull out the data field from NX
+            data = e_data["data"]  # edge data
+            e_type = e_data["edge_name"]  # edge type
+
+            # If edge matches the desired instance.
+            if e_type == self.edge_type:
+
+                # Test the edge
+                if not isinstance(data, list):
+                    data = [data]
+
+                for entry in data:
+                    if self._test_values_with_lookups(entry, self.props):
+                        subgraph_edges.append((u, v, k))
+                        # can stop on first match
+                        self.result_edges |= {(u, v, k)}
+                        self.result_nodes |= {u, v}
+                        break
+
+        return G.edge_subgraph(subgraph_edges)
diff --git a/beagle/analyzers/statements/node.py b/beagle/analyzers/statements/node.py
index cc314e2a..58d44fb2 100644
--- a/beagle/analyzers/statements/node.py
+++ b/beagle/analyzers/statements/node.py
@@ -9,7 +9,7 @@
 
 
 class NodeByProps(Statement):
-    def __init__(self, node_type: Type[Node], props: Dict[str, Union[str, FieldLookup, Dict]]):
+    def __init__(self, node_type: Type[Node], props: Dict[str, Union[str, FieldLookup, Dict]] = {}):
         """Searches the graph for a node of type `node_type` with properties matching `props`
 
         Parameters
diff --git a/beagle/analyzers/statements/process.py b/beagle/analyzers/statements/process.py
index 7054ae95..f6357505 100644
--- a/beagle/analyzers/statements/process.py
+++ b/beagle/analyzers/statements/process.py
@@ -7,7 +7,7 @@
 from .lookups import FieldLookup
 
 
-class FindProcess(FactoryMixin, NodeByPropsReachable):
+class FindProcess(FactoryMixin):
     """Executes statements relevant to a Process"""
 
     @classmethod
diff --git a/tests/analyzers/statements/test_edge.py b/tests/analyzers/statements/test_edge.py
index 4faefec2..aa4964e0 100644
--- a/tests/analyzers/statements/test_edge.py
+++ b/tests/analyzers/statements/test_edge.py
@@ -1,5 +1,6 @@
-from beagle.analyzers.statements.edge import EdgeByProps
+from beagle.analyzers.statements.edge import EdgeByProps, IntermediateEdgeByProps
 from beagle.analyzers.statements.lookups import Exact
+from beagle.analyzers.statements.process import FindProcess
 from beagle.nodes import File, Process
 
 
@@ -29,3 +30,25 @@ def test_one_edge_prop_test(G2, G3, graph_nodes_match):
 
     # Should match on `proc` from G1
     assert graph_nodes_match(statement.execute_networkx(G2), [])
+
+
+def test_intermediate_edge_by_props(G5, graph_nodes_match):
+
+    # Run the first statement.
+    statement1 = FindProcess.with_command_line("B")
+    statement2 = IntermediateEdgeByProps(edge_type="Launched")
+
+    # get the subgraph.
+    G_s = statement1.execute_networkx(G5)
+
+    # Set the upstream nodes of our next statement
+    statement2.set_upstream_nodes(statement1)
+
+    # running statement two should only give us B->C
+    assert graph_nodes_match(
+        statement2.execute_networkx(G_s),
+        [
+            Process(process_id=12, process_image="B", command_line="B"),
+            Process(process_id=12, process_image="C", command_line="C"),
+        ],
+    )

From 349d2da67043cf9a0204a8c2e8bffb88be1379c2 Mon Sep 17 00:00:00 2001
From: yampelo <omeryampel@gmail.com>
Date: Sat, 16 Nov 2019 19:46:31 -0500
Subject: [PATCH 16/25] classmethod -> staticmethod

---
 beagle/analyzers/statements/process.py | 42 +++++++++++++++-----------
 1 file changed, 24 insertions(+), 18 deletions(-)

diff --git a/beagle/analyzers/statements/process.py b/beagle/analyzers/statements/process.py
index f6357505..a1c6d47b 100644
--- a/beagle/analyzers/statements/process.py
+++ b/beagle/analyzers/statements/process.py
@@ -10,54 +10,60 @@
 class FindProcess(FactoryMixin):
     """Executes statements relevant to a Process"""
 
-    @classmethod
+    @staticmethod
     def with_command_line(
-        cls: Type["FindProcess"], command_line: Union[str, FieldLookup]
+        command_line: Union[str, FieldLookup]
     ) -> NodeByPropsReachable:  # pragma: no cover
 
         return NodeByPropsReachable(node_type=Process, props={"command_line": command_line})
 
-    @classmethod
+    @staticmethod
     def with_process_name(
-        cls: Type["FindProcess"], process_image: Union[str, FieldLookup]
+        process_image: Union[str, FieldLookup]
     ) -> NodeByPropsReachable:  # pragma: no cover
 
         return NodeByPropsReachable(node_type=Process, props={"process_image": process_image})
 
-    @classmethod
+    @staticmethod
     def with_process_path(
-        cls: Type["FindProcess"], process_path: Union[str, FieldLookup]
+        process_path: Union[str, FieldLookup]
     ) -> NodeByPropsReachable:  # pragma: no cover
 
         return NodeByPropsReachable(node_type=Process, props={"process_path": process_path})
 
-    @classmethod
+    @staticmethod
     def with_process_image_path(
-        cls: Type["FindProcess"], process_image_path: Union[str, FieldLookup]
+        process_image_path: Union[str, FieldLookup]
     ) -> NodeByPropsReachable:  # pragma: no cover
 
         return NodeByPropsReachable(
             node_type=Process, props={"process_image_path": process_image_path}
         )
 
-    @classmethod
-    def with_user(cls: Type["FindProcess"], user: Union[str, FieldLookup]) -> NodeByPropsReachable:
+    @staticmethod
+    def with_user(user: Union[str, FieldLookup]) -> NodeByPropsReachable:
+
         return NodeByPropsReachable(node_type=Process, props={"user": user})
 
-    @classmethod
+    @staticmethod
     def with_md5_hash(
-        cls: Type["FindProcess"], md5hash: Union[str, FieldLookup]
+        md5hash: Union[str, FieldLookup]
     ) -> NodeByPropsReachable:  # pragma: no cover
+
         return NodeByPropsReachable(node_type=Process, props={"hashes": {"md5": md5hash}})
 
-    @classmethod
+    @staticmethod
     def with_sha256_hash(
-        cls: Type["FindProcess"], md5hash: Union[str, FieldLookup]
+        sha256hash: Union[str, FieldLookup]
     ) -> NodeByPropsReachable:  # pragma: no cover
-        return NodeByPropsReachable(node_type=Process, props={"hashes": {"sha256": md5hash}})
 
-    @classmethod
+        return NodeByPropsReachable(node_type=Process, props={"hashes": {"sha256": sha256hash}})
+
+    @staticmethod
     def with_sha1_hash(
-        cls: Type["FindProcess"], md5hash: Union[str, FieldLookup]
+        sha1hash: Union[str, FieldLookup]
     ) -> NodeByPropsReachable:  # pragma: no cover
-        return NodeByPropsReachable(node_type=Process, props={"hashes": {"sha1": md5hash}})
+
+        return NodeByPropsReachable(node_type=Process, props={"hashes": {"sha1": sha1hash}})
+
+    def launched_by():

From 7a71a65a299aa6ef493b4da24a80627ec8ea290a Mon Sep 17 00:00:00 2001
From: yampelo <omeryampel@gmail.com>
Date: Sat, 16 Nov 2019 19:53:58 -0500
Subject: [PATCH 17/25] Analyzer: Class to execute statements

---
 beagle/analyzers/base_analyzer.py             | 37 ++++++++++++++++++-
 beagle/analyzers/statements/base_statement.py | 25 +++++++------
 beagle/analyzers/statements/edge.py           |  8 +++-
 beagle/analyzers/statements/process.py        | 13 ++++---
 tests/analyzers/test_base_analyzer.py         | 25 +++++++++++++
 5 files changed, 88 insertions(+), 20 deletions(-)
 create mode 100644 tests/analyzers/test_base_analyzer.py

diff --git a/beagle/analyzers/base_analyzer.py b/beagle/analyzers/base_analyzer.py
index 790a006c..a1eff422 100644
--- a/beagle/analyzers/base_analyzer.py
+++ b/beagle/analyzers/base_analyzer.py
@@ -1,5 +1,40 @@
+from typing import Type, cast
+
+import networkx as nx
+
+from beagle.analyzers.statements.base_statement import Statement
+from beagle.backends import Backend, NetworkX
+
+
 class Analyzer(object):
-    def __init__(self, name: str, description: str, score: int):
+    def __init__(self, name: str, description: str, score: int, statement: Statement):
         self.name = name
         self.description = description
         self.score = score
+
+        # Make sure we get the start.
+        while statement.upstream_statement is not None:
+            statement = statement.upstream_statement
+
+        self.statement: Statement = statement
+
+    def run(self, backend: Type[Backend]):
+        if isinstance(backend, NetworkX):
+            backend = cast(NetworkX, backend)
+            self.run_networkx(backend.G)
+
+    def run_networkx(self, G: nx.Graph) -> nx.Graph:
+
+        # H is a copy of our original graph.
+        H = G.copy()
+
+        current_statement = self.statement
+
+        while current_statement is not None:
+            # Run the statement.
+            H = current_statement.execute_networkx(H)
+
+            # Get the next statement, and execute
+            current_statement = current_statement.downstream_statement
+
+        return H
diff --git a/beagle/analyzers/statements/base_statement.py b/beagle/analyzers/statements/base_statement.py
index 0c7cd49c..a069d5a1 100644
--- a/beagle/analyzers/statements/base_statement.py
+++ b/beagle/analyzers/statements/base_statement.py
@@ -39,31 +39,31 @@ def __init__(self):
         self.result_edges: Set[Tuple[int, int, int]] = set()
 
         # Set of statements that came before or after it.
-        self.downstream_statements: List[Statement] = []
-        self.upstream_statements: List[Statement] = []
+        self.downstream_statement: Statement = None
+        self.upstream_statement: Statement = None
 
     def __rshift__(self, other: "Statement") -> "Statement":
-        """Implements Self >> Other == self.downstream_statements.append(other)
+        """Implements Self >> Other == self.downstream_statements = other
 
         Parameters
         ----------
         other : Statement
             The other statement to add.
         """
-        self.downstream_statements.append(other)
-        other.upstream_statements.append(self)
+        self.downstream_statement = other
+        other.upstream_statement = self
         return other
 
     def __lshift__(self, other: "Statement") -> "Statement":
-        """Implements Self << Other == self.upstream_statements.append(other)
+        """Implements Self << Other == self.upstream_statements = other
 
         Parameters
         ----------
         other : Statement
             The other statement to add.
         """
-        other.downstream_statements.append(self)
-        self.upstream_statements.append(other)
+        other.downstream_statement = self
+        self.upstream_statement = other
         return other
 
     def __or__(self, other: "Statement") -> "ChainedStatement":
@@ -208,6 +208,9 @@ def __init__(self, *args, **kwargs):
         self.upstream_edges: Set[Tuple[int, int, int]] = set()
         super().__init__(*args, **kwargs)
 
-    def set_upstream_nodes(self, upstream_statement: Statement):
-        self.upstream_nodes |= upstream_statement.result_nodes
-        self.upstream_edges |= upstream_statement.result_edges
+    def get_upstream_results(self) -> Tuple[Set[int], Set[Tuple[int, int, int]]]:
+        return self.upstream_statement.result_nodes, self.upstream_statement.result_edges
+
+    def set_upstream_nodes(self):
+        self.upstream_nodes |= self.upstream_statement.result_nodes
+        self.upstream_edges |= self.upstream_statement.result_edges
diff --git a/beagle/analyzers/statements/edge.py b/beagle/analyzers/statements/edge.py
index 4ff9a907..444879bf 100644
--- a/beagle/analyzers/statements/edge.py
+++ b/beagle/analyzers/statements/edge.py
@@ -29,7 +29,7 @@ def __init__(
 
         self.props: Dict[str, Union[FieldLookup, Dict]] = _str_to_exact(props)
 
-        super().__init__(*args, **kwargs)
+        super().__init__()
 
     def execute_networkx(self, G: nx.Graph) -> nx.Graph:
         """Searches a `nx.Graph` object for edges that match type `edge_type` and contains
@@ -74,11 +74,15 @@ def execute_networkx(self, G: nx.Graph) -> nx.Graph:
 
         Returns a subgraph with all nodes contained in match edges
         """
+
+        # Grab upstream information
+        upstream_nodes, _ = self.get_upstream_results()
+
         subgraph_edges = []
 
         for u, v, k, e_data in G.edges(
             # Only get the edges associate with nodes from the previous step.
-            self.upstream_nodes,
+            upstream_nodes,
             data=True,
             keys=True,
         ):
diff --git a/beagle/analyzers/statements/process.py b/beagle/analyzers/statements/process.py
index a1c6d47b..52758584 100644
--- a/beagle/analyzers/statements/process.py
+++ b/beagle/analyzers/statements/process.py
@@ -1,10 +1,11 @@
-from typing import Union, Type
+from typing import Union
 
 from beagle.nodes import Process
 
-from .node import NodeByPropsReachable
 from .base_statement import FactoryMixin
+from .edge import IntermediateEdgeByProps
 from .lookups import FieldLookup
+from .node import NodeByPropsReachable
 
 
 class FindProcess(FactoryMixin):
@@ -46,9 +47,7 @@ def with_user(user: Union[str, FieldLookup]) -> NodeByPropsReachable:
         return NodeByPropsReachable(node_type=Process, props={"user": user})
 
     @staticmethod
-    def with_md5_hash(
-        md5hash: Union[str, FieldLookup]
-    ) -> NodeByPropsReachable:  # pragma: no cover
+    def with_md5_hash(md5hash: Union[str, FieldLookup]) -> NodeByPropsReachable:  # pragma: no cover
 
         return NodeByPropsReachable(node_type=Process, props={"hashes": {"md5": md5hash}})
 
@@ -66,4 +65,6 @@ def with_sha1_hash(
 
         return NodeByPropsReachable(node_type=Process, props={"hashes": {"sha1": sha1hash}})
 
-    def launched_by():
+    @staticmethod
+    def that_was_launched():
+        return IntermediateEdgeByProps(edge_type="Launched")
diff --git a/tests/analyzers/test_base_analyzer.py b/tests/analyzers/test_base_analyzer.py
new file mode 100644
index 00000000..461a9f8c
--- /dev/null
+++ b/tests/analyzers/test_base_analyzer.py
@@ -0,0 +1,25 @@
+from beagle.analyzers.base_analyzer import Analyzer
+from beagle.analyzers.statements.edge import IntermediateEdgeByProps
+
+from beagle.analyzers.statements.process import FindProcess
+from beagle.nodes import Process
+
+
+def test_analyzer_two_statements(G5, graph_nodes_match):
+
+    analyzer = Analyzer(
+        name="test_analyzer_two_statements",
+        description="test_analyzer_two_statements",
+        score=0,
+        statement=FindProcess.with_command_line("B") >> FindProcess.that_was_launched(),
+    )
+
+    G = analyzer.run_networkx(G5)
+
+    assert graph_nodes_match(
+        G,
+        [
+            Process(process_id=12, process_image="B", command_line="B"),
+            Process(process_id=12, process_image="C", command_line="C"),
+        ],
+    )

From 1fc6e3d3e5f99ea2c624c42a6528e365bdc8e8b4 Mon Sep 17 00:00:00 2001
From: yampelo <omeryampel@gmail.com>
Date: Sat, 16 Nov 2019 20:15:16 -0500
Subject: [PATCH 18/25] Fixes unit tests

---
 beagle/analyzers/statements/base_statement.py |  4 +--
 tests/analyzers/conftest.py                   | 21 ++++++++++++++-
 .../statements/test_base_statement.py         | 13 ++-------
 tests/analyzers/statements/test_edge.py       |  5 ++--
 tests/analyzers/test_base_analyzer.py         | 27 +++++++++++++++++--
 5 files changed, 51 insertions(+), 19 deletions(-)

diff --git a/beagle/analyzers/statements/base_statement.py b/beagle/analyzers/statements/base_statement.py
index a069d5a1..f6fe36b0 100644
--- a/beagle/analyzers/statements/base_statement.py
+++ b/beagle/analyzers/statements/base_statement.py
@@ -43,7 +43,7 @@ def __init__(self):
         self.upstream_statement: Statement = None
 
     def __rshift__(self, other: "Statement") -> "Statement":
-        """Implements Self >> Other == self.downstream_statements = other
+        """Implements Self >> Other == self.downstream_statement = other
 
         Parameters
         ----------
@@ -55,7 +55,7 @@ def __rshift__(self, other: "Statement") -> "Statement":
         return other
 
     def __lshift__(self, other: "Statement") -> "Statement":
-        """Implements Self << Other == self.upstream_statements = other
+        """Implements Self << Other == self.upstream_statement = other
 
         Parameters
         ----------
diff --git a/tests/analyzers/conftest.py b/tests/analyzers/conftest.py
index e8fecd67..cfd10c58 100644
--- a/tests/analyzers/conftest.py
+++ b/tests/analyzers/conftest.py
@@ -96,7 +96,7 @@ def G5():
     F.launched[G]
     G.launched[H]
 
-    backend = NetworkX(consolidate_edges=True, nodes=[A, B, B, C, E, F, G, H])
+    backend = NetworkX(consolidate_edges=True, nodes=[A, B, C, D, E, F, G, H])
 
     return backend.graph()
 
@@ -124,3 +124,22 @@ def G6():
     backend = NetworkX(consolidate_edges=True, nodes=[parent, parent2, child, child2])
 
     return backend.graph()
+
+
+@pytest.fixture
+def G7():
+    # A graph with two, *disconnected* four process tree:
+    # A -> B -> C -> D
+    # E -> F -> G -> H
+    A = Process(process_id=10, process_image="A", command_line="A")
+    B = Process(process_id=12, process_image="B", command_line="B")
+    C = Process(process_id=12, process_image="C", command_line="C")
+    D = Process(process_id=12, process_image="D", command_line="D")
+
+    A.launched[B]
+    B.launched[C]
+    C.launched[D]
+
+    backend = NetworkX(consolidate_edges=True, nodes=[A, B, C, D])
+
+    return backend.graph()
diff --git a/tests/analyzers/statements/test_base_statement.py b/tests/analyzers/statements/test_base_statement.py
index 59358ca4..586c772c 100644
--- a/tests/analyzers/statements/test_base_statement.py
+++ b/tests/analyzers/statements/test_base_statement.py
@@ -62,20 +62,11 @@ def test_shift_operators():
 
     Bstatement >> Gstatement
 
-    assert Bstatement.downstream_statements == [Gstatement]
+    assert Bstatement.downstream_statement == Gstatement
 
     Bstatement = NodeByProps(node_type=Process, props={"process_image": Exact("B")})
     Gstatement = NodeByProps(node_type=Process, props={"process_image": Exact("G")})
 
     Bstatement << Gstatement
 
-    assert Gstatement.downstream_statements == [Bstatement]
-
-    Bstatement = NodeByProps(node_type=Process, props={"process_image": Exact("B")})
-    Gstatement = NodeByProps(node_type=Process, props={"process_image": Exact("G")})
-    Astatement = NodeByProps(node_type=Process, props={"process_image": Exact("A")})
-
-    Bstatement >> Gstatement
-    Bstatement >> Astatement
-
-    assert Bstatement.downstream_statements == [Gstatement, Astatement]
+    assert Gstatement.downstream_statement == Bstatement
diff --git a/tests/analyzers/statements/test_edge.py b/tests/analyzers/statements/test_edge.py
index aa4964e0..6440aae1 100644
--- a/tests/analyzers/statements/test_edge.py
+++ b/tests/analyzers/statements/test_edge.py
@@ -38,12 +38,11 @@ def test_intermediate_edge_by_props(G5, graph_nodes_match):
     statement1 = FindProcess.with_command_line("B")
     statement2 = IntermediateEdgeByProps(edge_type="Launched")
 
+    statement1 >> statement2
+
     # get the subgraph.
     G_s = statement1.execute_networkx(G5)
 
-    # Set the upstream nodes of our next statement
-    statement2.set_upstream_nodes(statement1)
-
     # running statement two should only give us B->C
     assert graph_nodes_match(
         statement2.execute_networkx(G_s),
diff --git a/tests/analyzers/test_base_analyzer.py b/tests/analyzers/test_base_analyzer.py
index 461a9f8c..de68b1d9 100644
--- a/tests/analyzers/test_base_analyzer.py
+++ b/tests/analyzers/test_base_analyzer.py
@@ -1,6 +1,4 @@
 from beagle.analyzers.base_analyzer import Analyzer
-from beagle.analyzers.statements.edge import IntermediateEdgeByProps
-
 from beagle.analyzers.statements.process import FindProcess
 from beagle.nodes import Process
 
@@ -23,3 +21,28 @@ def test_analyzer_two_statements(G5, graph_nodes_match):
             Process(process_id=12, process_image="C", command_line="C"),
         ],
     )
+
+
+def test_analyzer_or_statement_statements(G5, graph_nodes_match):
+
+    query = (
+        FindProcess.with_command_line("B") | FindProcess.with_command_line("A")
+    ) >> FindProcess.that_was_launched()
+
+    analyzer = Analyzer(
+        name="test_analyzer_two_statements",
+        description="test_analyzer_two_statements",
+        score=0,
+        statement=query,
+    )
+
+    G = analyzer.run_networkx(G5)
+
+    assert graph_nodes_match(
+        G,
+        [
+            Process(process_id=10, process_image="A", command_line="A"),
+            Process(process_id=12, process_image="B", command_line="B"),
+            Process(process_id=12, process_image="C", command_line="C"),
+        ],
+    )

From db98490451ee753006cc28aa2e6f4824eb936d2d Mon Sep 17 00:00:00 2001
From: yampelo <omeryampel@gmail.com>
Date: Sat, 16 Nov 2019 20:28:16 -0500
Subject: [PATCH 19/25] Tests edges with tree structures graphs

---
 tests/analyzers/conftest.py             | 24 ++++++++++++++++------
 tests/analyzers/statements/test_edge.py | 27 +++++++++++++++++++++++++
 2 files changed, 45 insertions(+), 6 deletions(-)

diff --git a/tests/analyzers/conftest.py b/tests/analyzers/conftest.py
index cfd10c58..654bf74f 100644
--- a/tests/analyzers/conftest.py
+++ b/tests/analyzers/conftest.py
@@ -128,18 +128,30 @@ def G6():
 
 @pytest.fixture
 def G7():
-    # A graph with two, *disconnected* four process tree:
-    # A -> B -> C -> D
-    # E -> F -> G -> H
+    # A graph that's a tree of process launches
+    #               A
+    #              / \
+    #             B   C
+    #           /  \  / \
+    #          D   E  F  G
+
     A = Process(process_id=10, process_image="A", command_line="A")
     B = Process(process_id=12, process_image="B", command_line="B")
     C = Process(process_id=12, process_image="C", command_line="C")
     D = Process(process_id=12, process_image="D", command_line="D")
+    E = Process(process_id=10, process_image="E", command_line="E")
+    F = Process(process_id=12, process_image="F", command_line="F")
+    G = Process(process_id=12, process_image="G", command_line="G")
 
     A.launched[B]
-    B.launched[C]
-    C.launched[D]
+    A.launched[C]
+
+    B.launched[D]
+    B.launched[E]
+
+    C.launched[F]
+    C.launched[G]
 
-    backend = NetworkX(consolidate_edges=True, nodes=[A, B, C, D])
+    backend = NetworkX(consolidate_edges=True, nodes=[A, B, C, D, E, F, G])
 
     return backend.graph()
diff --git a/tests/analyzers/statements/test_edge.py b/tests/analyzers/statements/test_edge.py
index 6440aae1..5abdbf06 100644
--- a/tests/analyzers/statements/test_edge.py
+++ b/tests/analyzers/statements/test_edge.py
@@ -2,6 +2,7 @@
 from beagle.analyzers.statements.lookups import Exact
 from beagle.analyzers.statements.process import FindProcess
 from beagle.nodes import File, Process
+from beagle.analyzers.base_analyzer import Analyzer
 
 
 def test_one_edge_prop_test(G2, G3, graph_nodes_match):
@@ -51,3 +52,29 @@ def test_intermediate_edge_by_props(G5, graph_nodes_match):
             Process(process_id=12, process_image="C", command_line="C"),
         ],
     )
+
+
+def test_intermediate_edge_all_candidates_found(G7, graph_nodes_match):
+
+    analyzer = Analyzer(
+        name="test_intermediate_edge_all_candidates_found",
+        description="test_intermediate_edge_all_candidates_found",
+        score=0,
+        statement=FindProcess.with_command_line("C") >> FindProcess.that_was_launched(),
+    )
+
+    G = analyzer.run_networkx(G7)
+
+    # should return
+    #             C
+    #            / \
+    #           F  G
+
+    assert graph_nodes_match(
+        G,
+        [
+            Process(process_id=12, process_image="C", command_line="C"),
+            Process(process_id=12, process_image="F", command_line="F"),
+            Process(process_id=12, process_image="G", command_line="G"),
+        ],
+    )

From f6b602721e1993205cac5f7d091022d351fc579a Mon Sep 17 00:00:00 2001
From: yampelo <omeryampel@gmail.com>
Date: Sun, 17 Nov 2019 15:01:09 -0500
Subject: [PATCH 20/25] Renames Statement as Query

---
 beagle/analyzers/base_analyzer.py             |  22 ++--
 .../{statements => queries}/__init__.py       |   0
 .../base_query.py}                            | 102 +++++++++---------
 .../analyzers/{statements => queries}/edge.py |   6 +-
 .../{statements => queries}/lookups.py        |   0
 .../analyzers/{statements => queries}/node.py |   4 +-
 .../{statements => queries}/process.py        |   4 +-
 .../statements/test_base_statement.py         |  42 ++++----
 tests/analyzers/statements/test_edge.py       |  32 +++---
 tests/analyzers/statements/test_lookups.py    |   2 +-
 tests/analyzers/statements/test_node.py       |  60 +++++------
 tests/analyzers/statements/test_process.py    |  32 +++---
 tests/analyzers/test_base_analyzer.py         |  18 ++--
 13 files changed, 162 insertions(+), 162 deletions(-)
 rename beagle/analyzers/{statements => queries}/__init__.py (100%)
 rename beagle/analyzers/{statements/base_statement.py => queries/base_query.py} (62%)
 rename beagle/analyzers/{statements => queries}/edge.py (95%)
 rename beagle/analyzers/{statements => queries}/lookups.py (100%)
 rename beagle/analyzers/{statements => queries}/node.py (97%)
 rename beagle/analyzers/{statements => queries}/process.py (95%)

diff --git a/beagle/analyzers/base_analyzer.py b/beagle/analyzers/base_analyzer.py
index a1eff422..5107d967 100644
--- a/beagle/analyzers/base_analyzer.py
+++ b/beagle/analyzers/base_analyzer.py
@@ -2,21 +2,21 @@
 
 import networkx as nx
 
-from beagle.analyzers.statements.base_statement import Statement
+from beagle.analyzers.queries.base_query import Query
 from beagle.backends import Backend, NetworkX
 
 
 class Analyzer(object):
-    def __init__(self, name: str, description: str, score: int, statement: Statement):
+    def __init__(self, name: str, description: str, score: int, query: Query):
         self.name = name
         self.description = description
         self.score = score
 
         # Make sure we get the start.
-        while statement.upstream_statement is not None:
-            statement = statement.upstream_statement
+        while query.upstream_query is not None:
+            query = query.upstream_query
 
-        self.statement: Statement = statement
+        self.query: Query = query
 
     def run(self, backend: Type[Backend]):
         if isinstance(backend, NetworkX):
@@ -28,13 +28,13 @@ def run_networkx(self, G: nx.Graph) -> nx.Graph:
         # H is a copy of our original graph.
         H = G.copy()
 
-        current_statement = self.statement
+        current_query = self.query
 
-        while current_statement is not None:
-            # Run the statement.
-            H = current_statement.execute_networkx(H)
+        while current_query is not None:
+            # Run the query.
+            H = current_query.execute_networkx(H)
 
-            # Get the next statement, and execute
-            current_statement = current_statement.downstream_statement
+            # Get the next query, and execute
+            current_query = current_query.downstream_query
 
         return H
diff --git a/beagle/analyzers/statements/__init__.py b/beagle/analyzers/queries/__init__.py
similarity index 100%
rename from beagle/analyzers/statements/__init__.py
rename to beagle/analyzers/queries/__init__.py
diff --git a/beagle/analyzers/statements/base_statement.py b/beagle/analyzers/queries/base_query.py
similarity index 62%
rename from beagle/analyzers/statements/base_statement.py
rename to beagle/analyzers/queries/base_query.py
index f6fe36b0..26f85199 100644
--- a/beagle/analyzers/statements/base_statement.py
+++ b/beagle/analyzers/queries/base_query.py
@@ -18,17 +18,17 @@ def _str_to_exact(props: dict) -> Dict[str, Union[FieldLookup, Dict]]:
     return props
 
 
-class Statement(object):
+class Query(object):
     def __init__(self):
-        """A statement is the base building block of a query. A statement takes as input a graph, executes,
+        """A query is the base building block of a query. A query takes as input a graph, executes,
         and returns the next graph.
 
-        >>> G2 = statement.execute_networkx(G)
+        >>> G2 = query.execute_networkx(G)
 
         Attributes
         ----------
         result_nodes: Set[int]:
-            The set of node IDs which create the subgraph returned by the statement.
+            The set of node IDs which create the subgraph returned by the query.
         result_edges: Set[Tuple[int, int, int]]:
             The set of (u, v, k) tuples representing the edges which created the subgraph.
         """
@@ -38,57 +38,57 @@ def __init__(self):
         # The resulting edge IDs
         self.result_edges: Set[Tuple[int, int, int]] = set()
 
-        # Set of statements that came before or after it.
-        self.downstream_statement: Statement = None
-        self.upstream_statement: Statement = None
+        # Set of queries that came before or after it.
+        self.downstream_query: Query = None
+        self.upstream_query: Query = None
 
-    def __rshift__(self, other: "Statement") -> "Statement":
-        """Implements Self >> Other == self.downstream_statement = other
+    def __rshift__(self, other: "Query") -> "Query":
+        """Implements Self >> Other == self.downstream_query = other
 
         Parameters
         ----------
-        other : Statement
-            The other statement to add.
+        other : Query
+            The other query to add.
         """
-        self.downstream_statement = other
-        other.upstream_statement = self
+        self.downstream_query = other
+        other.upstream_query = self
         return other
 
-    def __lshift__(self, other: "Statement") -> "Statement":
-        """Implements Self << Other == self.upstream_statement = other
+    def __lshift__(self, other: "Query") -> "Query":
+        """Implements Self << Other == self.upstream_query = other
 
         Parameters
         ----------
-        other : Statement
-            The other statement to add.
+        other : Query
+            The other query to add.
         """
-        other.downstream_statement = self
-        self.upstream_statement = other
+        other.downstream_query = self
+        self.upstream_query = other
         return other
 
-    def __or__(self, other: "Statement") -> "ChainedStatement":
-        """Allows statements to be combined through the `|` operator.
-        The result of execution is the union of both substatements.
+    def __or__(self, other: "Query") -> "ChainedQuery":
+        """Allows queries to be combined through the `|` operator.
+        The result of execution is the union of both subqueries.
 
-        >>> statement1 = Statement(...)
-        >>> statement2 = Statement(...)
-        >>> chained = statement1 | statement2
+        >>> query1 = Query(...)
+        >>> query2 = Query(...)
+        >>> chained = query1 | query2
 
 
         Parameters
         ----------
-        other: Statement
-            The statement to chain with.
+        other: Query
+            The query to chain with.
 
         Returns
         -------
-        ChainedStatement
-            A chained statement compromised of all three.
+        ChainedQuery
+            A chained query compromised of all three.
         """
-        return ChainedStatement(self, other)
+        return ChainedQuery(self, other)
 
     def execute_networkx(self, G: nx.Graph):  # pragma: no cover
-        """Execute a statement against a `networkx` graph."""
+        """Execute a query against a `networkx` graph."""
         raise NotImplementedError(f"NetworkX not supported for {self.__class__.__name__}")
 
     def _test_values_with_lookups(
@@ -146,48 +146,48 @@ def _test_values_with_lookups(
 
 
 class FactoryMixin(object):
-    """Mixin to prevent Statement Factories from calling execute methods.
+    """Mixin to prevent Query Factories from calling execute methods.
     """
 
     def execute_networkx(self, G: nx.graph):
-        raise UserWarning("Statement factories cannot be called directly")
+        raise UserWarning("Query factories cannot be called directly")
 
 
-class ChainedStatement(Statement):
-    def __init__(self, *args: Statement):
-        """Executes multiple Statements, combining their outputs.
+class ChainedQuery(Query):
+    def __init__(self, *args: Query):
+        """Executes multiple Querys, combining their outputs.
 
         Parameters
         ----------
-        args: Statement
-            One ore more statements
+        args: Query
+            One ore more queries
         """
-        self.statements = args
+        self.queries = args
         super().__init__()
 
     def execute_networkx(self, G: nx.Graph) -> nx.Graph:
-        """Executes multiple statements against a `nx.Graph` object, combining their outputs into one subgraph.
+        """Executes multiple queries against a `nx.Graph` object, combining their outputs into one subgraph.
 
         Parameters
         ----------
         G : nx.Graph
-            Graph to execute statements against
+            Graph to execute queries against
 
         Returns
         -------
         nx.Graph
-            Graph composed from the output graphs of the executed statements.
+            Graph composed from the output graphs of the executed queries.
         """
         # Get the subgraphs
 
         subgraphs = []
-        for statement in self.statements:
+        for query in self.queries:
             # Get the subgraphs
-            subgraphs.append(statement.execute_networkx(G))
+            subgraphs.append(query.execute_networkx(G))
 
             # add the reuslt_nodes, result_edges.
-            self.result_edges |= statement.result_edges
-            self.result_nodes |= statement.result_nodes
+            self.result_edges |= query.result_edges
+            self.result_nodes |= query.result_nodes
 
         # Compose the subgraphs
         H = subgraphs[0]
@@ -197,8 +197,8 @@ def execute_networkx(self, G: nx.Graph) -> nx.Graph:
         return H
 
 
-class IntermediateStatement(Statement):
-    """An IntermediateStatement is a statement which depends on a previous initial Statement to run.
+class IntermediateQuery(Query):
+    """An IntermediateQuery is a query which depends on a previous initial Query to run.
 
     For example, you may only want to find edges connected to one of the nodes identifed in `NodeByProps`.
     """
@@ -209,8 +209,8 @@ def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
 
     def get_upstream_results(self) -> Tuple[Set[int], Set[Tuple[int, int, int]]]:
-        return self.upstream_statement.result_nodes, self.upstream_statement.result_edges
+        return self.upstream_query.result_nodes, self.upstream_query.result_edges
 
     def set_upstream_nodes(self):
-        self.upstream_nodes |= self.upstream_statement.result_nodes
-        self.upstream_edges |= self.upstream_statement.result_edges
+        self.upstream_nodes |= self.upstream_query.result_nodes
+        self.upstream_edges |= self.upstream_query.result_edges
diff --git a/beagle/analyzers/statements/edge.py b/beagle/analyzers/queries/edge.py
similarity index 95%
rename from beagle/analyzers/statements/edge.py
rename to beagle/analyzers/queries/edge.py
index 444879bf..5190ee67 100644
--- a/beagle/analyzers/statements/edge.py
+++ b/beagle/analyzers/queries/edge.py
@@ -2,11 +2,11 @@
 
 import networkx as nx
 
-from .base_statement import Statement, _str_to_exact, IntermediateStatement
+from .base_query import Query, _str_to_exact, IntermediateQuery
 from .lookups import FieldLookup
 
 
-class EdgeByProps(Statement):
+class EdgeByProps(Query):
     def __init__(
         self, edge_type: str, props: Dict[str, Union[str, FieldLookup]] = {}, *args, **kwargs
     ):
@@ -64,7 +64,7 @@ def execute_networkx(self, G: nx.Graph) -> nx.Graph:
         return G.edge_subgraph(subgraph_edges)
 
 
-class IntermediateEdgeByProps(EdgeByProps, IntermediateStatement):
+class IntermediateEdgeByProps(EdgeByProps, IntermediateQuery):
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
 
diff --git a/beagle/analyzers/statements/lookups.py b/beagle/analyzers/queries/lookups.py
similarity index 100%
rename from beagle/analyzers/statements/lookups.py
rename to beagle/analyzers/queries/lookups.py
diff --git a/beagle/analyzers/statements/node.py b/beagle/analyzers/queries/node.py
similarity index 97%
rename from beagle/analyzers/statements/node.py
rename to beagle/analyzers/queries/node.py
index 58d44fb2..138e4177 100644
--- a/beagle/analyzers/statements/node.py
+++ b/beagle/analyzers/queries/node.py
@@ -4,11 +4,11 @@
 
 from beagle.nodes import Node
 
-from .base_statement import Statement, _str_to_exact
+from .base_query import Query, _str_to_exact
 from .lookups import FieldLookup
 
 
-class NodeByProps(Statement):
+class NodeByProps(Query):
     def __init__(self, node_type: Type[Node], props: Dict[str, Union[str, FieldLookup, Dict]] = {}):
         """Searches the graph for a node of type `node_type` with properties matching `props`
 
diff --git a/beagle/analyzers/statements/process.py b/beagle/analyzers/queries/process.py
similarity index 95%
rename from beagle/analyzers/statements/process.py
rename to beagle/analyzers/queries/process.py
index 52758584..77de2ea1 100644
--- a/beagle/analyzers/statements/process.py
+++ b/beagle/analyzers/queries/process.py
@@ -2,14 +2,14 @@
 
 from beagle.nodes import Process
 
-from .base_statement import FactoryMixin
+from .base_query import FactoryMixin
 from .edge import IntermediateEdgeByProps
 from .lookups import FieldLookup
 from .node import NodeByPropsReachable
 
 
 class FindProcess(FactoryMixin):
-    """Executes statements relevant to a Process"""
+    """Executes queries relevant to a Process"""
 
     @staticmethod
     def with_command_line(
diff --git a/tests/analyzers/statements/test_base_statement.py b/tests/analyzers/statements/test_base_statement.py
index 586c772c..58419677 100644
--- a/tests/analyzers/statements/test_base_statement.py
+++ b/tests/analyzers/statements/test_base_statement.py
@@ -1,7 +1,7 @@
 import pytest
-from beagle.analyzers.statements.base_statement import FactoryMixin
-from beagle.analyzers.statements.node import NodeByPropsReachable, NodeByProps
-from beagle.analyzers.statements.lookups import Exact
+from beagle.analyzers.queries.base_query import FactoryMixin
+from beagle.analyzers.queries.node import NodeByPropsReachable, NodeByProps
+from beagle.analyzers.queries.lookups import Exact
 from beagle.nodes import Process
 
 
@@ -14,13 +14,13 @@ class MyFactory(FactoryMixin):
         obj.execute_networkx(None)
 
 
-def test_chained_statement(G5, graph_nodes_match):
-    # Both paths should show up because we use a chained statement that returns both.
+def test_chained_query(G5, graph_nodes_match):
+    # Both paths should show up because we use a chained query that returns both.
 
-    Bstatement = NodeByPropsReachable(node_type=Process, props={"process_image": Exact("B")})
-    Gstatement = NodeByPropsReachable(node_type=Process, props={"process_image": Exact("G")})
+    Bquery = NodeByPropsReachable(node_type=Process, props={"process_image": Exact("B")})
+    Gquery = NodeByPropsReachable(node_type=Process, props={"process_image": Exact("G")})
 
-    chained = Bstatement | Gstatement
+    chained = Bquery | Gquery
 
     assert graph_nodes_match(
         chained.execute_networkx(G5),
@@ -37,14 +37,14 @@ def test_chained_statement(G5, graph_nodes_match):
     )
 
 
-def test_multiple_chained_statement(G5, graph_nodes_match):
+def test_multiple_chained_query(G5, graph_nodes_match):
     # Should properly execute all three.
 
-    Bstatement = NodeByProps(node_type=Process, props={"process_image": Exact("B")})
-    Gstatement = NodeByProps(node_type=Process, props={"process_image": Exact("G")})
-    Astatement = NodeByProps(node_type=Process, props={"process_image": Exact("A")})
+    Bquery = NodeByProps(node_type=Process, props={"process_image": Exact("B")})
+    Gquery = NodeByProps(node_type=Process, props={"process_image": Exact("G")})
+    Aquery = NodeByProps(node_type=Process, props={"process_image": Exact("A")})
 
-    chained = Bstatement | Gstatement | Astatement
+    chained = Bquery | Gquery | Aquery
 
     assert graph_nodes_match(
         chained.execute_networkx(G5),
@@ -57,16 +57,16 @@ def test_multiple_chained_statement(G5, graph_nodes_match):
 
 
 def test_shift_operators():
-    Bstatement = NodeByProps(node_type=Process, props={"process_image": Exact("B")})
-    Gstatement = NodeByProps(node_type=Process, props={"process_image": Exact("G")})
+    Bquery = NodeByProps(node_type=Process, props={"process_image": Exact("B")})
+    Gquery = NodeByProps(node_type=Process, props={"process_image": Exact("G")})
 
-    Bstatement >> Gstatement
+    Bquery >> Gquery
 
-    assert Bstatement.downstream_statement == Gstatement
+    assert Bquery.downstream_query == Gquery
 
-    Bstatement = NodeByProps(node_type=Process, props={"process_image": Exact("B")})
-    Gstatement = NodeByProps(node_type=Process, props={"process_image": Exact("G")})
+    Bquery = NodeByProps(node_type=Process, props={"process_image": Exact("B")})
+    Gquery = NodeByProps(node_type=Process, props={"process_image": Exact("G")})
 
-    Bstatement << Gstatement
+    Bquery << Gquery
 
-    assert Gstatement.downstream_statement == Bstatement
+    assert Gquery.downstream_query == Bquery
diff --git a/tests/analyzers/statements/test_edge.py b/tests/analyzers/statements/test_edge.py
index 5abdbf06..c7b51412 100644
--- a/tests/analyzers/statements/test_edge.py
+++ b/tests/analyzers/statements/test_edge.py
@@ -1,6 +1,6 @@
-from beagle.analyzers.statements.edge import EdgeByProps, IntermediateEdgeByProps
-from beagle.analyzers.statements.lookups import Exact
-from beagle.analyzers.statements.process import FindProcess
+from beagle.analyzers.queries.edge import EdgeByProps, IntermediateEdgeByProps
+from beagle.analyzers.queries.lookups import Exact
+from beagle.analyzers.queries.process import FindProcess
 from beagle.nodes import File, Process
 from beagle.analyzers.base_analyzer import Analyzer
 
@@ -8,10 +8,10 @@
 def test_one_edge_prop_test(G2, G3, graph_nodes_match):
 
     # String should get mapped to Exact("foo")
-    statement = EdgeByProps(edge_type="Wrote", props={"contents": "foo"})
+    query = EdgeByProps(edge_type="Wrote", props={"contents": "foo"})
 
     assert graph_nodes_match(
-        statement.execute_networkx(G2),
+        query.execute_networkx(G2),
         [
             Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar"),
             File(file_name="foo", file_path="bar"),
@@ -20,33 +20,33 @@ def test_one_edge_prop_test(G2, G3, graph_nodes_match):
 
     # Should work on the non-conslidating graph too.
     assert graph_nodes_match(
-        statement.execute_networkx(G3),
+        query.execute_networkx(G3),
         [
             Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar"),
             File(file_name="foo", file_path="bar"),
         ],
     )
 
-    statement = EdgeByProps(edge_type="Launched", props={"contents": Exact("bar")})
+    query = EdgeByProps(edge_type="Launched", props={"contents": Exact("bar")})
 
     # Should match on `proc` from G1
-    assert graph_nodes_match(statement.execute_networkx(G2), [])
+    assert graph_nodes_match(query.execute_networkx(G2), [])
 
 
 def test_intermediate_edge_by_props(G5, graph_nodes_match):
 
-    # Run the first statement.
-    statement1 = FindProcess.with_command_line("B")
-    statement2 = IntermediateEdgeByProps(edge_type="Launched")
+    # Run the first query.
+    query1 = FindProcess.with_command_line("B")
+    query2 = IntermediateEdgeByProps(edge_type="Launched")
 
-    statement1 >> statement2
+    query1 >> query2
 
     # get the subgraph.
-    G_s = statement1.execute_networkx(G5)
+    G_s = query1.execute_networkx(G5)
 
-    # running statement two should only give us B->C
+    # running query two should only give us B->C
     assert graph_nodes_match(
-        statement2.execute_networkx(G_s),
+        query2.execute_networkx(G_s),
         [
             Process(process_id=12, process_image="B", command_line="B"),
             Process(process_id=12, process_image="C", command_line="C"),
@@ -60,7 +60,7 @@ def test_intermediate_edge_all_candidates_found(G7, graph_nodes_match):
         name="test_intermediate_edge_all_candidates_found",
         description="test_intermediate_edge_all_candidates_found",
         score=0,
-        statement=FindProcess.with_command_line("C") >> FindProcess.that_was_launched(),
+        query=FindProcess.with_command_line("C") >> FindProcess.that_was_launched(),
     )
 
     G = analyzer.run_networkx(G7)
diff --git a/tests/analyzers/statements/test_lookups.py b/tests/analyzers/statements/test_lookups.py
index c97d161a..b9ef687a 100644
--- a/tests/analyzers/statements/test_lookups.py
+++ b/tests/analyzers/statements/test_lookups.py
@@ -1,6 +1,6 @@
 import re
 import pytest
-from beagle.analyzers.statements.lookups import (
+from beagle.analyzers.queries.lookups import (
     FieldLookup,
     Contains,
     IContains,
diff --git a/tests/analyzers/statements/test_node.py b/tests/analyzers/statements/test_node.py
index 58aefa86..97bcee3a 100644
--- a/tests/analyzers/statements/test_node.py
+++ b/tests/analyzers/statements/test_node.py
@@ -1,6 +1,6 @@
-from beagle.analyzers.statements.base_statement import Statement
-from beagle.analyzers.statements.lookups import Contains, EndsWith, Exact, StartsWith
-from beagle.analyzers.statements.node import (
+from beagle.analyzers.queries.base_query import Query
+from beagle.analyzers.queries.lookups import Contains, EndsWith, Exact, StartsWith
+from beagle.analyzers.queries.node import (
     NodeByProps,
     NodeByPropsAncestors,
     NodeByPropsDescendents,
@@ -10,7 +10,7 @@
 
 
 def test_test_props_nested_dict():
-    s = Statement()
+    s = Query()
 
     assert (
         s._test_values_with_lookups(
@@ -36,57 +36,57 @@ def test_test_props_nested_dict():
 
 
 def test_one_node_prop_test(G1, graph_nodes_match):
-    statement = NodeByProps(node_type=Process, props={"command_line": Contains("test.exe")})
+    query = NodeByProps(node_type=Process, props={"command_line": Contains("test.exe")})
 
     assert graph_nodes_match(
-        statement.execute_networkx(G1),
+        query.execute_networkx(G1),
         [Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar")],
     )
 
     # should mathc on other proc
-    statement = NodeByProps(node_type=Process, props={"command_line": EndsWith("123456")})
+    query = NodeByProps(node_type=Process, props={"command_line": EndsWith("123456")})
 
     assert graph_nodes_match(
-        statement.execute_networkx(G1),
+        query.execute_networkx(G1),
         [Process(process_id=12, process_image="best.exe", command_line="best.exe /c 123456")],
     )
 
     # should match on both
-    statement = NodeByProps(node_type=Process, props={"process_image": EndsWith("exe")})
+    query = NodeByProps(node_type=Process, props={"process_image": EndsWith("exe")})
 
     assert graph_nodes_match(
-        statement.execute_networkx(G1),
+        query.execute_networkx(G1),
         [
             Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar"),
             Process(process_id=12, process_image="best.exe", command_line="best.exe /c 123456"),
         ],
     )
-    statement = NodeByProps(node_type=Process, props={"process_image": StartsWith("exe")})
+    query = NodeByProps(node_type=Process, props={"process_image": StartsWith("exe")})
 
-    assert graph_nodes_match(statement.execute_networkx(G1), [])
+    assert graph_nodes_match(query.execute_networkx(G1), [])
 
 
 def test_multiple_node_prop_test(G1, graph_nodes_match):
-    statement = NodeByProps(
+    query = NodeByProps(
         node_type=Process,
         props={"command_line": Contains("foobar"), "process_image": StartsWith("test")},
     )
 
     # Should match on `proc` from G1
     assert graph_nodes_match(
-        statement.execute_networkx(G1),
+        query.execute_networkx(G1),
         [Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar")],
     )
 
 
 def test_node_conditional(G1, graph_nodes_match):
-    statement = NodeByProps(
+    query = NodeByProps(
         node_type=Process,
         props={"command_line": Contains("foobar"), "process_image": StartsWith("test")},
     )
 
     assert graph_nodes_match(
-        statement.execute_networkx(G1),
+        query.execute_networkx(G1),
         [Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar")],
     )
 
@@ -94,9 +94,9 @@ def test_node_conditional(G1, graph_nodes_match):
 def test_node_with_descendants(G4, graph_nodes_match):
 
     # A should return A->B->C->D
-    statement = NodeByPropsDescendents(node_type=Process, props={"process_image": Exact("A")})
+    query = NodeByPropsDescendents(node_type=Process, props={"process_image": Exact("A")})
     assert graph_nodes_match(
-        statement.execute_networkx(G4),
+        query.execute_networkx(G4),
         [
             Process(process_id=10, process_image="A", command_line="A"),
             Process(process_id=12, process_image="B", command_line="B"),
@@ -106,9 +106,9 @@ def test_node_with_descendants(G4, graph_nodes_match):
     )
 
     # B should return B->C->D
-    statement = NodeByPropsDescendents(node_type=Process, props={"process_image": Exact("B")})
+    query = NodeByPropsDescendents(node_type=Process, props={"process_image": Exact("B")})
     assert graph_nodes_match(
-        statement.execute_networkx(G4),
+        query.execute_networkx(G4),
         [
             Process(process_id=12, process_image="B", command_line="B"),
             Process(process_id=12, process_image="C", command_line="C"),
@@ -120,16 +120,16 @@ def test_node_with_descendants(G4, graph_nodes_match):
 def test_node_with_ancestors(G4, graph_nodes_match):
 
     # A should return A
-    statement = NodeByPropsAncestors(node_type=Process, props={"process_image": Exact("A")})
+    query = NodeByPropsAncestors(node_type=Process, props={"process_image": Exact("A")})
     assert graph_nodes_match(
-        statement.execute_networkx(G4),
+        query.execute_networkx(G4),
         [Process(process_id=10, process_image="A", command_line="A")],
     )
 
     # B should return A->B
-    statement = NodeByPropsAncestors(node_type=Process, props={"process_image": Exact("B")})
+    query = NodeByPropsAncestors(node_type=Process, props={"process_image": Exact("B")})
     assert graph_nodes_match(
-        statement.execute_networkx(G4),
+        query.execute_networkx(G4),
         [
             Process(process_id=10, process_image="A", command_line="A"),
             Process(process_id=12, process_image="B", command_line="B"),
@@ -137,9 +137,9 @@ def test_node_with_ancestors(G4, graph_nodes_match):
     )
 
     # D should return A->B->C->D
-    statement = NodeByPropsAncestors(node_type=Process, props={"process_image": Exact("D")})
+    query = NodeByPropsAncestors(node_type=Process, props={"process_image": Exact("D")})
     assert graph_nodes_match(
-        statement.execute_networkx(G4),
+        query.execute_networkx(G4),
         [
             Process(process_id=10, process_image="A", command_line="A"),
             Process(process_id=12, process_image="B", command_line="B"),
@@ -154,9 +154,9 @@ def test_nodes_reachable(G5, graph_nodes_match):
     # All queries will return the full path.
     # They should only return the path this process touches, A should return A->B->C->D and not E->F->G->H
 
-    statement = NodeByPropsReachable(node_type=Process, props={"process_image": Exact("B")})
+    query = NodeByPropsReachable(node_type=Process, props={"process_image": Exact("B")})
     assert graph_nodes_match(
-        statement.execute_networkx(G5),
+        query.execute_networkx(G5),
         [
             Process(process_id=10, process_image="A", command_line="A"),
             Process(process_id=12, process_image="B", command_line="B"),
@@ -165,9 +165,9 @@ def test_nodes_reachable(G5, graph_nodes_match):
         ],
     )
 
-    statement = NodeByPropsReachable(node_type=Process, props={"process_image": Exact("G")})
+    query = NodeByPropsReachable(node_type=Process, props={"process_image": Exact("G")})
     assert graph_nodes_match(
-        statement.execute_networkx(G5),
+        query.execute_networkx(G5),
         [
             Process(process_id=10, process_image="E", command_line="E"),
             Process(process_id=12, process_image="F", command_line="F"),
diff --git a/tests/analyzers/statements/test_process.py b/tests/analyzers/statements/test_process.py
index 49602beb..7e7175b8 100644
--- a/tests/analyzers/statements/test_process.py
+++ b/tests/analyzers/statements/test_process.py
@@ -1,15 +1,15 @@
-from beagle.analyzers.statements.process import FindProcess
+from beagle.analyzers.queries.process import FindProcess
 from beagle.nodes import Process, File
-from beagle.analyzers.statements.lookups import EndsWith
+from beagle.analyzers.queries.lookups import EndsWith
 
 
 def test_get_by_command_line_no_lookup(G5, graph_nodes_match):
 
     # Should return all nodes reachable from A
-    statement = FindProcess.with_command_line("A")
+    query = FindProcess.with_command_line("A")
 
     assert graph_nodes_match(
-        statement.execute_networkx(G5),
+        query.execute_networkx(G5),
         [
             Process(process_id=10, process_image="A", command_line="A"),
             Process(process_id=12, process_image="B", command_line="B"),
@@ -22,10 +22,10 @@ def test_get_by_command_line_no_lookup(G5, graph_nodes_match):
 def test_get_by_command_line_with_lookup(G5, graph_nodes_match):
 
     # Should return all nodes reachable from A Or G, (so all nodes)
-    statement = FindProcess.with_command_line(EndsWith("A") | EndsWith("G"))
+    query = FindProcess.with_command_line(EndsWith("A") | EndsWith("G"))
 
     assert graph_nodes_match(
-        statement.execute_networkx(G5),
+        query.execute_networkx(G5),
         [
             Process(process_id=10, process_image="A", command_line="A"),
             Process(process_id=12, process_image="B", command_line="B"),
@@ -42,12 +42,12 @@ def test_get_by_command_line_with_lookup(G5, graph_nodes_match):
 def test_get_process_name_no_lookup(G2, graph_nodes_match):
 
     # No match, since defaults to exact.
-    statement = FindProcess.with_process_name("exe")
-    assert graph_nodes_match(statement.execute_networkx(G2), [])
+    query = FindProcess.with_process_name("exe")
+    assert graph_nodes_match(query.execute_networkx(G2), [])
 
-    statement = FindProcess.with_process_name("test.exe")
+    query = FindProcess.with_process_name("test.exe")
     assert graph_nodes_match(
-        statement.execute_networkx(G2),
+        query.execute_networkx(G2),
         [
             Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar"),
             File(file_name="foo", file_path="bar"),
@@ -58,10 +58,10 @@ def test_get_process_name_no_lookup(G2, graph_nodes_match):
 def test_get_process_name_lookup(G2, graph_nodes_match):
 
     # Should return test.exe because it ends with exe
-    statement = FindProcess.with_process_name(EndsWith("exe"))
+    query = FindProcess.with_process_name(EndsWith("exe"))
 
     assert graph_nodes_match(
-        statement.execute_networkx(G2),
+        query.execute_networkx(G2),
         [
             Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar"),
             File(file_name="foo", file_path="bar"),
@@ -72,10 +72,10 @@ def test_get_process_name_lookup(G2, graph_nodes_match):
 def test_get_process_user(G6, graph_nodes_match):
 
     # Should return test.exe because it ends with exe
-    statement = FindProcess.with_user("omer")
+    query = FindProcess.with_user("omer")
 
     assert graph_nodes_match(
-        statement.execute_networkx(G6),
+        query.execute_networkx(G6),
         [
             Process(
                 process_id=1, process_image_path="d:\\", process_image="parent.exe", user="omer"
@@ -90,10 +90,10 @@ def test_get_process_user(G6, graph_nodes_match):
 def test_get_process_image_path(G6, graph_nodes_match):
 
     # Should return test.exe because it ends with exe
-    statement = FindProcess.with_process_image_path("d:\\")
+    query = FindProcess.with_process_image_path("d:\\")
 
     assert graph_nodes_match(
-        statement.execute_networkx(G6),
+        query.execute_networkx(G6),
         [
             Process(
                 process_id=1, process_image_path="d:\\", process_image="parent.exe", user="omer"
diff --git a/tests/analyzers/test_base_analyzer.py b/tests/analyzers/test_base_analyzer.py
index de68b1d9..67762784 100644
--- a/tests/analyzers/test_base_analyzer.py
+++ b/tests/analyzers/test_base_analyzer.py
@@ -1,15 +1,15 @@
 from beagle.analyzers.base_analyzer import Analyzer
-from beagle.analyzers.statements.process import FindProcess
+from beagle.analyzers.queries.process import FindProcess
 from beagle.nodes import Process
 
 
-def test_analyzer_two_statements(G5, graph_nodes_match):
+def test_analyzer_two_queries(G5, graph_nodes_match):
 
     analyzer = Analyzer(
-        name="test_analyzer_two_statements",
-        description="test_analyzer_two_statements",
+        name="test_analyzer_two_queries",
+        description="test_analyzer_two_queries",
         score=0,
-        statement=FindProcess.with_command_line("B") >> FindProcess.that_was_launched(),
+        query=FindProcess.with_command_line("B") >> FindProcess.that_was_launched(),
     )
 
     G = analyzer.run_networkx(G5)
@@ -23,17 +23,17 @@ def test_analyzer_two_statements(G5, graph_nodes_match):
     )
 
 
-def test_analyzer_or_statement_statements(G5, graph_nodes_match):
+def test_analyzer_or_query_queries(G5, graph_nodes_match):
 
     query = (
         FindProcess.with_command_line("B") | FindProcess.with_command_line("A")
     ) >> FindProcess.that_was_launched()
 
     analyzer = Analyzer(
-        name="test_analyzer_two_statements",
-        description="test_analyzer_two_statements",
+        name="test_analyzer_two_queries",
+        description="test_analyzer_two_queries",
         score=0,
-        statement=query,
+        query=query,
     )
 
     G = analyzer.run_networkx(G5)

From e2205a5509ac02ce6c0143b8309f10d24e0c47e7 Mon Sep 17 00:00:00 2001
From: yampelo <omeryampel@gmail.com>
Date: Sun, 17 Nov 2019 16:14:00 -0500
Subject: [PATCH 21/25] Adds FindProcess.that_was_launched

---
 beagle/analyzers/base_analyzer.py             |  2 +-
 beagle/analyzers/queries/base_query.py        |  5 +-
 beagle/analyzers/queries/edge.py              | 23 +++++++-
 beagle/analyzers/queries/node.py              |  2 +-
 beagle/analyzers/queries/process.py           |  9 ++--
 tests/analyzers/conftest.py                   | 14 ++++-
 ...t_base_statement.py => test_base_query.py} |  0
 tests/analyzers/statements/test_edge.py       | 26 ---------
 tests/analyzers/statements/test_lookups.py    | 19 ++++---
 tests/analyzers/statements/test_node.py       |  3 +-
 tests/analyzers/statements/test_process.py    | 53 ++++++++++++++++++-
 tests/analyzers/test_base_analyzer.py         |  4 +-
 12 files changed, 109 insertions(+), 51 deletions(-)
 rename tests/analyzers/statements/{test_base_statement.py => test_base_query.py} (100%)

diff --git a/beagle/analyzers/base_analyzer.py b/beagle/analyzers/base_analyzer.py
index 5107d967..08c7d930 100644
--- a/beagle/analyzers/base_analyzer.py
+++ b/beagle/analyzers/base_analyzer.py
@@ -7,7 +7,7 @@
 
 
 class Analyzer(object):
-    def __init__(self, name: str, description: str, score: int, query: Query):
+    def __init__(self, name: str, query: Query, description: str = None, score: int = None):
         self.name = name
         self.description = description
         self.score = score
diff --git a/beagle/analyzers/queries/base_query.py b/beagle/analyzers/queries/base_query.py
index 26f85199..0ea7aac8 100644
--- a/beagle/analyzers/queries/base_query.py
+++ b/beagle/analyzers/queries/base_query.py
@@ -20,8 +20,7 @@ def _str_to_exact(props: dict) -> Dict[str, Union[FieldLookup, Dict]]:
 
 class Query(object):
     def __init__(self):
-        """A query is the base building block of a query. A query takes as input a graph, executes,
-        and returns the next graph.
+        """A query takes as input a graph, executes, and returns the next graph.
 
         >>> G2 = query.execute_networkx(G)
 
@@ -96,7 +95,7 @@ def _test_values_with_lookups(
         value_to_test: Union[Node, Dict[str, Any]],
         lookup_tests: Dict[str, Union[FieldLookup, Dict]],
     ) -> bool:
-        """Tests a node or dictionay against a configuration of lookup_tests.
+        """Tests a node or dictionary against a configuration of lookup_tests.
 
         Parameters
         ----------
diff --git a/beagle/analyzers/queries/edge.py b/beagle/analyzers/queries/edge.py
index 5190ee67..ba95e16a 100644
--- a/beagle/analyzers/queries/edge.py
+++ b/beagle/analyzers/queries/edge.py
@@ -1,4 +1,4 @@
-from typing import Dict, Union
+from typing import Dict, Union, Set
 
 import networkx as nx
 
@@ -101,9 +101,28 @@ def execute_networkx(self, G: nx.Graph) -> nx.Graph:
                 for entry in data:
                     if self._test_values_with_lookups(entry, self.props):
                         subgraph_edges.append((u, v, k))
-                        # can stop on first match
                         self.result_edges |= {(u, v, k)}
                         self.result_nodes |= {u, v}
+
+                        # can stop on first match
                         break
 
         return G.edge_subgraph(subgraph_edges)
+
+
+class IntermediateEdgeByPropsDescendants(IntermediateEdgeByProps):
+    """Perform a `IntermediateEdgeByProps` query, expanding the descendants of the found edges."""
+
+    def execute_networkx(self, G: nx.Graph) -> nx.Graph:
+        next_graph = super().execute_networkx(G)
+
+        # get the nodes from the previous graph.
+        subgraph_nodes: Set[int] = {node_id for node_id in next_graph.nodes()}
+
+        # For every node that matched `in IntermediateEdgeByProps`
+        for _, v, _ in self.result_edges:
+            subgraph_nodes |= nx.descendants(G, v) | {v}
+
+        self.result_nodes |= subgraph_nodes
+
+        return G.subgraph(subgraph_nodes)
diff --git a/beagle/analyzers/queries/node.py b/beagle/analyzers/queries/node.py
index 138e4177..ed72550c 100644
--- a/beagle/analyzers/queries/node.py
+++ b/beagle/analyzers/queries/node.py
@@ -4,7 +4,7 @@
 
 from beagle.nodes import Node
 
-from .base_query import Query, _str_to_exact
+from .base_query import IntermediateQuery, Query, _str_to_exact
 from .lookups import FieldLookup
 
 
diff --git a/beagle/analyzers/queries/process.py b/beagle/analyzers/queries/process.py
index 77de2ea1..c5a3a8a9 100644
--- a/beagle/analyzers/queries/process.py
+++ b/beagle/analyzers/queries/process.py
@@ -3,7 +3,7 @@
 from beagle.nodes import Process
 
 from .base_query import FactoryMixin
-from .edge import IntermediateEdgeByProps
+from .edge import IntermediateEdgeByProps, IntermediateEdgeByPropsDescendants
 from .lookups import FieldLookup
 from .node import NodeByPropsReachable
 
@@ -66,5 +66,8 @@ def with_sha1_hash(
         return NodeByPropsReachable(node_type=Process, props={"hashes": {"sha1": sha1hash}})
 
     @staticmethod
-    def that_was_launched():
-        return IntermediateEdgeByProps(edge_type="Launched")
+    def that_was_launched(descendants: bool = True):
+        if descendants:
+            return IntermediateEdgeByPropsDescendants(edge_type="Launched")
+        else:
+            return IntermediateEdgeByProps(edge_type="Launched")
diff --git a/tests/analyzers/conftest.py b/tests/analyzers/conftest.py
index 654bf74f..2d97a929 100644
--- a/tests/analyzers/conftest.py
+++ b/tests/analyzers/conftest.py
@@ -10,7 +10,19 @@
 @pytest.fixture
 def graph_nodes_match():
     def validate_nodes_match(graph: nx.Graph, nodes: List[Node]) -> bool:
-        return [n["data"] for _, n in graph.nodes(data=True)] == nodes
+
+        node_objs = [n["data"] for _, n in graph.nodes(data=True)]
+
+        length_match = len(graph.nodes()) == len(nodes)
+
+        node_match = all([n in node_objs for n in nodes])
+
+        if length_match and node_match:
+            return True
+
+        else:
+            print(f"Expected {nodes} got {node_objs}")
+            return False
 
     return validate_nodes_match
 
diff --git a/tests/analyzers/statements/test_base_statement.py b/tests/analyzers/statements/test_base_query.py
similarity index 100%
rename from tests/analyzers/statements/test_base_statement.py
rename to tests/analyzers/statements/test_base_query.py
diff --git a/tests/analyzers/statements/test_edge.py b/tests/analyzers/statements/test_edge.py
index c7b51412..74cb0dbf 100644
--- a/tests/analyzers/statements/test_edge.py
+++ b/tests/analyzers/statements/test_edge.py
@@ -52,29 +52,3 @@ def test_intermediate_edge_by_props(G5, graph_nodes_match):
             Process(process_id=12, process_image="C", command_line="C"),
         ],
     )
-
-
-def test_intermediate_edge_all_candidates_found(G7, graph_nodes_match):
-
-    analyzer = Analyzer(
-        name="test_intermediate_edge_all_candidates_found",
-        description="test_intermediate_edge_all_candidates_found",
-        score=0,
-        query=FindProcess.with_command_line("C") >> FindProcess.that_was_launched(),
-    )
-
-    G = analyzer.run_networkx(G7)
-
-    # should return
-    #             C
-    #            / \
-    #           F  G
-
-    assert graph_nodes_match(
-        G,
-        [
-            Process(process_id=12, process_image="C", command_line="C"),
-            Process(process_id=12, process_image="F", command_line="F"),
-            Process(process_id=12, process_image="G", command_line="G"),
-        ],
-    )
diff --git a/tests/analyzers/statements/test_lookups.py b/tests/analyzers/statements/test_lookups.py
index b9ef687a..72b235b4 100644
--- a/tests/analyzers/statements/test_lookups.py
+++ b/tests/analyzers/statements/test_lookups.py
@@ -1,17 +1,20 @@
 import re
+from typing import Type
+
 import pytest
+
 from beagle.analyzers.queries.lookups import (
-    FieldLookup,
+    And,
     Contains,
-    IContains,
+    EndsWith,
     Exact,
+    FieldLookup,
+    IContains,
     IExact,
-    StartsWith,
-    EndsWith,
-    Regex,
-    And,
-    Or,
     Not,
+    Or,
+    Regex,
+    StartsWith,
 )
 
 
@@ -48,7 +51,7 @@
         (Regex, re.compile(r"\d"), "test test", False),
     ],
 )
-def test_lookups(cls: FieldLookup, value: str, prop: str, result: str):
+def test_lookups(cls: Type[FieldLookup], value: str, prop: str, result: str):
     # prop -> value being tested again, value -> the thing we're looking up
     assert cls(value).test(prop) == result
 
diff --git a/tests/analyzers/statements/test_node.py b/tests/analyzers/statements/test_node.py
index 97bcee3a..0020e269 100644
--- a/tests/analyzers/statements/test_node.py
+++ b/tests/analyzers/statements/test_node.py
@@ -122,8 +122,7 @@ def test_node_with_ancestors(G4, graph_nodes_match):
     # A should return A
     query = NodeByPropsAncestors(node_type=Process, props={"process_image": Exact("A")})
     assert graph_nodes_match(
-        query.execute_networkx(G4),
-        [Process(process_id=10, process_image="A", command_line="A")],
+        query.execute_networkx(G4), [Process(process_id=10, process_image="A", command_line="A")]
     )
 
     # B should return A->B
diff --git a/tests/analyzers/statements/test_process.py b/tests/analyzers/statements/test_process.py
index 7e7175b8..8d7d6dc1 100644
--- a/tests/analyzers/statements/test_process.py
+++ b/tests/analyzers/statements/test_process.py
@@ -1,6 +1,7 @@
-from beagle.analyzers.queries.process import FindProcess
-from beagle.nodes import Process, File
+from beagle.analyzers.base_analyzer import Analyzer
 from beagle.analyzers.queries.lookups import EndsWith
+from beagle.analyzers.queries.process import FindProcess
+from beagle.nodes import File, Process
 
 
 def test_get_by_command_line_no_lookup(G5, graph_nodes_match):
@@ -103,3 +104,51 @@ def test_get_process_image_path(G6, graph_nodes_match):
             ),
         ],
     )
+
+
+def test_process_launched_no_descendants(G7, graph_nodes_match):
+    analyzer = Analyzer(
+        name="test_process_launched_descendants",
+        query=FindProcess.with_command_line("C")
+        >> FindProcess.that_was_launched(descendants=False),
+    )
+
+    G = analyzer.run_networkx(G7)
+
+    # should return
+    #             C
+    #            / \
+    #           F  G
+
+    assert graph_nodes_match(
+        G,
+        [
+            Process(process_id=12, process_image="C", command_line="C"),
+            Process(process_id=12, process_image="F", command_line="F"),
+            Process(process_id=12, process_image="G", command_line="G"),
+        ],
+    )
+
+
+def test_process_launched_descendants(G7, graph_nodes_match):
+    analyzer = Analyzer(
+        name="test_process_launched_descendants",
+        query=FindProcess.with_command_line("A") >> FindProcess.that_was_launched(),
+    )
+
+    G = analyzer.run_networkx(G7)
+
+    # Should return the full graph.
+    # since it should find B and C which are children of A, then expand their children.
+    assert graph_nodes_match(
+        G,
+        [
+            Process(process_id=10, process_image="A", command_line="A"),
+            Process(process_id=12, process_image="B", command_line="B"),
+            Process(process_id=12, process_image="C", command_line="C"),
+            Process(process_id=12, process_image="D", command_line="D"),
+            Process(process_id=10, process_image="E", command_line="E"),
+            Process(process_id=12, process_image="F", command_line="F"),
+            Process(process_id=12, process_image="G", command_line="G"),
+        ],
+    )
diff --git a/tests/analyzers/test_base_analyzer.py b/tests/analyzers/test_base_analyzer.py
index 67762784..120b8ed0 100644
--- a/tests/analyzers/test_base_analyzer.py
+++ b/tests/analyzers/test_base_analyzer.py
@@ -9,7 +9,7 @@ def test_analyzer_two_queries(G5, graph_nodes_match):
         name="test_analyzer_two_queries",
         description="test_analyzer_two_queries",
         score=0,
-        query=FindProcess.with_command_line("B") >> FindProcess.that_was_launched(),
+        query=FindProcess.with_command_line("B") >> FindProcess.that_was_launched(descendants=False),
     )
 
     G = analyzer.run_networkx(G5)
@@ -27,7 +27,7 @@ def test_analyzer_or_query_queries(G5, graph_nodes_match):
 
     query = (
         FindProcess.with_command_line("B") | FindProcess.with_command_line("A")
-    ) >> FindProcess.that_was_launched()
+    ) >> FindProcess.that_was_launched(descendants=False)
 
     analyzer = Analyzer(
         name="test_analyzer_two_queries",

From 22e7043cad84283ca3aa7aca65ab46dd4db01819 Mon Sep 17 00:00:00 2001
From: yampelo <omeryampel@gmail.com>
Date: Sun, 17 Nov 2019 17:07:59 -0500
Subject: [PATCH 22/25] Adds query factory for Files

---
 beagle/analyzers/base_analyzer.py             |  6 +-
 beagle/analyzers/queries/base_query.py        |  7 ++-
 beagle/analyzers/queries/edge.py              |  6 +-
 beagle/analyzers/queries/file.py              | 58 +++++++++++++++++++
 beagle/analyzers/queries/lookups.py           |  3 +
 beagle/analyzers/queries/node.py              |  4 +-
 beagle/nodes/file.py                          |  2 +-
 tests/analyzers/statements/test_base_query.py | 14 ++++-
 tests/analyzers/statements/test_file.py       | 53 +++++++++++++++++
 tests/analyzers/test_base_analyzer.py         | 26 ++++++++-
 10 files changed, 165 insertions(+), 14 deletions(-)
 create mode 100644 beagle/analyzers/queries/file.py
 create mode 100644 tests/analyzers/statements/test_file.py

diff --git a/beagle/analyzers/base_analyzer.py b/beagle/analyzers/base_analyzer.py
index 08c7d930..4388b651 100644
--- a/beagle/analyzers/base_analyzer.py
+++ b/beagle/analyzers/base_analyzer.py
@@ -1,4 +1,4 @@
-from typing import Type, cast
+from typing import Type, cast, Any
 
 import networkx as nx
 
@@ -18,10 +18,10 @@ def __init__(self, name: str, query: Query, description: str = None, score: int
 
         self.query: Query = query
 
-    def run(self, backend: Type[Backend]):
+    def run(self, backend: Type[Backend]) -> Any:
         if isinstance(backend, NetworkX):
             backend = cast(NetworkX, backend)
-            self.run_networkx(backend.G)
+            return self.run_networkx(backend.G)
 
     def run_networkx(self, G: nx.Graph) -> nx.Graph:
 
diff --git a/beagle/analyzers/queries/base_query.py b/beagle/analyzers/queries/base_query.py
index 0ea7aac8..965bac7f 100644
--- a/beagle/analyzers/queries/base_query.py
+++ b/beagle/analyzers/queries/base_query.py
@@ -7,6 +7,9 @@
 from .lookups import Exact, FieldLookup
 
 
+PropsDict = Dict[str, Union[str, FieldLookup, Dict]]
+
+
 def _str_to_exact(props: dict) -> Dict[str, Union[FieldLookup, Dict]]:
     # Ensures strings become Exact, Works on nested dicts
     for k, v in props.items():
@@ -123,7 +126,7 @@ def _test_values_with_lookups(
         for attr_name, lookup in lookup_tests.items():
             if isinstance(lookup, dict):
                 # recursivly check props against nested entrys (e.g is hashes dict in Process)
-                if isinstance(value_to_test, Node):
+                if isinstance(value_to_test, Node):  # pragma: no cover
                     results.append(
                         self._test_values_with_lookups(
                             value_to_test=getattr(value_to_test, attr_name), lookup_tests=lookup
@@ -210,6 +213,6 @@ def __init__(self, *args, **kwargs):
     def get_upstream_results(self) -> Tuple[Set[int], Set[Tuple[int, int, int]]]:
         return self.upstream_query.result_nodes, self.upstream_query.result_edges
 
-    def set_upstream_nodes(self):
+    def set_upstream_nodes(self):  # pragma: no cover
         self.upstream_nodes |= self.upstream_query.result_nodes
         self.upstream_edges |= self.upstream_query.result_edges
diff --git a/beagle/analyzers/queries/edge.py b/beagle/analyzers/queries/edge.py
index ba95e16a..5693d89b 100644
--- a/beagle/analyzers/queries/edge.py
+++ b/beagle/analyzers/queries/edge.py
@@ -2,14 +2,12 @@
 
 import networkx as nx
 
-from .base_query import Query, _str_to_exact, IntermediateQuery
+from .base_query import Query, _str_to_exact, IntermediateQuery, PropsDict
 from .lookups import FieldLookup
 
 
 class EdgeByProps(Query):
-    def __init__(
-        self, edge_type: str, props: Dict[str, Union[str, FieldLookup]] = {}, *args, **kwargs
-    ):
+    def __init__(self, edge_type: str, props: PropsDict = {}, *args, **kwargs):
         """Searches the graph for an edge of type `edge_type` with properties matching `props`
 
         Parameters
diff --git a/beagle/analyzers/queries/file.py b/beagle/analyzers/queries/file.py
new file mode 100644
index 00000000..02961b92
--- /dev/null
+++ b/beagle/analyzers/queries/file.py
@@ -0,0 +1,58 @@
+from typing import Union
+
+from beagle.nodes import File
+
+from .base_query import FactoryMixin, PropsDict
+from .edge import IntermediateEdgeByProps, IntermediateEdgeByPropsDescendants
+from .lookups import FieldLookup
+from .node import NodeByPropsReachable
+
+
+class FindFile(FactoryMixin):
+    """Executes queries relevant to a File"""
+
+    @staticmethod
+    def with_full_path(full_path: Union[str, FieldLookup]) -> NodeByPropsReachable:
+        return NodeByPropsReachable(node_type=File, props={"full_path": full_path})
+
+    @staticmethod
+    def with_file_path(file_path: Union[str, FieldLookup]) -> NodeByPropsReachable:
+        return NodeByPropsReachable(node_type=File, props={"file_path": file_path})
+
+    @staticmethod
+    def with_file_name(file_name: Union[str, FieldLookup]) -> NodeByPropsReachable:
+        return NodeByPropsReachable(node_type=File, props={"file_name": file_name})
+
+    @staticmethod
+    def with_extension(
+        extension: Union[str, FieldLookup]
+    ) -> NodeByPropsReachable:  # pragma: no cover
+        return NodeByPropsReachable(node_type=File, props={"extension": extension})
+
+    @staticmethod
+    def with_timestamp(
+        timestamp: Union[str, FieldLookup]
+    ) -> NodeByPropsReachable:  # pragma: no cover
+        return NodeByPropsReachable(node_type=File, props={"timestamp": timestamp})
+
+    @staticmethod
+    def with_hashes(hashes: Union[str, FieldLookup]) -> NodeByPropsReachable:  # pragma: no cover
+        return NodeByPropsReachable(node_type=File, props={"hashes": hashes})
+
+    @staticmethod
+    def with_props(props: PropsDict) -> NodeByPropsReachable:  # pragma: no cover
+        return NodeByPropsReachable(node_type=File, props=props)
+
+    @staticmethod
+    def that_was_written(descendants: bool = False):
+        if descendants:
+            return IntermediateEdgeByPropsDescendants(edge_type="Wrote")
+        else:
+            return IntermediateEdgeByProps(edge_type="Wrote")
+
+    @staticmethod
+    def that_was_copied(descendants: bool = False):
+        if descendants:
+            return IntermediateEdgeByPropsDescendants(edge_type="Copied To")
+        else:
+            return IntermediateEdgeByProps(edge_type="Copied To")
diff --git a/beagle/analyzers/queries/lookups.py b/beagle/analyzers/queries/lookups.py
index 76c764e3..917468eb 100644
--- a/beagle/analyzers/queries/lookups.py
+++ b/beagle/analyzers/queries/lookups.py
@@ -63,6 +63,9 @@ def __invert__(self) -> "Not":
         """
         return Not(self)
 
+    def __eq__(self, other):
+        return (type(self) == type(other)) and (self.value == other.value)
+
 
 class Or(FieldLookup):
     """Boolean OR, Meant to be used with other lookups:
diff --git a/beagle/analyzers/queries/node.py b/beagle/analyzers/queries/node.py
index ed72550c..d4fb7274 100644
--- a/beagle/analyzers/queries/node.py
+++ b/beagle/analyzers/queries/node.py
@@ -4,12 +4,12 @@
 
 from beagle.nodes import Node
 
-from .base_query import IntermediateQuery, Query, _str_to_exact
+from .base_query import Query, _str_to_exact, PropsDict
 from .lookups import FieldLookup
 
 
 class NodeByProps(Query):
-    def __init__(self, node_type: Type[Node], props: Dict[str, Union[str, FieldLookup, Dict]] = {}):
+    def __init__(self, node_type: Type[Node], props: PropsDict = {}):
         """Searches the graph for a node of type `node_type` with properties matching `props`
 
         Parameters
diff --git a/beagle/nodes/file.py b/beagle/nodes/file.py
index a8fbcb76..4c795c75 100644
--- a/beagle/nodes/file.py
+++ b/beagle/nodes/file.py
@@ -5,7 +5,7 @@
 from beagle.edges import FileOf, CopiedTo
 
 # mypy type hinting
-if TYPE_CHECKING:
+if TYPE_CHECKING:  # pragma: no cover
     from beagle.nodes import Process  # noqa: F401
 
 
diff --git a/tests/analyzers/statements/test_base_query.py b/tests/analyzers/statements/test_base_query.py
index 58419677..940a046a 100644
--- a/tests/analyzers/statements/test_base_query.py
+++ b/tests/analyzers/statements/test_base_query.py
@@ -1,5 +1,5 @@
 import pytest
-from beagle.analyzers.queries.base_query import FactoryMixin
+from beagle.analyzers.queries.base_query import FactoryMixin, _str_to_exact
 from beagle.analyzers.queries.node import NodeByPropsReachable, NodeByProps
 from beagle.analyzers.queries.lookups import Exact
 from beagle.nodes import Process
@@ -14,6 +14,18 @@ class MyFactory(FactoryMixin):
         obj.execute_networkx(None)
 
 
+@pytest.mark.parametrize(
+    "props,expected",
+    [
+        ({"process_image": "A"}, {"process_image": Exact("A")}),
+        ({"hashes": {"md5": "A"}}, {"hashes": {"md5": Exact("A")}}),
+        ({"hashes": {"md5": "A", "baz": {"foo": "bar"}}}, {"hashes": {"md5": Exact("A"), "baz": {"foo": Exact("bar")}}}),
+    ],
+)
+def test_str_to_exact(props, expected):
+    assert _str_to_exact(props) == expected
+
+
 def test_chained_query(G5, graph_nodes_match):
     # Both paths should show up because we use a chained query that returns both.
 
diff --git a/tests/analyzers/statements/test_file.py b/tests/analyzers/statements/test_file.py
new file mode 100644
index 00000000..25250d6e
--- /dev/null
+++ b/tests/analyzers/statements/test_file.py
@@ -0,0 +1,53 @@
+from beagle.analyzers.base_analyzer import Analyzer
+from beagle.analyzers.queries.file import FindFile
+from beagle.nodes import File, Process
+
+
+def test_file_with_name(G3, graph_nodes_match):
+    analyzer = Analyzer(name="test_file_with_name", query=FindFile.with_file_name("foo"))
+
+    G = analyzer.run_networkx(G3)
+
+    assert graph_nodes_match(
+        G,
+        [
+            Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar"),
+            File(file_name="foo", file_path="bar"),
+        ],
+    )
+
+
+def test_file_with_path(G3, graph_nodes_match):
+    analyzer = Analyzer(name="test_file_with_path", query=FindFile.with_file_path("bar"))
+
+    G = analyzer.run_networkx(G3)
+
+    assert graph_nodes_match(
+        G,
+        [
+            Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar"),
+            File(file_name="foo", file_path="bar"),
+        ],
+    )
+
+
+def test_file_with_full_path(G3, graph_nodes_match):
+    analyzer = Analyzer(name="test_file_with_full_path", query=FindFile.with_full_path("bar\\foo"))
+
+    G = analyzer.run_networkx(G3)
+
+    assert graph_nodes_match(
+        G,
+        [
+            Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar"),
+            File(file_name="foo", file_path="bar"),
+        ],
+    )
+
+
+def test_file_that_was_written(G3, graph_nodes_match):
+    analyzer = Analyzer(name="test_file_that_was_written", query=FindFile.that_was_written())
+
+    G = analyzer.run_networkx(G3)
+
+    assert graph_nodes_match(G, [File(file_name="foo", file_path="bar")])
diff --git a/tests/analyzers/test_base_analyzer.py b/tests/analyzers/test_base_analyzer.py
index 120b8ed0..2ac411e0 100644
--- a/tests/analyzers/test_base_analyzer.py
+++ b/tests/analyzers/test_base_analyzer.py
@@ -2,6 +2,29 @@
 from beagle.analyzers.queries.process import FindProcess
 from beagle.nodes import Process
 
+from beagle.backends import NetworkX
+
+
+def test_analyzer_from_networx_backed(G5, graph_nodes_match):
+    analyzer = Analyzer(
+        name="test_analyzer_two_queries",
+        description="test_analyzer_two_queries",
+        score=0,
+        query=FindProcess.with_command_line("B")
+        >> FindProcess.that_was_launched(descendants=False),
+    )
+
+    backend = NetworkX(nodes=[])
+    backend.G = G5
+
+    assert graph_nodes_match(
+        analyzer.run(backend),
+        [
+            Process(process_id=12, process_image="B", command_line="B"),
+            Process(process_id=12, process_image="C", command_line="C"),
+        ],
+    )
+
 
 def test_analyzer_two_queries(G5, graph_nodes_match):
 
@@ -9,7 +32,8 @@ def test_analyzer_two_queries(G5, graph_nodes_match):
         name="test_analyzer_two_queries",
         description="test_analyzer_two_queries",
         score=0,
-        query=FindProcess.with_command_line("B") >> FindProcess.that_was_launched(descendants=False),
+        query=FindProcess.with_command_line("B")
+        >> FindProcess.that_was_launched(descendants=False),
     )
 
     G = analyzer.run_networkx(G5)

From 460e1d9d91500c473058f085ffa8e8ee1028af12 Mon Sep 17 00:00:00 2001
From: yampelo <omeryampel@gmail.com>
Date: Sun, 17 Nov 2019 21:56:13 -0500
Subject: [PATCH 23/25] All queries can now be intermediary by default.

---
 beagle/analyzers/queries/__init__.py          |  24 ++++
 beagle/analyzers/queries/base_query.py        | 123 ++++++++----------
 beagle/analyzers/queries/edge.py              |  72 +++++-----
 beagle/analyzers/queries/file.py              |  22 ++--
 beagle/analyzers/queries/process.py           |  12 +-
 tests/analyzers/conftest.py                   |  17 +++
 tests/analyzers/statements/test_base_query.py |   8 +-
 tests/analyzers/statements/test_edge.py       |   5 +-
 tests/analyzers/statements/test_file.py       |  16 ++-
 9 files changed, 163 insertions(+), 136 deletions(-)

diff --git a/beagle/analyzers/queries/__init__.py b/beagle/analyzers/queries/__init__.py
index e69de29b..f2d8da77 100644
--- a/beagle/analyzers/queries/__init__.py
+++ b/beagle/analyzers/queries/__init__.py
@@ -0,0 +1,24 @@
+from networkx import nx
+from .base_query import Query, PropsDict
+from .edge import EdgeByProps, EdgeByPropsAncestors, EdgeByPropsDescendants, EdgeByPropsReachable
+
+
+def make_edge_query(
+    edge_type: str, descendants=True, ancestors=False, reachable=False, edge_props: PropsDict = {}
+) -> Query:
+    if reachable or (descendants and reachable):
+        return EdgeByPropsReachable(edge_type=edge_type, edge_props=edge_props)
+    elif descendants:
+        return EdgeByPropsDescendants(edge_type=edge_type, edge_props=edge_props)
+    elif ancestors:
+        return EdgeByPropsAncestors(edge_type=edge_type, edge_props=edge_props)
+    else:
+        return EdgeByProps(edge_type=edge_type, edge_props=edge_props)
+
+
+class FactoryMixin(object):
+    """Mixin to prevent Query Factories from calling execute methods.
+    """
+
+    def execute_networkx(self, G: nx.graph):
+        raise UserWarning("Query factories cannot be called directly")
diff --git a/beagle/analyzers/queries/base_query.py b/beagle/analyzers/queries/base_query.py
index 965bac7f..a10b2250 100644
--- a/beagle/analyzers/queries/base_query.py
+++ b/beagle/analyzers/queries/base_query.py
@@ -44,54 +44,15 @@ def __init__(self):
         self.downstream_query: Query = None
         self.upstream_query: Query = None
 
-    def __rshift__(self, other: "Query") -> "Query":
-        """Implements Self >> Other == self.downstream_query = other
-
-        Parameters
-        ----------
-        other : Query
-            The other query to add.
-        """
-        self.downstream_query = other
-        other.upstream_query = self
-        return other
-
-    def __lshift__(self, other: "Query") -> "Query":
-        """Implements Self << Other == self.upstream_query = other
-
-        Parameters
-        ----------
-        other : Query
-            The other query to add.
-        """
-        other.downstream_query = self
-        self.upstream_query = other
-        return other
-
-    def __or__(self, other: "Query") -> "ChainedQuery":
-        """Allows queries to be combined through the `|` operator.
-        The result of execution is the union of both subqueries.
-
-        >>> query1 = Query(...)
-        >>> query2 = Query(...)
-        >>> chained = query1 | query2
-
-
-        Parameters
-        ----------
-        other: Query
-            The query to chain with.
+        self.upstream_nodes: Set[int] = set()
+        self.upstream_edges: Set[Tuple[int, int, int]] = set()
 
-        Returns
-        -------
-        ChainedQuery
-            A chained query compromised of all three.
-        """
-        return ChainedQuery(self, other)
+    def get_upstream_results(self) -> Tuple[Set[int], Set[Tuple[int, int, int]]]:
+        return self.upstream_query.result_nodes, self.upstream_query.result_edges
 
-    def execute_networkx(self, G: nx.Graph):  # pragma: no cover
-        """Execute a query against a `networkx` graph."""
-        raise NotImplementedError(f"NetworkX not supported for {self.__class__.__name__}")
+    def set_upstream_nodes(self):  # pragma: no cover
+        self.upstream_nodes |= self.upstream_query.result_nodes
+        self.upstream_edges |= self.upstream_query.result_edges
 
     def _test_values_with_lookups(
         self,
@@ -146,13 +107,54 @@ def _test_values_with_lookups(
 
         return any(results)
 
+    def execute_networkx(self, G: nx.Graph):  # pragma: no cover
+        """Execute a query against a `networkx` graph."""
+        raise NotImplementedError(f"NetworkX not supported for {self.__class__.__name__}")
+
+    def __rshift__(self, other: "Query") -> "Query":
+        """Implements Self >> Other == self.downstream_query = other
+
+        Parameters
+        ----------
+        other : Query
+            The other query to add.
+        """
+        self.downstream_query = other
+        other.upstream_query = self
+        return other
+
+    def __lshift__(self, other: "Query") -> "Query":
+        """Implements Self << Other == self.upstream_query = other
+
+        Parameters
+        ----------
+        other : Query
+            The other query to add.
+        """
+        other.downstream_query = self
+        self.upstream_query = other
+        return other
+
+    def __or__(self, other: "Query") -> "ChainedQuery":
+        """Allows queries to be combined through the `|` operator.
+        The result of execution is the union of both subqueries.
+
+        >>> query1 = Query(...)
+        >>> query2 = Query(...)
+        >>> chained = query1 | query2
 
-class FactoryMixin(object):
-    """Mixin to prevent Query Factories from calling execute methods.
-    """
 
-    def execute_networkx(self, G: nx.graph):
-        raise UserWarning("Query factories cannot be called directly")
+        Parameters
+        ----------
+        other: Query
+            The query to chain with.
+
+        Returns
+        -------
+        ChainedQuery
+            A chained query compromised of all three.
+        """
+        return ChainedQuery(self, other)
 
 
 class ChainedQuery(Query):
@@ -197,22 +199,3 @@ def execute_networkx(self, G: nx.Graph) -> nx.Graph:
             H = nx.compose(H, subgraph)
 
         return H
-
-
-class IntermediateQuery(Query):
-    """An IntermediateQuery is a query which depends on a previous initial Query to run.
-
-    For example, you may only want to find edges connected to one of the nodes identifed in `NodeByProps`.
-    """
-
-    def __init__(self, *args, **kwargs):
-        self.upstream_nodes: Set[int] = set()
-        self.upstream_edges: Set[Tuple[int, int, int]] = set()
-        super().__init__(*args, **kwargs)
-
-    def get_upstream_results(self) -> Tuple[Set[int], Set[Tuple[int, int, int]]]:
-        return self.upstream_query.result_nodes, self.upstream_query.result_edges
-
-    def set_upstream_nodes(self):  # pragma: no cover
-        self.upstream_nodes |= self.upstream_query.result_nodes
-        self.upstream_edges |= self.upstream_query.result_edges
diff --git a/beagle/analyzers/queries/edge.py b/beagle/analyzers/queries/edge.py
index 5693d89b..e0ce9724 100644
--- a/beagle/analyzers/queries/edge.py
+++ b/beagle/analyzers/queries/edge.py
@@ -2,7 +2,7 @@
 
 import networkx as nx
 
-from .base_query import Query, _str_to_exact, IntermediateQuery, PropsDict
+from .base_query import Query, _str_to_exact, PropsDict
 from .lookups import FieldLookup
 
 
@@ -37,8 +37,14 @@ def execute_networkx(self, G: nx.Graph) -> nx.Graph:
         """
         subgraph_edges = []
 
+        if self.upstream_query is not None:
+            upstream_nodes, _ = self.get_upstream_results()
+            edges = G.edges(upstream_nodes, data=True, keys=True)
+        else:
+            edges = G.edges(data=True, keys=True)
+
         # For each edge
-        for u, v, k, e_data in G.edges(data=True, keys=True):
+        for u, v, k, e_data in edges:
 
             # pull out the data field from NX
             data = e_data["data"]  # edge data
@@ -62,54 +68,44 @@ def execute_networkx(self, G: nx.Graph) -> nx.Graph:
         return G.edge_subgraph(subgraph_edges)
 
 
-class IntermediateEdgeByProps(EdgeByProps, IntermediateQuery):
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
+class EdgeByPropsDescendants(EdgeByProps):
+    """Perform a `EdgeByProps` query, expanding the descendants of the found edges."""
 
     def execute_networkx(self, G: nx.Graph) -> nx.Graph:
-        """Searches a `nx.Graph` object for edges that match type `edge_type` and contains
-        props matching `props`. This is O(E).
+        next_graph = super().execute_networkx(G)
 
-        Returns a subgraph with all nodes contained in match edges
-        """
+        # get the nodes from the previous graph.
+        subgraph_nodes: Set[int] = {node_id for node_id in next_graph.nodes()}
 
-        # Grab upstream information
-        upstream_nodes, _ = self.get_upstream_results()
+        # For every node that matched `in EdgeByProps`
+        for _, v, _ in self.result_edges:
+            subgraph_nodes |= nx.descendants(G, v) | {v}
 
-        subgraph_edges = []
+        self.result_nodes |= subgraph_nodes
 
-        for u, v, k, e_data in G.edges(
-            # Only get the edges associate with nodes from the previous step.
-            upstream_nodes,
-            data=True,
-            keys=True,
-        ):
+        return G.subgraph(subgraph_nodes)
 
-            # pull out the data field from NX
-            data = e_data["data"]  # edge data
-            e_type = e_data["edge_name"]  # edge type
 
-            # If edge matches the desired instance.
-            if e_type == self.edge_type:
+class EdgeByPropsAncestors(EdgeByProps):
+    """Perform a `EdgeByProps` query, expanding the ancestors of the found edges."""
 
-                # Test the edge
-                if not isinstance(data, list):
-                    data = [data]
+    def execute_networkx(self, G: nx.Graph) -> nx.Graph:
+        next_graph = super().execute_networkx(G)
 
-                for entry in data:
-                    if self._test_values_with_lookups(entry, self.props):
-                        subgraph_edges.append((u, v, k))
-                        self.result_edges |= {(u, v, k)}
-                        self.result_nodes |= {u, v}
+        # get the nodes from the previous graph.
+        subgraph_nodes: Set[int] = {node_id for node_id in next_graph.nodes()}
 
-                        # can stop on first match
-                        break
+        # For every node that matched `in EdgeByProps`
+        for _, v, _ in self.result_edges:
+            subgraph_nodes |= nx.ancestors(G, v) | {v}
 
-        return G.edge_subgraph(subgraph_edges)
+        self.result_nodes |= subgraph_nodes
+
+        return G.subgraph(subgraph_nodes)
 
 
-class IntermediateEdgeByPropsDescendants(IntermediateEdgeByProps):
-    """Perform a `IntermediateEdgeByProps` query, expanding the descendants of the found edges."""
+class EdgeByPropsReachable(EdgeByProps):
+    """Perform a `EdgeByProps` query, including all reachable nodes."""
 
     def execute_networkx(self, G: nx.Graph) -> nx.Graph:
         next_graph = super().execute_networkx(G)
@@ -117,9 +113,9 @@ def execute_networkx(self, G: nx.Graph) -> nx.Graph:
         # get the nodes from the previous graph.
         subgraph_nodes: Set[int] = {node_id for node_id in next_graph.nodes()}
 
-        # For every node that matched `in IntermediateEdgeByProps`
+        # For every node that matched `in EdgeByProps`
         for _, v, _ in self.result_edges:
-            subgraph_nodes |= nx.descendants(G, v) | {v}
+            subgraph_nodes |= nx.ancestors(G, v) | nx.descendants(G, v) | {v}
 
         self.result_nodes |= subgraph_nodes
 
diff --git a/beagle/analyzers/queries/file.py b/beagle/analyzers/queries/file.py
index 02961b92..7004a755 100644
--- a/beagle/analyzers/queries/file.py
+++ b/beagle/analyzers/queries/file.py
@@ -2,8 +2,8 @@
 
 from beagle.nodes import File
 
-from .base_query import FactoryMixin, PropsDict
-from .edge import IntermediateEdgeByProps, IntermediateEdgeByPropsDescendants
+from . import FactoryMixin, make_edge_query
+from .base_query import PropsDict
 from .lookups import FieldLookup
 from .node import NodeByPropsReachable
 
@@ -44,15 +44,13 @@ def with_props(props: PropsDict) -> NodeByPropsReachable:  # pragma: no cover
         return NodeByPropsReachable(node_type=File, props=props)
 
     @staticmethod
-    def that_was_written(descendants: bool = False):
-        if descendants:
-            return IntermediateEdgeByPropsDescendants(edge_type="Wrote")
-        else:
-            return IntermediateEdgeByProps(edge_type="Wrote")
+    def that_was_written(descendants=True, ancestors=False, reachable=False):
+        return make_edge_query(
+            edge_type="Wrote", descendants=descendants, ancestors=ancestors, reachable=reachable
+        )
 
     @staticmethod
-    def that_was_copied(descendants: bool = False):
-        if descendants:
-            return IntermediateEdgeByPropsDescendants(edge_type="Copied To")
-        else:
-            return IntermediateEdgeByProps(edge_type="Copied To")
+    def that_was_copied(descendants=True, ancestors=False, reachable=False):
+        return make_edge_query(
+            edge_type="Copied To", descendants=descendants, ancestors=ancestors, reachable=reachable
+        )
diff --git a/beagle/analyzers/queries/process.py b/beagle/analyzers/queries/process.py
index c5a3a8a9..0019fdff 100644
--- a/beagle/analyzers/queries/process.py
+++ b/beagle/analyzers/queries/process.py
@@ -2,8 +2,7 @@
 
 from beagle.nodes import Process
 
-from .base_query import FactoryMixin
-from .edge import IntermediateEdgeByProps, IntermediateEdgeByPropsDescendants
+from . import FactoryMixin, make_edge_query
 from .lookups import FieldLookup
 from .node import NodeByPropsReachable
 
@@ -66,8 +65,7 @@ def with_sha1_hash(
         return NodeByPropsReachable(node_type=Process, props={"hashes": {"sha1": sha1hash}})
 
     @staticmethod
-    def that_was_launched(descendants: bool = True):
-        if descendants:
-            return IntermediateEdgeByPropsDescendants(edge_type="Launched")
-        else:
-            return IntermediateEdgeByProps(edge_type="Launched")
+    def that_was_launched(descendants=True, ancestors=False, reachable=False):
+        return make_edge_query(
+            edge_type="Launched", descendants=descendants, ancestors=ancestors, reachable=reachable
+        )
diff --git a/tests/analyzers/conftest.py b/tests/analyzers/conftest.py
index 2d97a929..ee4b68c6 100644
--- a/tests/analyzers/conftest.py
+++ b/tests/analyzers/conftest.py
@@ -167,3 +167,20 @@ def G7():
     backend = NetworkX(consolidate_edges=True, nodes=[A, B, C, D, E, F, G])
 
     return backend.graph()
+
+
+@pytest.fixture
+def G8():
+    #  A launches B, B writes to F2
+
+    A = Process(process_id=10, process_image="A", command_line="A")
+
+    B = Process(process_id=12, process_image="B", command_line="B")
+    F1 = File(file_name="bar", file_path="bar")
+
+    A.launched[B]
+    B.wrote[F1].append(contents="bar")
+
+    backend = NetworkX(consolidate_edges=True, nodes=[A, B, F1])
+
+    return backend.graph()
diff --git a/tests/analyzers/statements/test_base_query.py b/tests/analyzers/statements/test_base_query.py
index 940a046a..e8bf28ef 100644
--- a/tests/analyzers/statements/test_base_query.py
+++ b/tests/analyzers/statements/test_base_query.py
@@ -1,5 +1,6 @@
 import pytest
-from beagle.analyzers.queries.base_query import FactoryMixin, _str_to_exact
+from beagle.analyzers.queries import FactoryMixin
+from beagle.analyzers.queries.base_query import _str_to_exact
 from beagle.analyzers.queries.node import NodeByPropsReachable, NodeByProps
 from beagle.analyzers.queries.lookups import Exact
 from beagle.nodes import Process
@@ -19,7 +20,10 @@ class MyFactory(FactoryMixin):
     [
         ({"process_image": "A"}, {"process_image": Exact("A")}),
         ({"hashes": {"md5": "A"}}, {"hashes": {"md5": Exact("A")}}),
-        ({"hashes": {"md5": "A", "baz": {"foo": "bar"}}}, {"hashes": {"md5": Exact("A"), "baz": {"foo": Exact("bar")}}}),
+        (
+            {"hashes": {"md5": "A", "baz": {"foo": "bar"}}},
+            {"hashes": {"md5": Exact("A"), "baz": {"foo": Exact("bar")}}},
+        ),
     ],
 )
 def test_str_to_exact(props, expected):
diff --git a/tests/analyzers/statements/test_edge.py b/tests/analyzers/statements/test_edge.py
index 74cb0dbf..5c2d1932 100644
--- a/tests/analyzers/statements/test_edge.py
+++ b/tests/analyzers/statements/test_edge.py
@@ -1,8 +1,7 @@
-from beagle.analyzers.queries.edge import EdgeByProps, IntermediateEdgeByProps
+from beagle.analyzers.queries.edge import EdgeByProps
 from beagle.analyzers.queries.lookups import Exact
 from beagle.analyzers.queries.process import FindProcess
 from beagle.nodes import File, Process
-from beagle.analyzers.base_analyzer import Analyzer
 
 
 def test_one_edge_prop_test(G2, G3, graph_nodes_match):
@@ -37,7 +36,7 @@ def test_intermediate_edge_by_props(G5, graph_nodes_match):
 
     # Run the first query.
     query1 = FindProcess.with_command_line("B")
-    query2 = IntermediateEdgeByProps(edge_type="Launched")
+    query2 = EdgeByProps(edge_type="Launched")
 
     query1 >> query2
 
diff --git a/tests/analyzers/statements/test_file.py b/tests/analyzers/statements/test_file.py
index 25250d6e..3f3d15dd 100644
--- a/tests/analyzers/statements/test_file.py
+++ b/tests/analyzers/statements/test_file.py
@@ -45,9 +45,17 @@ def test_file_with_full_path(G3, graph_nodes_match):
     )
 
 
-def test_file_that_was_written(G3, graph_nodes_match):
-    analyzer = Analyzer(name="test_file_that_was_written", query=FindFile.that_was_written())
+def test_file_that_was_written(G8, graph_nodes_match):
+    analyzer = Analyzer(
+        name="test_file_that_was_written", query=FindFile.that_was_written(descendants=False)
+    )
 
-    G = analyzer.run_networkx(G3)
+    G = analyzer.run_networkx(G8)
 
-    assert graph_nodes_match(G, [File(file_name="foo", file_path="bar")])
+    assert graph_nodes_match(
+        G,
+        [
+            Process(process_id=12, process_image="B", command_line="B"),
+            File(file_name="bar", file_path="bar"),
+        ],
+    )

From cfea7bd86b4e03e08a10ce6790ff9c48c77dbee8 Mon Sep 17 00:00:00 2001
From: yampelo <omeryampel@gmail.com>
Date: Sun, 17 Nov 2019 22:06:53 -0500
Subject: [PATCH 24/25] FindFile: finishes file queries

---
 beagle/analyzers/queries/base_query.py |  4 +++-
 beagle/analyzers/queries/file.py       | 32 +++++++++++++++++++++++---
 beagle/analyzers/queries/process.py    |  5 ++++
 3 files changed, 37 insertions(+), 4 deletions(-)

diff --git a/beagle/analyzers/queries/base_query.py b/beagle/analyzers/queries/base_query.py
index a10b2250..927b9571 100644
--- a/beagle/analyzers/queries/base_query.py
+++ b/beagle/analyzers/queries/base_query.py
@@ -7,7 +7,7 @@
 from .lookups import Exact, FieldLookup
 
 
-PropsDict = Dict[str, Union[str, FieldLookup, Dict]]
+PropsDict = Dict[str, Union[str, FieldLookup, Dict, None]]
 
 
 def _str_to_exact(props: dict) -> Dict[str, Union[FieldLookup, Dict]]:
@@ -17,6 +17,8 @@ def _str_to_exact(props: dict) -> Dict[str, Union[FieldLookup, Dict]]:
             props[k] = Exact(v)
         elif isinstance(v, dict):
             props[k] = _str_to_exact(v)
+        elif v is None:
+            del props[k]
 
     return props
 
diff --git a/beagle/analyzers/queries/file.py b/beagle/analyzers/queries/file.py
index 7004a755..d40f6c65 100644
--- a/beagle/analyzers/queries/file.py
+++ b/beagle/analyzers/queries/file.py
@@ -43,14 +43,40 @@ def with_hashes(hashes: Union[str, FieldLookup]) -> NodeByPropsReachable:  # pra
     def with_props(props: PropsDict) -> NodeByPropsReachable:  # pragma: no cover
         return NodeByPropsReachable(node_type=File, props=props)
 
+    # ---- Edge methods ----- #
+
     @staticmethod
-    def that_was_written(descendants=True, ancestors=False, reachable=False):
+    def that_was_written(
+        contents: str = None, descendants=True, ancestors=False, reachable=False
+    ):  # pragma: no cover
         return make_edge_query(
-            edge_type="Wrote", descendants=descendants, ancestors=ancestors, reachable=reachable
+            edge_type="Wrote",
+            edge_props={"contents": contents},
+            descendants=descendants,
+            ancestors=ancestors,
+            reachable=reachable,
         )
 
     @staticmethod
-    def that_was_copied(descendants=True, ancestors=False, reachable=False):
+    def that_was_copied(descendants=True, ancestors=False, reachable=False):  # pragma: no cover
         return make_edge_query(
             edge_type="Copied To", descendants=descendants, ancestors=ancestors, reachable=reachable
         )
+
+    @staticmethod
+    def that_was_loaded(descendants=True, ancestors=False, reachable=False):  # pragma: no cover
+        return make_edge_query(
+            edge_type="Loaded", descendants=descendants, ancestors=ancestors, reachable=reachable
+        )
+
+    @staticmethod
+    def that_was_accessed(descendants=True, ancestors=False, reachable=False):  # pragma: no cover
+        return make_edge_query(
+            edge_type="Accessed", descendants=descendants, ancestors=ancestors, reachable=reachable
+        )
+
+    @staticmethod
+    def that_was_deleted(descendants=True, ancestors=False, reachable=False):  # pragma: no cover
+        return make_edge_query(
+            edge_type="Deleted", descendants=descendants, ancestors=ancestors, reachable=reachable
+        )
diff --git a/beagle/analyzers/queries/process.py b/beagle/analyzers/queries/process.py
index 0019fdff..2f475519 100644
--- a/beagle/analyzers/queries/process.py
+++ b/beagle/analyzers/queries/process.py
@@ -3,6 +3,7 @@
 from beagle.nodes import Process
 
 from . import FactoryMixin, make_edge_query
+from .base_query import PropsDict
 from .lookups import FieldLookup
 from .node import NodeByPropsReachable
 
@@ -64,6 +65,10 @@ def with_sha1_hash(
 
         return NodeByPropsReachable(node_type=Process, props={"hashes": {"sha1": sha1hash}})
 
+    @staticmethod
+    def with_props(props: PropsDict) -> NodeByPropsReachable:  # pragma: no cover
+        return NodeByPropsReachable(node_type=Process, props=props)
+
     @staticmethod
     def that_was_launched(descendants=True, ancestors=False, reachable=False):
         return make_edge_query(

From 55404f38a791fbc88defb6bbcb8354d21c931202 Mon Sep 17 00:00:00 2001
From: yampelo <omeryampel@gmail.com>
Date: Mon, 18 Nov 2019 00:36:49 -0500
Subject: [PATCH 25/25] SummaryQuery: adds ability to summarize information
 gathered

---
 beagle/analyzers/base_analyzer.py       | 18 ++++++++++---
 beagle/analyzers/queries/edge.py        |  2 +-
 beagle/analyzers/queries/file.py        |  8 +++---
 beagle/analyzers/queries/process.py     |  2 +-
 beagle/analyzers/queries/summary.py     | 35 +++++++++++++++++++++++++
 beagle/datasources/base_datasource.py   | 22 ++++++++++++++++
 beagle/transformers/base_transformer.py | 22 ++++++++++++++++
 7 files changed, 100 insertions(+), 9 deletions(-)
 create mode 100644 beagle/analyzers/queries/summary.py

diff --git a/beagle/analyzers/base_analyzer.py b/beagle/analyzers/base_analyzer.py
index 4388b651..9433808a 100644
--- a/beagle/analyzers/base_analyzer.py
+++ b/beagle/analyzers/base_analyzer.py
@@ -1,9 +1,12 @@
-from typing import Type, cast, Any
+from typing import Any, Type, cast
 
 import networkx as nx
 
-from beagle.analyzers.queries.base_query import Query
 from beagle.backends import Backend, NetworkX
+from beagle.common import logger
+
+from .queries.base_query import Query
+from .queries.summary import SummaryQuery
 
 
 class Analyzer(object):
@@ -19,11 +22,13 @@ def __init__(self, name: str, query: Query, description: str = None, score: int
         self.query: Query = query
 
     def run(self, backend: Type[Backend]) -> Any:
+
         if isinstance(backend, NetworkX):
             backend = cast(NetworkX, backend)
             return self.run_networkx(backend.G)
 
     def run_networkx(self, G: nx.Graph) -> nx.Graph:
+        logger.info(f"Running analyzer {self.name}")
 
         # H is a copy of our original graph.
         H = G.copy()
@@ -32,9 +37,16 @@ def run_networkx(self, G: nx.Graph) -> nx.Graph:
 
         while current_query is not None:
             # Run the query.
-            H = current_query.execute_networkx(H)
+            if isinstance(current_query, SummaryQuery):
+                # SummaryQueries get the original graph.
+                H = current_query.execute_networkx(G.copy())
+            else:
+                H = current_query.execute_networkx(H)
 
             # Get the next query, and execute
             current_query = current_query.downstream_query
 
+        if len(H.nodes()) > 0:
+            logger.info(f"Analyzer query returned a matching subgraph.")
+
         return H
diff --git a/beagle/analyzers/queries/edge.py b/beagle/analyzers/queries/edge.py
index e0ce9724..098b5f71 100644
--- a/beagle/analyzers/queries/edge.py
+++ b/beagle/analyzers/queries/edge.py
@@ -62,7 +62,7 @@ def execute_networkx(self, G: nx.Graph) -> nx.Graph:
                         subgraph_edges.append((u, v, k))
                         # can stop on first match
                         self.result_edges |= {(u, v, k)}
-                        self.result_nodes |= {u, v}
+                        self.result_nodes |= {v}
                         break
 
         return G.edge_subgraph(subgraph_edges)
diff --git a/beagle/analyzers/queries/file.py b/beagle/analyzers/queries/file.py
index d40f6c65..4d716cfe 100644
--- a/beagle/analyzers/queries/file.py
+++ b/beagle/analyzers/queries/file.py
@@ -58,25 +58,25 @@ def that_was_written(
         )
 
     @staticmethod
-    def that_was_copied(descendants=True, ancestors=False, reachable=False):  # pragma: no cover
+    def that_was_copied(descendants=False, ancestors=False, reachable=False):  # pragma: no cover
         return make_edge_query(
             edge_type="Copied To", descendants=descendants, ancestors=ancestors, reachable=reachable
         )
 
     @staticmethod
-    def that_was_loaded(descendants=True, ancestors=False, reachable=False):  # pragma: no cover
+    def that_was_loaded(descendants=False, ancestors=False, reachable=False):  # pragma: no cover
         return make_edge_query(
             edge_type="Loaded", descendants=descendants, ancestors=ancestors, reachable=reachable
         )
 
     @staticmethod
-    def that_was_accessed(descendants=True, ancestors=False, reachable=False):  # pragma: no cover
+    def that_was_accessed(descendants=False, ancestors=False, reachable=False):  # pragma: no cover
         return make_edge_query(
             edge_type="Accessed", descendants=descendants, ancestors=ancestors, reachable=reachable
         )
 
     @staticmethod
-    def that_was_deleted(descendants=True, ancestors=False, reachable=False):  # pragma: no cover
+    def that_was_deleted(descendants=False, ancestors=False, reachable=False):  # pragma: no cover
         return make_edge_query(
             edge_type="Deleted", descendants=descendants, ancestors=ancestors, reachable=reachable
         )
diff --git a/beagle/analyzers/queries/process.py b/beagle/analyzers/queries/process.py
index 2f475519..adc5af6f 100644
--- a/beagle/analyzers/queries/process.py
+++ b/beagle/analyzers/queries/process.py
@@ -70,7 +70,7 @@ def with_props(props: PropsDict) -> NodeByPropsReachable:  # pragma: no cover
         return NodeByPropsReachable(node_type=Process, props=props)
 
     @staticmethod
-    def that_was_launched(descendants=True, ancestors=False, reachable=False):
+    def that_was_launched(descendants=False, ancestors=False, reachable=False):
         return make_edge_query(
             edge_type="Launched", descendants=descendants, ancestors=ancestors, reachable=reachable
         )
diff --git a/beagle/analyzers/queries/summary.py b/beagle/analyzers/queries/summary.py
new file mode 100644
index 00000000..ae89cb15
--- /dev/null
+++ b/beagle/analyzers/queries/summary.py
@@ -0,0 +1,35 @@
+from typing import List, Set, Type
+
+import networkx as nx
+
+from beagle.analyzers.queries import Query
+from beagle.nodes import Node
+
+
+class SummaryQuery(Query):
+    # Nothing special, just a type for detecting when we reach a summary operator.
+    pass
+
+
+class CollectDetectedNodes(SummaryQuery):
+    def __init__(self, node_types: List[Type[Node]] = []):
+        self.node_types = tuple(node_types)
+        super().__init__()
+
+    def execute_networkx(self, G: nx.Graph) -> nx.Graph:
+
+        all_resulting_nodes: Set[int] = set()
+
+        # Get the upstream nodes.
+        upstream_query = self.upstream_query
+        while upstream_query is not None:
+            all_resulting_nodes |= upstream_query.result_nodes
+            upstream_query = upstream_query.upstream_query
+
+        if self.node_types:
+            node_attrs = nx.get_node_attributes(G, "data")
+            all_resulting_nodes = filter(
+                lambda node: isinstance(node_attrs[node], self.node_types), all_resulting_nodes
+            )
+
+        return G.subgraph(all_resulting_nodes)
diff --git a/beagle/datasources/base_datasource.py b/beagle/datasources/base_datasource.py
index 83ca6ef3..94bd092d 100644
--- a/beagle/datasources/base_datasource.py
+++ b/beagle/datasources/base_datasource.py
@@ -112,6 +112,28 @@ def to_graph(self, *args, **kwargs) -> Any:
 
         return self.to_transformer(self.transformers[0]).to_graph(*args, **kwargs)  # type: ignore
 
+    def to_backend(self, graph=False, *args, **kwargs) -> Any:
+        """Allows to hop immediatly from a datasource to a backend.
+
+        Supports parameters for the to_graph() function of the transformer.
+
+        see :py:method:`beagle.transformers.base_transformer.Transformer.to_graph`
+
+        Examples
+        --------
+        >>> SysmonEVTX('data/sysmon/autoruns-sysmon.evtx').to_backend(Graphistry, render=True)
+        <Graphistry object>
+
+        Returns
+        -------
+        Any
+            Returns a backend, prior to being graphed.
+        """
+
+        return self.to_transformer(self.transformers[0]).to_backend(
+            graph=graph, *args, **kwargs
+        )  # type: ignore
+
     def _convert_to_parent_fields(self, process: dict) -> dict:
         """Converts a process to represent a child process.
 
diff --git a/beagle/transformers/base_transformer.py b/beagle/transformers/base_transformer.py
index f3fe7dce..efb92af1 100644
--- a/beagle/transformers/base_transformer.py
+++ b/beagle/transformers/base_transformer.py
@@ -60,6 +60,28 @@ def to_graph(self, backend: "Backend" = NetworkX, *args, **kwargs) -> Any:
         backend = backend(nodes=nodes, metadata=self.datasource.metadata(), *args, **kwargs)
         return backend.graph()
 
+    def to_backend(self, backend: "Backend" = NetworkX, graph=False, *args, **kwargs) -> Any:
+        """Graphs the nodes created by :py:meth:`run`. If no backend is specific,
+        the default used is NetworkX.
+
+        Parameters
+        ----------
+        backend : [type], optional
+            [description] (the default is NetworkX, which [default_description])
+
+        Returns
+        -------
+        [type]
+            [description]
+        """
+
+        nodes = self.run()
+
+        backend = backend(nodes=nodes, metadata=self.datasource.metadata(), *args, **kwargs)
+        if graph:
+            backend.graph()
+        return backend
+
     def run(self) -> List[Node]:
         """Generates the list of nodes from the datasource.