diff --git a/beagle/analyzers/__init__.py b/beagle/analyzers/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/beagle/analyzers/base_analyzer.py b/beagle/analyzers/base_analyzer.py index e69de29b..9433808a 100644 --- a/beagle/analyzers/base_analyzer.py +++ b/beagle/analyzers/base_analyzer.py @@ -0,0 +1,52 @@ +from typing import Any, Type, cast + +import networkx as nx + +from beagle.backends import Backend, NetworkX +from beagle.common import logger + +from .queries.base_query import Query +from .queries.summary import SummaryQuery + + +class Analyzer(object): + def __init__(self, name: str, query: Query, description: str = None, score: int = None): + self.name = name + self.description = description + self.score = score + + # Make sure we get the start. + while query.upstream_query is not None: + query = query.upstream_query + + self.query: Query = query + + def run(self, backend: Type[Backend]) -> Any: + + if isinstance(backend, NetworkX): + backend = cast(NetworkX, backend) + return self.run_networkx(backend.G) + + def run_networkx(self, G: nx.Graph) -> nx.Graph: + logger.info(f"Running analyzer {self.name}") + + # H is a copy of our original graph. + H = G.copy() + + current_query = self.query + + while current_query is not None: + # Run the query. + if isinstance(current_query, SummaryQuery): + # SummaryQueries get the original graph. + H = current_query.execute_networkx(G.copy()) + else: + H = current_query.execute_networkx(H) + + # Get the next query, and execute + current_query = current_query.downstream_query + + if len(H.nodes()) > 0: + logger.info(f"Analyzer query returned a matching subgraph.") + + return H diff --git a/beagle/analyzers/queries/__init__.py b/beagle/analyzers/queries/__init__.py new file mode 100644 index 00000000..f2d8da77 --- /dev/null +++ b/beagle/analyzers/queries/__init__.py @@ -0,0 +1,24 @@ +from networkx import nx +from .base_query import Query, PropsDict +from .edge import EdgeByProps, EdgeByPropsAncestors, EdgeByPropsDescendants, EdgeByPropsReachable + + +def make_edge_query( + edge_type: str, descendants=True, ancestors=False, reachable=False, edge_props: PropsDict = {} +) -> Query: + if reachable or (descendants and reachable): + return EdgeByPropsReachable(edge_type=edge_type, edge_props=edge_props) + elif descendants: + return EdgeByPropsDescendants(edge_type=edge_type, edge_props=edge_props) + elif ancestors: + return EdgeByPropsAncestors(edge_type=edge_type, edge_props=edge_props) + else: + return EdgeByProps(edge_type=edge_type, edge_props=edge_props) + + +class FactoryMixin(object): + """Mixin to prevent Query Factories from calling execute methods. + """ + + def execute_networkx(self, G: nx.graph): + raise UserWarning("Query factories cannot be called directly") diff --git a/beagle/analyzers/queries/base_query.py b/beagle/analyzers/queries/base_query.py new file mode 100644 index 00000000..927b9571 --- /dev/null +++ b/beagle/analyzers/queries/base_query.py @@ -0,0 +1,203 @@ +from typing import Any, Dict, List, Set, Tuple, Union + +import networkx as nx + +from beagle.nodes import Node + +from .lookups import Exact, FieldLookup + + +PropsDict = Dict[str, Union[str, FieldLookup, Dict, None]] + + +def _str_to_exact(props: dict) -> Dict[str, Union[FieldLookup, Dict]]: + # Ensures strings become Exact, Works on nested dicts + for k, v in props.items(): + if isinstance(v, str): + props[k] = Exact(v) + elif isinstance(v, dict): + props[k] = _str_to_exact(v) + elif v is None: + del props[k] + + return props + + +class Query(object): + def __init__(self): + """A query takes as input a graph, executes, and returns the next graph. + + >>> G2 = query.execute_networkx(G) + + Attributes + ---------- + result_nodes: Set[int]: + The set of node IDs which create the subgraph returned by the query. + result_edges: Set[Tuple[int, int, int]]: + The set of (u, v, k) tuples representing the edges which created the subgraph. + """ + # The resulting node IDs + self.result_nodes: Set[int] = set() + + # The resulting edge IDs + self.result_edges: Set[Tuple[int, int, int]] = set() + + # Set of queries that came before or after it. + self.downstream_query: Query = None + self.upstream_query: Query = None + + self.upstream_nodes: Set[int] = set() + self.upstream_edges: Set[Tuple[int, int, int]] = set() + + def get_upstream_results(self) -> Tuple[Set[int], Set[Tuple[int, int, int]]]: + return self.upstream_query.result_nodes, self.upstream_query.result_edges + + def set_upstream_nodes(self): # pragma: no cover + self.upstream_nodes |= self.upstream_query.result_nodes + self.upstream_edges |= self.upstream_query.result_edges + + def _test_values_with_lookups( + self, + value_to_test: Union[Node, Dict[str, Any]], + lookup_tests: Dict[str, Union[FieldLookup, Dict]], + ) -> bool: + """Tests a node or dictionary against a configuration of lookup_tests. + + Parameters + ---------- + value_to_test : Union[Node, Dict[str, Any]] + The node or dict to test. + lookup_tests : Dict[str, FieldLookup] + The set of lookup_tests to test. + + Returns + ------- + bool + Did all of the tests pass? + """ + + # Auto pass if no tests.s + if not lookup_tests: + return True + + # Auto fail on empty value (given we have tests) + if not value_to_test: + return False + + results: List[bool] = [] + + for attr_name, lookup in lookup_tests.items(): + if isinstance(lookup, dict): + # recursivly check props against nested entrys (e.g is hashes dict in Process) + if isinstance(value_to_test, Node): # pragma: no cover + results.append( + self._test_values_with_lookups( + value_to_test=getattr(value_to_test, attr_name), lookup_tests=lookup + ) + ) + else: + results.append( + self._test_values_with_lookups( + value_to_test=value_to_test.get(attr_name, {}), lookup_tests=lookup + ) + ) + else: + if isinstance(value_to_test, Node): + results.append(lookup.test(getattr(value_to_test, attr_name))) + else: + results.append(lookup.test(value_to_test.get(attr_name))) + + return any(results) + + def execute_networkx(self, G: nx.Graph): # pragma: no cover + """Execute a query against a `networkx` graph.""" + raise NotImplementedError(f"NetworkX not supported for {self.__class__.__name__}") + + def __rshift__(self, other: "Query") -> "Query": + """Implements Self >> Other == self.downstream_query = other + + Parameters + ---------- + other : Query + The other query to add. + """ + self.downstream_query = other + other.upstream_query = self + return other + + def __lshift__(self, other: "Query") -> "Query": + """Implements Self << Other == self.upstream_query = other + + Parameters + ---------- + other : Query + The other query to add. + """ + other.downstream_query = self + self.upstream_query = other + return other + + def __or__(self, other: "Query") -> "ChainedQuery": + """Allows queries to be combined through the `|` operator. + The result of execution is the union of both subqueries. + + >>> query1 = Query(...) + >>> query2 = Query(...) + >>> chained = query1 | query2 + + + Parameters + ---------- + other: Query + The query to chain with. + + Returns + ------- + ChainedQuery + A chained query compromised of all three. + """ + return ChainedQuery(self, other) + + +class ChainedQuery(Query): + def __init__(self, *args: Query): + """Executes multiple Querys, combining their outputs. + + Parameters + ---------- + args: Query + One ore more queries + """ + self.queries = args + super().__init__() + + def execute_networkx(self, G: nx.Graph) -> nx.Graph: + """Executes multiple queries against a `nx.Graph` object, combining their outputs into one subgraph. + + Parameters + ---------- + G : nx.Graph + Graph to execute queries against + + Returns + ------- + nx.Graph + Graph composed from the output graphs of the executed queries. + """ + # Get the subgraphs + + subgraphs = [] + for query in self.queries: + # Get the subgraphs + subgraphs.append(query.execute_networkx(G)) + + # add the reuslt_nodes, result_edges. + self.result_edges |= query.result_edges + self.result_nodes |= query.result_nodes + + # Compose the subgraphs + H = subgraphs[0] + for subgraph in subgraphs[1:]: + H = nx.compose(H, subgraph) + + return H diff --git a/beagle/analyzers/queries/edge.py b/beagle/analyzers/queries/edge.py new file mode 100644 index 00000000..098b5f71 --- /dev/null +++ b/beagle/analyzers/queries/edge.py @@ -0,0 +1,122 @@ +from typing import Dict, Union, Set + +import networkx as nx + +from .base_query import Query, _str_to_exact, PropsDict +from .lookups import FieldLookup + + +class EdgeByProps(Query): + def __init__(self, edge_type: str, props: PropsDict = {}, *args, **kwargs): + """Searches the graph for an edge of type `edge_type` with properties matching `props` + + Parameters + ---------- + edge_type : str + The type of edge to look for. e.g. Wrote + props : Dict[str, Union[str, FieldLookup]] + The set of props to filter the resulting edges by. Any string is transformed to `Exact` lookups. + + Examples + ---------- + Filter for TCP edges, with contents that match ".pdf" + >>> EdgeByProps(edge_type="TCP", props={"payload": Contains(".pdf")}) + + """ + self.edge_type = edge_type + + self.props: Dict[str, Union[FieldLookup, Dict]] = _str_to_exact(props) + + super().__init__() + + def execute_networkx(self, G: nx.Graph) -> nx.Graph: + """Searches a `nx.Graph` object for edges that match type `edge_type` and contains + props matching `props`. This is O(E). + + Returns a subgraph with all nodes contained in match edges + """ + subgraph_edges = [] + + if self.upstream_query is not None: + upstream_nodes, _ = self.get_upstream_results() + edges = G.edges(upstream_nodes, data=True, keys=True) + else: + edges = G.edges(data=True, keys=True) + + # For each edge + for u, v, k, e_data in edges: + + # pull out the data field from NX + data = e_data["data"] # edge data + e_type = e_data["edge_name"] # edge type + + # If edge matches the desired instance. + if e_type == self.edge_type: + + # Test the edge + if not isinstance(data, list): + data = [data] + + for entry in data: + if self._test_values_with_lookups(entry, self.props): + subgraph_edges.append((u, v, k)) + # can stop on first match + self.result_edges |= {(u, v, k)} + self.result_nodes |= {v} + break + + return G.edge_subgraph(subgraph_edges) + + +class EdgeByPropsDescendants(EdgeByProps): + """Perform a `EdgeByProps` query, expanding the descendants of the found edges.""" + + def execute_networkx(self, G: nx.Graph) -> nx.Graph: + next_graph = super().execute_networkx(G) + + # get the nodes from the previous graph. + subgraph_nodes: Set[int] = {node_id for node_id in next_graph.nodes()} + + # For every node that matched `in EdgeByProps` + for _, v, _ in self.result_edges: + subgraph_nodes |= nx.descendants(G, v) | {v} + + self.result_nodes |= subgraph_nodes + + return G.subgraph(subgraph_nodes) + + +class EdgeByPropsAncestors(EdgeByProps): + """Perform a `EdgeByProps` query, expanding the ancestors of the found edges.""" + + def execute_networkx(self, G: nx.Graph) -> nx.Graph: + next_graph = super().execute_networkx(G) + + # get the nodes from the previous graph. + subgraph_nodes: Set[int] = {node_id for node_id in next_graph.nodes()} + + # For every node that matched `in EdgeByProps` + for _, v, _ in self.result_edges: + subgraph_nodes |= nx.ancestors(G, v) | {v} + + self.result_nodes |= subgraph_nodes + + return G.subgraph(subgraph_nodes) + + +class EdgeByPropsReachable(EdgeByProps): + """Perform a `EdgeByProps` query, including all reachable nodes.""" + + def execute_networkx(self, G: nx.Graph) -> nx.Graph: + next_graph = super().execute_networkx(G) + + # get the nodes from the previous graph. + subgraph_nodes: Set[int] = {node_id for node_id in next_graph.nodes()} + + # For every node that matched `in EdgeByProps` + for _, v, _ in self.result_edges: + subgraph_nodes |= nx.ancestors(G, v) | nx.descendants(G, v) | {v} + + self.result_nodes |= subgraph_nodes + + return G.subgraph(subgraph_nodes) diff --git a/beagle/analyzers/queries/file.py b/beagle/analyzers/queries/file.py new file mode 100644 index 00000000..4d716cfe --- /dev/null +++ b/beagle/analyzers/queries/file.py @@ -0,0 +1,82 @@ +from typing import Union + +from beagle.nodes import File + +from . import FactoryMixin, make_edge_query +from .base_query import PropsDict +from .lookups import FieldLookup +from .node import NodeByPropsReachable + + +class FindFile(FactoryMixin): + """Executes queries relevant to a File""" + + @staticmethod + def with_full_path(full_path: Union[str, FieldLookup]) -> NodeByPropsReachable: + return NodeByPropsReachable(node_type=File, props={"full_path": full_path}) + + @staticmethod + def with_file_path(file_path: Union[str, FieldLookup]) -> NodeByPropsReachable: + return NodeByPropsReachable(node_type=File, props={"file_path": file_path}) + + @staticmethod + def with_file_name(file_name: Union[str, FieldLookup]) -> NodeByPropsReachable: + return NodeByPropsReachable(node_type=File, props={"file_name": file_name}) + + @staticmethod + def with_extension( + extension: Union[str, FieldLookup] + ) -> NodeByPropsReachable: # pragma: no cover + return NodeByPropsReachable(node_type=File, props={"extension": extension}) + + @staticmethod + def with_timestamp( + timestamp: Union[str, FieldLookup] + ) -> NodeByPropsReachable: # pragma: no cover + return NodeByPropsReachable(node_type=File, props={"timestamp": timestamp}) + + @staticmethod + def with_hashes(hashes: Union[str, FieldLookup]) -> NodeByPropsReachable: # pragma: no cover + return NodeByPropsReachable(node_type=File, props={"hashes": hashes}) + + @staticmethod + def with_props(props: PropsDict) -> NodeByPropsReachable: # pragma: no cover + return NodeByPropsReachable(node_type=File, props=props) + + # ---- Edge methods ----- # + + @staticmethod + def that_was_written( + contents: str = None, descendants=True, ancestors=False, reachable=False + ): # pragma: no cover + return make_edge_query( + edge_type="Wrote", + edge_props={"contents": contents}, + descendants=descendants, + ancestors=ancestors, + reachable=reachable, + ) + + @staticmethod + def that_was_copied(descendants=False, ancestors=False, reachable=False): # pragma: no cover + return make_edge_query( + edge_type="Copied To", descendants=descendants, ancestors=ancestors, reachable=reachable + ) + + @staticmethod + def that_was_loaded(descendants=False, ancestors=False, reachable=False): # pragma: no cover + return make_edge_query( + edge_type="Loaded", descendants=descendants, ancestors=ancestors, reachable=reachable + ) + + @staticmethod + def that_was_accessed(descendants=False, ancestors=False, reachable=False): # pragma: no cover + return make_edge_query( + edge_type="Accessed", descendants=descendants, ancestors=ancestors, reachable=reachable + ) + + @staticmethod + def that_was_deleted(descendants=False, ancestors=False, reachable=False): # pragma: no cover + return make_edge_query( + edge_type="Deleted", descendants=descendants, ancestors=ancestors, reachable=reachable + ) diff --git a/beagle/analyzers/queries/lookups.py b/beagle/analyzers/queries/lookups.py new file mode 100644 index 00000000..917468eb --- /dev/null +++ b/beagle/analyzers/queries/lookups.py @@ -0,0 +1,173 @@ +import re +from typing import Pattern, Union +from abc import ABCMeta, abstractmethod +import functools + + +def not_null(f): + # Ensures the passed in prop is not null + @functools.wraps(f) + def wrapper(self, prop, *args, **kwargs): + if prop is None: + return False + else: + return f(self, prop, *args, **kwargs) + + return wrapper + + +class FieldLookup(object, metaclass=ABCMeta): # pragma: no cover + def __init__(self, value): + self.value = value + + @abstractmethod + def test(self, prop) -> bool: + pass + + def __and__(self, other) -> "And": + """Combines two FieldLookups to works as a logical and + + >>> Contains("test.exe") & Contains("fest.exe") + And(Contains("test.exe"), Contains("fest.exe")) + + Returns + ------- + And + And FieldLookup Object + """ + return And(self, other) + + def __or__(self, other) -> "Or": + """Combines two FieldLookup objects to work as a logical Or + + >>> Contains("test.exe") | Contains("fest.exe") + Or(Contains("test.exe"), Contains("fest.exe")) + + Returns + ------- + Or + Or FieldLookupObject + """ + return Or(self, other) + + def __invert__(self) -> "Not": + """Negates a field lookup + + >>> ~Contains("test.exe") + Not(Contains("test.exe")) + + Returns + ------- + Not + Not FieldLookupObject + """ + return Not(self) + + def __eq__(self, other): + return (type(self) == type(other)) and (self.value == other.value) + + +class Or(FieldLookup): + """Boolean OR, Meant to be used with other lookups: + >>> Or(Contains("foo"), StartsWith("bar")) + """ + + def __init__(self, *args: FieldLookup): + self.lookups = args + + def test(self, prop: str): + for lookup in self.lookups: + if lookup.test(prop): + return True + + return False + + +class And(FieldLookup): + """Boolean And, Meant to be used with other lookups: + >>> And(Contains("foo"), StartsWith("bar"), EndsWith("zar")) + """ + + def __init__(self, *args: FieldLookup): + self.lookups = args + + def test(self, prop: str): + for lookup in self.lookups: + if not lookup.test(prop): + return False + + return True + + +class Not(FieldLookup): + """Boolean And, Meant to be used with other lookups: + >>> And(Contains("foo"), StartsWith("bar"), EndsWith("zar")) + """ + + def __init__(self, arg: FieldLookup): + self.lookup = arg + + def test(self, prop: str): + return not self.lookup.test(prop) + + +class Contains(FieldLookup): + """Case sensitve contains""" + + @not_null + def test(self, prop: str): + return self.value in prop + + +class IContains(FieldLookup): + """Case insensitve Contains""" + + @not_null + def test(self, prop: str): + return str(self.value).lower() in str(prop).lower() + + +class Exact(FieldLookup): + """Exact match""" + + @not_null + def test(self, prop: str): + return self.value == prop + + +class IExact(FieldLookup): + """Insensitive Exact match""" + + @not_null + def test(self, prop: str): + return str(self.value).lower() == str(prop).lower() + + +class StartsWith(FieldLookup): + """Property begins with""" + + @not_null + def test(self, prop: str): + return prop.startswith(self.value) + + +class EndsWith(FieldLookup): + """Property begins endswith""" + + @not_null + def test(self, prop: str): + return prop.endswith(self.value) + + +class Regex(FieldLookup): + """Regex Match""" + + def __init__(self, value: Union[str, Pattern]): + if isinstance(value, str): + self.value: Pattern = re.compile(value) + else: + self.value = value + + @not_null + def test(self, prop: str): + return self.value.search(prop) is not None diff --git a/beagle/analyzers/queries/node.py b/beagle/analyzers/queries/node.py new file mode 100644 index 00000000..d4fb7274 --- /dev/null +++ b/beagle/analyzers/queries/node.py @@ -0,0 +1,115 @@ +from typing import Dict, Set, Type, Union + +import networkx as nx + +from beagle.nodes import Node + +from .base_query import Query, _str_to_exact, PropsDict +from .lookups import FieldLookup + + +class NodeByProps(Query): + def __init__(self, node_type: Type[Node], props: PropsDict = {}): + """Searches the graph for a node of type `node_type` with properties matching `props` + + Parameters + ---------- + node_type : Type[Node] + The type of node to look for. e.g. Process + props : Dict[str, Union[str, FieldLookup, Dict]] + The set of props to filter the resulting nodes by. Any string is transformed to `Exact` lookups. + + Examples + ---------- + Filter for Process nodes, with command lines that contain `text.exe` + >>> NodeByProps(node_type=Process, props={"command_line": Contains("test.exe")}) + + This may also be a nested dict. + >>> NodeByProps(node_type=Process, props={"hashes": {"md5": Contains("test.exe")}}) + + """ + self.node_type = node_type + + self.props: Dict[str, Union[FieldLookup, Dict]] = _str_to_exact(props) + + # Cast and assign. + super().__init__() + + def execute_networkx(self, G: nx.Graph) -> nx.Graph: + """Searches a `nx.Graph` object for nodes that match type `node_type` and contains + props matching `props`. This is O(V). + """ + subgraph_nodes = [] + + # For each node + for node_id, data in G.nodes(data=True): + node = data["data"] + + # If node matches the desired instance. + if isinstance(node, self.node_type): + # Test the node + if self._test_values_with_lookups(node, self.props): + subgraph_nodes.append(node_id) + self.result_nodes |= {node_id} + + return G.subgraph(subgraph_nodes) + + +class NodeByPropsDescendents(NodeByProps): + """Executes a `NodeByProps` query, and returns all descendants of the matching nodes. + see py:meth:`NodeByProps`""" + + def execute_networkx(self, G: nx.Graph) -> nx.Graph: + + # Get the next graph + next_graph = super().execute_networkx(G) + + subgraph_nodes: Set[int] = set() + + # For every node that matched `NodeByProps` + for node_id in next_graph.nodes(): + # Get the nodes descendants in the original graph, and add make a subgraph from those. + subgraph_nodes |= nx.descendants(G, node_id) | {node_id} + + self.result_nodes |= {node_id} + + return G.subgraph(subgraph_nodes) + + +class NodeByPropsAncestors(NodeByProps): + """Executes a `NodeByProps` query, and returns all ascendants of the matching nodes. + see py:meth:`NodeByProps`""" + + def execute_networkx(self, G: nx.Graph) -> nx.Graph: + + # Get the next graph + next_graph = super().execute_networkx(G) + + subgraph_nodes: Set[int] = set() + + # For every node that matched `NodeByProps` + for node_id in next_graph.nodes(): + # Get the nodes ancestors in the original graph, and add make a subgraph from those. + subgraph_nodes |= nx.ancestors(G, node_id) | {node_id} + self.result_nodes |= {node_id} + + return G.subgraph(subgraph_nodes) + + +class NodeByPropsReachable(NodeByProps): + """Executes a `NodeByProps` query, and returns all ancestors and descendants of the matching nodes. + see py:meth:`NodeByProps`""" + + def execute_networkx(self, G: nx.Graph) -> nx.Graph: + + # Get the next graph + next_graph = super().execute_networkx(G) + + subgraph_nodes: Set[int] = set() + + # For every node that matched `NodeByProps` + for node_id in next_graph.nodes(): + subgraph_nodes |= nx.ancestors(G, node_id) | nx.descendants(G, node_id) | {node_id} + self.result_nodes |= {node_id} + + return G.subgraph(subgraph_nodes) diff --git a/beagle/analyzers/queries/process.py b/beagle/analyzers/queries/process.py new file mode 100644 index 00000000..adc5af6f --- /dev/null +++ b/beagle/analyzers/queries/process.py @@ -0,0 +1,76 @@ +from typing import Union + +from beagle.nodes import Process + +from . import FactoryMixin, make_edge_query +from .base_query import PropsDict +from .lookups import FieldLookup +from .node import NodeByPropsReachable + + +class FindProcess(FactoryMixin): + """Executes queries relevant to a Process""" + + @staticmethod + def with_command_line( + command_line: Union[str, FieldLookup] + ) -> NodeByPropsReachable: # pragma: no cover + + return NodeByPropsReachable(node_type=Process, props={"command_line": command_line}) + + @staticmethod + def with_process_name( + process_image: Union[str, FieldLookup] + ) -> NodeByPropsReachable: # pragma: no cover + + return NodeByPropsReachable(node_type=Process, props={"process_image": process_image}) + + @staticmethod + def with_process_path( + process_path: Union[str, FieldLookup] + ) -> NodeByPropsReachable: # pragma: no cover + + return NodeByPropsReachable(node_type=Process, props={"process_path": process_path}) + + @staticmethod + def with_process_image_path( + process_image_path: Union[str, FieldLookup] + ) -> NodeByPropsReachable: # pragma: no cover + + return NodeByPropsReachable( + node_type=Process, props={"process_image_path": process_image_path} + ) + + @staticmethod + def with_user(user: Union[str, FieldLookup]) -> NodeByPropsReachable: + + return NodeByPropsReachable(node_type=Process, props={"user": user}) + + @staticmethod + def with_md5_hash(md5hash: Union[str, FieldLookup]) -> NodeByPropsReachable: # pragma: no cover + + return NodeByPropsReachable(node_type=Process, props={"hashes": {"md5": md5hash}}) + + @staticmethod + def with_sha256_hash( + sha256hash: Union[str, FieldLookup] + ) -> NodeByPropsReachable: # pragma: no cover + + return NodeByPropsReachable(node_type=Process, props={"hashes": {"sha256": sha256hash}}) + + @staticmethod + def with_sha1_hash( + sha1hash: Union[str, FieldLookup] + ) -> NodeByPropsReachable: # pragma: no cover + + return NodeByPropsReachable(node_type=Process, props={"hashes": {"sha1": sha1hash}}) + + @staticmethod + def with_props(props: PropsDict) -> NodeByPropsReachable: # pragma: no cover + return NodeByPropsReachable(node_type=Process, props=props) + + @staticmethod + def that_was_launched(descendants=False, ancestors=False, reachable=False): + return make_edge_query( + edge_type="Launched", descendants=descendants, ancestors=ancestors, reachable=reachable + ) diff --git a/beagle/analyzers/queries/summary.py b/beagle/analyzers/queries/summary.py new file mode 100644 index 00000000..ae89cb15 --- /dev/null +++ b/beagle/analyzers/queries/summary.py @@ -0,0 +1,35 @@ +from typing import List, Set, Type + +import networkx as nx + +from beagle.analyzers.queries import Query +from beagle.nodes import Node + + +class SummaryQuery(Query): + # Nothing special, just a type for detecting when we reach a summary operator. + pass + + +class CollectDetectedNodes(SummaryQuery): + def __init__(self, node_types: List[Type[Node]] = []): + self.node_types = tuple(node_types) + super().__init__() + + def execute_networkx(self, G: nx.Graph) -> nx.Graph: + + all_resulting_nodes: Set[int] = set() + + # Get the upstream nodes. + upstream_query = self.upstream_query + while upstream_query is not None: + all_resulting_nodes |= upstream_query.result_nodes + upstream_query = upstream_query.upstream_query + + if self.node_types: + node_attrs = nx.get_node_attributes(G, "data") + all_resulting_nodes = filter( + lambda node: isinstance(node_attrs[node], self.node_types), all_resulting_nodes + ) + + return G.subgraph(all_resulting_nodes) diff --git a/beagle/backends/networkx.py b/beagle/backends/networkx.py index 6ddaa55d..ac59b2ca 100644 --- a/beagle/backends/networkx.py +++ b/beagle/backends/networkx.py @@ -190,10 +190,7 @@ def insert_edges(self, u: Node, v: Node, edge_name: str, instances: List[dict]) # Otherwise, they key is assigned from NetworkX, and we add the edge type as a label: else: self.G.add_edges_from( - [ - (u_id, v_id, {"key": edge_name, "data": entry, "edge_name": edge_name}) - for entry in instances - ] + [(u_id, v_id, {"data": entry, "edge_name": edge_name}) for entry in instances] ) def update_node(self, node: Node, node_id: int) -> None: # pragma: no cover diff --git a/beagle/datasources/base_datasource.py b/beagle/datasources/base_datasource.py index 83ca6ef3..94bd092d 100644 --- a/beagle/datasources/base_datasource.py +++ b/beagle/datasources/base_datasource.py @@ -112,6 +112,28 @@ def to_graph(self, *args, **kwargs) -> Any: return self.to_transformer(self.transformers[0]).to_graph(*args, **kwargs) # type: ignore + def to_backend(self, graph=False, *args, **kwargs) -> Any: + """Allows to hop immediatly from a datasource to a backend. + + Supports parameters for the to_graph() function of the transformer. + + see :py:method:`beagle.transformers.base_transformer.Transformer.to_graph` + + Examples + -------- + >>> SysmonEVTX('data/sysmon/autoruns-sysmon.evtx').to_backend(Graphistry, render=True) + + + Returns + ------- + Any + Returns a backend, prior to being graphed. + """ + + return self.to_transformer(self.transformers[0]).to_backend( + graph=graph, *args, **kwargs + ) # type: ignore + def _convert_to_parent_fields(self, process: dict) -> dict: """Converts a process to represent a child process. diff --git a/beagle/nodes/file.py b/beagle/nodes/file.py index a8fbcb76..4c795c75 100644 --- a/beagle/nodes/file.py +++ b/beagle/nodes/file.py @@ -5,7 +5,7 @@ from beagle.edges import FileOf, CopiedTo # mypy type hinting -if TYPE_CHECKING: +if TYPE_CHECKING: # pragma: no cover from beagle.nodes import Process # noqa: F401 diff --git a/beagle/transformers/base_transformer.py b/beagle/transformers/base_transformer.py index f3fe7dce..efb92af1 100644 --- a/beagle/transformers/base_transformer.py +++ b/beagle/transformers/base_transformer.py @@ -60,6 +60,28 @@ def to_graph(self, backend: "Backend" = NetworkX, *args, **kwargs) -> Any: backend = backend(nodes=nodes, metadata=self.datasource.metadata(), *args, **kwargs) return backend.graph() + def to_backend(self, backend: "Backend" = NetworkX, graph=False, *args, **kwargs) -> Any: + """Graphs the nodes created by :py:meth:`run`. If no backend is specific, + the default used is NetworkX. + + Parameters + ---------- + backend : [type], optional + [description] (the default is NetworkX, which [default_description]) + + Returns + ------- + [type] + [description] + """ + + nodes = self.run() + + backend = backend(nodes=nodes, metadata=self.datasource.metadata(), *args, **kwargs) + if graph: + backend.graph() + return backend + def run(self) -> List[Node]: """Generates the list of nodes from the datasource. diff --git a/tests/analyzers/conftest.py b/tests/analyzers/conftest.py new file mode 100644 index 00000000..ee4b68c6 --- /dev/null +++ b/tests/analyzers/conftest.py @@ -0,0 +1,186 @@ +from typing import List + +import networkx as nx +import pytest + +from beagle.backends.networkx import NetworkX +from beagle.nodes import File, Node, Process + + +@pytest.fixture +def graph_nodes_match(): + def validate_nodes_match(graph: nx.Graph, nodes: List[Node]) -> bool: + + node_objs = [n["data"] for _, n in graph.nodes(data=True)] + + length_match = len(graph.nodes()) == len(nodes) + + node_match = all([n in node_objs for n in nodes]) + + if length_match and node_match: + return True + + else: + print(f"Expected {nodes} got {node_objs}") + return False + + return validate_nodes_match + + +@pytest.fixture +def G1(): + # A basic graph, with two nodes an an edge + proc = Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar") + other_proc = Process(process_id=12, process_image="best.exe", command_line="best.exe /c 123456") + + proc.launched[other_proc].append(timestamp=1) + + backend = NetworkX(consolidate_edges=True, nodes=[proc, other_proc]) + + return backend.graph() + + +@pytest.fixture +def G2(): + # A basic graph, with two nodes an an edge + proc = Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar") + f = File(file_name="foo", file_path="bar") + + proc.wrote[f].append(contents="foo") + + backend = NetworkX(consolidate_edges=True, nodes=[proc, f]) + + return backend.graph() + + +@pytest.fixture +def G3(): + # *no consolidating* + proc = Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar") + f = File(file_name="foo", file_path="bar") + + proc.wrote[f].append(contents="foo") + proc.wrote[f].append(contents="bar") + + backend = NetworkX(consolidate_edges=False, nodes=[proc, f]) + + return backend.graph() + + +@pytest.fixture +def G4(): + # A graph with a four process tree: + # A -> B -> C -> D + A = Process(process_id=10, process_image="A", command_line="A") + B = Process(process_id=12, process_image="B", command_line="B") + C = Process(process_id=12, process_image="C", command_line="C") + D = Process(process_id=12, process_image="D", command_line="D") + + A.launched[B] + B.launched[C] + C.launched[D] + + backend = NetworkX(consolidate_edges=True, nodes=[A, B, B, C]) + + return backend.graph() + + +@pytest.fixture +def G5(): + # A graph with two, *disconnected* four process tree: + # A -> B -> C -> D + # E -> F -> G -> H + A = Process(process_id=10, process_image="A", command_line="A") + B = Process(process_id=12, process_image="B", command_line="B") + C = Process(process_id=12, process_image="C", command_line="C") + D = Process(process_id=12, process_image="D", command_line="D") + + E = Process(process_id=10, process_image="E", command_line="E") + F = Process(process_id=12, process_image="F", command_line="F") + G = Process(process_id=12, process_image="G", command_line="G") + H = Process(process_id=12, process_image="H", command_line="H") + + A.launched[B] + B.launched[C] + C.launched[D] + + E.launched[F] + F.launched[G] + G.launched[H] + + backend = NetworkX(consolidate_edges=True, nodes=[A, B, C, D, E, F, G, H]) + + return backend.graph() + + +@pytest.fixture +def G6(): + parent = Process( + process_id=1, process_image_path="d:\\", process_image="parent.exe", user="omer" + ) + child = Process( + process_id=2, process_image_path="d:\\users", process_image="child.exe", user="omer" + ) + + parent2 = Process( + process_id=4, process_image_path="c:\\", process_image="parent.exe", user="admin" + ) + child2 = Process( + process_id=3, process_image_path="c:\\users", process_image="child.exe", user="admin" + ) + + parent.launched[child].append(timestamp=12456) + + parent2.launched[child2].append(timestamp=2) + + backend = NetworkX(consolidate_edges=True, nodes=[parent, parent2, child, child2]) + + return backend.graph() + + +@pytest.fixture +def G7(): + # A graph that's a tree of process launches + # A + # / \ + # B C + # / \ / \ + # D E F G + + A = Process(process_id=10, process_image="A", command_line="A") + B = Process(process_id=12, process_image="B", command_line="B") + C = Process(process_id=12, process_image="C", command_line="C") + D = Process(process_id=12, process_image="D", command_line="D") + E = Process(process_id=10, process_image="E", command_line="E") + F = Process(process_id=12, process_image="F", command_line="F") + G = Process(process_id=12, process_image="G", command_line="G") + + A.launched[B] + A.launched[C] + + B.launched[D] + B.launched[E] + + C.launched[F] + C.launched[G] + + backend = NetworkX(consolidate_edges=True, nodes=[A, B, C, D, E, F, G]) + + return backend.graph() + + +@pytest.fixture +def G8(): + # A launches B, B writes to F2 + + A = Process(process_id=10, process_image="A", command_line="A") + + B = Process(process_id=12, process_image="B", command_line="B") + F1 = File(file_name="bar", file_path="bar") + + A.launched[B] + B.wrote[F1].append(contents="bar") + + backend = NetworkX(consolidate_edges=True, nodes=[A, B, F1]) + + return backend.graph() diff --git a/tests/analyzers/statements/__init__.py b/tests/analyzers/statements/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/analyzers/statements/test_base_query.py b/tests/analyzers/statements/test_base_query.py new file mode 100644 index 00000000..e8bf28ef --- /dev/null +++ b/tests/analyzers/statements/test_base_query.py @@ -0,0 +1,88 @@ +import pytest +from beagle.analyzers.queries import FactoryMixin +from beagle.analyzers.queries.base_query import _str_to_exact +from beagle.analyzers.queries.node import NodeByPropsReachable, NodeByProps +from beagle.analyzers.queries.lookups import Exact +from beagle.nodes import Process + + +def test_factory_mixin(): + class MyFactory(FactoryMixin): + pass + + with pytest.raises(UserWarning): + obj = MyFactory() + obj.execute_networkx(None) + + +@pytest.mark.parametrize( + "props,expected", + [ + ({"process_image": "A"}, {"process_image": Exact("A")}), + ({"hashes": {"md5": "A"}}, {"hashes": {"md5": Exact("A")}}), + ( + {"hashes": {"md5": "A", "baz": {"foo": "bar"}}}, + {"hashes": {"md5": Exact("A"), "baz": {"foo": Exact("bar")}}}, + ), + ], +) +def test_str_to_exact(props, expected): + assert _str_to_exact(props) == expected + + +def test_chained_query(G5, graph_nodes_match): + # Both paths should show up because we use a chained query that returns both. + + Bquery = NodeByPropsReachable(node_type=Process, props={"process_image": Exact("B")}) + Gquery = NodeByPropsReachable(node_type=Process, props={"process_image": Exact("G")}) + + chained = Bquery | Gquery + + assert graph_nodes_match( + chained.execute_networkx(G5), + [ + Process(process_id=10, process_image="A", command_line="A"), + Process(process_id=12, process_image="B", command_line="B"), + Process(process_id=12, process_image="C", command_line="C"), + Process(process_id=12, process_image="D", command_line="D"), + Process(process_id=10, process_image="E", command_line="E"), + Process(process_id=12, process_image="F", command_line="F"), + Process(process_id=12, process_image="G", command_line="G"), + Process(process_id=12, process_image="H", command_line="H"), + ], + ) + + +def test_multiple_chained_query(G5, graph_nodes_match): + # Should properly execute all three. + + Bquery = NodeByProps(node_type=Process, props={"process_image": Exact("B")}) + Gquery = NodeByProps(node_type=Process, props={"process_image": Exact("G")}) + Aquery = NodeByProps(node_type=Process, props={"process_image": Exact("A")}) + + chained = Bquery | Gquery | Aquery + + assert graph_nodes_match( + chained.execute_networkx(G5), + [ + Process(process_id=12, process_image="B", command_line="B"), + Process(process_id=12, process_image="G", command_line="G"), + Process(process_id=10, process_image="A", command_line="A"), + ], + ) + + +def test_shift_operators(): + Bquery = NodeByProps(node_type=Process, props={"process_image": Exact("B")}) + Gquery = NodeByProps(node_type=Process, props={"process_image": Exact("G")}) + + Bquery >> Gquery + + assert Bquery.downstream_query == Gquery + + Bquery = NodeByProps(node_type=Process, props={"process_image": Exact("B")}) + Gquery = NodeByProps(node_type=Process, props={"process_image": Exact("G")}) + + Bquery << Gquery + + assert Gquery.downstream_query == Bquery diff --git a/tests/analyzers/statements/test_edge.py b/tests/analyzers/statements/test_edge.py new file mode 100644 index 00000000..5c2d1932 --- /dev/null +++ b/tests/analyzers/statements/test_edge.py @@ -0,0 +1,53 @@ +from beagle.analyzers.queries.edge import EdgeByProps +from beagle.analyzers.queries.lookups import Exact +from beagle.analyzers.queries.process import FindProcess +from beagle.nodes import File, Process + + +def test_one_edge_prop_test(G2, G3, graph_nodes_match): + + # String should get mapped to Exact("foo") + query = EdgeByProps(edge_type="Wrote", props={"contents": "foo"}) + + assert graph_nodes_match( + query.execute_networkx(G2), + [ + Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar"), + File(file_name="foo", file_path="bar"), + ], + ) + + # Should work on the non-conslidating graph too. + assert graph_nodes_match( + query.execute_networkx(G3), + [ + Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar"), + File(file_name="foo", file_path="bar"), + ], + ) + + query = EdgeByProps(edge_type="Launched", props={"contents": Exact("bar")}) + + # Should match on `proc` from G1 + assert graph_nodes_match(query.execute_networkx(G2), []) + + +def test_intermediate_edge_by_props(G5, graph_nodes_match): + + # Run the first query. + query1 = FindProcess.with_command_line("B") + query2 = EdgeByProps(edge_type="Launched") + + query1 >> query2 + + # get the subgraph. + G_s = query1.execute_networkx(G5) + + # running query two should only give us B->C + assert graph_nodes_match( + query2.execute_networkx(G_s), + [ + Process(process_id=12, process_image="B", command_line="B"), + Process(process_id=12, process_image="C", command_line="C"), + ], + ) diff --git a/tests/analyzers/statements/test_file.py b/tests/analyzers/statements/test_file.py new file mode 100644 index 00000000..3f3d15dd --- /dev/null +++ b/tests/analyzers/statements/test_file.py @@ -0,0 +1,61 @@ +from beagle.analyzers.base_analyzer import Analyzer +from beagle.analyzers.queries.file import FindFile +from beagle.nodes import File, Process + + +def test_file_with_name(G3, graph_nodes_match): + analyzer = Analyzer(name="test_file_with_name", query=FindFile.with_file_name("foo")) + + G = analyzer.run_networkx(G3) + + assert graph_nodes_match( + G, + [ + Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar"), + File(file_name="foo", file_path="bar"), + ], + ) + + +def test_file_with_path(G3, graph_nodes_match): + analyzer = Analyzer(name="test_file_with_path", query=FindFile.with_file_path("bar")) + + G = analyzer.run_networkx(G3) + + assert graph_nodes_match( + G, + [ + Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar"), + File(file_name="foo", file_path="bar"), + ], + ) + + +def test_file_with_full_path(G3, graph_nodes_match): + analyzer = Analyzer(name="test_file_with_full_path", query=FindFile.with_full_path("bar\\foo")) + + G = analyzer.run_networkx(G3) + + assert graph_nodes_match( + G, + [ + Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar"), + File(file_name="foo", file_path="bar"), + ], + ) + + +def test_file_that_was_written(G8, graph_nodes_match): + analyzer = Analyzer( + name="test_file_that_was_written", query=FindFile.that_was_written(descendants=False) + ) + + G = analyzer.run_networkx(G8) + + assert graph_nodes_match( + G, + [ + Process(process_id=12, process_image="B", command_line="B"), + File(file_name="bar", file_path="bar"), + ], + ) diff --git a/tests/analyzers/statements/test_lookups.py b/tests/analyzers/statements/test_lookups.py new file mode 100644 index 00000000..72b235b4 --- /dev/null +++ b/tests/analyzers/statements/test_lookups.py @@ -0,0 +1,81 @@ +import re +from typing import Type + +import pytest + +from beagle.analyzers.queries.lookups import ( + And, + Contains, + EndsWith, + Exact, + FieldLookup, + IContains, + IExact, + Not, + Or, + Regex, + StartsWith, +) + + +@pytest.mark.parametrize( + "cls,value,prop,result", + [ + (Contains, "test", "test", True), + (Contains, "test", "worst", False), + (Contains, "test", "he is the test", True), + (Contains, "test", "the test was bad", True), + (Contains, "test", "the TEST was bad", False), + (IContains, "test", "TEST", True), + (IContains, "test", "tEsT", True), + (IContains, "test", "worst", False), + # Test we reject null value. + (IContains, "test", None, False), + (IContains, "test", "he is the test", True), + (IContains, "test", "the test was bad", True), + (Exact, "test", "test", True), + (Exact, "test", " test ", False), + (Exact, "test", "some test a", False), + (IExact, "test", "test", True), + (IExact, "test", "TEST", True), + (IExact, "test", "tEst", True), + (StartsWith, "test", "test", True), + (StartsWith, "test", "not a test", False), + (StartsWith, "test", "test is the best", True), + (EndsWith, "test", "test", True), + (EndsWith, "test", "not test but a nest", False), + (EndsWith, "test", "the best test", True), + (Regex, r"\d", "test 1 test", True), + (Regex, re.compile(r"\d"), "test 1 test", True), + (Regex, r"\d", "test test", False), + (Regex, re.compile(r"\d"), "test test", False), + ], +) +def test_lookups(cls: Type[FieldLookup], value: str, prop: str, result: str): + # prop -> value being tested again, value -> the thing we're looking up + assert cls(value).test(prop) == result + + +def test_and(): + assert And(StartsWith("foo"), EndsWith("bar")).test("foo bar") is True + assert And(StartsWith("foo"), EndsWith("bar")).test("foo nar bar") is True + assert And(StartsWith("foo"), EndsWith("bar")).test("bar foo") is False + + +def test_or(): + assert Or(StartsWith("foo"), EndsWith("bar")).test("foo bar") is True + assert Or(StartsWith("foo"), EndsWith("bar")).test("foo") is True + assert Or(StartsWith("foo"), EndsWith("bar")).test("bar") is True + assert Or(StartsWith("foo"), EndsWith("bar")).test("foo nar bar") is True + assert Or(StartsWith("foo"), EndsWith("bar")).test("bar foo") is False + + +def test_not(): + assert Not(Contains("test")).test("hello") is True + assert Not(Not(Contains("test"))).test("hello") is False + + +def test_operator_overloading(): + assert (~Contains("test")).test("hello") is True + assert (Contains("test") & EndsWith("hello")).test("test my hello") is True + assert (Contains("test") | EndsWith("hello")).test("hello") is True diff --git a/tests/analyzers/statements/test_node.py b/tests/analyzers/statements/test_node.py new file mode 100644 index 00000000..0020e269 --- /dev/null +++ b/tests/analyzers/statements/test_node.py @@ -0,0 +1,176 @@ +from beagle.analyzers.queries.base_query import Query +from beagle.analyzers.queries.lookups import Contains, EndsWith, Exact, StartsWith +from beagle.analyzers.queries.node import ( + NodeByProps, + NodeByPropsAncestors, + NodeByPropsDescendents, + NodeByPropsReachable, +) +from beagle.nodes import Process + + +def test_test_props_nested_dict(): + s = Query() + + assert ( + s._test_values_with_lookups( + value_to_test={"hashes": {"md5": "1234"}}, + lookup_tests={"hashes": {"md5": Exact("1234")}}, + ) + is True + ) + + assert ( + s._test_values_with_lookups( + value_to_test={"hashes": {}}, lookup_tests={"hashes": {"md5": Exact("1234")}} + ) + is False + ) + + assert ( + s._test_values_with_lookups( + value_to_test={"hashes": None}, lookup_tests={"hashes": {"md5": Exact("1234")}} + ) + is False + ) + + +def test_one_node_prop_test(G1, graph_nodes_match): + query = NodeByProps(node_type=Process, props={"command_line": Contains("test.exe")}) + + assert graph_nodes_match( + query.execute_networkx(G1), + [Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar")], + ) + + # should mathc on other proc + query = NodeByProps(node_type=Process, props={"command_line": EndsWith("123456")}) + + assert graph_nodes_match( + query.execute_networkx(G1), + [Process(process_id=12, process_image="best.exe", command_line="best.exe /c 123456")], + ) + + # should match on both + query = NodeByProps(node_type=Process, props={"process_image": EndsWith("exe")}) + + assert graph_nodes_match( + query.execute_networkx(G1), + [ + Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar"), + Process(process_id=12, process_image="best.exe", command_line="best.exe /c 123456"), + ], + ) + query = NodeByProps(node_type=Process, props={"process_image": StartsWith("exe")}) + + assert graph_nodes_match(query.execute_networkx(G1), []) + + +def test_multiple_node_prop_test(G1, graph_nodes_match): + query = NodeByProps( + node_type=Process, + props={"command_line": Contains("foobar"), "process_image": StartsWith("test")}, + ) + + # Should match on `proc` from G1 + assert graph_nodes_match( + query.execute_networkx(G1), + [Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar")], + ) + + +def test_node_conditional(G1, graph_nodes_match): + query = NodeByProps( + node_type=Process, + props={"command_line": Contains("foobar"), "process_image": StartsWith("test")}, + ) + + assert graph_nodes_match( + query.execute_networkx(G1), + [Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar")], + ) + + +def test_node_with_descendants(G4, graph_nodes_match): + + # A should return A->B->C->D + query = NodeByPropsDescendents(node_type=Process, props={"process_image": Exact("A")}) + assert graph_nodes_match( + query.execute_networkx(G4), + [ + Process(process_id=10, process_image="A", command_line="A"), + Process(process_id=12, process_image="B", command_line="B"), + Process(process_id=12, process_image="C", command_line="C"), + Process(process_id=12, process_image="D", command_line="D"), + ], + ) + + # B should return B->C->D + query = NodeByPropsDescendents(node_type=Process, props={"process_image": Exact("B")}) + assert graph_nodes_match( + query.execute_networkx(G4), + [ + Process(process_id=12, process_image="B", command_line="B"), + Process(process_id=12, process_image="C", command_line="C"), + Process(process_id=12, process_image="D", command_line="D"), + ], + ) + + +def test_node_with_ancestors(G4, graph_nodes_match): + + # A should return A + query = NodeByPropsAncestors(node_type=Process, props={"process_image": Exact("A")}) + assert graph_nodes_match( + query.execute_networkx(G4), [Process(process_id=10, process_image="A", command_line="A")] + ) + + # B should return A->B + query = NodeByPropsAncestors(node_type=Process, props={"process_image": Exact("B")}) + assert graph_nodes_match( + query.execute_networkx(G4), + [ + Process(process_id=10, process_image="A", command_line="A"), + Process(process_id=12, process_image="B", command_line="B"), + ], + ) + + # D should return A->B->C->D + query = NodeByPropsAncestors(node_type=Process, props={"process_image": Exact("D")}) + assert graph_nodes_match( + query.execute_networkx(G4), + [ + Process(process_id=10, process_image="A", command_line="A"), + Process(process_id=12, process_image="B", command_line="B"), + Process(process_id=12, process_image="C", command_line="C"), + Process(process_id=12, process_image="D", command_line="D"), + ], + ) + + +def test_nodes_reachable(G5, graph_nodes_match): + + # All queries will return the full path. + # They should only return the path this process touches, A should return A->B->C->D and not E->F->G->H + + query = NodeByPropsReachable(node_type=Process, props={"process_image": Exact("B")}) + assert graph_nodes_match( + query.execute_networkx(G5), + [ + Process(process_id=10, process_image="A", command_line="A"), + Process(process_id=12, process_image="B", command_line="B"), + Process(process_id=12, process_image="C", command_line="C"), + Process(process_id=12, process_image="D", command_line="D"), + ], + ) + + query = NodeByPropsReachable(node_type=Process, props={"process_image": Exact("G")}) + assert graph_nodes_match( + query.execute_networkx(G5), + [ + Process(process_id=10, process_image="E", command_line="E"), + Process(process_id=12, process_image="F", command_line="F"), + Process(process_id=12, process_image="G", command_line="G"), + Process(process_id=12, process_image="H", command_line="H"), + ], + ) diff --git a/tests/analyzers/statements/test_process.py b/tests/analyzers/statements/test_process.py new file mode 100644 index 00000000..8d7d6dc1 --- /dev/null +++ b/tests/analyzers/statements/test_process.py @@ -0,0 +1,154 @@ +from beagle.analyzers.base_analyzer import Analyzer +from beagle.analyzers.queries.lookups import EndsWith +from beagle.analyzers.queries.process import FindProcess +from beagle.nodes import File, Process + + +def test_get_by_command_line_no_lookup(G5, graph_nodes_match): + + # Should return all nodes reachable from A + query = FindProcess.with_command_line("A") + + assert graph_nodes_match( + query.execute_networkx(G5), + [ + Process(process_id=10, process_image="A", command_line="A"), + Process(process_id=12, process_image="B", command_line="B"), + Process(process_id=12, process_image="C", command_line="C"), + Process(process_id=12, process_image="D", command_line="D"), + ], + ) + + +def test_get_by_command_line_with_lookup(G5, graph_nodes_match): + + # Should return all nodes reachable from A Or G, (so all nodes) + query = FindProcess.with_command_line(EndsWith("A") | EndsWith("G")) + + assert graph_nodes_match( + query.execute_networkx(G5), + [ + Process(process_id=10, process_image="A", command_line="A"), + Process(process_id=12, process_image="B", command_line="B"), + Process(process_id=12, process_image="C", command_line="C"), + Process(process_id=12, process_image="D", command_line="D"), + Process(process_id=10, process_image="E", command_line="E"), + Process(process_id=12, process_image="F", command_line="F"), + Process(process_id=12, process_image="G", command_line="G"), + Process(process_id=12, process_image="H", command_line="H"), + ], + ) + + +def test_get_process_name_no_lookup(G2, graph_nodes_match): + + # No match, since defaults to exact. + query = FindProcess.with_process_name("exe") + assert graph_nodes_match(query.execute_networkx(G2), []) + + query = FindProcess.with_process_name("test.exe") + assert graph_nodes_match( + query.execute_networkx(G2), + [ + Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar"), + File(file_name="foo", file_path="bar"), + ], + ) + + +def test_get_process_name_lookup(G2, graph_nodes_match): + + # Should return test.exe because it ends with exe + query = FindProcess.with_process_name(EndsWith("exe")) + + assert graph_nodes_match( + query.execute_networkx(G2), + [ + Process(process_id=10, process_image="test.exe", command_line="test.exe /c foobar"), + File(file_name="foo", file_path="bar"), + ], + ) + + +def test_get_process_user(G6, graph_nodes_match): + + # Should return test.exe because it ends with exe + query = FindProcess.with_user("omer") + + assert graph_nodes_match( + query.execute_networkx(G6), + [ + Process( + process_id=1, process_image_path="d:\\", process_image="parent.exe", user="omer" + ), + Process( + process_id=2, process_image_path="d:\\users", process_image="child.exe", user="omer" + ), + ], + ) + + +def test_get_process_image_path(G6, graph_nodes_match): + + # Should return test.exe because it ends with exe + query = FindProcess.with_process_image_path("d:\\") + + assert graph_nodes_match( + query.execute_networkx(G6), + [ + Process( + process_id=1, process_image_path="d:\\", process_image="parent.exe", user="omer" + ), + Process( + process_id=2, process_image_path="d:\\users", process_image="child.exe", user="omer" + ), + ], + ) + + +def test_process_launched_no_descendants(G7, graph_nodes_match): + analyzer = Analyzer( + name="test_process_launched_descendants", + query=FindProcess.with_command_line("C") + >> FindProcess.that_was_launched(descendants=False), + ) + + G = analyzer.run_networkx(G7) + + # should return + # C + # / \ + # F G + + assert graph_nodes_match( + G, + [ + Process(process_id=12, process_image="C", command_line="C"), + Process(process_id=12, process_image="F", command_line="F"), + Process(process_id=12, process_image="G", command_line="G"), + ], + ) + + +def test_process_launched_descendants(G7, graph_nodes_match): + analyzer = Analyzer( + name="test_process_launched_descendants", + query=FindProcess.with_command_line("A") >> FindProcess.that_was_launched(), + ) + + G = analyzer.run_networkx(G7) + + # Should return the full graph. + # since it should find B and C which are children of A, then expand their children. + assert graph_nodes_match( + G, + [ + Process(process_id=10, process_image="A", command_line="A"), + Process(process_id=12, process_image="B", command_line="B"), + Process(process_id=12, process_image="C", command_line="C"), + Process(process_id=12, process_image="D", command_line="D"), + Process(process_id=10, process_image="E", command_line="E"), + Process(process_id=12, process_image="F", command_line="F"), + Process(process_id=12, process_image="G", command_line="G"), + ], + ) diff --git a/tests/analyzers/test_base_analyzer.py b/tests/analyzers/test_base_analyzer.py new file mode 100644 index 00000000..2ac411e0 --- /dev/null +++ b/tests/analyzers/test_base_analyzer.py @@ -0,0 +1,72 @@ +from beagle.analyzers.base_analyzer import Analyzer +from beagle.analyzers.queries.process import FindProcess +from beagle.nodes import Process + +from beagle.backends import NetworkX + + +def test_analyzer_from_networx_backed(G5, graph_nodes_match): + analyzer = Analyzer( + name="test_analyzer_two_queries", + description="test_analyzer_two_queries", + score=0, + query=FindProcess.with_command_line("B") + >> FindProcess.that_was_launched(descendants=False), + ) + + backend = NetworkX(nodes=[]) + backend.G = G5 + + assert graph_nodes_match( + analyzer.run(backend), + [ + Process(process_id=12, process_image="B", command_line="B"), + Process(process_id=12, process_image="C", command_line="C"), + ], + ) + + +def test_analyzer_two_queries(G5, graph_nodes_match): + + analyzer = Analyzer( + name="test_analyzer_two_queries", + description="test_analyzer_two_queries", + score=0, + query=FindProcess.with_command_line("B") + >> FindProcess.that_was_launched(descendants=False), + ) + + G = analyzer.run_networkx(G5) + + assert graph_nodes_match( + G, + [ + Process(process_id=12, process_image="B", command_line="B"), + Process(process_id=12, process_image="C", command_line="C"), + ], + ) + + +def test_analyzer_or_query_queries(G5, graph_nodes_match): + + query = ( + FindProcess.with_command_line("B") | FindProcess.with_command_line("A") + ) >> FindProcess.that_was_launched(descendants=False) + + analyzer = Analyzer( + name="test_analyzer_two_queries", + description="test_analyzer_two_queries", + score=0, + query=query, + ) + + G = analyzer.run_networkx(G5) + + assert graph_nodes_match( + G, + [ + Process(process_id=10, process_image="A", command_line="A"), + Process(process_id=12, process_image="B", command_line="B"), + Process(process_id=12, process_image="C", command_line="C"), + ], + ) diff --git a/tests/edges/__init__.py b/tests/edges/__init__.py new file mode 100644 index 00000000..e69de29b