Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 1 addition & 4 deletions config/pipeline_config_default.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ readers:
method_type: log_file_reader
auto_config: False
params:
file: local/miranda.json
file: tests/test_folder/audit.log

parsers:
MatcherParser:
Expand Down Expand Up @@ -97,11 +97,8 @@ detectors:
variables:
- pos: 0
name: var1
params:
threshold: 0.
header_variables:
- pos: level
params: {}
NewValueComboDetector_All:
method_type: new_value_combo_detector
auto_config: False
Expand Down
533 changes: 533 additions & 0 deletions notebooks/persistency_demo.ipynb

Large diffs are not rendered by default.

154 changes: 154 additions & 0 deletions notebooks/reader_parser_detector.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "54215189",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"os.chdir(\"../\")\n",
"\n",
"from detectmatelibrary.detectors.new_value_combo_detector import NewValueComboDetector, schemas\n",
"from detectmatelibrary.parsers.template_matcher import MatcherParser\n",
"from detectmatelibrary.readers.log_file import LogFileReader\n",
"\n",
"from detectmatelibrary.common.persistency.event_data_structures.trackers import (\n",
" EventVariableTracker, StabilityTracker\n",
")\n",
"from detectmatelibrary.common.persistency.event_data_structures.dataframes import (\n",
" EventDataFrame, ChunkedEventDataFrame\n",
")\n",
"from detectmatelibrary.common.persistency.event_persistency import EventPersistency\n",
"\n",
"import logging\n",
"logging.getLogger().setLevel(logging.ERROR) # Only show errors"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "4cd73416",
"metadata": {},
"outputs": [],
"source": [
"import yaml\n",
"\n",
"\n",
"with open(\"config/pipeline_config_default.yaml\", 'r') as f:\n",
" config = yaml.safe_load(f)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "54ae5e78",
"metadata": {},
"outputs": [],
"source": [
"reader = LogFileReader(config=config)\n",
"parser = MatcherParser(config=config)\n",
"detector = NewValueComboDetector(config=config)\n",
"\n",
"output = schemas.DetectorSchema()"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "f23c07d8",
"metadata": {},
"outputs": [],
"source": [
"reader.reset()\n",
"for i in range(1000):\n",
" log = reader.process(as_bytes=False)\n",
" parsed_log = parser.process(log)\n",
" detector.configure(parsed_log)\n",
"\n",
"detector.set_configuration()"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "ddc59204",
"metadata": {},
"outputs": [],
"source": [
"reader.reset()\n",
"for i in range(1000):\n",
" log = reader.process(as_bytes=False)\n",
" parsed_log = parser.process(log)\n",
" detector.train(parsed_log)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "651e272e",
"metadata": {},
"outputs": [],
"source": [
"predictions = {}\n",
"\n",
"for i in range(1000):\n",
" log = reader.process(as_bytes=False)\n",
" parsed_log = parser.process(log)\n",
" prediction = detector.detect(parsed_log, output_=output)\n",
" predictions[i] = prediction\n"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "51daa602",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[(859, True),\n",
" (860, True),\n",
" (861, True),\n",
" (862, True),\n",
" (864, True),\n",
" (865, True),\n",
" (866, True),\n",
" (867, True)]"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# get true predictions\n",
"[(i, is_anomaly) for i, is_anomaly in predictions.items() if is_anomaly]"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "detectmatelibrary (3.12.3)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
3 changes: 3 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,15 @@ readme = "README.md"
requires-python = ">=3.12"
dependencies = [
"drain3>=0.9.11",
"numpy>=2.3.2",
"pandas>=2.3.2",
"polars>=1.36.1",
"protobuf>=6.32.1",
"pydantic>=2.11.7",
"pyyaml>=6.0.3",
"regex>=2025.11.3",
"kafka-python>=2.3.0",
"ujson>=5.11.0",
]

[project.optional-dependencies]
Expand Down
60 changes: 59 additions & 1 deletion src/detectmatelibrary/common/_config/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from pydantic import BaseModel, ConfigDict

from typing_extensions import Self
from typing import Any, Dict
from typing import Any, Dict, List, Optional
from copy import deepcopy


Expand Down Expand Up @@ -35,3 +35,61 @@ def from_dict(cls, data: Dict[str, Any], method_id: str) -> Self:
ConfigMethods.check_type(config_, method_type=aux.method_type)

return cls(**ConfigMethods.process(config_))


def generate_detector_config(
variable_selection: Dict[int, List[str]],
templates: Dict[Any, str | None],
detector_name: str,
method_type: str,
base_config: Optional[Dict[str, Any]] = None,
**additional_params: Any,
) -> Dict[str, Any]:
"""Generate the configuration for detectors. Output is a dictionary.

Args:
variable_selection (Dict[int, List[str]]): Mapping of event IDs to variable names.
templates (Dict[Any, str | None]): Mapping of event IDs to their templates.
detector_name (str): Name of the detector.
method_type (str): Type of the detection method.
base_config (Optional[Dict[str, Any]]): Base configuration to build upon.
**additional_params: Additional parameters for the detector.
"""

if base_config is None:
base_config = {
"detectors": {
detector_name: {
"method_type": method_type,
"auto_config": False,
"params": {
"log_variables": []
},
}
}
}
config = deepcopy(base_config)

detectors = config.setdefault("detectors", {})
detector = detectors.setdefault(detector_name, {})
detector.setdefault("method_type", method_type)
detector.setdefault("auto_config", False)
params = detector.setdefault("params", {})
params.update(additional_params)
log_variables = params.setdefault("log_variables", [])

for event_id, all_variables in variable_selection.items():
variables = [
{"pos": int(name.split("_")[1]), "name": name}
for name in all_variables if name.startswith("var_")
]
header_variables = [{"pos": name} for name in all_variables if not name.startswith("var_")]

log_variables.append({
"id": f"id_{event_id}",
"event": event_id,
"template": templates.get(event_id, ""),
"variables": variables,
"header_variables": header_variables,
})
return config
61 changes: 60 additions & 1 deletion src/detectmatelibrary/common/_config/_compile.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@
from detectmatelibrary.common._config._formats import apply_format


from typing import Any, Dict
from typing import Any, Dict, List, Optional
from copy import deepcopy
import warnings


Expand Down Expand Up @@ -82,3 +83,61 @@ def process(config: Dict[str, Any]) -> Dict[str, Any]:
config.update(config["params"])
config.pop("params")
return config


def generate_detector_config(
variable_selection: Dict[int, List[str]],
templates: Dict[Any, str | None],
detector_name: str,
method_type: str,
base_config: Optional[Dict[str, Any]] = None,
**additional_params: Any,
) -> Dict[str, Any]:
"""Generate the configuration for detectors. Output is a dictionary.

Args:
variable_selection (Dict[int, List[str]]): Mapping of event IDs to variable names.
templates (Dict[Any, str | None]): Mapping of event IDs to their templates.
detector_name (str): Name of the detector.
method_type (str): Type of the detection method.
base_config (Optional[Dict[str, Any]]): Base configuration to build upon.
**additional_params: Additional parameters for the detector.
"""

if base_config is None:
base_config = {
"detectors": {
detector_name: {
"method_type": method_type,
"auto_config": False,
"params": {
"log_variables": []
},
}
}
}
config = deepcopy(base_config)

detectors = config.setdefault("detectors", {})
detector = detectors.setdefault(detector_name, {})
detector.setdefault("method_type", method_type)
detector.setdefault("auto_config", False)
params = detector.setdefault("params", {})
params.update(additional_params)
log_variables = params.setdefault("log_variables", [])

for event_id, all_variables in variable_selection.items():
variables = [
{"pos": int(name.split("_")[1]), "name": name}
for name in all_variables if name.startswith("var_")
]
header_variables = [{"pos": name} for name in all_variables if not name.startswith("var_")]

log_variables.append({
"id": f"id_{event_id}",
"event": event_id,
"template": templates.get(event_id, ""),
"variables": variables,
"header_variables": header_variables,
})
return config
5 changes: 5 additions & 0 deletions src/detectmatelibrary/common/persistency/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from .event_persistency import EventPersistency

__all__ = [
"EventPersistency"
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
from abc import ABC, abstractmethod
from typing import Any, List
from dataclasses import dataclass


@dataclass
class EventDataStructure(ABC):
"""Storage backend interface for event-based data analysis."""

event_id: int = -1
template: str = ""

@abstractmethod
def add_data(self, data_object: Any) -> None: ...

@abstractmethod
def get_data(self) -> Any: ...

@abstractmethod
def get_variables(self) -> List[str]: ...

@classmethod
@abstractmethod
def to_data(cls, raw_data: Any) -> Any:
"""Convert raw data into the appropriate data format for storage."""
pass

def get_template(self) -> str:
return self.template

def get_event_id(self) -> int:
return self.event_id
Loading
Loading