From 9ce5c21fe693734feabde9a292583807c6ea0730 Mon Sep 17 00:00:00 2001 From: Jozef Sabo <31158086+jozef-sabo@users.noreply.github.com> Date: Tue, 29 Apr 2025 16:52:45 +0200 Subject: [PATCH 01/10] Add new types to unlogged types in constants.py --- maldump/constants.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/maldump/constants.py b/maldump/constants.py index 90e0cf0..c979bce 100644 --- a/maldump/constants.py +++ b/maldump/constants.py @@ -19,6 +19,7 @@ def __contains__(item: Any) -> bool: from maldump.parsers.avast_parser import AvastParser from maldump.parsers.avg_parser import AVGParser from maldump.parsers.eset_parser import EsetParser + from maldump.parsers.kaitai.eset_virlog_parser import EsetVirlogParser from maldump.parsers.forticlient_parser import ForticlientParser from maldump.parsers.kaitai.forticlient_parser import ( ForticlientParser as ForticlientKaitaiParser, @@ -31,6 +32,8 @@ def __contains__(item: Any) -> bool: unlogged = { bytes, EsetParser, + EsetVirlogParser, + EsetVirlogParser.Widestr, AvastParser, AVGParser, ForticlientParser, From c01ceb46f93ce36af6769b6d6a050895ad1e0a0f Mon Sep 17 00:00:00 2001 From: Jozef Sabo <31158086+jozef-sabo@users.noreply.github.com> Date: Tue, 29 Apr 2025 16:55:06 +0200 Subject: [PATCH 02/10] Fix logging parsed arguments in logger function Now also processing iterables (list, tuple, set) and dictionary --- maldump/utils.py | 33 +++++++++++++++++++++++++-------- 1 file changed, 25 insertions(+), 8 deletions(-) diff --git a/maldump/utils.py b/maldump/utils.py index bff035c..66216dd 100755 --- a/maldump/utils.py +++ b/maldump/utils.py @@ -30,6 +30,30 @@ def xor(plaintext: bytes, key: bytes) -> bytes: class Logger: + @staticmethod + def logify(obj: Any): + return ( + {key: Logger.logify(value) for key, value in obj.items()} + if isinstance(obj, dict) + else ( + [Logger.logify(value) for value in obj] + if isinstance(obj, list) + else ( + {Logger.logify(value) for value in obj} + if isinstance(obj, set) + else ( + (Logger.logify(value) for value in obj) + if isinstance(obj, tuple) + else ( + "<" + type(obj).__name__ + ">" + if type(obj) in UnloggedObjects() + else obj + ) + ) + ) + ) + ) + @staticmethod def log(_func: Callable | None = None, *, lgr: logging.Logger = logger) -> Any: def log_fn(func: Callable) -> Any: @@ -37,14 +61,7 @@ def wrapper(*args: tuple, **kwargs: dict) -> Any: lgr.debug( "Calling function: %s, arguments: %s, keyword arguments: %s", func.__name__, - tuple( - ( - arg - if type(arg) not in UnloggedObjects() - else "<" + type(arg).__name__ + ">" - ) - for arg in args - ), + tuple((Logger.logify(arg)) for arg in args), kwargs, ) return func(*args, **kwargs) From 53e9e6a5a5961011b6303c35ba90215294ec5593 Mon Sep 17 00:00:00 2001 From: Jozef Sabo <31158086+jozef-sabo@users.noreply.github.com> Date: Tue, 29 Apr 2025 16:56:31 +0200 Subject: [PATCH 03/10] ESET virlog parser now supporting new (unknown) hash type This prevents the program from crashing if that type of hash is present --- maldump/parsers/kaitai/eset_virlog_parser.ksy | 4 ++++ maldump/parsers/kaitai/eset_virlog_parser.py | 3 +++ 2 files changed, 7 insertions(+) diff --git a/maldump/parsers/kaitai/eset_virlog_parser.ksy b/maldump/parsers/kaitai/eset_virlog_parser.ksy index e956b1f..1dd2154 100644 --- a/maldump/parsers/kaitai/eset_virlog_parser.ksy +++ b/maldump/parsers/kaitai/eset_virlog_parser.ksy @@ -93,6 +93,7 @@ types: 'opcode::firstseen': unixdate 'opcode::unknown_hash': hash 'opcode::unknown_hash2': hash + 'opcode::unknown_hash3': hash 'opcode::program_hash': hash 'opcode::object_hash': hash 'opcode::unknown_u1int1': u1 @@ -200,6 +201,9 @@ enums: 0x4213a4: id: "unknown_hash2" -orig-id: UNKNOWN_HASH2 + 0x4213ab: + id: "unknown_hash3" + -orig-id: UNKNOWN_HASH3 0x450fa0: id: "unknown_u4int6" -orig-id: UNKNOWN_U4INT6 diff --git a/maldump/parsers/kaitai/eset_virlog_parser.py b/maldump/parsers/kaitai/eset_virlog_parser.py index 95aac74..2031597 100644 --- a/maldump/parsers/kaitai/eset_virlog_parser.py +++ b/maldump/parsers/kaitai/eset_virlog_parser.py @@ -23,6 +23,7 @@ class Opcode(Enum): object_hash = 4330398 unknown_hash = 4330400 unknown_hash2 = 4330404 + unknown_hash3 = 4330411 unknown_u1int2 = 4398415 unknown_u1int1 = 4398455 unknown_u4int10 = 4522986 @@ -262,6 +263,8 @@ def _read(self): self.arg = EsetVirlogParser.Widestr(self._io, self, self._root) elif _on == EsetVirlogParser.Opcode.object_name: self.arg = EsetVirlogParser.Widestr(self._io, self, self._root) + elif _on == EsetVirlogParser.Opcode.unknown_hash3: + self.arg = EsetVirlogParser.Hash(self._io, self, self._root) elif _on == EsetVirlogParser.Opcode.unknown_u8int1: self.arg = self._io.read_u8le() elif _on == EsetVirlogParser.Opcode.unknown_u4int13: From b331e4f534f333bddb87812573ebb4fe15ae7672 Mon Sep 17 00:00:00 2001 From: Jozef Sabo <31158086+jozef-sabo@users.noreply.github.com> Date: Tue, 29 Apr 2025 17:05:17 +0200 Subject: [PATCH 04/10] Fix crashing on entry parsing --- maldump/parsers/eset_parser.py | 40 ++++++++++++++++++++++++++++------ 1 file changed, 33 insertions(+), 7 deletions(-) diff --git a/maldump/parsers/eset_parser.py b/maldump/parsers/eset_parser.py index 7850b03..fe79794 100644 --- a/maldump/parsers/eset_parser.py +++ b/maldump/parsers/eset_parser.py @@ -47,13 +47,37 @@ def parseRecord(record: dict): return { "timestamp": record.get("timestamp"), - "virusdb": record.get("virus_db").str, - "obj": record.get("object_name").str, - "objhash": record.get("object_hash").hash.hex(), - "infiltration": record.get("infiltration_name").str, - "user": record.get("user_name").str.split("\\")[1], - "progname": record.get("program_name").str, - "proghash": record.get("program_hash").hash.hex(), + "virusdb": ( + record.get("virus_db").str if record.get("virus_db") is not None else None + ), + "obj": ( + record.get("object_name").str + if record.get("object_name") is not None + else None + ), + "objhash": ( + record.get("object_hash").hash.hex() + if record.get("object_hash") is not None + else None + ), + "infiltration": ( + record.get("infiltration_name").str + if record.get("infiltration_name") is not None + else None + ), + "user": ( + record.get("user_name").str if record.get("user_name") is not None else None + ), + "progname": ( + record.get("program_name").str + if record.get("program_name") is not None + else None + ), + "proghash": ( + record.get("program_hash").hash.hex() + if record.get("program_hash") is not None + else None + ), "firstseen": record.get("firstseen"), } @@ -143,6 +167,8 @@ def parse_from_log(self, _=None) -> dict[tuple[str, datetime], QuarEntry]: q.threat = metadata["infiltration"] q.path = metadata["obj"] q.malfile = self._get_malfile(metadata["user"], metadata["objhash"]) + if (q.sha1, metadata["user"]) in quarfiles: + logger.debug("Entry (idx %s) already found, skipping", idx) quarfiles[q.sha1, metadata["user"]] = q return quarfiles From 5b4dc1242b51dcdb908b8b3e9bacf248519de551 Mon Sep 17 00:00:00 2001 From: Jozef Sabo <31158086+jozef-sabo@users.noreply.github.com> Date: Mon, 5 May 2025 11:16:00 +0200 Subject: [PATCH 05/10] Fix imports order for ruff --- maldump/constants.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/maldump/constants.py b/maldump/constants.py index c979bce..5a98891 100644 --- a/maldump/constants.py +++ b/maldump/constants.py @@ -19,8 +19,8 @@ def __contains__(item: Any) -> bool: from maldump.parsers.avast_parser import AvastParser from maldump.parsers.avg_parser import AVGParser from maldump.parsers.eset_parser import EsetParser - from maldump.parsers.kaitai.eset_virlog_parser import EsetVirlogParser from maldump.parsers.forticlient_parser import ForticlientParser + from maldump.parsers.kaitai.eset_virlog_parser import EsetVirlogParser from maldump.parsers.kaitai.forticlient_parser import ( ForticlientParser as ForticlientKaitaiParser, ) From ff756c49f2575e96f173714da73869fbc7bebe59 Mon Sep 17 00:00:00 2001 From: Jozef Sabo <31158086+jozef-sabo@users.noreply.github.com> Date: Mon, 5 May 2025 11:21:32 +0200 Subject: [PATCH 06/10] Fix return types in utils.py --- maldump/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/maldump/utils.py b/maldump/utils.py index 66216dd..80e589f 100755 --- a/maldump/utils.py +++ b/maldump/utils.py @@ -31,7 +31,7 @@ def xor(plaintext: bytes, key: bytes) -> bytes: class Logger: @staticmethod - def logify(obj: Any): + def logify(obj: Any) -> Any: return ( {key: Logger.logify(value) for key, value in obj.items()} if isinstance(obj, dict) @@ -42,7 +42,7 @@ def logify(obj: Any): {Logger.logify(value) for value in obj} if isinstance(obj, set) else ( - (Logger.logify(value) for value in obj) + tuple(Logger.logify(value) for value in obj) if isinstance(obj, tuple) else ( "<" + type(obj).__name__ + ">" From c8a1228cf03c3c64e48d9a3450ded0d27125efae Mon Sep 17 00:00:00 2001 From: Jozef Sabo <31158086+jozef-sabo@users.noreply.github.com> Date: Mon, 5 May 2025 11:22:05 +0200 Subject: [PATCH 07/10] Fix incorrect default value as an argument for detecting AVs --- maldump/__main__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/maldump/__main__.py b/maldump/__main__.py index 7a9e761..83ddba1 100755 --- a/maldump/__main__.py +++ b/maldump/__main__.py @@ -172,7 +172,7 @@ def parse_cli() -> argparse.Namespace: parser.add_argument( "-c", "--detect-avs", - action="store_false", + action="store_true", help="try only avs which were detected in the system", ) parser.add_argument( From ad8cf6430f66f2a12845e180f0dd4bf6381b3e38 Mon Sep 17 00:00:00 2001 From: Jozef Sabo <31158086+jozef-sabo@users.noreply.github.com> Date: Mon, 5 May 2025 12:14:40 +0200 Subject: [PATCH 08/10] Fix incorrect user parsing in ESET The splitting of username was removed in b331e4f534f333bddb87812573ebb4fe15ae7672 for keeping the username and the domain name altogether, however this is causing an error while reading from the malware file itself --- maldump/parsers/eset_parser.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/maldump/parsers/eset_parser.py b/maldump/parsers/eset_parser.py index fe79794..9384bb6 100644 --- a/maldump/parsers/eset_parser.py +++ b/maldump/parsers/eset_parser.py @@ -66,7 +66,9 @@ def parseRecord(record: dict): else None ), "user": ( - record.get("user_name").str if record.get("user_name") is not None else None + record.get("user_name").str.split("\\")[1] + if record.get("user_name") is not None + else None ), "progname": ( record.get("program_name").str @@ -162,7 +164,7 @@ def parse_from_log(self, _=None) -> dict[tuple[str, datetime], QuarEntry]: if metadata["user"] == "SYSTEM": logger.debug("Entry's (idx %s) user is SYSTEM, skipping", idx) continue - q = QuarEntry() + q = QuarEntry(self) q.timestamp = metadata["timestamp"] q.threat = metadata["infiltration"] q.path = metadata["obj"] @@ -218,7 +220,7 @@ def parse_from_fs( size = kt.mal_size threat = kt.findings[0].threat_canonized.str - q = QuarEntry() + q = QuarEntry(self) q.timestamp = timestamp q.path = path q.sha1 = sha1 From 69e1a2e9564cf7c8c70ea7e929aed3112df3fee4 Mon Sep 17 00:00:00 2001 From: Jozef Sabo <31158086+jozef-sabo@users.noreply.github.com> Date: Mon, 5 May 2025 12:17:19 +0200 Subject: [PATCH 09/10] Introduce `av` parameter to QuarEntry object This allows later revert the complete path of identification the malware file. --- maldump/parsers/avast_parser.py | 4 ++-- maldump/parsers/avg_parser.py | 4 ++-- maldump/parsers/avira_parser.py | 3 +-- maldump/parsers/forticlient_parser.py | 2 +- maldump/parsers/gdata_parser.py | 2 +- maldump/parsers/kaspersky_parser.py | 4 ++-- maldump/parsers/malwarebytes_parser.py | 4 ++-- maldump/parsers/mcafee_parser.py | 2 +- maldump/parsers/windef_parser.py | 4 ++-- maldump/structures.py | 3 ++- 10 files changed, 16 insertions(+), 16 deletions(-) diff --git a/maldump/parsers/avast_parser.py b/maldump/parsers/avast_parser.py index 1aed838..00c2a79 100644 --- a/maldump/parsers/avast_parser.py +++ b/maldump/parsers/avast_parser.py @@ -138,7 +138,7 @@ def get(e: ET, f) -> str: else: malfile = self._getRawFromFile(chest_id) - q = QuarEntry() + q = QuarEntry(self) q.timestamp = dt.fromtimestamp(int(get(e, "TransferTime"))) q.threat = get(e, "Virus") q.path = path @@ -180,7 +180,7 @@ def parse_from_fs( timestamp = DTC.get_dt_from_stat(entry_stat) size = entry_stat.st_size - q = QuarEntry() + q = QuarEntry(self) q.path = str(entry) q.timestamp = timestamp q.size = size diff --git a/maldump/parsers/avg_parser.py b/maldump/parsers/avg_parser.py index bb7ab53..beee3e9 100644 --- a/maldump/parsers/avg_parser.py +++ b/maldump/parsers/avg_parser.py @@ -138,7 +138,7 @@ def get(e: ET, f) -> str: else: malfile = self._getRawFromFile(chest_id) - q = QuarEntry() + q = QuarEntry(self) q.timestamp = dt.fromtimestamp(int(get(e, "TransferTime"))) q.threat = get(e, "Virus") q.path = path @@ -180,7 +180,7 @@ def parse_from_fs( timestamp = DTC.get_dt_from_stat(entry_stat) size = entry_stat.st_size - q = QuarEntry() + q = QuarEntry(self) q.path = str(entry) q.timestamp = timestamp q.size = size diff --git a/maldump/parsers/avira_parser.py b/maldump/parsers/avira_parser.py index 122945b..c808a8f 100755 --- a/maldump/parsers/avira_parser.py +++ b/maldump/parsers/avira_parser.py @@ -24,8 +24,7 @@ def parse_from_fs(self, _=None) -> dict[str, QuarEntry]: logger.debug('Skipping entry idx %s, path "%s"', idx, metafile) continue - q = QuarEntry() - + q = QuarEntry(self) q.timestamp = parse(self).timestamp(kt.qua_time) q.threat = kt.mal_type q.path = kt.filename[4:] diff --git a/maldump/parsers/forticlient_parser.py b/maldump/parsers/forticlient_parser.py index de6c8d3..b120a3f 100755 --- a/maldump/parsers/forticlient_parser.py +++ b/maldump/parsers/forticlient_parser.py @@ -37,7 +37,7 @@ def parse_from_fs(self, _=None) -> dict[str, QuarEntry]: logger.debug('Skipping entry idx %s, path "%s"', idx, metafile) continue - q = QuarEntry() + q = QuarEntry(self) q.timestamp = self._get_time(kt.timestamp) q.threat = kt.mal_type q.path = self._normalize_path(kt.mal_path) diff --git a/maldump/parsers/gdata_parser.py b/maldump/parsers/gdata_parser.py index 75f724a..41fb7bd 100755 --- a/maldump/parsers/gdata_parser.py +++ b/maldump/parsers/gdata_parser.py @@ -25,7 +25,7 @@ def parse_from_fs(self, _=None) -> dict[str, QuarEntry]: logger.debug('Skipping entry idx %s, path "%s"', idx, metafile) continue - q = QuarEntry() + q = QuarEntry(self) q.timestamp = parse(self).timestamp(kt.data1.quatime) q.threat = kt.data1.malwaretype.string_content q.path = kt.data2.path.string_content[4:] diff --git a/maldump/parsers/kaspersky_parser.py b/maldump/parsers/kaspersky_parser.py index 59baa2a..0aa4969 100644 --- a/maldump/parsers/kaspersky_parser.py +++ b/maldump/parsers/kaspersky_parser.py @@ -66,7 +66,7 @@ def parse_from_log(self, _=None) -> dict[str, QuarEntry]: for row in rows: filename = row[0] malfile = self._get_malfile(filename) - q = QuarEntry() + q = QuarEntry(self) q.timestamp = self._normalize_time(row[6]) q.threat = row[3] q.path = row[1] + row[2] @@ -105,7 +105,7 @@ def parse_from_fs( timestamp = DTC.get_dt_from_stat(entry_stat) size = entry_stat.st_size - q = QuarEntry() + q = QuarEntry(self) q.path = str(entry) q.timestamp = timestamp q.size = size diff --git a/maldump/parsers/malwarebytes_parser.py b/maldump/parsers/malwarebytes_parser.py index 49d557b..0d1e939 100644 --- a/maldump/parsers/malwarebytes_parser.py +++ b/maldump/parsers/malwarebytes_parser.py @@ -64,7 +64,7 @@ def parse_from_log(self, _=None) -> dict[str, QuarEntry]: malfile = read.contents(self.location / (uid + ".quar")) malfile = b"" if malfile is None else self._decrypt(malfile) - q = QuarEntry() + q = QuarEntry(self) q.timestamp = self._normalize_time(metadata["trace"]["cleanTime"]) q.threat = metadata["threatName"] q.path = metadata["trace"]["objectPath"] @@ -101,7 +101,7 @@ def parse_from_fs( malfile = read.contents(entry) malfile = b"" if malfile is None else self._decrypt(malfile) - q = QuarEntry() + q = QuarEntry(self) q.path = str(entry) q.timestamp = timestamp q.size = size diff --git a/maldump/parsers/mcafee_parser.py b/maldump/parsers/mcafee_parser.py index 9ef4b97..63a623d 100644 --- a/maldump/parsers/mcafee_parser.py +++ b/maldump/parsers/mcafee_parser.py @@ -47,7 +47,7 @@ def parse_from_fs( if parser is None: logger.debug('Skipping entry idx %s, path "%s"', idx, metafile) - q = QuarEntry() + q = QuarEntry(self) q.timestamp = dt.strptime(parser["timestamp"], "%Y-%m-%d %H:%M:%S") q.threat = parser["threat"] q.path = parser["file_name"] diff --git a/maldump/parsers/windef_parser.py b/maldump/parsers/windef_parser.py index 2304f2e..cd5d339 100644 --- a/maldump/parsers/windef_parser.py +++ b/maldump/parsers/windef_parser.py @@ -63,7 +63,7 @@ def parse_from_log(self, _=None) -> dict[str, QuarEntry]: guid = e.entry.element[0].content.value.hex().upper() malfile = self._get_malfile(guid) - q = QuarEntry() + q = QuarEntry(self) q.timestamp = ts q.threat = kt.data1.mal_type q.path = self._normalize(e.entry.path.character) @@ -106,7 +106,7 @@ def parse_from_fs( logger.debug('Skipping entry idx %s, path "%s"', idx, entry) continue - q = QuarEntry() + q = QuarEntry(self) q.path = str(entry) q.timestamp = timestamp q.size = kt_data.encryptedfile.len_malfile diff --git a/maldump/structures.py b/maldump/structures.py index e9878b7..3b4a516 100644 --- a/maldump/structures.py +++ b/maldump/structures.py @@ -21,7 +21,8 @@ class QuarEntry: sha256: str | None = None _malfile: bytes - def __init__(self) -> None: ... + def __init__(self, av: Parser) -> None: + self.av = av @property def malfile(self) -> bytes: From cee354c0f869671fe0b30c4a5726fbac6be63d87 Mon Sep 17 00:00:00 2001 From: Jozef Sabo <31158086+jozef-sabo@users.noreply.github.com> Date: Mon, 5 May 2025 12:17:35 +0200 Subject: [PATCH 10/10] Add subtests to test in test_maldump.py --- test/test_maldump.py | 39 ++++++++++++++++++++++++--------------- 1 file changed, 24 insertions(+), 15 deletions(-) diff --git a/test/test_maldump.py b/test/test_maldump.py index 619f05c..7293afe 100755 --- a/test/test_maldump.py +++ b/test/test_maldump.py @@ -18,45 +18,54 @@ def test_list_not_empty(self) -> None: def test_timestamp(self) -> None: for av in self.avs: for entry in av: - self.assertIsInstance(entry.timestamp, datetime) + with self.subTest(i=(entry.av.name, entry.sha1)): + self.assertIsInstance(entry.timestamp, datetime) def test_path_contains_eicar(self) -> None: for av in self.avs: for entry in av: - self.assertIsNotNone(entry.path) - self.assertIn("eicar", entry.path) + with self.subTest(i=(entry.av.name, entry.sha1)): + self.assertIsNotNone(entry.path) + self.assertIn("eicar", entry.path) def test_file_size(self) -> None: for av in self.avs: for entry in av: - self.assertEqual(entry.size, 68) + with self.subTest(i=(entry.av.name, entry.sha1)): + self.assertEqual(entry.size, 68) def test_md5_hash(self) -> None: for av in self.avs: for entry in av: - self.assertEqual(entry.md5, "44d88612fea8a8f36de82e1278abb02f") + with self.subTest(i=(entry.av.name, entry.sha1)): + self.assertEqual(entry.md5, "44d88612fea8a8f36de82e1278abb02f") def test_sha1_hash(self) -> None: for av in self.avs: for entry in av: - self.assertEqual(entry.sha1, "3395856ce81f2b7382dee72602f798b642f14140") + with self.subTest(i=(entry.av.name, entry.sha1)): + self.assertEqual( + entry.sha1, "3395856ce81f2b7382dee72602f798b642f14140" + ) def test_sha256_hash(self) -> None: for av in self.avs: for entry in av: - self.assertEqual( - entry.sha256, - "275a021bbfb6489e54d471899f7db9d1663fc695ec2fe2a2c4538aabf651fd0f", - ) + with self.subTest(i=(entry.av.name, entry.sha1)): + self.assertEqual( + entry.sha256, + "275a021bbfb6489e54d471899f7db9d1663fc695ec2fe2a2c4538aabf651fd0f", + ) def test_file_is_eicar(self) -> None: for av in self.avs: for entry in av: - self.assertIsInstance(entry.malfile, bytes) - self.assertEqual( - entry.malfile, - rb"X5O!P%@AP[4\PZX54(P^)7CC)7}$EICAR-STANDARD-ANTIVIRUS-TEST-FILE!$H+H*", - ) + with self.subTest(i=(entry.av.name, entry.sha1)): + self.assertIsInstance(entry.malfile, bytes) + self.assertEqual( + entry.malfile, + rb"X5O!P%@AP[4\PZX54(P^)7CC)7}$EICAR-STANDARD-ANTIVIRUS-TEST-FILE!$H+H*", + ) if __name__ == "__main__":