Skip to content

Commit 22a4e08

Browse files
authored
feat(heuristics): improve differentiation between stub packages and dependency confusion attacks (#1174)
This PR provides some improvement for the dependency confusion PR . - The minimal content heuristic now checks for the .pyi files . - A Second heuristic that checks if the package name contains stub in its name . Signed-off-by: Amine <amine.raouane@enim.ac.ma>
1 parent ca1fc9c commit 22a4e08

File tree

6 files changed

+113
-20
lines changed

6 files changed

+113
-20
lines changed

src/macaron/malware_analyzer/pypi_heuristics/heuristics.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,9 @@ class Heuristics(str, Enum):
5555
#: Indicates that the package's description is unsecure, such as not having a descriptive keywords.
5656
UNSECURE_DESCRIPTION = "unsecure_description"
5757

58+
#: Indicates that the package contains stub files.
59+
STUB_NAME = "stub_name"
60+
5861

5962
class HeuristicResult(str, Enum):
6063
"""Result type indicating the outcome of a heuristic."""

src/macaron/malware_analyzer/pypi_heuristics/metadata/minimal_content.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
class MinimalContentAnalyzer(BaseHeuristicAnalyzer):
1919
"""Check whether the package has minimal content."""
2020

21-
FILES_THRESHOLD = 50
21+
FILES_THRESHOLD = 10
2222

2323
def __init__(self) -> None:
2424
super().__init__(
@@ -46,9 +46,12 @@ def analyze(self, pypi_package_json: PyPIPackageJsonAsset) -> tuple[HeuristicRes
4646
logger.debug(error_msg)
4747
raise SourceCodeError(error_msg)
4848

49-
file_count = sum(len(files) for _, _, files in os.walk(pypi_package_json.package_sourcecode_path))
49+
file_count = sum(
50+
sum(1 for f in files if f.endswith(".pyi"))
51+
for _, _, files in os.walk(pypi_package_json.package_sourcecode_path)
52+
)
5053

5154
if file_count >= self.FILES_THRESHOLD:
52-
return HeuristicResult.PASS, {"message": "Package has sufficient content"}
55+
return HeuristicResult.PASS, {"message": "Package has sufficient pyi files", "pyi_files": file_count}
5356

54-
return HeuristicResult.FAIL, {"message": "Not enough files found"}
57+
return HeuristicResult.FAIL, {"message": "Not enough pyi files found", "pyi_files": file_count}
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved.
2+
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
3+
4+
"""This analyzer checks if a PyPI package has 'stub' in its name."""
5+
6+
import logging
7+
8+
from macaron.json_tools import JsonType
9+
from macaron.malware_analyzer.pypi_heuristics.base_analyzer import BaseHeuristicAnalyzer
10+
from macaron.malware_analyzer.pypi_heuristics.heuristics import HeuristicResult, Heuristics
11+
from macaron.slsa_analyzer.package_registry.pypi_registry import PyPIPackageJsonAsset
12+
13+
logger: logging.Logger = logging.getLogger(__name__)
14+
15+
16+
class StubNameAnalyzer(BaseHeuristicAnalyzer):
17+
"""Check whether the package name contains 'stub'."""
18+
19+
def __init__(self) -> None:
20+
super().__init__(
21+
name="stub_name_analyzer",
22+
heuristic=Heuristics.STUB_NAME,
23+
depends_on=None,
24+
)
25+
26+
def analyze(self, pypi_package_json: PyPIPackageJsonAsset) -> tuple[HeuristicResult, dict[str, JsonType]]:
27+
"""Analyze the package.
28+
29+
Parameters
30+
----------
31+
pypi_package_json: PyPIPackageJsonAsset
32+
The PyPI package JSON asset object.
33+
34+
Returns
35+
-------
36+
tuple[HeuristicResult, dict[str, JsonType]]:
37+
The result and related information collected during the analysis.
38+
"""
39+
package_name = pypi_package_json.component_name
40+
if "stub" in package_name.lower():
41+
return HeuristicResult.PASS, {}
42+
return HeuristicResult.FAIL, {}

src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -421,12 +421,16 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData:
421421
422422
% Package released with a name similar to a popular package.
423423
{Confidence.HIGH.value}::trigger(malware_high_confidence_4) :-
424-
quickUndetailed, forceSetup, failed({Heuristics.TYPOSQUATTING_PRESENCE.value}).
424+
quickUndetailed,
425+
forceSetup,
426+
failed({Heuristics.TYPOSQUATTING_PRESENCE.value}),
427+
failed({Heuristics.STUB_NAME.value}).
425428
426429
% Package released with dependency confusion .
427430
{Confidence.HIGH.value}::trigger(malware_high_confidence_5) :-
428431
forceSetup,
429-
passed({Heuristics.MINIMAL_CONTENT.value}),
432+
failed({Heuristics.MINIMAL_CONTENT.value}),
433+
failed({Heuristics.STUB_NAME.value}),
430434
failed({Heuristics.ANOMALOUS_VERSION.value}),
431435
failed({Heuristics.UNSECURE_DESCRIPTION.value}).
432436

tests/malware_analyzer/pypi/test_minimal_content.py

Lines changed: 10 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,10 @@ def test_analyze_sufficient_files_pass(analyzer: MinimalContentAnalyzer, pypi_pa
2323
pypi_package_json.download_sourcecode.return_value = True
2424
pypi_package_json.package_sourcecode_path = "/fake/path"
2525
with patch("os.walk") as mock_walk:
26-
mock_walk.return_value = [("root", [], [f"file{i}.py" for i in range(60)])]
27-
result, info = analyzer.analyze(pypi_package_json)
26+
mock_walk.return_value = [("root", [], [f"file{i}.pyi" for i in range(60)])]
27+
result, _ = analyzer.analyze(pypi_package_json)
2828

2929
assert result == HeuristicResult.PASS
30-
assert info == {"message": "Package has sufficient content"}
3130
pypi_package_json.download_sourcecode.assert_called_once()
3231

3332

@@ -36,23 +35,21 @@ def test_analyze_exactly_threshold_files_pass(analyzer: MinimalContentAnalyzer,
3635
pypi_package_json.download_sourcecode.return_value = True
3736
pypi_package_json.package_sourcecode_path = "/fake/path"
3837
with patch("os.walk") as mock_walk:
39-
mock_walk.return_value = [("root", [], [f"file{i}.py" for i in range(50)])]
40-
result, info = analyzer.analyze(pypi_package_json)
38+
mock_walk.return_value = [("root", [], [f"file{i}.pyi" for i in range(10)])]
39+
result, _ = analyzer.analyze(pypi_package_json)
4140

4241
assert result == HeuristicResult.PASS
43-
assert info == {"message": "Package has sufficient content"}
4442

4543

4644
def test_analyze_insufficient_files_fail(analyzer: MinimalContentAnalyzer, pypi_package_json: MagicMock) -> None:
4745
"""Test the analyzer fails when the package has insufficient files."""
4846
pypi_package_json.download_sourcecode.return_value = True
4947
pypi_package_json.package_sourcecode_path = "/fake/path"
5048
with patch("os.walk") as mock_walk:
51-
mock_walk.return_value = [("root", [], ["file1.py"])]
52-
result, info = analyzer.analyze(pypi_package_json)
49+
mock_walk.return_value = [("root", [], ["file1.pyi"])]
50+
result, _ = analyzer.analyze(pypi_package_json)
5351

5452
assert result == HeuristicResult.FAIL
55-
assert info == {"message": "Not enough files found"}
5653

5754

5855
def test_analyze_no_files_fail(analyzer: MinimalContentAnalyzer, pypi_package_json: MagicMock) -> None:
@@ -61,10 +58,9 @@ def test_analyze_no_files_fail(analyzer: MinimalContentAnalyzer, pypi_package_js
6158
pypi_package_json.package_sourcecode_path = "/fake/path"
6259
with patch("os.walk") as mock_walk:
6360
mock_walk.return_value = [("root", [], [])]
64-
result, info = analyzer.analyze(pypi_package_json)
61+
result, _ = analyzer.analyze(pypi_package_json)
6562

6663
assert result == HeuristicResult.FAIL
67-
assert info == {"message": "Not enough files found"}
6864

6965

7066
def test_analyze_download_failed_raises_error(analyzer: MinimalContentAnalyzer, pypi_package_json: MagicMock) -> None:
@@ -84,8 +80,8 @@ def test_analyze_download_failed_raises_error(analyzer: MinimalContentAnalyzer,
8480
(0, HeuristicResult.FAIL),
8581
(1, HeuristicResult.FAIL),
8682
(2, HeuristicResult.FAIL),
87-
(55, HeuristicResult.PASS),
88-
(70, HeuristicResult.PASS),
83+
(12, HeuristicResult.PASS),
84+
(15, HeuristicResult.PASS),
8985
],
9086
)
9187
def test_analyze_various_file_counts(
@@ -98,7 +94,7 @@ def test_analyze_various_file_counts(
9894
"""Test the analyzer with various file counts."""
9995
pypi_package_json.download_sourcecode.return_value = True
10096
pypi_package_json.package_sourcecode_path = "/fake/path"
101-
files = [f"file{i}.py" for i in range(file_count)]
97+
files = [f"file{i}.pyi" for i in range(file_count)]
10298
mock_walk = MagicMock(return_value=[("root", [], files)])
10399
monkeypatch.setattr("os.walk", mock_walk)
104100

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved.
2+
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
3+
4+
"""Tests for the StubNameAnalyzer heuristic."""
5+
6+
from unittest.mock import MagicMock
7+
8+
import pytest
9+
10+
from macaron.malware_analyzer.pypi_heuristics.heuristics import HeuristicResult
11+
from macaron.malware_analyzer.pypi_heuristics.metadata.stub_name import StubNameAnalyzer
12+
13+
14+
@pytest.fixture(name="analyzer")
15+
def analyzer_() -> StubNameAnalyzer:
16+
"""Pytest fixture to create a StubNameAnalyzer instance."""
17+
return StubNameAnalyzer()
18+
19+
20+
@pytest.mark.parametrize(
21+
("package_name", "expected_result"),
22+
[
23+
("numpy", HeuristicResult.FAIL),
24+
("pandas", HeuristicResult.FAIL),
25+
("scikit-learn", HeuristicResult.FAIL),
26+
("tensorflow-stub", HeuristicResult.PASS),
27+
("torch-stubs", HeuristicResult.PASS),
28+
("requests", HeuristicResult.FAIL),
29+
("flask-stub", HeuristicResult.PASS),
30+
("my_package", HeuristicResult.FAIL),
31+
("requests-stub-client", HeuristicResult.PASS),
32+
("testpackage", HeuristicResult.FAIL),
33+
],
34+
)
35+
def test_analyze_various_package_names(
36+
analyzer: StubNameAnalyzer,
37+
pypi_package_json: MagicMock,
38+
package_name: str,
39+
expected_result: HeuristicResult,
40+
) -> None:
41+
"""Test the analyzer with various package names."""
42+
pypi_package_json.component_name = package_name
43+
result, _ = analyzer.analyze(pypi_package_json)
44+
45+
assert result == expected_result

0 commit comments

Comments
 (0)