From bb9f32a4445beb8eba0597d375373dc626c0bc3f Mon Sep 17 00:00:00 2001 From: Carl Flottmann Date: Thu, 14 Aug 2025 12:19:04 +1000 Subject: [PATCH 1/9] chore: refactoring fake email analyzer to be more optimistic in passing emails Signed-off-by: Carl Flottmann --- .../pypi_heuristics/metadata/fake_email.py | 36 ++-- .../malware_analyzer/pypi/test_fake_email.py | 169 +++++++++--------- 2 files changed, 110 insertions(+), 95 deletions(-) diff --git a/src/macaron/malware_analyzer/pypi_heuristics/metadata/fake_email.py b/src/macaron/malware_analyzer/pypi_heuristics/metadata/fake_email.py index ff0509f68..ea1649b0d 100644 --- a/src/macaron/malware_analyzer/pypi_heuristics/metadata/fake_email.py +++ b/src/macaron/malware_analyzer/pypi_heuristics/metadata/fake_email.py @@ -101,28 +101,40 @@ def analyze(self, pypi_package_json: PyPIPackageJsonAsset) -> tuple[HeuristicRes """ package_json = pypi_package_json.package_json if not package_json.get("info", {}): + # Malformed JSON, the "info" field must be present raise HeuristicAnalyzerValueError("No package info available.") author_email = json_extract(package_json, ["info", "author_email"], str) maintainer_email = json_extract(package_json, ["info", "maintainer_email"], str) + # If there is no email information, this heuristic does not apply (note, this is common, so + # not a case of malformed JSON) if not author_email and not maintainer_email: return HeuristicResult.SKIP, {"message": "No author or maintainer email available."} - validated_emails: list[JsonType] = [] - details = ["normalized", "local_part", "domain"] + # non_emails are ones where an email format cannot be extracted + # invalid_emails are ones that are invalid as per validate_email() + # valid_emails are ones that are valid as per validate_email() + detail_info: dict = {"non_emails": [], "invalid_emails": [], "valid_emails": []} + result = HeuristicResult.FAIL for email_field in [author_email, maintainer_email]: - if email_field: - emails = self.get_emails(email_field) - if not emails: - return HeuristicResult.FAIL, {"message": "no emails found in the email field"} + if not email_field: + continue - for email in emails: - email_info = self.is_valid_email(email) - if not email_info: - return HeuristicResult.FAIL, {"invalid_email": email} + emails = self.get_emails(email_field) + if not emails: + detail_info["non_emails"].append(email_field) - validated_emails.append({key: getattr(email_info, key) for key in details}) + for email in emails: + email_info = self.is_valid_email(email) + if email_info is None: + detail_info["invalid_emails"].append(email) + continue - return HeuristicResult.PASS, {"validated_emails": validated_emails} + logger.debug("Email %s normalized to %s", email, email_info.normalized) + detail_info["valid_emails"].append(email) + # Optimistic, so if there exists a valid email, we will pass this heuristic + result = HeuristicResult.PASS + + return result, detail_info diff --git a/tests/malware_analyzer/pypi/test_fake_email.py b/tests/malware_analyzer/pypi/test_fake_email.py index f51fb4110..3c4d2a3b3 100644 --- a/tests/malware_analyzer/pypi/test_fake_email.py +++ b/tests/malware_analyzer/pypi/test_fake_email.py @@ -11,7 +11,6 @@ from macaron.errors import HeuristicAnalyzerValueError from macaron.malware_analyzer.pypi_heuristics.heuristics import HeuristicResult from macaron.malware_analyzer.pypi_heuristics.metadata.fake_email import FakeEmailAnalyzer -from macaron.slsa_analyzer.package_registry.pypi_registry import PyPIPackageJsonAsset @pytest.fixture(name="analyzer") @@ -20,110 +19,114 @@ def analyzer_() -> FakeEmailAnalyzer: return FakeEmailAnalyzer() -@pytest.fixture(name="pypi_package_json_asset_mock") -def pypi_package_json_asset_mock_() -> MagicMock: - """Pytest fixture for a mock PyPIPackageJsonAsset.""" - mock_asset = MagicMock(spec=PyPIPackageJsonAsset) - mock_asset.package_json = {} - return mock_asset - - -def test_analyze_skip_no_emails_present(analyzer: FakeEmailAnalyzer, pypi_package_json_asset_mock: MagicMock) -> None: - """Test the analyzer skips if no author_email or maintainer_email is present.""" - pypi_package_json_asset_mock.package_json = {"info": {"author_email": None, "maintainer_email": None}} - result, info = analyzer.analyze(pypi_package_json_asset_mock) +def test_missing_info(pypi_package_json: MagicMock, analyzer: FakeEmailAnalyzer) -> None: + """Test when JSON 'info' key is missing in the PyPI data (should error). + + Parameters + ---------- + pypi_package_json: MagicMock + The PyPIPackageJsonAsset MagicMock fixture. + analyzer: FakeEmailAnalyzer + An initialized FakeEmailAnalyzer instance. + """ + pypi_package_json.package_json = {} # No 'info' key + with pytest.raises(HeuristicAnalyzerValueError): + analyzer.analyze(pypi_package_json) + + +def test_no_emails_present(pypi_package_json: MagicMock, analyzer: FakeEmailAnalyzer) -> None: + """Test when no author_email or maintainer_email is present (should skip). + + Parameters + ---------- + pypi_package_json: MagicMock + The PyPIPackageJsonAsset MagicMock fixture. + analyzer: FakeEmailAnalyzer + An initialized FakeEmailAnalyzer instance. + """ + pypi_package_json.package_json = {"info": {"author_email": None, "maintainer_email": None}} + result, _ = analyzer.analyze(pypi_package_json) assert result == HeuristicResult.SKIP - assert info["message"] == "No author or maintainer email available." - -def test_analyze_raises_error_for_missing_info_key( - analyzer: FakeEmailAnalyzer, pypi_package_json_asset_mock: MagicMock -) -> None: - """Test the analyzer raises an error if the 'info' key is missing in the PyPI data.""" - pypi_package_json_asset_mock.package_json = {} # No 'info' key - with pytest.raises(HeuristicAnalyzerValueError) as exc_info: - analyzer.analyze(pypi_package_json_asset_mock) - assert "No package info available." in str(exc_info.value) +def test_non_email(pypi_package_json: MagicMock, analyzer: FakeEmailAnalyzer) -> None: + """Test with a non-parsable email address (should fail). -def test_analyze_fail_no_email_found_in_field( - analyzer: FakeEmailAnalyzer, pypi_package_json_asset_mock: MagicMock -) -> None: - """Test the analyzer fails if an email field does not contain a parsable email address.""" - pypi_package_json_asset_mock.package_json = {"info": {"author_email": "not an email", "maintainer_email": None}} - result, info = analyzer.analyze(pypi_package_json_asset_mock) + Parameters + ---------- + pypi_package_json: MagicMock + The PyPIPackageJsonAsset MagicMock fixture. + analyzer: FakeEmailAnalyzer + An initialized FakeEmailAnalyzer instance. + """ + pypi_package_json.package_json = {"info": {"author_email": "not an email", "maintainer_email": "also not an email"}} + result, info = analyzer.analyze(pypi_package_json) assert result == HeuristicResult.FAIL - assert info == {"message": "no emails found in the email field"} + # assert types (for mypy) + assert isinstance(info["non_emails"], list) -def test_analyze_fail_invalid_email(analyzer: FakeEmailAnalyzer, pypi_package_json_asset_mock: MagicMock) -> None: - """Test analyzer fails if the email field contains an invalid email format.""" - invalid_email = "user@example" - pypi_package_json_asset_mock.package_json = {"info": {"author_email": invalid_email, "maintainer_email": None}} + assert "not an email" in info["non_emails"] + assert "also not an email" in info["non_emails"] - result, info = analyzer.analyze(pypi_package_json_asset_mock) - assert result == HeuristicResult.FAIL - assert info == {"message": "no emails found in the email field"} +def test_invalid_email(pypi_package_json: MagicMock, analyzer: FakeEmailAnalyzer) -> None: + """Test with an invalid email address that doesn't accept mail (should fail). -def test_analyze_pass_only_maintainer_email_valid( - analyzer: FakeEmailAnalyzer, pypi_package_json_asset_mock: MagicMock -) -> None: - """Test the analyzer passes if only a valid maintainer_email is present and deliverability is not checked.""" - email = "maintainer@example.net" - pypi_package_json_asset_mock.package_json = {"info": {"author_email": None, "maintainer_email": email}} - result, info = analyzer.analyze(pypi_package_json_asset_mock) + Parameters + ---------- + pypi_package_json: MagicMock + The PyPIPackageJsonAsset MagicMock fixture. + analyzer: FakeEmailAnalyzer + An initialized FakeEmailAnalyzer instance. + """ + pypi_package_json.package_json = { + "info": {"author_email": "user@example.com", "maintainer_email": "other@example.com"} + } - if analyzer.check_deliverability: - assert result == HeuristicResult.FAIL - assert info == {"invalid_email": email} - return + result, info = analyzer.analyze(pypi_package_json) + assert result == HeuristicResult.FAIL - assert result == HeuristicResult.PASS - assert info["validated_emails"] == [ - {"normalized": "maintainer@example.net", "local_part": "maintainer", "domain": "example.net"} - ] + # assert types (for mypy) + assert isinstance(info["invalid_emails"], list) + assert "user@example.com" in info["invalid_emails"] + assert "other@example.com" in info["invalid_emails"] -def test_analyze_pass_both_emails_valid(analyzer: FakeEmailAnalyzer, pypi_package_json_asset_mock: MagicMock) -> None: - """Test the analyzer passes if both emails are valid and deliverability is not checked.""" - author_email = "example@gmail.com" - author_local_part, author_domain = author_email.split("@") - maintainer_email = "maintainer@example.net" - maintainer_local_part, maintainer_domain = maintainer_email.split("@") - pypi_package_json_asset_mock.package_json = { - "info": {"author_email": author_email, "maintainer_email": maintainer_email} - } - result, info = analyzer.analyze(pypi_package_json_asset_mock) - if analyzer.check_deliverability: - assert result == HeuristicResult.FAIL - assert info == {"invalid_email": maintainer_email} - return +def test_valid_email(pypi_package_json: MagicMock, analyzer: FakeEmailAnalyzer) -> None: + """Test with valid email address that does accept mail (should pass). + Parameters + ---------- + pypi_package_json: MagicMock + The PyPIPackageJsonAsset MagicMock fixture. + analyzer: FakeEmailAnalyzer + An initialized FakeEmailAnalyzer instance. + """ + # TODO: change this to use a test domain instead of turning off deliverability + analyzer.check_deliverability = False + pypi_package_json.package_json = { + "info": {"author_email": "user@example.net", "maintainer_email": "other@example.net"} + } + result, info = analyzer.analyze(pypi_package_json) assert result == HeuristicResult.PASS - validated_emails = info.get("validated_emails") - assert isinstance(validated_emails, list) - assert len(validated_emails) == 2 - assert {"normalized": author_email, "local_part": author_local_part, "domain": author_domain} in validated_emails - assert { - "normalized": maintainer_email, - "local_part": maintainer_local_part, - "domain": maintainer_domain, - } in validated_emails + # assert types (for mypy) + assert isinstance(info["valid_emails"], list) - -def test_is_valid_email_failure(analyzer: FakeEmailAnalyzer) -> None: - """Test is_valid_email returns None on failure.""" - result = analyzer.is_valid_email("invalid-email") - assert result is None + assert "user@example.net" in info["valid_emails"] + assert "other@example.net" in info["valid_emails"] def test_get_emails(analyzer: FakeEmailAnalyzer) -> None: - """Test the get_emails method.""" - email_field = "test@example.com, another test " - expected = ["test@example.com", "another@example.org"] + """Test the get_emails method extracts emails from text correctly. + + analyzer: FakeEmailAnalyzer + An initialized FakeEmailAnalyzer instance. + """ + email_field = "test@example.com, Another User , please also email me@example.net thanks!" + expected = ["test@example.com", "anotheruser@example.org", "me@example.net"] assert analyzer.get_emails(email_field) == expected email_field_no_email = "this is not an email" From f5edd28e0be71d57335bdbf53e96a78b640c3f5d Mon Sep 17 00:00:00 2001 From: Carl Flottmann Date: Thu, 14 Aug 2025 15:43:09 +1000 Subject: [PATCH 2/9] test: fake email analyzer tests now run offline in a test environment Signed-off-by: Carl Flottmann --- .../pypi_heuristics/metadata/fake_email.py | 2 +- .../malware_analyzer/pypi/test_fake_email.py | 38 +++++-------------- 2 files changed, 10 insertions(+), 30 deletions(-) diff --git a/src/macaron/malware_analyzer/pypi_heuristics/metadata/fake_email.py b/src/macaron/malware_analyzer/pypi_heuristics/metadata/fake_email.py index ea1649b0d..300629ae1 100644 --- a/src/macaron/malware_analyzer/pypi_heuristics/metadata/fake_email.py +++ b/src/macaron/malware_analyzer/pypi_heuristics/metadata/fake_email.py @@ -133,7 +133,7 @@ def analyze(self, pypi_package_json: PyPIPackageJsonAsset) -> tuple[HeuristicRes continue logger.debug("Email %s normalized to %s", email, email_info.normalized) - detail_info["valid_emails"].append(email) + detail_info["valid_emails"].append(email_info.normalized) # Optimistic, so if there exists a valid email, we will pass this heuristic result = HeuristicResult.PASS diff --git a/tests/malware_analyzer/pypi/test_fake_email.py b/tests/malware_analyzer/pypi/test_fake_email.py index 3c4d2a3b3..973b90fb4 100644 --- a/tests/malware_analyzer/pypi/test_fake_email.py +++ b/tests/malware_analyzer/pypi/test_fake_email.py @@ -4,7 +4,7 @@ """Tests for the FakeEmailAnalyzer heuristic.""" -from unittest.mock import MagicMock +from unittest.mock import MagicMock, patch import pytest @@ -19,6 +19,7 @@ def analyzer_() -> FakeEmailAnalyzer: return FakeEmailAnalyzer() +@patch("email_validator.TEST_ENVIRONMENT", True) def test_missing_info(pypi_package_json: MagicMock, analyzer: FakeEmailAnalyzer) -> None: """Test when JSON 'info' key is missing in the PyPI data (should error). @@ -34,6 +35,7 @@ def test_missing_info(pypi_package_json: MagicMock, analyzer: FakeEmailAnalyzer) analyzer.analyze(pypi_package_json) +@patch("email_validator.TEST_ENVIRONMENT", True) def test_no_emails_present(pypi_package_json: MagicMock, analyzer: FakeEmailAnalyzer) -> None: """Test when no author_email or maintainer_email is present (should skip). @@ -49,6 +51,7 @@ def test_no_emails_present(pypi_package_json: MagicMock, analyzer: FakeEmailAnal assert result == HeuristicResult.SKIP +@patch("email_validator.TEST_ENVIRONMENT", True) def test_non_email(pypi_package_json: MagicMock, analyzer: FakeEmailAnalyzer) -> None: """Test with a non-parsable email address (should fail). @@ -70,32 +73,9 @@ def test_non_email(pypi_package_json: MagicMock, analyzer: FakeEmailAnalyzer) -> assert "also not an email" in info["non_emails"] -def test_invalid_email(pypi_package_json: MagicMock, analyzer: FakeEmailAnalyzer) -> None: - """Test with an invalid email address that doesn't accept mail (should fail). - - Parameters - ---------- - pypi_package_json: MagicMock - The PyPIPackageJsonAsset MagicMock fixture. - analyzer: FakeEmailAnalyzer - An initialized FakeEmailAnalyzer instance. - """ - pypi_package_json.package_json = { - "info": {"author_email": "user@example.com", "maintainer_email": "other@example.com"} - } - - result, info = analyzer.analyze(pypi_package_json) - assert result == HeuristicResult.FAIL - - # assert types (for mypy) - assert isinstance(info["invalid_emails"], list) - - assert "user@example.com" in info["invalid_emails"] - assert "other@example.com" in info["invalid_emails"] - - +@patch("email_validator.TEST_ENVIRONMENT", True) def test_valid_email(pypi_package_json: MagicMock, analyzer: FakeEmailAnalyzer) -> None: - """Test with valid email address that does accept mail (should pass). + """Test with valid email address format (should pass). Parameters ---------- @@ -107,7 +87,7 @@ def test_valid_email(pypi_package_json: MagicMock, analyzer: FakeEmailAnalyzer) # TODO: change this to use a test domain instead of turning off deliverability analyzer.check_deliverability = False pypi_package_json.package_json = { - "info": {"author_email": "user@example.net", "maintainer_email": "other@example.net"} + "info": {"author_email": "user@example.test", "maintainer_email": "other@example.test"} } result, info = analyzer.analyze(pypi_package_json) assert result == HeuristicResult.PASS @@ -115,8 +95,8 @@ def test_valid_email(pypi_package_json: MagicMock, analyzer: FakeEmailAnalyzer) # assert types (for mypy) assert isinstance(info["valid_emails"], list) - assert "user@example.net" in info["valid_emails"] - assert "other@example.net" in info["valid_emails"] + assert "user@example.test" in info["valid_emails"] + assert "other@example.test" in info["valid_emails"] def test_get_emails(analyzer: FakeEmailAnalyzer) -> None: From 2caad35ca615eedd9e3391f7370f98d0046ad902 Mon Sep 17 00:00:00 2001 From: Carl Flottmann Date: Thu, 14 Aug 2025 17:12:47 +1000 Subject: [PATCH 3/9] test: adding integration test for email analyzer, included her invalid email check on smooth-operator Signed-off-by: Carl Flottmann --- .../check_smooth_operator_email.sh | 13 ++++++++++ .../policy.dl | 11 ++++++++ .../test.yaml | 25 +++++++++++++++++++ 3 files changed, 49 insertions(+) create mode 100755 tests/integration/cases/email_checking_pypi_malware_analyzer/check_smooth_operator_email.sh create mode 100644 tests/integration/cases/email_checking_pypi_malware_analyzer/policy.dl create mode 100644 tests/integration/cases/email_checking_pypi_malware_analyzer/test.yaml diff --git a/tests/integration/cases/email_checking_pypi_malware_analyzer/check_smooth_operator_email.sh b/tests/integration/cases/email_checking_pypi_malware_analyzer/check_smooth_operator_email.sh new file mode 100755 index 000000000..ae4edb439 --- /dev/null +++ b/tests/integration/cases/email_checking_pypi_malware_analyzer/check_smooth_operator_email.sh @@ -0,0 +1,13 @@ +#!/bin/bash +# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. +if sqlite3 --json output/macaron.db "SELECT detect_malicious_metadata_check.detail_information + FROM detect_malicious_metadata_check JOIN check_facts on detect_malicious_metadata_check.id = check_facts.id + JOIN check_result on check_facts.check_result_id = check_result.id JOIN component + ON component.id = check_result.component_id WHERE check_result.check_id = 'mcn_detect_malicious_metadata_1' + AND component.name = 'clodd'" | jq -r ".[0].detail_information | fromjson | .invalid_emails | length > 0"; then + exit 0 +else + echo "ERROR: the invalid_emails report for smooth-operator is empty" >&2 + exit 1 +fi diff --git a/tests/integration/cases/email_checking_pypi_malware_analyzer/policy.dl b/tests/integration/cases/email_checking_pypi_malware_analyzer/policy.dl new file mode 100644 index 000000000..9008f6201 --- /dev/null +++ b/tests/integration/cases/email_checking_pypi_malware_analyzer/policy.dl @@ -0,0 +1,11 @@ +/* Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved. */ +/* Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. */ + +#include "prelude.dl" + +Policy("check-smooth-operator", component_id, "Check clodd artifacts") :- + check_passed(component_id, "mcn_detect_malicious_metadata_1"). + +apply_policy_to("check-smooth-operator", component_id) :- + is_component(component_id, purl), + match("pkg:pypi/smooth-operator", purl). diff --git a/tests/integration/cases/email_checking_pypi_malware_analyzer/test.yaml b/tests/integration/cases/email_checking_pypi_malware_analyzer/test.yaml new file mode 100644 index 000000000..d3ceb779b --- /dev/null +++ b/tests/integration/cases/email_checking_pypi_malware_analyzer/test.yaml @@ -0,0 +1,25 @@ +# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +description: | + Analyzing two python packages to check if the email address is deliverable and valid. The package smooth-operator is known + to use an invalid email address with an example.com domain. + +tags: +- macaron-python-package + +steps: +- name: Run macaron analyze against smooth-operator + kind: analyze + options: + command_args: + - -purl + - pkg:pypi/smooth-operator +- name: Query the output database to verify the smooth-operator email was invalid. + kind: shell + options: + cmd: ./check_smooth_operator_email.sh +- name: Run macaron verify-policy to check the results of the packages + kind: verify + options: + policy: policy.dl From 884766e5238ae607e51338f289ac5a70e328691c Mon Sep 17 00:00:00 2001 From: Carl Flottmann Date: Fri, 15 Aug 2025 10:03:18 +1000 Subject: [PATCH 4/9] test: complete integration test for valid and invalid emails with deliverability Signed-off-by: Carl Flottmann --- .../check_emails.sh | 24 +++++++++++++++++++ .../check_smooth_operator_email.sh | 13 ---------- .../policy.dl | 9 ++++++- .../test.yaml | 14 +++++++---- 4 files changed, 42 insertions(+), 18 deletions(-) create mode 100755 tests/integration/cases/email_checking_pypi_malware_analyzer/check_emails.sh delete mode 100755 tests/integration/cases/email_checking_pypi_malware_analyzer/check_smooth_operator_email.sh diff --git a/tests/integration/cases/email_checking_pypi_malware_analyzer/check_emails.sh b/tests/integration/cases/email_checking_pypi_malware_analyzer/check_emails.sh new file mode 100755 index 000000000..f914efbf5 --- /dev/null +++ b/tests/integration/cases/email_checking_pypi_malware_analyzer/check_emails.sh @@ -0,0 +1,24 @@ +#!/bin/bash +# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. +if sqlite3 --json output/macaron.db "SELECT detect_malicious_metadata_check.detail_information + FROM detect_malicious_metadata_check JOIN check_facts on detect_malicious_metadata_check.id = check_facts.id + JOIN check_result on check_facts.check_result_id = check_result.id JOIN component + ON component.id = check_result.component_id WHERE check_result.check_id = 'mcn_detect_malicious_metadata_1' + AND component.name = 'smooth-operator'" | jq -r ".[0].detail_information | fromjson | .invalid_emails | length > 0"; then + exit 0 +else + echo "ERROR: the invalid_emails report for smooth-operator is empty" >&2 + exit 1 +fi + +if sqlite3 --json output/macaron.db "SELECT detect_malicious_metadata_check.detail_information + FROM detect_malicious_metadata_check JOIN check_facts on detect_malicious_metadata_check.id = check_facts.id + JOIN check_result on check_facts.check_result_id = check_result.id JOIN component + ON component.id = check_result.component_id WHERE check_result.check_id = 'mcn_detect_malicious_metadata_1' + AND component.name = 'email-validator'" | jq -r ".[0].detail_information | fromjson | .valid_emails | length > 0"; then + exit 0 +else + echo "ERROR: the valid_emails report for email-validator is empty" >&2 + exit 1 +fi diff --git a/tests/integration/cases/email_checking_pypi_malware_analyzer/check_smooth_operator_email.sh b/tests/integration/cases/email_checking_pypi_malware_analyzer/check_smooth_operator_email.sh deleted file mode 100755 index ae4edb439..000000000 --- a/tests/integration/cases/email_checking_pypi_malware_analyzer/check_smooth_operator_email.sh +++ /dev/null @@ -1,13 +0,0 @@ -#!/bin/bash -# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved. -# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. -if sqlite3 --json output/macaron.db "SELECT detect_malicious_metadata_check.detail_information - FROM detect_malicious_metadata_check JOIN check_facts on detect_malicious_metadata_check.id = check_facts.id - JOIN check_result on check_facts.check_result_id = check_result.id JOIN component - ON component.id = check_result.component_id WHERE check_result.check_id = 'mcn_detect_malicious_metadata_1' - AND component.name = 'clodd'" | jq -r ".[0].detail_information | fromjson | .invalid_emails | length > 0"; then - exit 0 -else - echo "ERROR: the invalid_emails report for smooth-operator is empty" >&2 - exit 1 -fi diff --git a/tests/integration/cases/email_checking_pypi_malware_analyzer/policy.dl b/tests/integration/cases/email_checking_pypi_malware_analyzer/policy.dl index 9008f6201..122be99ee 100644 --- a/tests/integration/cases/email_checking_pypi_malware_analyzer/policy.dl +++ b/tests/integration/cases/email_checking_pypi_malware_analyzer/policy.dl @@ -3,9 +3,16 @@ #include "prelude.dl" -Policy("check-smooth-operator", component_id, "Check clodd artifacts") :- +Policy("check-smooth-operator", component_id, "Check smooth-operator artifacts") :- check_passed(component_id, "mcn_detect_malicious_metadata_1"). apply_policy_to("check-smooth-operator", component_id) :- is_component(component_id, purl), match("pkg:pypi/smooth-operator", purl). + +Policy("check-email-validator", component_id, "Check email-validator artifacts") :- + check_passed(component_id, "mcn_detect_malicious_metadata_1"). + +apply_policy_to("check-email-validator", component_id) :- + is_component(component_id, purl), + match("pkg:pypi/email-validator", purl). diff --git a/tests/integration/cases/email_checking_pypi_malware_analyzer/test.yaml b/tests/integration/cases/email_checking_pypi_malware_analyzer/test.yaml index d3ceb779b..f2363311f 100644 --- a/tests/integration/cases/email_checking_pypi_malware_analyzer/test.yaml +++ b/tests/integration/cases/email_checking_pypi_malware_analyzer/test.yaml @@ -3,7 +3,7 @@ description: | Analyzing two python packages to check if the email address is deliverable and valid. The package smooth-operator is known - to use an invalid email address with an example.com domain. + to use an invalid email address with an example.com domain. email-validator is known to have a valid, deliverable, email address. tags: - macaron-python-package @@ -15,11 +15,17 @@ steps: command_args: - -purl - pkg:pypi/smooth-operator -- name: Query the output database to verify the smooth-operator email was invalid. +- name: Run macaron analyze against email-validator + kind: analyze + options: + command_args: + - -purl + - pkg:pypi/email-validator +- name: Query the output database to verify the emails were detected as invalid and valid respectively. kind: shell options: - cmd: ./check_smooth_operator_email.sh -- name: Run macaron verify-policy to check the results of the packages + cmd: ./check_emails.sh +- name: Run macaron verify-policy to check the results of the packages still passed. kind: verify options: policy: policy.dl From fac37149ce4e5191d86317d6e659846124d0bb55 Mon Sep 17 00:00:00 2001 From: Carl Flottmann Date: Fri, 15 Aug 2025 10:37:31 +1000 Subject: [PATCH 5/9] test: fixed integration test to fail and pass email results as expected Signed-off-by: Carl Flottmann --- .../check_emails.sh | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/tests/integration/cases/email_checking_pypi_malware_analyzer/check_emails.sh b/tests/integration/cases/email_checking_pypi_malware_analyzer/check_emails.sh index f914efbf5..30e89ce3b 100755 --- a/tests/integration/cases/email_checking_pypi_malware_analyzer/check_emails.sh +++ b/tests/integration/cases/email_checking_pypi_malware_analyzer/check_emails.sh @@ -1,24 +1,23 @@ #!/bin/bash # Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. -if sqlite3 --json output/macaron.db "SELECT detect_malicious_metadata_check.detail_information +result=$(sqlite3 --json output/macaron.db "SELECT detect_malicious_metadata_check.detail_information FROM detect_malicious_metadata_check JOIN check_facts on detect_malicious_metadata_check.id = check_facts.id JOIN check_result on check_facts.check_result_id = check_result.id JOIN component ON component.id = check_result.component_id WHERE check_result.check_id = 'mcn_detect_malicious_metadata_1' - AND component.name = 'smooth-operator'" | jq -r ".[0].detail_information | fromjson | .invalid_emails | length > 0"; then - exit 0 -else + AND component.name = 'smooth-operator'" | jq -r ".[0].detail_information | fromjson | .invalid_emails | length > 0") +if [[ "$result" == "false" ]]; then echo "ERROR: the invalid_emails report for smooth-operator is empty" >&2 exit 1 fi -if sqlite3 --json output/macaron.db "SELECT detect_malicious_metadata_check.detail_information +result=$(sqlite3 --json output/macaron.db "SELECT detect_malicious_metadata_check.detail_information FROM detect_malicious_metadata_check JOIN check_facts on detect_malicious_metadata_check.id = check_facts.id JOIN check_result on check_facts.check_result_id = check_result.id JOIN component ON component.id = check_result.component_id WHERE check_result.check_id = 'mcn_detect_malicious_metadata_1' - AND component.name = 'email-validator'" | jq -r ".[0].detail_information | fromjson | .valid_emails | length > 0"; then - exit 0 -else + AND component.name = 'email-validator'" | jq -r ".[0].detail_information | fromjson | .valid_emails | length > 0") +if [[ "$result" == "false" ]]; then echo "ERROR: the valid_emails report for email-validator is empty" >&2 - exit 1 + exit 2 fi +exit 0 From 309f38322321344936c851d70d5af2cf162a097a Mon Sep 17 00:00:00 2001 From: Carl Flottmann Date: Fri, 15 Aug 2025 11:40:06 +1000 Subject: [PATCH 6/9] chore: fix copyright and remove comment Signed-off-by: Carl Flottmann --- .../cases/email_checking_pypi_malware_analyzer/check_emails.sh | 2 +- tests/malware_analyzer/pypi/test_fake_email.py | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/integration/cases/email_checking_pypi_malware_analyzer/check_emails.sh b/tests/integration/cases/email_checking_pypi_malware_analyzer/check_emails.sh index 30e89ce3b..709ed94f0 100755 --- a/tests/integration/cases/email_checking_pypi_malware_analyzer/check_emails.sh +++ b/tests/integration/cases/email_checking_pypi_malware_analyzer/check_emails.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. result=$(sqlite3 --json output/macaron.db "SELECT detect_malicious_metadata_check.detail_information FROM detect_malicious_metadata_check JOIN check_facts on detect_malicious_metadata_check.id = check_facts.id diff --git a/tests/malware_analyzer/pypi/test_fake_email.py b/tests/malware_analyzer/pypi/test_fake_email.py index 973b90fb4..067bbc749 100644 --- a/tests/malware_analyzer/pypi/test_fake_email.py +++ b/tests/malware_analyzer/pypi/test_fake_email.py @@ -84,8 +84,6 @@ def test_valid_email(pypi_package_json: MagicMock, analyzer: FakeEmailAnalyzer) analyzer: FakeEmailAnalyzer An initialized FakeEmailAnalyzer instance. """ - # TODO: change this to use a test domain instead of turning off deliverability - analyzer.check_deliverability = False pypi_package_json.package_json = { "info": {"author_email": "user@example.test", "maintainer_email": "other@example.test"} } From b93367fdf1480178f1bb4220f4dcd08cbb345d94 Mon Sep 17 00:00:00 2001 From: Carl Flottmann Date: Fri, 15 Aug 2025 14:35:46 +1000 Subject: [PATCH 7/9] test: comment description about test environment variable Signed-off-by: Carl Flottmann --- tests/malware_analyzer/pypi/test_fake_email.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/malware_analyzer/pypi/test_fake_email.py b/tests/malware_analyzer/pypi/test_fake_email.py index 067bbc749..827d454e1 100644 --- a/tests/malware_analyzer/pypi/test_fake_email.py +++ b/tests/malware_analyzer/pypi/test_fake_email.py @@ -12,6 +12,11 @@ from macaron.malware_analyzer.pypi_heuristics.heuristics import HeuristicResult from macaron.malware_analyzer.pypi_heuristics.metadata.fake_email import FakeEmailAnalyzer +# Note: throughout these unit tests we set email_validator.TEST_ENVIRONMENT to True. This sets a global +# environment variable in email_validator that ensures that all calls to validate_email have +# test_environment=True set. This allows test and **.test domains and disables DNS deliverability checks +# (so disables check_deliverability=true). See https://github.com/JoshData/python-email-validator for more. + @pytest.fixture(name="analyzer") def analyzer_() -> FakeEmailAnalyzer: From b5cd0ea5c209d581b8e4e44a4422fb223fbdb432 Mon Sep 17 00:00:00 2001 From: Carl Flottmann Date: Fri, 15 Aug 2025 15:41:38 +1000 Subject: [PATCH 8/9] test: use defaults.ini instead of test environment Signed-off-by: Carl Flottmann --- .../malware_analyzer/pypi/test_fake_email.py | 41 +++++++++++++------ 1 file changed, 28 insertions(+), 13 deletions(-) diff --git a/tests/malware_analyzer/pypi/test_fake_email.py b/tests/malware_analyzer/pypi/test_fake_email.py index 827d454e1..c2682e8b0 100644 --- a/tests/malware_analyzer/pypi/test_fake_email.py +++ b/tests/malware_analyzer/pypi/test_fake_email.py @@ -4,19 +4,17 @@ """Tests for the FakeEmailAnalyzer heuristic.""" -from unittest.mock import MagicMock, patch +import os +from pathlib import Path +from unittest.mock import MagicMock import pytest +from macaron.config.defaults import load_defaults from macaron.errors import HeuristicAnalyzerValueError from macaron.malware_analyzer.pypi_heuristics.heuristics import HeuristicResult from macaron.malware_analyzer.pypi_heuristics.metadata.fake_email import FakeEmailAnalyzer -# Note: throughout these unit tests we set email_validator.TEST_ENVIRONMENT to True. This sets a global -# environment variable in email_validator that ensures that all calls to validate_email have -# test_environment=True set. This allows test and **.test domains and disables DNS deliverability checks -# (so disables check_deliverability=true). See https://github.com/JoshData/python-email-validator for more. - @pytest.fixture(name="analyzer") def analyzer_() -> FakeEmailAnalyzer: @@ -24,7 +22,24 @@ def analyzer_() -> FakeEmailAnalyzer: return FakeEmailAnalyzer() -@patch("email_validator.TEST_ENVIRONMENT", True) +@pytest.fixture(name="fake_email_defaults_override") +def set_defaults_(tmp_path: Path) -> None: + """Disable check_deliverability in defaults.ini so we do not make network connections. + + Parameters + ---------- + tmp_path: Path + Pytest temporary path fixture. + """ + defaults_file = Path(os.path.join(tmp_path, "config.ini")) + content = """ + [heuristic.pypi] + check_deliverability = False + """ + defaults_file.write_text(content, encoding="utf-8") + assert load_defaults(str(defaults_file)) is True + + def test_missing_info(pypi_package_json: MagicMock, analyzer: FakeEmailAnalyzer) -> None: """Test when JSON 'info' key is missing in the PyPI data (should error). @@ -40,7 +55,6 @@ def test_missing_info(pypi_package_json: MagicMock, analyzer: FakeEmailAnalyzer) analyzer.analyze(pypi_package_json) -@patch("email_validator.TEST_ENVIRONMENT", True) def test_no_emails_present(pypi_package_json: MagicMock, analyzer: FakeEmailAnalyzer) -> None: """Test when no author_email or maintainer_email is present (should skip). @@ -56,7 +70,6 @@ def test_no_emails_present(pypi_package_json: MagicMock, analyzer: FakeEmailAnal assert result == HeuristicResult.SKIP -@patch("email_validator.TEST_ENVIRONMENT", True) def test_non_email(pypi_package_json: MagicMock, analyzer: FakeEmailAnalyzer) -> None: """Test with a non-parsable email address (should fail). @@ -78,7 +91,6 @@ def test_non_email(pypi_package_json: MagicMock, analyzer: FakeEmailAnalyzer) -> assert "also not an email" in info["non_emails"] -@patch("email_validator.TEST_ENVIRONMENT", True) def test_valid_email(pypi_package_json: MagicMock, analyzer: FakeEmailAnalyzer) -> None: """Test with valid email address format (should pass). @@ -90,7 +102,10 @@ def test_valid_email(pypi_package_json: MagicMock, analyzer: FakeEmailAnalyzer) An initialized FakeEmailAnalyzer instance. """ pypi_package_json.package_json = { - "info": {"author_email": "user@example.test", "maintainer_email": "other@example.test"} + "info": { + "author_email": "test.email.zwiusiubwq@gmail.com", + "maintainer_email": "test.email.fqnmwbsxca@gmail.com", + } } result, info = analyzer.analyze(pypi_package_json) assert result == HeuristicResult.PASS @@ -98,8 +113,8 @@ def test_valid_email(pypi_package_json: MagicMock, analyzer: FakeEmailAnalyzer) # assert types (for mypy) assert isinstance(info["valid_emails"], list) - assert "user@example.test" in info["valid_emails"] - assert "other@example.test" in info["valid_emails"] + assert "test.email.zwiusiubwq@gmail.com" in info["valid_emails"] + assert "test.email.fqnmwbsxca@gmail.com" in info["valid_emails"] def test_get_emails(analyzer: FakeEmailAnalyzer) -> None: From d6802282a8b2c3afd9a6fa9d814c2c492bf16dca Mon Sep 17 00:00:00 2001 From: Trong Nhan Mai Date: Mon, 18 Aug 2025 11:17:20 +1000 Subject: [PATCH 9/9] chore: use fake_email_defaults_override fixture in test_fake_email.py Signed-off-by: Trong Nhan Mai --- .../malware_analyzer/pypi/test_fake_email.py | 20 ++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/tests/malware_analyzer/pypi/test_fake_email.py b/tests/malware_analyzer/pypi/test_fake_email.py index c2682e8b0..56e81e035 100644 --- a/tests/malware_analyzer/pypi/test_fake_email.py +++ b/tests/malware_analyzer/pypi/test_fake_email.py @@ -15,11 +15,15 @@ from macaron.malware_analyzer.pypi_heuristics.heuristics import HeuristicResult from macaron.malware_analyzer.pypi_heuristics.metadata.fake_email import FakeEmailAnalyzer - -@pytest.fixture(name="analyzer") -def analyzer_() -> FakeEmailAnalyzer: - """Pytest fixture to create a FakeEmailAnalyzer instance.""" - return FakeEmailAnalyzer() +# If check_deliverability is True, DNS-based check is enabled for email_validator. +# If check_deliverability is True and no network is available, email_validator will perform DNS-based check +# but the DNS queries will time out. +# email_validator doesn't treat timeout as a validation failure: +# https://github.com/JoshData/python-email-validator/blob/98800bac023b8713351393a5043034065f1ea6cb/email_validator/deliverability.py#L144 +# Therefore, FakeEmailAnalyzer.is_valid_email doesn't return None, and we will treat this as an "valid email". +# This has a risk of treating an invalid email as valid when an unexpected timeout occurs. +# We have ensured that check_deliverability is always False for all unit test cases in this module, so that behavior +# is not expected to happen. @pytest.fixture(name="fake_email_defaults_override") @@ -40,6 +44,12 @@ def set_defaults_(tmp_path: Path) -> None: assert load_defaults(str(defaults_file)) is True +@pytest.fixture(name="analyzer") +def analyzer_(fake_email_defaults_override: None) -> FakeEmailAnalyzer: # pylint: disable=unused-argument + """Pytest fixture to create a FakeEmailAnalyzer instance.""" + return FakeEmailAnalyzer() + + def test_missing_info(pypi_package_json: MagicMock, analyzer: FakeEmailAnalyzer) -> None: """Test when JSON 'info' key is missing in the PyPI data (should error).