diff --git a/src/macaron/malware_analyzer/pypi_heuristics/base_analyzer.py b/src/macaron/malware_analyzer/pypi_heuristics/base_analyzer.py index 0c55b03fd..08c468336 100644 --- a/src/macaron/malware_analyzer/pypi_heuristics/base_analyzer.py +++ b/src/macaron/malware_analyzer/pypi_heuristics/base_analyzer.py @@ -1,4 +1,4 @@ -# Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """Define and initialize the base analyzer.""" @@ -18,13 +18,9 @@ def __init__( self, name: str, heuristic: Heuristics, - depends_on: list[tuple[Heuristics, HeuristicResult]] | None, ) -> None: self.name: str = name self.heuristic: Heuristics = heuristic - self.depends_on: list[tuple[Heuristics, HeuristicResult]] | None = ( - depends_on # Contains the dependent heuristics and the expected result of each heuristic - ) @abstractmethod def analyze(self, pypi_package_json: PyPIPackageJsonAsset) -> tuple[HeuristicResult, dict[str, JsonType]]: diff --git a/src/macaron/malware_analyzer/pypi_heuristics/heuristics.py b/src/macaron/malware_analyzer/pypi_heuristics/heuristics.py index c37f763a5..b93ed9c6b 100644 --- a/src/macaron/malware_analyzer/pypi_heuristics/heuristics.py +++ b/src/macaron/malware_analyzer/pypi_heuristics/heuristics.py @@ -56,8 +56,6 @@ class HeuristicResult(str, Enum): #: Indicates that suspicious activity was detected. FAIL = "FAIL" - #: Indicates that the heuristic check could not be performed due to missing metadata. - #: The `SKIP` result occurs when the necessary metadata is not available. This often happens - #: when fetching data through the PyPI API and the relevant data, such as the maintainer's - #: join date or release information, is missing or unavailable. + #: Indicates that this heuristic is not applicable to this package. + #: Please use HeuristicAnalyzerValueError for malformed package data. SKIP = "SKIP" diff --git a/src/macaron/malware_analyzer/pypi_heuristics/metadata/anomalous_version.py b/src/macaron/malware_analyzer/pypi_heuristics/metadata/anomalous_version.py index 40843297d..01dcabae7 100644 --- a/src/macaron/malware_analyzer/pypi_heuristics/metadata/anomalous_version.py +++ b/src/macaron/malware_analyzer/pypi_heuristics/metadata/anomalous_version.py @@ -61,11 +61,7 @@ class AnomalousVersionAnalyzer(BaseHeuristicAnalyzer): DIGIT_DATE_FORMATS: list[str] = ["%Y%m%d", "%Y%d%m", "%d%m%Y", "%m%d%Y", "%y%m%d", "%y%d%m", "%d%m%y", "%m%d%y"] def __init__(self) -> None: - super().__init__( - name="anomalous_version_analyzer", - heuristic=Heuristics.ANOMALOUS_VERSION, - depends_on=[(Heuristics.ONE_RELEASE, HeuristicResult.FAIL)], - ) + super().__init__(name="anomalous_version_analyzer", heuristic=Heuristics.ANOMALOUS_VERSION) self.major_threshold, self.epoch_threshold, self.day_publish_error = self._load_defaults() def _load_defaults(self) -> tuple[int, int, int]: @@ -110,13 +106,8 @@ def analyze(self, pypi_package_json: PyPIPackageJsonAsset) -> tuple[HeuristicRes logger.debug(error_msg) raise HeuristicAnalyzerValueError(error_msg) - if len(releases) != 1: - error_msg = ( - "This heuristic depends on a single release, but somehow there are multiple when the one release" - + " heuristic failed." - ) - logger.debug(error_msg) - raise HeuristicAnalyzerValueError(error_msg) + if len(releases) != 1: # We only analyze packages with a single release, this heuristic does not apply. + return HeuristicResult.SKIP, {} # Since there is only one release, the latest version should be that release release = pypi_package_json.get_latest_version() diff --git a/src/macaron/malware_analyzer/pypi_heuristics/metadata/closer_release_join_date.py b/src/macaron/malware_analyzer/pypi_heuristics/metadata/closer_release_join_date.py index bfa9a0704..b761602fd 100644 --- a/src/macaron/malware_analyzer/pypi_heuristics/metadata/closer_release_join_date.py +++ b/src/macaron/malware_analyzer/pypi_heuristics/metadata/closer_release_join_date.py @@ -3,15 +3,19 @@ """Analyzer checks whether the maintainers' join date closer to latest package's release date.""" +import logging from datetime import datetime, timedelta from macaron.config.defaults import defaults +from macaron.errors import HeuristicAnalyzerValueError from macaron.json_tools import JsonType from macaron.malware_analyzer.datetime_parser import parse_datetime from macaron.malware_analyzer.pypi_heuristics.base_analyzer import BaseHeuristicAnalyzer from macaron.malware_analyzer.pypi_heuristics.heuristics import HeuristicResult, Heuristics from macaron.slsa_analyzer.package_registry.pypi_registry import PyPIPackageJsonAsset, PyPIRegistry +logger: logging.Logger = logging.getLogger(__name__) + class CloserReleaseJoinDateAnalyzer(BaseHeuristicAnalyzer): """Check whether the maintainers' join date closer to package's latest release date. @@ -20,9 +24,7 @@ class CloserReleaseJoinDateAnalyzer(BaseHeuristicAnalyzer): """ def __init__(self) -> None: - super().__init__( - name="closer_release_join_date_analyzer", heuristic=Heuristics.CLOSER_RELEASE_JOIN_DATE, depends_on=None - ) + super().__init__(name="closer_release_join_date_analyzer", heuristic=Heuristics.CLOSER_RELEASE_JOIN_DATE) self.gap_threshold: int = self._load_defaults() def _load_defaults(self) -> int: @@ -97,7 +99,20 @@ def analyze(self, pypi_package_json: PyPIPackageJsonAsset) -> tuple[HeuristicRes maintainers_join_date: list[datetime] | None = self._get_maintainers_join_date( pypi_package_json.pypi_registry, pypi_package_json.component_name ) + # If there is no maintainer join date information, then it is malformed package metadata + if not maintainers_join_date: + error_msg = "Metadata has no maintainers or join dates for them" + logger.debug(error_msg) + raise HeuristicAnalyzerValueError(error_msg) + latest_release_date: datetime | None = self._get_latest_release_date(pypi_package_json) + # Upload time is standardized by PyPI, so if it is not in the expected format then it is + # malformed package metadata + if not latest_release_date: + error_msg = "Unable to parse latest upload time" + logger.debug(error_msg) + raise HeuristicAnalyzerValueError(error_msg) + detail_info: dict[str, JsonType] = { "maintainers_join_date": ( [date.strftime("%Y-%m-%d %H:%M:%S") for date in maintainers_join_date] if maintainers_join_date else [] @@ -105,9 +120,6 @@ def analyze(self, pypi_package_json: PyPIPackageJsonAsset) -> tuple[HeuristicRes "latest_release_date": latest_release_date.strftime("%Y-%m-%d %H:%M:%S") if latest_release_date else "", } - if maintainers_join_date is None or latest_release_date is None: - return HeuristicResult.SKIP, detail_info - for date in maintainers_join_date: difference = abs(latest_release_date - date) threshold_delta = timedelta(days=self.gap_threshold) diff --git a/src/macaron/malware_analyzer/pypi_heuristics/metadata/empty_project_link.py b/src/macaron/malware_analyzer/pypi_heuristics/metadata/empty_project_link.py index 6ef2cc132..9ae163aa9 100644 --- a/src/macaron/malware_analyzer/pypi_heuristics/metadata/empty_project_link.py +++ b/src/macaron/malware_analyzer/pypi_heuristics/metadata/empty_project_link.py @@ -3,17 +3,21 @@ """Analyzer checks there is no project link of the package.""" +import logging + from macaron.json_tools import JsonType from macaron.malware_analyzer.pypi_heuristics.base_analyzer import BaseHeuristicAnalyzer from macaron.malware_analyzer.pypi_heuristics.heuristics import HeuristicResult, Heuristics from macaron.slsa_analyzer.package_registry.pypi_registry import PyPIPackageJsonAsset +logger: logging.Logger = logging.getLogger(__name__) + class EmptyProjectLinkAnalyzer(BaseHeuristicAnalyzer): """Check whether the PyPI package has no project links.""" def __init__(self) -> None: - super().__init__(name="empty_project_link_analyzer", heuristic=Heuristics.EMPTY_PROJECT_LINK, depends_on=None) + super().__init__(name="empty_project_link_analyzer", heuristic=Heuristics.EMPTY_PROJECT_LINK) def analyze(self, pypi_package_json: PyPIPackageJsonAsset) -> tuple[HeuristicResult, dict[str, JsonType]]: """Analyze the package. @@ -30,10 +34,7 @@ def analyze(self, pypi_package_json: PyPIPackageJsonAsset) -> tuple[HeuristicRes """ project_links = pypi_package_json.get_project_links() - if project_links is None: - return HeuristicResult.FAIL, {} - - if len(project_links) == 0: # Total. + if project_links is None or len(project_links) == 0: return HeuristicResult.FAIL, {} return HeuristicResult.PASS, {"project_links": project_links} diff --git a/src/macaron/malware_analyzer/pypi_heuristics/metadata/fake_email.py b/src/macaron/malware_analyzer/pypi_heuristics/metadata/fake_email.py index ff0509f68..8f3987929 100644 --- a/src/macaron/malware_analyzer/pypi_heuristics/metadata/fake_email.py +++ b/src/macaron/malware_analyzer/pypi_heuristics/metadata/fake_email.py @@ -34,11 +34,7 @@ class FakeEmailAnalyzer(BaseHeuristicAnalyzer): ) def __init__(self) -> None: - super().__init__( - name="fake_email_analyzer", - heuristic=Heuristics.FAKE_EMAIL, - depends_on=None, - ) + super().__init__(name="fake_email_analyzer", heuristic=Heuristics.FAKE_EMAIL) self.check_deliverability: bool = self._load_defaults() def _load_defaults(self) -> bool: diff --git a/src/macaron/malware_analyzer/pypi_heuristics/metadata/high_release_frequency.py b/src/macaron/malware_analyzer/pypi_heuristics/metadata/high_release_frequency.py index d5e1b7635..792a743c6 100644 --- a/src/macaron/malware_analyzer/pypi_heuristics/metadata/high_release_frequency.py +++ b/src/macaron/malware_analyzer/pypi_heuristics/metadata/high_release_frequency.py @@ -7,6 +7,7 @@ from datetime import datetime from macaron.config.defaults import defaults +from macaron.errors import HeuristicAnalyzerValueError from macaron.json_tools import JsonType, json_extract from macaron.malware_analyzer.datetime_parser import parse_datetime from macaron.malware_analyzer.pypi_heuristics.base_analyzer import BaseHeuristicAnalyzer @@ -20,11 +21,7 @@ class HighReleaseFrequencyAnalyzer(BaseHeuristicAnalyzer): """Check whether the release frequency is high.""" def __init__(self) -> None: - super().__init__( - name="high_release_frequency_analyzer", - heuristic=Heuristics.HIGH_RELEASE_FREQUENCY, - depends_on=[(Heuristics.ONE_RELEASE, HeuristicResult.PASS)], # Analyzing when this heuristic pass - ) + super().__init__(name="high_release_frequency_analyzer", heuristic=Heuristics.HIGH_RELEASE_FREQUENCY) self.average_gap_threshold: int = self._load_defaults() # Days def _load_defaults(self) -> int: @@ -49,7 +46,13 @@ def analyze(self, pypi_package_json: PyPIPackageJsonAsset) -> tuple[HeuristicRes The result and related information collected during the analysis. """ version_to_releases: dict | None = pypi_package_json.get_releases() - if version_to_releases is None or len(version_to_releases) == 1: + if version_to_releases is None: + error_msg = "Metadata has no release information" + logger.debug(error_msg) + raise HeuristicAnalyzerValueError(error_msg) + + if len(version_to_releases) == 1: + # We only analyze packages with multiple releases, this heuristic does not apply. return HeuristicResult.SKIP, {} extract_data: dict[str, datetime] = {} diff --git a/src/macaron/malware_analyzer/pypi_heuristics/metadata/one_release.py b/src/macaron/malware_analyzer/pypi_heuristics/metadata/one_release.py index 7d81ec010..a06ce0469 100644 --- a/src/macaron/malware_analyzer/pypi_heuristics/metadata/one_release.py +++ b/src/macaron/malware_analyzer/pypi_heuristics/metadata/one_release.py @@ -4,17 +4,22 @@ """Analyzer checks the packages contain one release.""" +import logging + +from macaron.errors import HeuristicAnalyzerValueError from macaron.json_tools import JsonType from macaron.malware_analyzer.pypi_heuristics.base_analyzer import BaseHeuristicAnalyzer from macaron.malware_analyzer.pypi_heuristics.heuristics import HeuristicResult, Heuristics from macaron.slsa_analyzer.package_registry.pypi_registry import PyPIPackageJsonAsset +logger: logging.Logger = logging.getLogger(__name__) + class OneReleaseAnalyzer(BaseHeuristicAnalyzer): """Determine if there is only one release of the package.""" def __init__(self) -> None: - super().__init__(name="one_release_analyzer", heuristic=Heuristics.ONE_RELEASE, depends_on=None) + super().__init__(name="one_release_analyzer", heuristic=Heuristics.ONE_RELEASE) def analyze(self, pypi_package_json: PyPIPackageJsonAsset) -> tuple[HeuristicResult, dict[str, JsonType]]: """Analyze the package. @@ -31,7 +36,9 @@ def analyze(self, pypi_package_json: PyPIPackageJsonAsset) -> tuple[HeuristicRes """ releases: dict | None = pypi_package_json.get_releases() if releases is None: - return HeuristicResult.SKIP, {"releases": {}} + error_msg = "Metadata has no release information" + logger.debug(error_msg) + raise HeuristicAnalyzerValueError(error_msg) if len(releases) == 1: return HeuristicResult.FAIL, {"releases": releases} # Higher false positive, so we keep it MEDIUM diff --git a/src/macaron/malware_analyzer/pypi_heuristics/metadata/source_code_repo.py b/src/macaron/malware_analyzer/pypi_heuristics/metadata/source_code_repo.py index 708301807..989e5475f 100644 --- a/src/macaron/malware_analyzer/pypi_heuristics/metadata/source_code_repo.py +++ b/src/macaron/malware_analyzer/pypi_heuristics/metadata/source_code_repo.py @@ -24,7 +24,6 @@ def __init__(self) -> None: super().__init__( name="source_code_repo_analyzer", heuristic=Heuristics.SOURCE_CODE_REPO, - depends_on=[(Heuristics.EMPTY_PROJECT_LINK, HeuristicResult.PASS)], ) def analyze(self, pypi_package_json: PyPIPackageJsonAsset) -> tuple[HeuristicResult, dict[str, JsonType]]: diff --git a/src/macaron/malware_analyzer/pypi_heuristics/metadata/typosquatting_presence.py b/src/macaron/malware_analyzer/pypi_heuristics/metadata/typosquatting_presence.py index 87658f714..b3ccbbe01 100644 --- a/src/macaron/malware_analyzer/pypi_heuristics/metadata/typosquatting_presence.py +++ b/src/macaron/malware_analyzer/pypi_heuristics/metadata/typosquatting_presence.py @@ -60,9 +60,7 @@ class TyposquattingPresenceAnalyzer(BaseHeuristicAnalyzer): } def __init__(self, popular_packages_path: str | None = None) -> None: - super().__init__( - name="typosquatting_presence_analyzer", heuristic=Heuristics.TYPOSQUATTING_PRESENCE, depends_on=None - ) + super().__init__(name="typosquatting_presence_analyzer", heuristic=Heuristics.TYPOSQUATTING_PRESENCE) self.default_path = os.path.join(MACARON_PATH, "resources/popular_packages.txt") if popular_packages_path: self.default_path = popular_packages_path diff --git a/src/macaron/malware_analyzer/pypi_heuristics/metadata/unchanged_release.py b/src/macaron/malware_analyzer/pypi_heuristics/metadata/unchanged_release.py index 278f3eeb5..16188a5a9 100644 --- a/src/macaron/malware_analyzer/pypi_heuristics/metadata/unchanged_release.py +++ b/src/macaron/malware_analyzer/pypi_heuristics/metadata/unchanged_release.py @@ -1,10 +1,11 @@ -# Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """Heuristics analyzer to check unchanged content in multiple releases.""" import logging from collections import Counter +from macaron.errors import HeuristicAnalyzerValueError from macaron.json_tools import JsonType, json_extract from macaron.malware_analyzer.pypi_heuristics.base_analyzer import BaseHeuristicAnalyzer from macaron.malware_analyzer.pypi_heuristics.heuristics import HeuristicResult, Heuristics @@ -17,11 +18,7 @@ class UnchangedReleaseAnalyzer(BaseHeuristicAnalyzer): """Analyze whether the content of the package is updated by the maintainer.""" def __init__(self) -> None: - super().__init__( - name="unchanged_release_analyzer", - heuristic=Heuristics.UNCHANGED_RELEASE, - depends_on=[(Heuristics.HIGH_RELEASE_FREQUENCY, HeuristicResult.FAIL)], - ) + super().__init__(name="unchanged_release_analyzer", heuristic=Heuristics.UNCHANGED_RELEASE) self.hash_algo: str = "sha256" def _get_digests(self, pypi_package_json: PyPIPackageJsonAsset) -> list[str] | None: @@ -68,6 +65,12 @@ def analyze(self, pypi_package_json: PyPIPackageJsonAsset) -> tuple[HeuristicRes """ digests: list[str] | None = self._get_digests(pypi_package_json) if digests is None: + error_msg = "Metadata has no digest information" + logger.debug(error_msg) + raise HeuristicAnalyzerValueError(error_msg) + + if len(digests) == 1: + # We only analyze packages with multiple releases, this heuristic does not apply. return HeuristicResult.SKIP, {} frequency = Counter(digests) diff --git a/src/macaron/malware_analyzer/pypi_heuristics/metadata/wheel_absence.py b/src/macaron/malware_analyzer/pypi_heuristics/metadata/wheel_absence.py index 0198a932d..2bc3fd8ed 100644 --- a/src/macaron/malware_analyzer/pypi_heuristics/metadata/wheel_absence.py +++ b/src/macaron/malware_analyzer/pypi_heuristics/metadata/wheel_absence.py @@ -34,7 +34,6 @@ def __init__(self) -> None: super().__init__( name="wheel_absence_analyzer", heuristic=Heuristics.WHEEL_ABSENCE, - depends_on=None, ) def analyze(self, pypi_package_json: PyPIPackageJsonAsset) -> tuple[HeuristicResult, dict[str, JsonType]]: diff --git a/src/macaron/malware_analyzer/pypi_heuristics/sourcecode/pypi_sourcecode_analyzer.py b/src/macaron/malware_analyzer/pypi_heuristics/sourcecode/pypi_sourcecode_analyzer.py index 4fd96e23a..23b4819aa 100644 --- a/src/macaron/malware_analyzer/pypi_heuristics/sourcecode/pypi_sourcecode_analyzer.py +++ b/src/macaron/malware_analyzer/pypi_heuristics/sourcecode/pypi_sourcecode_analyzer.py @@ -56,13 +56,6 @@ def __init__(self, resources_path: str | None = None) -> None: super().__init__( name="suspicious_patterns_analyzer", heuristic=Heuristics.SUSPICIOUS_PATTERNS, - # We include the SKIP condition here as we want to consider the case where EMPTY_PROJECT_LINK fails, - # meaning SOURCE_CODE_REPO is skipped, as this is still a scenario where the source code repository - # is not available, so we want to run source code analysis. - depends_on=[ - (Heuristics.SOURCE_CODE_REPO, HeuristicResult.FAIL), - (Heuristics.SOURCE_CODE_REPO, HeuristicResult.SKIP), - ], ) if resources_path is None: resources_path = global_config.resources_path diff --git a/src/macaron/malware_analyzer/pypi_heuristics/sourcecode/suspicious_setup.py b/src/macaron/malware_analyzer/pypi_heuristics/sourcecode/suspicious_setup.py index ebde2a21f..1e7b3c462 100644 --- a/src/macaron/malware_analyzer/pypi_heuristics/sourcecode/suspicious_setup.py +++ b/src/macaron/malware_analyzer/pypi_heuristics/sourcecode/suspicious_setup.py @@ -14,6 +14,7 @@ import requests from requests import RequestException +from macaron.errors import HeuristicAnalyzerValueError from macaron.json_tools import JsonType from macaron.malware_analyzer.pypi_heuristics.base_analyzer import BaseHeuristicAnalyzer from macaron.malware_analyzer.pypi_heuristics.heuristics import HeuristicResult, Heuristics @@ -26,11 +27,7 @@ class SuspiciousSetupAnalyzer(BaseHeuristicAnalyzer): """Check whether suspicious packages are imported in setup.py.""" def __init__(self) -> None: - super().__init__( - name="suspicious_setup_analyzer", - heuristic=Heuristics.SUSPICIOUS_SETUP, - depends_on=[(Heuristics.CLOSER_RELEASE_JOIN_DATE, HeuristicResult.FAIL)], - ) + super().__init__(name="suspicious_setup_analyzer", heuristic=Heuristics.SUSPICIOUS_SETUP) self.blacklist: list = ["base64", "request"] def _get_setup_source_code(self, pypi_package_json: PyPIPackageJsonAsset) -> str | None: @@ -46,8 +43,11 @@ def _get_setup_source_code(self, pypi_package_json: PyPIPackageJsonAsset) -> str str | None The source code. """ - sourcecode_url: str | None = pypi_package_json.get_sourcecode_url() + sourcecode_url: str | None = pypi_package_json.get_sourcecode_url(package_type="sdist") if sourcecode_url is None: + # This isn't an error as some packages may be distributed just as wheels, which typically don't + # include setup.py files, or at least don't run then automatically. + logger.info("Package metadata does not supply a tarball") return None # Get name of file. @@ -63,7 +63,9 @@ def _get_setup_source_code(self, pypi_package_json: PyPIPackageJsonAsset) -> str return None if response.status_code != 200: - return None + error_msg = f"HTTP Error occured: reponse code {response.status_code}" + logger.debug(error_msg) + raise HeuristicAnalyzerValueError(error_msg) source_file = os.path.join(temp_dir, file_name) with open(source_file, "wb") as file: @@ -72,9 +74,10 @@ def _get_setup_source_code(self, pypi_package_json: PyPIPackageJsonAsset) -> str file.write(chunk) except RequestException as error: # Something went wrong with the request, abort. - logger.debug("Error while streaming source file: %s", error) response.close() - return None + error_msg = f"Error while streaming source file: {error}" + logger.debug(error_msg) + raise HeuristicAnalyzerValueError(error_msg) from error target_file = "setup.py" file_dir = file_name.removesuffix(".tar.gz").removesuffix(".zip") @@ -83,40 +86,65 @@ def _get_setup_source_code(self, pypi_package_json: PyPIPackageJsonAsset) -> str with tarfile.open(source_file, "r:gz") as tar: member = tar.getmember(archive_target) if not member.isfile(): - logger.debug("Target tar member is not a file: %s", member) + logger.debug( + "Target %s is not considered a file in the archive %s", archive_target, source_file + ) + # setup.py is not considered a file, so heuristic does not apply return None + tar.extract(member=member, path=temp_dir) + except KeyError as exception: - logger.debug("Error finding target '%s' in tar file '%s': %s.", archive_target, source_file, exception) + logger.debug( + "Target %s is not present within the archive %s: %s", archive_target, source_file, exception + ) + # setup.py is not in the archive, so heuristic does not apply return None + except tarfile.ReadError as exception: - logger.debug("Error reading tar file: %s", exception) + logger.debug("Unable to read tarfile %s: %s. Attempting to parse as a zip file", source_file, exception) try: with zipfile.ZipFile(source_file, "r") as zip_ref: info = zip_ref.getinfo(archive_target) if info.is_dir(): - logger.debug("Target zip member is a directory: %s", info) + logger.debug("Target %s is a directory in the archive %s", archive_target, source_file) + # setup.py is a directory, so heuristic does not apply return None + zip_ref.extract(member=info, path=temp_dir) + except zipfile.BadZipFile as bad_zip_exception: - logger.debug("Error reading zip file: %s", bad_zip_exception) + error_msg = f"Error reading zip file: {bad_zip_exception}" + logger.debug(error_msg) + raise HeuristicAnalyzerValueError(error_msg) from bad_zip_exception + except zipfile.LargeZipFile as large_zip_exception: - logger.debug("Zip file too large to read: %s", large_zip_exception) + error_msg = f"Zip file too large to read: {large_zip_exception}" + logger.debug(error_msg) + raise HeuristicAnalyzerValueError(error_msg) from large_zip_exception + except KeyError as zip_key_exception: logger.debug( - "Error finding target '%s' in zip file '%s': %s", archive_target, source_file, zip_key_exception + "Target '%s' is not in zip file '%s': %s", archive_target, source_file, zip_key_exception ) + # setup.py is not in the archive, so heuristic does not apply return None final_path = os.path.join(temp_dir, archive_target) - # Check if file exists and is readable. - if not os.access(final_path, os.R_OK): - logger.debug("Source file could not be extracted and read from download archive: %s.", final_path) - return None - with open(final_path, encoding="utf-8") as file: - return file.read() + try: + with open(final_path, encoding="utf-8") as file: + return file.read() + except FileNotFoundError as file_not_found_error: + error_msg = f"Final extracted path for {archive_target} does not exist: {file_not_found_error}" + logger.debug(error_msg) + raise HeuristicAnalyzerValueError(error_msg) from file_not_found_error + + except PermissionError as permission_error: + error_msg = f"Unable to access extracted path for {archive_target}: {permission_error}" + logger.debug(error_msg) + raise HeuristicAnalyzerValueError(error_msg) from permission_error def analyze(self, pypi_package_json: PyPIPackageJsonAsset) -> tuple[HeuristicResult, dict[str, JsonType]]: """Analyze the package. @@ -133,6 +161,7 @@ def analyze(self, pypi_package_json: PyPIPackageJsonAsset) -> tuple[HeuristicRes """ content: str | None = self._get_setup_source_code(pypi_package_json) if content is None: + # setup.py didn't exist as a file in the archive, so this heuristic does not apply return HeuristicResult.SKIP, {} # Catch the imported module. diff --git a/src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py b/src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py index 3f86dfc9f..15575797f 100644 --- a/src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py +++ b/src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py @@ -140,7 +140,9 @@ def analyze_source( logger.debug("Instantiating %s", PyPISourcecodeAnalyzer.__name__) analyzer = PyPISourcecodeAnalyzer() - if not force and analyzer.depends_on and self._should_skip(results, analyzer.depends_on): + # If SOURCE_CODE_REPO failed, there is no source code repository available for this package. This is when we would want + # to run source code analysis. + if not force and results[Heuristics.SOURCE_CODE_REPO] == HeuristicResult.PASS: return {analyzer.heuristic: HeuristicResult.SKIP}, {} try: @@ -221,14 +223,6 @@ def run_heuristics( analyzer: BaseHeuristicAnalyzer = _analyzer() logger.debug("Instantiating %s", _analyzer.__name__) - depends_on: list[tuple[Heuristics, HeuristicResult]] | None = analyzer.depends_on - - if depends_on: - should_skip: bool = self._should_skip(results, depends_on) - if should_skip: - results[analyzer.heuristic] = HeuristicResult.SKIP - continue - result, result_info = analyzer.analyze(pypi_package_json) if analyzer.heuristic: results[analyzer.heuristic] = result diff --git a/tests/malware_analyzer/pypi/test_anomalous_version.py b/tests/malware_analyzer/pypi/test_anomalous_version.py index 3edd73d0c..28e5be51f 100644 --- a/tests/malware_analyzer/pypi/test_anomalous_version.py +++ b/tests/malware_analyzer/pypi/test_anomalous_version.py @@ -12,7 +12,13 @@ def test_analyze_no_information(pypi_package_json: MagicMock) -> None: - """Test for when there is no release information, so error""" + """Test for when there is no release information (should error) + + Parameters + ---------- + pypi_package_json: MagicMock + The PyPIPackageJsonAsset MagicMock fixture. + """ analyzer = AnomalousVersionAnalyzer() pypi_package_json.get_releases.return_value = None @@ -22,7 +28,13 @@ def test_analyze_no_information(pypi_package_json: MagicMock) -> None: def test_analyze_invalid_time(pypi_package_json: MagicMock) -> None: - """Test for when the supplied upload time does not conform with PEP 440, so error.""" + """Test for when the supplied upload time does not conform with PEP 440 (should error). + + Parameters + ---------- + pypi_package_json: MagicMock + The PyPIPackageJsonAsset MagicMock fixture. + """ analyzer = AnomalousVersionAnalyzer() version = "1.1" release = { @@ -60,7 +72,13 @@ def test_analyze_invalid_time(pypi_package_json: MagicMock) -> None: def test_analyze_no_time(pypi_package_json: MagicMock) -> None: - """Test for when there is no supplied upload time, so error.""" + """Test for when there is no supplied upload time (should error). + + Parameters + ---------- + pypi_package_json: MagicMock + The PyPIPackageJsonAsset MagicMock fixture. + """ analyzer = AnomalousVersionAnalyzer() version = "1.1" release = { @@ -253,6 +271,8 @@ def test_analyze( Parameters ---------- + pypi_package_json: MagicMock + The PyPIPackageJsonAsset MagicMock fixture. version : str the version number for the test package. upload_date : str @@ -297,3 +317,47 @@ def test_analyze( actual_result = analyzer.analyze(pypi_package_json) assert actual_result == expected_result + + +def test_multiple_releases(pypi_package_json: MagicMock) -> None: + """Test when there are multiple releases of the package (should skip). + + Parameters + ---------- + pypi_package_json: MagicMock + The PyPIPackageJsonAsset MagicMock fixture. + """ + analyzer = AnomalousVersionAnalyzer() + release_content = [ + { + "comment_text": "", + "digests": { + "blake2b_256": "defa2fbcebaeeb909511139ce28dac4a77ab2452ba72b49a22b12981b2f375b3", + "md5": "9203bbb130f8ddb38269f4861c170d04", + "sha256": "168bcccbf5106132e90b85659297700194369b8f6b3e5a03769614f0d200e370", + }, + "downloads": -1, + "filename": "ttttttttest_nester.py-0.1.0.tar.gz", + "has_sig": False, + "md5_digest": "9203bbb130f8ddb38269f4861c170d04", + "packagetype": "sdist", + "python_version": "source", + "requires_python": None, + "size": 546, + "upload_time": "2016-10-13T05:42:27", + "upload_time_iso_8601": "2016-10-13T05:42:27.073842Z", + "url": "https://files.pythonhosted.org/packages/de/fa/" + + "2fbcebaeeb909511139ce28dac4a77ab2452ba72b49a22b12981b2f375b3/ttttttttest_nester.py-0.1.0.tar.gz", + "yanked": False, + "yanked_reason": None, + } + ] + releases = { # this can just be the same content, as it'll be skipped anyway + "0.1": release_content, + "0.2": release_content, + } + pypi_package_json.get_releases.return_value = releases + expected_result: tuple[HeuristicResult, dict] = (HeuristicResult.SKIP, {}) + + actual_result = analyzer.analyze(pypi_package_json) + assert actual_result == expected_result diff --git a/tests/malware_analyzer/pypi/test_closer_release_join_date.py b/tests/malware_analyzer/pypi/test_closer_release_join_date.py index 309574a21..408a616b7 100644 --- a/tests/malware_analyzer/pypi/test_closer_release_join_date.py +++ b/tests/malware_analyzer/pypi/test_closer_release_join_date.py @@ -5,12 +5,21 @@ from datetime import datetime from unittest.mock import MagicMock +import pytest + +from macaron.errors import HeuristicAnalyzerValueError from macaron.malware_analyzer.pypi_heuristics.heuristics import HeuristicResult from macaron.malware_analyzer.pypi_heuristics.metadata.closer_release_join_date import CloserReleaseJoinDateAnalyzer -def test_analyze_pass(pypi_package_json: MagicMock) -> None: - """Test analyze method when the heuristic should pass.""" +def test_far_away_release_join_date(pypi_package_json: MagicMock) -> None: + """Test when the maintainer join date is far away from the upload date (should pass). + + Parameters + ---------- + pypi_package_json: MagicMock + The PyPIPackageJsonAsset MagicMock fixture. + """ analyzer = CloserReleaseJoinDateAnalyzer() # Set up mock return values. @@ -28,8 +37,14 @@ def test_analyze_pass(pypi_package_json: MagicMock) -> None: assert "latest_release_date" in detail_info -def test_analyze_process(pypi_package_json: MagicMock) -> None: - """Test analyze method when the heuristic should fail.""" +def test_closer_release_join_date(pypi_package_json: MagicMock) -> None: + """Test when the maintainer join date is close to the upload date (should fail). + + Parameters + ---------- + pypi_package_json: MagicMock + The PyPIPackageJsonAsset MagicMock fixture. + """ analyzer = CloserReleaseJoinDateAnalyzer() # Set up mock return values. @@ -47,19 +62,47 @@ def test_analyze_process(pypi_package_json: MagicMock) -> None: assert "latest_release_date" in detail_info -def test_analyze_skip(pypi_package_json: MagicMock) -> None: - """Test analyze method when the heuristic should be skipped.""" +def test_no_maintainers(pypi_package_json: MagicMock) -> None: + """Test when there are no maintainers (should error). + + Parameters + ---------- + pypi_package_json: MagicMock + The PyPIPackageJsonAsset MagicMock fixture. + """ analyzer = CloserReleaseJoinDateAnalyzer() # Set up mock return values. pypi_package_json.pypi_registry.get_maintainers_of_package.return_value = None - pypi_package_json.get_latest_release_upload_time.return_value = "2022-06-20T12:00:00" pypi_package_json.component_name = "mock1" # Call the method. - result, detail_info = analyzer.analyze(pypi_package_json) + with pytest.raises(HeuristicAnalyzerValueError): + _ = analyzer.analyze(pypi_package_json) + + +@pytest.mark.parametrize( + ("upload_time"), + [pytest.param("20 June 2022 at 12pm", id="test_incorrect_format"), pytest.param(None, id="test_no_upload_time")], +) +def test_malformed_upload_time(pypi_package_json: MagicMock, upload_time: str | None) -> None: + """Test when the upload time is not in the expected format (should error). + + Parameters + ---------- + pypi_package_json: MagicMock + The PyPIPackageJsonAsset MagicMock fixture. + upload_time: str | None + The upload time for the package in any format that isn't the expected one. + """ + analyzer = CloserReleaseJoinDateAnalyzer() - # Assert. - assert result == HeuristicResult.SKIP - assert "maintainers_join_date" in detail_info - assert "latest_release_date" in detail_info + # Set up mock return values. + pypi_package_json.pypi_registry.get_maintainers_of_package.return_value = ["maintainer1"] + pypi_package_json.pypi_registry.get_maintainer_join_date.side_effect = [datetime(2022, 6, 18)] + pypi_package_json.get_latest_release_upload_time.return_value = upload_time + pypi_package_json.component_name = "mock1" + + # Call the method. + with pytest.raises(HeuristicAnalyzerValueError): + _ = analyzer.analyze(pypi_package_json) diff --git a/tests/malware_analyzer/pypi/test_empty_project_link_analyzer.py b/tests/malware_analyzer/pypi/test_empty_project_link_analyzer.py index 5dad60add..25d679e5d 100644 --- a/tests/malware_analyzer/pypi/test_empty_project_link_analyzer.py +++ b/tests/malware_analyzer/pypi/test_empty_project_link_analyzer.py @@ -1,4 +1,4 @@ -# Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """Tests for heuristic detecting malicious metadata from PyPI""" @@ -36,8 +36,14 @@ def setup_empty_project_link_analyzer() -> dict: } -def test_analyze_no_links(empty_project_link_analyzer: dict) -> None: - """Test for result failed.""" +def test_no_links(empty_project_link_analyzer: dict) -> None: + """Test with no links (should fail). + + Parameters + ---------- + empty_project_link_analyzer: dict + A configured EmptyProjectLinkAnalyzer from the fixture. + """ mock_pypi_package_fail = empty_project_link_analyzer["mock_pypi_package_fail"] mock_pypi_package_fail.get_project_links.return_value = {} expected_result: tuple[HeuristicResult, dict] = (HeuristicResult.FAIL, {}) @@ -47,8 +53,14 @@ def test_analyze_no_links(empty_project_link_analyzer: dict) -> None: assert result == expected_result -def test_analyze_with_links(empty_project_link_analyzer: dict) -> None: - """Test for result passed.""" +def test_with_links(empty_project_link_analyzer: dict) -> None: + """Test with links present (should pass). + + Parameters + ---------- + empty_project_link_analyzer: dict + A configured EmptyProjectLinkAnalyzer from the fixture. + """ package_links = empty_project_link_analyzer["package_links"] mock_pypi_package_pass = empty_project_link_analyzer["mock_pypi_package_pass"] mock_pypi_package_pass.get_project_links.return_value = package_links @@ -57,14 +69,3 @@ def test_analyze_with_links(empty_project_link_analyzer: dict) -> None: result = empty_project_link_analyzer["analyzer"].analyze(mock_pypi_package_pass) assert result == expected_result - - -def test_analyze_none(empty_project_link_analyzer: dict) -> None: - """Test for result skip.""" - mock_pypi_package_pass = empty_project_link_analyzer["mock_pypi_package_pass"] - mock_pypi_package_pass.get_project_links.return_value = None - expected_result: tuple[HeuristicResult, dict] = (HeuristicResult.FAIL, {}) - - result = empty_project_link_analyzer["analyzer"].analyze(mock_pypi_package_pass) - - assert result == expected_result diff --git a/tests/malware_analyzer/pypi/test_high_release_frequency.py b/tests/malware_analyzer/pypi/test_high_release_frequency.py index 9cd82b570..7e5e713ba 100644 --- a/tests/malware_analyzer/pypi/test_high_release_frequency.py +++ b/tests/malware_analyzer/pypi/test_high_release_frequency.py @@ -1,16 +1,19 @@ -# Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """Tests for high release frequency heuristic.""" from unittest.mock import MagicMock +import pytest + +from macaron.errors import HeuristicAnalyzerValueError from macaron.malware_analyzer.pypi_heuristics.heuristics import HeuristicResult from macaron.malware_analyzer.pypi_heuristics.metadata.high_release_frequency import HighReleaseFrequencyAnalyzer -def test_analyze_high_frequency_pass(pypi_package_json: MagicMock) -> None: - """Test HighReleaseFrequencyAnalyzer with low release frequency (should pass). +def test_low_release_frequency(pypi_package_json: MagicMock) -> None: + """Test with low release frequency (should pass). Parameters ---------- @@ -34,8 +37,8 @@ def test_analyze_high_frequency_pass(pypi_package_json: MagicMock) -> None: assert detail_info == {"frequency": 9} -def test_analyze_low_frequency_fail(pypi_package_json: MagicMock) -> None: - """Test HighReleaseFrequencyAnalyzer with high release frequency (should fail). +def test_high_release_frequency(pypi_package_json: MagicMock) -> None: + """Test with high release frequency (should fail). Parameters ---------- @@ -59,8 +62,8 @@ def test_analyze_low_frequency_fail(pypi_package_json: MagicMock) -> None: assert detail_info == {"frequency": 1} -def test_analyze_no_releases_skip(pypi_package_json: MagicMock) -> None: - """Test HighReleaseFrequencyAnalyzer when no releases are available (should skip). +def test_no_releases(pypi_package_json: MagicMock) -> None: + """Test when no releases are available (should error). Parameters ---------- @@ -73,15 +76,12 @@ def test_analyze_no_releases_skip(pypi_package_json: MagicMock) -> None: pypi_package_json.get_releases.return_value = None # Call the method. - result, detail_info = analyzer.analyze(pypi_package_json) - - # Assert. - assert result == HeuristicResult.SKIP - assert not detail_info + with pytest.raises(HeuristicAnalyzerValueError): + _ = analyzer.analyze(pypi_package_json) -def test_analyze_single_release_skip(pypi_package_json: MagicMock) -> None: - """Test HighReleaseFrequencyAnalyzer with a single release (should skip). +def test_single_release(pypi_package_json: MagicMock) -> None: + """Test with a single release (should skip). Parameters ---------- diff --git a/tests/malware_analyzer/pypi/test_one_release_analyzer.py b/tests/malware_analyzer/pypi/test_one_release_analyzer.py index 60ad244ab..f14a4865d 100644 --- a/tests/malware_analyzer/pypi/test_one_release_analyzer.py +++ b/tests/malware_analyzer/pypi/test_one_release_analyzer.py @@ -1,4 +1,4 @@ -# Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """Tests for heuristic detecting malicious metadata from PyPI""" @@ -6,6 +6,7 @@ import pytest +from macaron.errors import HeuristicAnalyzerValueError from macaron.malware_analyzer.pypi_heuristics.heuristics import HeuristicResult from macaron.malware_analyzer.pypi_heuristics.metadata.one_release import OneReleaseAnalyzer from macaron.slsa_analyzer.package_registry.pypi_registry import PyPIPackageJsonAsset @@ -31,19 +32,29 @@ def setup_one_release_analyzer() -> dict: } -def test_analyze_no_releases(one_release_analyzer: dict) -> None: - """Test for result skipped.""" +def test_no_releases(one_release_analyzer: dict) -> None: + """No release information available (should error). + + Parameters + ---------- + one_release_analyzer: dict + A configured OneReleaseAnalyzer from the fixture. + """ mock_pypi_package_pass = one_release_analyzer["mock_pypi_package_pass"] mock_pypi_package_pass.get_releases.return_value = None - expected_result: tuple[HeuristicResult, dict] = (HeuristicResult.SKIP, {"releases": {}}) - result = one_release_analyzer["analyzer"].analyze(mock_pypi_package_pass) + with pytest.raises(HeuristicAnalyzerValueError): + _ = one_release_analyzer["analyzer"].analyze(mock_pypi_package_pass) - assert result == expected_result +def test_one_release(one_release_analyzer: dict) -> None: + """Test for a single release (should fail). -def test_analyze_one_release(one_release_analyzer: dict) -> None: - """Test for result failed.""" + Parameters + ---------- + one_release_analyzer: dict + A configured OneReleaseAnalyzer from the fixture. + """ release = { "0.1.0": [ { @@ -79,8 +90,14 @@ def test_analyze_one_release(one_release_analyzer: dict) -> None: assert result == expected_result -def test_analyze_multiple_releases(one_release_analyzer: dict) -> None: - """Test for result passed.""" +def test_multiple_releases(one_release_analyzer: dict) -> None: + """Test with multiple releases (should pass). + + Parameters + ---------- + one_release_analyzer: dict + A configured OneReleaseAnalyzer from the fixture. + """ releases = { "0.0.1": [], "0.10.0": [ diff --git a/tests/malware_analyzer/pypi/test_pypi_sourcecode_analyzer.py b/tests/malware_analyzer/pypi/test_pypi_sourcecode_analyzer.py index 295083e08..82029b03a 100644 --- a/tests/malware_analyzer/pypi/test_pypi_sourcecode_analyzer.py +++ b/tests/malware_analyzer/pypi/test_pypi_sourcecode_analyzer.py @@ -4,12 +4,14 @@ """Tests detecting malicious patterns in PyPI package sourcecode.""" import json import os -from unittest.mock import MagicMock, patch +from pathlib import Path +from unittest.mock import MagicMock import pytest import yaml from macaron import MACARON_PATH +from macaron.config.defaults import defaults, load_defaults from macaron.errors import ConfigurationError, HeuristicAnalyzerValueError from macaron.malware_analyzer.pypi_heuristics.heuristics import HeuristicResult from macaron.malware_analyzer.pypi_heuristics.sourcecode.pypi_sourcecode_analyzer import PyPISourcecodeAnalyzer @@ -17,6 +19,21 @@ RESOURCES_PATH = os.path.join(MACARON_PATH, "resources") +def config_defaults(tmp_path: Path, content: str) -> None: + """Set up the global defaults variable with the given defaults.ini content. + + Parameters + ---------- + tmp_path: Path + Pytest temporary path fixture. + content: str + The content of the defaults.init file. + """ + defaults_file = Path(os.path.join(tmp_path, "config.ini")) + defaults_file.write_text(content, encoding="utf-8") + assert load_defaults(str(defaults_file)) is True + + def get_rule_ids_list(path: str) -> set[str]: """ Extract a set of Semgrep rule IDs from a .yaml file. @@ -24,7 +41,12 @@ def get_rule_ids_list(path: str) -> set[str]: Parameters ---------- path: str - the path to the .yaml file to read. + The path to the .yaml file to read. + + Returns + ------- + set[str] + Extracted set of rule IDs from the .yaml file provided. """ with open(path, encoding="utf8") as semgrep_yaml: ruleset: dict[str, list] = yaml.safe_load(semgrep_yaml.read()) @@ -32,21 +54,30 @@ def get_rule_ids_list(path: str) -> set[str]: def test_no_resources() -> None: - """Test for when the semgrep rules can't be found, so error.""" + """Test for when the semgrep rules can't be found (should error).""" with pytest.raises(ConfigurationError): _ = PyPISourcecodeAnalyzer(resources_path="") -@patch("macaron.malware_analyzer.pypi_heuristics.sourcecode.pypi_sourcecode_analyzer.defaults") -def test_no_defaults_section(mock_defaults: MagicMock) -> None: - """Test for when the heuristics.pypi in defaults isn't defined at all, so error.""" - mock_defaults.has_section.side_effect = lambda _: False +def test_no_defaults_section() -> None: + """Test for when the heuristics.pypi in defaults isn't defined at all (should error). + + This is more so a coverage internal test, and can't really happen when a user provides their own config.ini. + """ + # have to manually remove the section as providing a file won't remove the section + defaults.remove_section("heuristic.pypi") with pytest.raises(ConfigurationError): _ = PyPISourcecodeAnalyzer(resources_path=RESOURCES_PATH) def test_no_sourcecode(pypi_package_json: MagicMock) -> None: - """Test for when there is no source code available, so error.""" + """Test for when there is no source code available (should error). + + Parameters + ---------- + pypi_package_json: MagicMock + The PyPIPackageJsonAsset MagicMock fixture. + """ analyzer = PyPISourcecodeAnalyzer(resources_path=RESOURCES_PATH) pypi_package_json.package_sourcecode_path = "" @@ -55,59 +86,58 @@ def test_no_sourcecode(pypi_package_json: MagicMock) -> None: analyzer.analyze(pypi_package_json) -@patch("macaron.malware_analyzer.pypi_heuristics.sourcecode.pypi_sourcecode_analyzer.defaults") -def test_no_custom_path(mock_defaults: MagicMock) -> None: - """Test for when a default path isn't provided, so the custom rule path should be None.""" - mock_defaults.has_section.side_effect = lambda section: section == "heuristic.pypi" - mock_defaults.__getitem__.side_effect = lambda section: ( - MagicMock(get=lambda _: None) if section == "heuristic.pypi" else None - ) - analyzer = PyPISourcecodeAnalyzer(resources_path=RESOURCES_PATH) - assert analyzer.custom_rule_path is None +def test_no_custom_path(tmp_path: Path) -> None: + """Test for when a default path isn't provided (the custom rule path should be None). - # Make sure the empty string is not considered as a path - mock_defaults.has_section.side_effect = lambda section: section == "heuristic.pypi" - mock_defaults.__getitem__.side_effect = lambda section: ( - MagicMock(get=lambda _: "") if section == "heuristic.pypi" else None - ) + Parameters + ---------- + tmp_path: Path + Pytest temporary path fixture. + """ + defaults_content = """ + [heuristic.pypi] + custom_semgrep_rules_path = + """ + config_defaults(tmp_path, defaults_content) analyzer = PyPISourcecodeAnalyzer(resources_path=RESOURCES_PATH) assert analyzer.custom_rule_path is None -@patch("macaron.malware_analyzer.pypi_heuristics.sourcecode.pypi_sourcecode_analyzer.defaults") -def test_nonexistent_rule_path(mock_defaults: MagicMock) -> None: - """Test for when the custom path provided does not exist, so error.""" - defaults = { - "custom_semgrep_rules_path": "some_random_path", - } - sub_section = MagicMock() - sub_section.get.side_effect = defaults.get - - mock_defaults.has_section.side_effect = lambda section: section == "heuristic.pypi" - mock_defaults.__getitem__.side_effect = lambda section: sub_section if section == "heuristic.pypi" else None +def test_nonexistent_rule_path(tmp_path: Path) -> None: + """Test for when the custom path provided does not exist (should error). + Parameters + ---------- + tmp_path: Path + Pytest temporary path fixture. + """ + defaults_content = """ + [heuristic.pypi] + custom_semgrep_rules_path = some_random_path + """ + config_defaults(tmp_path, defaults_content) with pytest.raises(ConfigurationError): _ = PyPISourcecodeAnalyzer(resources_path=RESOURCES_PATH) -@patch("macaron.malware_analyzer.pypi_heuristics.sourcecode.pypi_sourcecode_analyzer.defaults") -def test_invalid_custom_rules(mock_defaults: MagicMock) -> None: - """Test for when the provided file is not a valid semgrep rule, so error,""" - # Use this file as an invalid semgrep rule as it is most definitely not a semgrep rule, and does exist. - defaults = { - "custom_semgrep_rules_path": os.path.abspath(__file__), - } - sub_section = MagicMock() - sub_section.get.side_effect = defaults.get - - mock_defaults.has_section.side_effect = lambda section: section == "heuristic.pypi" - mock_defaults.__getitem__.side_effect = lambda section: sub_section if section == "heuristic.pypi" else None +def test_invalid_custom_rules(tmp_path: Path) -> None: + """Test for when the provided file is not a valid semgrep rule (should error). + Parameters + ---------- + tmp_path: Path + Pytest temporary path fixture. + """ + # Use this file as an invalid semgrep rule as it is most definitely not a semgrep rule, and does exist. + defaults_content = f""" + [heuristic.pypi] + custom_semgrep_rules_path = {os.path.abspath(__file__)} + """ + config_defaults(tmp_path, defaults_content) with pytest.raises(ConfigurationError): _ = PyPISourcecodeAnalyzer(resources_path=RESOURCES_PATH) -@patch("macaron.malware_analyzer.pypi_heuristics.sourcecode.pypi_sourcecode_analyzer.defaults") @pytest.mark.parametrize( # the sourcecode sample directory under resources/sourcecode_samples and the semgrep rule under resources/pypi_malware_rules ("sourcecode_sample_dir", "rule_file"), @@ -116,10 +146,18 @@ def test_invalid_custom_rules(mock_defaults: MagicMock) -> None: pytest.param("exfiltration", "exfiltration.yaml", id="exfiltration"), ], ) -def test_rules( - mock_defaults: MagicMock, pypi_package_json: MagicMock, sourcecode_sample_dir: str, rule_file: str -) -> None: - """Test the default Semgrep rulesets on code samples.""" +def test_rules(pypi_package_json: MagicMock, sourcecode_sample_dir: str, rule_file: str) -> None: + """Test the default Semgrep rulesets on suspicious code samples (should fail). + + Parameters + ---------- + pypi_package_json: MagicMock + The PyPIPackageJsonAsset MagicMock fixture. + sourcecode_sample_dir: str + The directory name in the test resources directory containing sample suspicious code and an expected_results.json file. + rule_file: str + The file name (with extension) of the ruleset from the main resources directory to test. + """ sample_path = os.path.join( os.path.dirname(os.path.abspath(__file__)), "resources", "sourcecode_samples", sourcecode_sample_dir ) @@ -128,10 +166,7 @@ def test_rules( expected_results = json.loads(file.read()) # Test with none of the defaults.ini settings used, to ensure this ruleset is run - mock_defaults.has_section.side_effect = lambda section: section == "heuristic.pypi" - mock_defaults.__getitem__.side_effect = lambda section: ( - MagicMock(get=lambda _: None) if section == "heuristic.pypi" else None - ) + defaults["heuristic.pypi"].clear() analyzer = PyPISourcecodeAnalyzer(resources_path=RESOURCES_PATH) @@ -144,24 +179,23 @@ def test_rules( assert expected_results == analysis -@patch("macaron.malware_analyzer.pypi_heuristics.sourcecode.pypi_sourcecode_analyzer.defaults") -def test_custom_rules(mock_defaults: MagicMock, pypi_package_json: MagicMock) -> None: - """Test that custom rulesets are properly run and appear in output detections""" - sample_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "resources", "sourcecode_samples") +def test_custom_rules(pypi_package_json: MagicMock) -> None: + """Test that custom rulesets are properly run and appear in output detections (heuristic should fail). + + Parameters + ---------- + pypi_package_json: MagicMock + The PyPIPackageJsonAsset MagicMock fixture. + """ custom_rule_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "resources", "custom_sample.yaml") expected_ids = get_rule_ids_list(custom_rule_path) - defaults = { - "custom_semgrep_rules_path": custom_rule_path, - } - sub_section = MagicMock() - sub_section.get.side_effect = defaults.get - - mock_defaults.has_section.side_effect = lambda section: section == "heuristic.pypi" - mock_defaults.__getitem__.side_effect = lambda section: sub_section if section == "heuristic.pypi" else None + defaults["heuristic.pypi"]["custom_semgrep_rules_path"] = custom_rule_path analyzer = PyPISourcecodeAnalyzer(resources_path=RESOURCES_PATH) - pypi_package_json.package_sourcecode_path = sample_path + pypi_package_json.package_sourcecode_path = os.path.join( + os.path.dirname(os.path.abspath(__file__)), "resources", "sourcecode_samples" + ) result, analysis = analyzer.analyze(pypi_package_json) @@ -175,52 +209,37 @@ def test_custom_rules(mock_defaults: MagicMock, pypi_package_json: MagicMock) -> assert expected_ids - actual_ids == set() -@patch("macaron.malware_analyzer.pypi_heuristics.sourcecode.pypi_sourcecode_analyzer.defaults") -@pytest.mark.parametrize( - # the sourcecode sample directory under resources/sourcecode_samples and the semgrep rule under resources/pypi_malware_rules - ("defaults", "list_keys", "rulefile_path"), - [ - pytest.param( - {"disabled_default_rulesets": "obfuscation"}, - {"disabled_default_rulesets"}, - os.path.join(RESOURCES_PATH, "pypi_malware_rules", "obfuscation.yaml"), - id="test_disable_default_ruleset", - ), - pytest.param( - { - "disabled_custom_rulesets": "custom_sample", - "custom_semgrep_rules_path": os.path.join(os.path.dirname(os.path.abspath(__file__)), "resources"), - }, - {"disabled_custom_rulesets"}, - os.path.join(os.path.dirname(os.path.abspath(__file__)), "resources", "custom_sample.yaml"), - id="test_disable_custom_ruleset", - ), - ], -) def test_disabling_rulesets( - mock_defaults: MagicMock, + tmp_path: Path, pypi_package_json: MagicMock, - defaults: dict[str, str], - list_keys: set[str], - rulefile_path: str, ) -> None: - """Test that rulesets can be disabled""" - sample_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "resources", "sourcecode_samples") - - expected_ids = get_rule_ids_list(rulefile_path) - sub_section = MagicMock() - sub_section.get.side_effect = defaults.get - - mock_defaults.has_section.side_effect = lambda section: section == "heuristic.pypi" - mock_defaults.__getitem__.side_effect = lambda section: sub_section if section == "heuristic.pypi" else None - mock_defaults.get_list.side_effect = lambda section, option: ( - [x.strip() for x in defaults[option].split("\n") if x.strip()] - if section == "heuristic.pypi" and option in list_keys - else None + """Test that rulesets can be disabled (heuristic should fail). + + Parameters + ---------- + tmp_path: Path + Pytest temporary path fixture. + pypi_package_json: MagicMock + The PyPIPackageJsonAsset MagicMock fixture. + """ + # ensure both custom and default rulesets can be disabled + expected_ids = get_rule_ids_list(os.path.join(RESOURCES_PATH, "pypi_malware_rules", "obfuscation.yaml")) + expected_ids.union( + get_rule_ids_list(os.path.join(os.path.dirname(os.path.abspath(__file__)), "resources", "custom_sample.yaml")) ) + defaults_content = f""" + [heuristic.pypi] + disabled_default_rulesets = obfuscation + disabled_custom_rulesets = custom_sample + custom_semgrep_rules_path = {os.path.join(os.path.dirname(os.path.abspath(__file__)), "resources")} + """ + config_defaults(tmp_path, defaults_content) + analyzer = PyPISourcecodeAnalyzer(resources_path=RESOURCES_PATH) - pypi_package_json.package_sourcecode_path = sample_path + pypi_package_json.package_sourcecode_path = os.path.join( + os.path.dirname(os.path.abspath(__file__)), "resources", "sourcecode_samples" + ) result, analysis = analyzer.analyze(pypi_package_json) @@ -234,49 +253,51 @@ def test_disabling_rulesets( assert expected_ids - actual_ids == set() -@patch("macaron.malware_analyzer.pypi_heuristics.sourcecode.pypi_sourcecode_analyzer.defaults") -def test_unknown_ruleset_exclusions(mock_defaults: MagicMock) -> None: - """Test when there are ruleset names supplied to be disabled that don't exist""" - defaults = { - "disabled_custom_rulesets": "custom_sample\ndoes_not_exist", - "custom_semgrep_rules_path": os.path.join(os.path.dirname(os.path.abspath(__file__)), "resources"), - } - sub_section = MagicMock() - sub_section.get.side_effect = defaults.get - - mock_defaults.has_section.side_effect = lambda section: section == "heuristic.pypi" - mock_defaults.__getitem__.side_effect = lambda section: sub_section if section == "heuristic.pypi" else None - mock_defaults.get_list.side_effect = lambda section, option: ( - [x.strip() for x in defaults[option].split("\n") if x.strip()] - if section == "heuristic.pypi" and option == "disabled_custom_rulesets" - else None - ) +def test_unknown_ruleset_exclusions(tmp_path: Path) -> None: + """Test when there are ruleset names supplied to be disabled that don't exist (should error). + + Parameters + ---------- + tmp_path: Path + Pytest temporary path fixture. + """ + defaults_content = f""" + [heuristic.pypi] + disabled_custom_rulesets = + custom_sample + does_not_exist + custom_semgrep_rules_path = {os.path.join(os.path.dirname(os.path.abspath(__file__)), "resources")} + """ + config_defaults(tmp_path, defaults_content) with pytest.raises(ConfigurationError): _ = PyPISourcecodeAnalyzer(resources_path=RESOURCES_PATH) -@patch("macaron.malware_analyzer.pypi_heuristics.sourcecode.pypi_sourcecode_analyzer.defaults") -def test_disabling_rules(mock_defaults: MagicMock, pypi_package_json: MagicMock) -> None: - """Test individual rules can be disabled""" - sample_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "resources", "sourcecode_samples") - custom_rule_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "resources", "custom_sample.yaml") +def test_disabling_rules(tmp_path: Path, pypi_package_json: MagicMock) -> None: + """Test individual rules can be disabled (heuristic should fail). + + Parameters + ---------- + tmp_path: Path + Pytest temporary path fixture. + pypi_package_json: MagicMock + The PyPIPackageJsonAsset MagicMock fixture. + """ expected_ids = {"custom_sample_1", "exfiltration_remote-exfiltration"} + defaults_content = f""" + [heuristic.pypi] + custom_semgrep_rules_path = {os.path.join(os.path.dirname(os.path.abspath(__file__)), "resources", "custom_sample.yaml")} + disabled_rules = + custom_sample_1 + exfiltration_remote-exfiltration + """ + config_defaults(tmp_path, defaults_content) - defaults: dict[str, str] = { - "custom_semgrep_rules_path": custom_rule_path, - "disabled_rules": "\n".join(expected_ids), - } - sub_section = MagicMock() - sub_section.get.side_effect = defaults.get - - mock_defaults.has_section.side_effect = lambda section: section == "heuristic.pypi" - mock_defaults.__getitem__.side_effect = lambda section: sub_section if section == "heuristic.pypi" else None - mock_defaults.get_list.side_effect = lambda section, option: ( - list(expected_ids) if section == "heuristic.pypi" and option == "disabled_rules" else None - ) analyzer = PyPISourcecodeAnalyzer(resources_path=RESOURCES_PATH) - pypi_package_json.package_sourcecode_path = sample_path + pypi_package_json.package_sourcecode_path = os.path.join( + os.path.dirname(os.path.abspath(__file__)), "resources", "sourcecode_samples" + ) result, analysis = analyzer.analyze(pypi_package_json) diff --git a/tests/malware_analyzer/pypi/test_source_code_repo.py b/tests/malware_analyzer/pypi/test_source_code_repo.py index 3cc9db15d..4d4fe07d2 100644 --- a/tests/malware_analyzer/pypi/test_source_code_repo.py +++ b/tests/malware_analyzer/pypi/test_source_code_repo.py @@ -19,7 +19,17 @@ ], ) def test_repo_existence(pypi_package_json: MagicMock, repository: bool, expected_result: HeuristicResult) -> None: - """Test if the source code repo exists.""" + """Test if the source code repo exists. + + Parameters + ---------- + pypi_package_json: MagicMock + The PyPIPackageJsonAsset MagicMock fixture. + repository: bool + Boolean stating whether this package has a related repository. + expected_result: HeuristicResult + The expected result of this run of the SourceCodeRepoAnalyzer. + """ pypi_package_json.has_repository = repository analyzer = SourceCodeRepoAnalyzer() result, _ = analyzer.analyze(pypi_package_json) diff --git a/tests/malware_analyzer/pypi/test_suspicious_setup.py b/tests/malware_analyzer/pypi/test_suspicious_setup.py index ec9af0f0f..2aba95880 100644 --- a/tests/malware_analyzer/pypi/test_suspicious_setup.py +++ b/tests/malware_analyzer/pypi/test_suspicious_setup.py @@ -1,4 +1,4 @@ -# Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """Tests for suspicious setup.py heuristic.""" @@ -11,11 +11,8 @@ from macaron.slsa_analyzer.package_registry.pypi_registry import PyPIPackageJsonAsset -def test_analyze_skip() -> None: - """Test to ensure the URL of the source distribution is missing. - - The heuristic analyzer should return SKIP if the URL is not present. - """ +def test_missing_sourcecode_url() -> None: + """Test to missing sourcecode distrobution URL (should skip).""" mock_pypi_package = MagicMock(spec=PyPIPackageJsonAsset) mock_pypi_package.get_sourcecode_url.return_value = None @@ -26,11 +23,8 @@ def test_analyze_skip() -> None: assert not data -def test_analyze_fail() -> None: - """Test to ensure that setup.py includes a suspicious import (base64). - - The heuristic analyzer should return FAIL if the suspicious import is found. - """ +def test_suspicious_import() -> None: + """Test when setup.py contains a suspicious import (should fail).""" mock_pypi_package = MagicMock(spec=PyPIPackageJsonAsset) mock_pypi_package.get_sourcecode_url.return_value = "http://example.com/sourcecode.tar.gz" @@ -45,11 +39,8 @@ def test_analyze_fail() -> None: assert "base64" in data["import_module"] -def test_analyze_no_suspicious_import() -> None: - """Test to ensure that setup.py does not include suspicious imports. - - The heuristic analyzer should return PASS if no suspicious imports are found. - """ +def test_benign_import() -> None: + """Test when setup.py contains a benign import (should pass).""" mock_pypi_package = MagicMock(spec=PyPIPackageJsonAsset) mock_pypi_package.get_sourcecode_url.return_value = "http://example.com/sourcecode.tar.gz" @@ -66,7 +57,13 @@ def test_analyze_no_suspicious_import() -> None: @pytest.fixture(name="sample_code") def sample_code_() -> str: - """Return a block of sample source code.""" + """Return a block of sample source code. + + Returns + ------- + str: + Sample source code with a series of imports for analysis. + """ return """ import re import os, sys @@ -85,7 +82,13 @@ def test(): @pytest.fixture(name="sample_code_imports") def sample_code_imports_() -> list[str]: - """Return the list of imports that match the sample source code.""" + """Return the list of imports that match the sample source code. + + Returns + ------- + list[str]: + A list of sample package names used as imports, matching the sample source code fixture. + """ return [ "re", "os", @@ -110,14 +113,30 @@ def sample_code_imports_() -> list[str]: def test_ast_import_extraction(sample_code: str, sample_code_imports: list[str]) -> None: - """Test the extraction of imports using the AST method.""" + """Test the extraction of imports using the AST method. + + Parameters + ---------- + sample_code: str + Sample source code from the fixture with imports matching sample_code_imports. + sample_code_imports: list[str] + Package imports from the fixture matching those imported from the sample_code. + """ parsed_imports = SuspiciousSetupAnalyzer().extract_from_ast(sample_code) assert len(parsed_imports) == len(sample_code_imports) assert bool(parsed_imports.intersection(sample_code_imports)) def test_re_import_extraction(sample_code: str, sample_code_imports: list[str]) -> None: - """Test the extraction of imports using the regular expression method.""" + """Test the extraction of imports using the regular expression method. + + Parameters + ---------- + sample_code: str + Sample source code from the fixture with imports matching sample_code_imports. + sample_code_imports: list[str] + Package imports from the fixture matching those imported from the sample_code. + """ parsed_imports = SuspiciousSetupAnalyzer().extract_from_lines(sample_code) assert len(parsed_imports) == len(sample_code_imports) assert bool(parsed_imports.intersection(sample_code_imports)) diff --git a/tests/malware_analyzer/pypi/test_typosquatting_presence.py b/tests/malware_analyzer/pypi/test_typosquatting_presence.py index 2d23233a2..8aad4fcc7 100644 --- a/tests/malware_analyzer/pypi/test_typosquatting_presence.py +++ b/tests/malware_analyzer/pypi/test_typosquatting_presence.py @@ -17,7 +17,18 @@ @pytest.fixture(name="analyzer") def analyzer_(tmp_path: Path) -> TyposquattingPresenceAnalyzer: - """Pytest fixture to create a TyposquattingPresenceAnalyzer instance with a dummy popular packages file.""" + """Pytest fixture to create a TyposquattingPresenceAnalyzer instance with a dummy popular packages file. + + Parameters + ---------- + tmp_path: Path + Pytest temporary path fixture. + + Returns + ------- + TyposquattingPresenceAnalyzer: + A configured analyzer with popular packages. + """ # Create a dummy popular packages file. pkg_file = Path(os.path.join(tmp_path, "popular.txt")) popular_packages = ["requests", "flask", "pytest"] @@ -26,16 +37,32 @@ def analyzer_(tmp_path: Path) -> TyposquattingPresenceAnalyzer: return analyzer_instance -def test_analyze_exact_match_pass(analyzer: TyposquattingPresenceAnalyzer, pypi_package_json: MagicMock) -> None: - """Test the analyzer passes when the package name is an exact match to a popular package.""" +def test_exact_match(analyzer: TyposquattingPresenceAnalyzer, pypi_package_json: MagicMock) -> None: + """Test the analyzer passes when the package name is an exact match to a popular package (should pass). + + Parameters + ---------- + analyzer: TyposquattingPresenceAnalyzer + A configured TyposquattingPresenceAnalyzer instance. + pypi_package_json: MagicMock + The PyPIPackageJsonAsset MagicMock fixture. + """ pypi_package_json.component_name = "requests" result, info = analyzer.analyze(pypi_package_json) assert result == HeuristicResult.PASS assert info == {"package_name": "requests"} -def test_analyze_similar_name_fail(analyzer: TyposquattingPresenceAnalyzer, pypi_package_json: MagicMock) -> None: - """Test the analyzer fails when the package name is suspiciously similar to a popular package.""" +def test_similar_name(analyzer: TyposquattingPresenceAnalyzer, pypi_package_json: MagicMock) -> None: + """Test the analyzer fails when the package name is suspiciously similar to a popular package (should fail). + + Parameters + ---------- + analyzer: TyposquattingPresenceAnalyzer + A configured TyposquattingPresenceAnalyzer instance. + pypi_package_json: MagicMock + The PyPIPackageJsonAsset MagicMock fixture. + """ pypi_package_json.component_name = "reqursts" result, info = analyzer.analyze(pypi_package_json) assert result == HeuristicResult.FAIL @@ -46,21 +73,49 @@ def test_analyze_similar_name_fail(analyzer: TyposquattingPresenceAnalyzer, pypi assert info["similarity_ratio"] >= analyzer.distance_ratio_threshold -def test_analyze_unrelated_name_pass(analyzer: TyposquattingPresenceAnalyzer, pypi_package_json: MagicMock) -> None: - """Test the analyzer passes when the package name is not similar to any popular package.""" +def test_unrelated_name(analyzer: TyposquattingPresenceAnalyzer, pypi_package_json: MagicMock) -> None: + """Test the analyzer passes when the package name is not similar to any popular package (should pass). + + Parameters + ---------- + analyzer: TyposquattingPresenceAnalyzer + A configured TyposquattingPresenceAnalyzer instance. + pypi_package_json: MagicMock + The PyPIPackageJsonAsset MagicMock fixture. + """ pypi_package_json.component_name = "launchable" result, info = analyzer.analyze(pypi_package_json) assert result == HeuristicResult.PASS assert info == {"package_name": "launchable"} -def test_analyze_nonexistent_file_skip() -> None: - """Test the analyzer raises an error if the popular packages file does not exist.""" +def test_nonexistent_popular_packages_file() -> None: + """Test when the popular packages file does not exist (should error).""" with pytest.raises(HeuristicAnalyzerValueError) as exc_info: TyposquattingPresenceAnalyzer("nonexistent_file.txt") assert "Popular packages file not found or path not configured" in str(exc_info.value) +def test_empty_popular_packages_file(tmp_path: Path, pypi_package_json: MagicMock) -> None: + """Test when the popular packages file is empty (should skip). + + Parameters + ---------- + tmp_path: Path + Pytest temporary path fixture. + pypi_package_json: MagicMock + The PyPIPackageJsonAsset MagicMock fixture. + """ + pkg_file = Path(os.path.join(tmp_path, "empty_popular.txt")) + pkg_file.write_text("", encoding="utf-8") + analyzer_instance = TyposquattingPresenceAnalyzer(str(pkg_file)) + result, info = analyzer_instance.analyze(pypi_package_json) + assert result == HeuristicResult.SKIP + error_msg = info.get("warning") + assert isinstance(error_msg, str) + assert "Popular packages file is empty" in error_msg + + @pytest.mark.parametrize( ("package1", "package2", "expected_ratio"), [ @@ -72,17 +127,17 @@ def test_analyze_nonexistent_file_skip() -> None: def test_jaro_distance( analyzer: TyposquattingPresenceAnalyzer, package1: str, package2: str, expected_ratio: float ) -> None: - """Test the Jaro distance calculation.""" + """Test the Jaro distance calculation. + + Parameters + ---------- + analyzer: TyposquattingPresenceAnalyzer + A configured TyposquattingPresenceAnalyzer instance. + package1: str + Name of the first package in the distance calculation. + package2: str + Name of the second package in the distance calculation. + expected_ratio: float + The expected Jaro distance result for these two package names. + """ assert analyzer.jaro_distance(package1, package2) == expected_ratio - - -def test_empty_popular_packages_file(tmp_path: Path, pypi_package_json: MagicMock) -> None: - """Test the analyzer skips when the popular packages file is empty.""" - pkg_file = Path(os.path.join(tmp_path, "empty_popular.txt")) - pkg_file.write_text("", encoding="utf-8") - analyzer_instance = TyposquattingPresenceAnalyzer(str(pkg_file)) - result, info = analyzer_instance.analyze(pypi_package_json) - assert result == HeuristicResult.SKIP - error_msg = info.get("warning") - assert isinstance(error_msg, str) - assert "Popular packages file is empty" in error_msg diff --git a/tests/malware_analyzer/pypi/test_unchanged_release.py b/tests/malware_analyzer/pypi/test_unchanged_release.py index f1162aaea..62ad66f63 100644 --- a/tests/malware_analyzer/pypi/test_unchanged_release.py +++ b/tests/malware_analyzer/pypi/test_unchanged_release.py @@ -1,15 +1,18 @@ -# Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """Tests for heuristic detecting malicious metadata from PyPI""" from unittest.mock import MagicMock +import pytest + +from macaron.errors import HeuristicAnalyzerValueError from macaron.malware_analyzer.pypi_heuristics.heuristics import HeuristicResult from macaron.malware_analyzer.pypi_heuristics.metadata.unchanged_release import UnchangedReleaseAnalyzer -def test_analyze_pass(pypi_package_json: MagicMock) -> None: - """Test the analyze method returning PASS. +def test_changed_releases(pypi_package_json: MagicMock) -> None: + """Test multiple releases with different digests and clear changes (should pass). Parameters ---------- @@ -33,14 +36,8 @@ def test_analyze_pass(pypi_package_json: MagicMock) -> None: assert not detail_info -def test_analyze_fail(pypi_package_json: MagicMock) -> None: - """Test the analyze method returning FAIL. - - Parameters - ---------- - pypi_package_json: MagicMock - The PyPIPackageJsonAsset MagicMock fixture. - """ +def test_unchanged_release(pypi_package_json: MagicMock) -> None: + """Test multiple releases with a duplicate digest and unchanged release (should fail).""" analyzer = UnchangedReleaseAnalyzer() # Set up mock return values. @@ -58,8 +55,8 @@ def test_analyze_fail(pypi_package_json: MagicMock) -> None: assert not detail_info -def test_analyze_skip(pypi_package_json: MagicMock) -> None: - """Test the analyze method returning SKIP. +def test_single_release(pypi_package_json: MagicMock) -> None: + """Test a single release (should skip). Parameters ---------- @@ -69,7 +66,7 @@ def test_analyze_skip(pypi_package_json: MagicMock) -> None: analyzer = UnchangedReleaseAnalyzer() # Set up mock return values. - pypi_package_json.get_releases.return_value = None + pypi_package_json.get_releases.return_value = {"v1.0": [{"digests": {"sha256": "digest1"}}]} # Call the method. result, detail_info = analyzer.analyze(pypi_package_json) @@ -77,3 +74,21 @@ def test_analyze_skip(pypi_package_json: MagicMock) -> None: # Assert. assert result == HeuristicResult.SKIP assert not detail_info + + +def test_no_digests(pypi_package_json: MagicMock) -> None: + """Test the digest information being unavailable (should error). + + Parameters + ---------- + pypi_package_json: MagicMock + The PyPIPackageJsonAsset MagicMock fixture. + """ + analyzer = UnchangedReleaseAnalyzer() + + # Set up mock return values. + pypi_package_json.get_releases.return_value = None + + # Call the method. + with pytest.raises(HeuristicAnalyzerValueError): + _ = analyzer.analyze(pypi_package_json) diff --git a/tests/malware_analyzer/pypi/test_wheel_absence.py b/tests/malware_analyzer/pypi/test_wheel_absence.py index 2c233428f..1168ce4d5 100644 --- a/tests/malware_analyzer/pypi/test_wheel_absence.py +++ b/tests/malware_analyzer/pypi/test_wheel_absence.py @@ -11,8 +11,14 @@ from macaron.malware_analyzer.pypi_heuristics.metadata.wheel_absence import WheelAbsenceAnalyzer -def test_analyze_no_information(pypi_package_json: MagicMock) -> None: - """Test for when there is no release information, so error""" +def test_no_information(pypi_package_json: MagicMock) -> None: + """Test for when there is no release information (should error). + + Parameters + ---------- + pypi_package_json: MagicMock + The PyPIPackageJsonAsset MagicMock fixture. + """ analyzer = WheelAbsenceAnalyzer() pypi_package_json.get_releases.return_value = None @@ -25,8 +31,16 @@ def test_analyze_no_information(pypi_package_json: MagicMock) -> None: # E.g. if it is imported like this: import os; os.listdir() then you patch os.listdir. # If it is imported like this: from os import listdir; listdir() then you patch .listdir. @patch("macaron.malware_analyzer.pypi_heuristics.metadata.wheel_absence.send_head_http_raw") -def test_analyze_tar_present(mock_send_head_http_raw: MagicMock, pypi_package_json: MagicMock) -> None: - """Test for when only .tar.gz is present, so failed""" +def test_tar_only_present(mock_send_head_http_raw: MagicMock, pypi_package_json: MagicMock) -> None: + """Test for when only .tar.gz is present (should fail). + + Parameters + ---------- + mock_send_head_http_raw: MagicMock + The mocked function macaron.util.send_head_http_raw. + pypi_package_json: MagicMock + The PyPIPackageJsonAsset MagicMock fixture. + """ analyzer = WheelAbsenceAnalyzer() version = "0.1.0" filename = "ttttttttest_nester.py-0.1.0.tar.gz" @@ -86,8 +100,16 @@ def test_analyze_tar_present(mock_send_head_http_raw: MagicMock, pypi_package_js @patch("macaron.malware_analyzer.pypi_heuristics.metadata.wheel_absence.send_head_http_raw") -def test_analyze_whl_present(mock_send_head_http_raw: MagicMock, pypi_package_json: MagicMock) -> None: - """Test for when only .whl is present, so pass""" +def test_whl_only_present(mock_send_head_http_raw: MagicMock, pypi_package_json: MagicMock) -> None: + """Test for when only .whl is present (should pass). + + Parameters + ---------- + mock_send_head_http_raw: MagicMock + The mocked function macaron.util.send_head_http_raw. + pypi_package_json: MagicMock + The PyPIPackageJsonAsset MagicMock fixture. + """ analyzer = WheelAbsenceAnalyzer() version = "0.1.0" filename = "ttttttttest_nester.py-0.1.0.whl" @@ -146,7 +168,15 @@ def test_analyze_whl_present(mock_send_head_http_raw: MagicMock, pypi_package_js @patch("macaron.malware_analyzer.pypi_heuristics.metadata.wheel_absence.send_head_http_raw") def test_analyze_both_present(mock_send_head_http_raw: MagicMock, pypi_package_json: MagicMock) -> None: - """Test for when both .tar.gz and .whl are present, so passed""" + """Test for when both .tar.gz and .whl are present (should pass). + + Parameters + ---------- + mock_send_head_http_raw: MagicMock + The mocked function macaron.util.send_head_http_raw. + pypi_package_json: MagicMock + The PyPIPackageJsonAsset MagicMock fixture. + """ analyzer = WheelAbsenceAnalyzer() version = "0.1.0" file_prefix = "ttttttttest_nester.py-0.1.0" diff --git a/tests/slsa_analyzer/checks/test_detect_malicious_metadata_check.py b/tests/slsa_analyzer/checks/test_detect_malicious_metadata_check.py index 07c4684de..6ff59b449 100644 --- a/tests/slsa_analyzer/checks/test_detect_malicious_metadata_check.py +++ b/tests/slsa_analyzer/checks/test_detect_malicious_metadata_check.py @@ -110,8 +110,8 @@ def test_detect_malicious_metadata( load_defaults(user_config_path) pypi_registry.load_defaults() - httpserver.expect_request("/project/zlibxjson").respond_with_data(p_page_content) - httpserver.expect_request("/user/tser111111").respond_with_data(u_page_content) + httpserver.expect_request("/project/zlibxjson/").respond_with_data(p_page_content) + httpserver.expect_request("/user/tser111111/").respond_with_data(u_page_content) httpserver.expect_request("/pypi/zlibxjson/json").respond_with_json(package_json) httpserver.expect_request( "/packages/3e/1e/b1ecb05e7ca1eb74ca6257a7f43d052b90d2ac01feb28eb28ce677a871ab/zlibxjson-8.2.tar.gz"