From 2b87ba75ee4a6ae9c58c39a81678bbb6e9c02854 Mon Sep 17 00:00:00 2001 From: Matthias Goerens Date: Fri, 24 Oct 2025 21:24:13 +0200 Subject: [PATCH] Add check of tarball content Checks that the tarball contains a unique directory named after the chart. This directory must contain a Chart.yaml file. The directory may contain additional files or folders. No other files or folder should be placed at the root of the archive. Fix #342 Signed-off-by: Matthias Goerens --- scripts/src/submission/submission.py | 76 +++++++++- scripts/src/submission/submission_test.py | 165 +++++++++++++++++++++- scripts/src/submission/validate.py | 2 +- 3 files changed, 235 insertions(+), 8 deletions(-) diff --git a/scripts/src/submission/submission.py b/scripts/src/submission/submission.py index 90cfa731..165dc315 100644 --- a/scripts/src/submission/submission.py +++ b/scripts/src/submission/submission.py @@ -1,5 +1,6 @@ import os import re +import tarfile import requests import semver import yaml @@ -54,6 +55,10 @@ class ChartError(SubmissionError): pass +class TarballContentError(SubmissionError): + pass + + @dataclass class Chart: """Represents a Helm Chart @@ -288,7 +293,7 @@ def _get_modified_files(self): if "filename" in file: self.modified_files.append(file["filename"]) - def parse_modified_files(self): + def parse_modified_files(self, repo_path: str = ""): """Classify the list of modified files. Modified files are categorized into 5 groups, mapping to 5 class attributes: @@ -301,10 +306,15 @@ def parse_modified_files(self): - A list of added / modified OWNERS files is recorded in the `modified_owners` attribute. - The rest of the files are classified in the `modified_unknown` attribute. + Args: + repo_path (str): Local directory where the repo is checked out + Raises a SubmissionError if: + * The PR doesn't contain any files * The Submission concerns more than one chart * The version of the chart is not SemVer compatible * The tarball file is named incorrectly + * The tarball content is incorrect """ if not self.modified_files: @@ -322,7 +332,7 @@ def parse_modified_files(self): elif file_category == "tarball": category, organization, name, version, _ = match.groups() self.chart.register_chart_info(category, organization, name, version) - self.set_tarball(file_path, match) + self.set_tarball(file_path, match, repo_path) elif file_category == "owners": category, organization, name = match.groups() self.chart.register_chart_info(category, organization, name) @@ -356,11 +366,22 @@ def set_source(self, file_path: str): self.source.found = True self.source.path = os.path.dirname(file_path) - def set_tarball(self, file_path: str, tarball_match: re.Match[str]): + def set_tarball( + self, file_path: str, tarball_match: re.Match[str], repo_path: str = "" + ): """Action to take when a file related to the tarball is found. This can either be the .tgz tarball itself, or the .prov provenance key. + Args: + file_path (str): File that has been potentially detected as the tarball or the provenance file. + tarball_match (re.Match[str]): Matching regex, used to get the chart name and version and the tarball name. + repo_path (str): Local directory where the repo is checked out + + Raises: + SubmissionError: If the tarball is incorrectly named + TarballContentError: If an error is found when checking the tarball content + """ _, file_extension = os.path.splitext(file_path) if file_extension == ".tgz": @@ -373,6 +394,10 @@ def set_tarball(self, file_path: str, tarball_match: re.Match[str]): if tar_name != expected_tar_name: msg = f"[ERROR] the tgz file is named incorrectly. Expected: {expected_tar_name}. Got: {tar_name}" raise SubmissionError(msg) + + # Raise a TarballContentError if content check fails + check_tarball_content(os.path.join(repo_path, file_path), chart_name) + elif file_extension == ".prov": self.tarball.provenance = file_path else: @@ -631,3 +656,48 @@ def download_index_data( raise HelmIndexError(f"Error parsing index file at {index_url}") from e return data + + +def check_tarball_content(tarball_path: str, chart_name: str): + """Check the tarball content for errors. + + Checks that the tarball contains a unique directory named after the chart. This directory must contain a Chart.yaml + file. The directory may contain additional files or folders. No other files or folder should be placed at the root + of the archive. + + Args: + tarball_path (str): Location of the tarball to check on the local filesystem. + chart_name (str): Name of the corresponding chart. + + Raise: + TarballContentError: If an error is found when checking the tarball content + + """ + found_chart_directory = False + found_chart_yaml = False + found_file_out_of_dir = False + expected_chart_file_path = os.path.join(chart_name, "Chart.yaml") + with tarfile.open(tarball_path, "r:gz") as tar: + tarinfo = tar.next() + while tarinfo: + if ( + tarinfo.isdir() and tarinfo.name == chart_name + ) or tarinfo.name.startswith(chart_name + "/"): + found_chart_directory = True + if tarinfo.isfile() and tarinfo.name == expected_chart_file_path: + found_chart_yaml = True + else: + found_file_out_of_dir = True + tarinfo = tar.next() + + if not found_chart_directory: + msg = f"[ERROR] Incorrect tarball content: expected a {chart_name} directory" + raise TarballContentError(msg) + + if found_file_out_of_dir: + msg = f"[ERROR] Incorrect tarball content: found a file outside the {chart_name} directory" + raise TarballContentError(msg) + + if not found_chart_yaml: + msg = f"[ERROR] Incorrect tarball content: expected a {expected_chart_file_path} file" + raise TarballContentError(msg) diff --git a/scripts/src/submission/submission_test.py b/scripts/src/submission/submission_test.py index 3fcf176c..ffa98901 100644 --- a/scripts/src/submission/submission_test.py +++ b/scripts/src/submission/submission_test.py @@ -10,11 +10,15 @@ import contextlib import pytest import os +import re import responses +import tarfile import tempfile from dataclasses import dataclass, field +from reporegex import matchers +from pathlib import Path from submission import submission # Define assets that are being reused accross tests @@ -69,10 +73,66 @@ def make_new_tarball_only_submission(): return s +def get_tarball_from_modified_files(modified_files: list[str]) -> str: + """Browse the modified files and return information on the first tarball found (.tgz) + + Args: + modified_files (list[str]): List of files modified by the PR + + Returns: + (str, str): basedir and filename of the tarball + + """ + _, _, tarballpattern = matchers.get_file_match_compiled_patterns() + + for file in modified_files: + if tarballpattern.match(file): + return os.path.split(file) + return "", "" + + +def create_tarball(tarball_basedir: str, tarball_name: str, tarball_content: list[str]): + """Create a tarball. + + Args: + tarball_basedir (str): Directory in which to place the tarball. Will be created. + tarball_name (str): Name of the tarball including .tgz extension. + tarball_content (list[str]): List files to be included in the tarball. Files are empty as we don't perform any + check based on the content of the files, only on theire presence or absence in the tarball. + + """ + with tempfile.TemporaryDirectory() as content_temp_dir: + # Create tarball content in a temporary directory. + for file in tarball_content: + # Ensure base directory exists + os.makedirs( + os.path.join(content_temp_dir, os.path.dirname(file)), exist_ok=True + ) + # Create an empty file - we don't check actual content of any files + Path(os.path.join(content_temp_dir, file)).touch() + + # Create tarball base directory + os.makedirs(tarball_basedir) + + # Create tarball + with tarfile.open( + os.path.join(tarball_basedir, tarball_name), mode="w:gz" + ) as tarball: + # os.listdir lists both files and directory + for file in os.listdir(content_temp_dir): + # Add the files and directories present in the root folder into the archive. Directory content is + # automatically added recursively. + # Use arcname option to rename the file/directory within the archive and make them relative paths. + tarball.add( + os.path.join(content_temp_dir, file), arcname=os.path.basename(file) + ) + + @dataclass class SubmissionInitScenario: api_url: str modified_files: list[str] + tarball_content: list[str] = field(default_factory=lambda: list()) expected_submission: submission.Submission = None excepted_exception: contextlib.ContextDecorator = field( default_factory=lambda: contextlib.nullcontext() @@ -142,20 +202,24 @@ class SubmissionInitScenario: ), ), # PR contains an unsigned tarball + # Tarball contains a Chart.yaml placed under the directory named after the chart SubmissionInitScenario( api_url="https://api.github.com/repos/openshift-helm-charts/charts/pulls/4", modified_files=[ f"charts/{expected_category}/{expected_organization}/{expected_name}/{expected_version}/{expected_name}-{expected_version}.tgz" ], + tarball_content=[os.path.join(expected_name, "Chart.yaml")], expected_submission=make_new_tarball_only_submission(), ), # PR contains a signed tarball + # Tarball contains a Chart.yaml placed under the directory named after the chart SubmissionInitScenario( api_url="https://api.github.com/repos/openshift-helm-charts/charts/pulls/5", modified_files=[ f"charts/{expected_category}/{expected_organization}/{expected_name}/{expected_version}/{expected_name}-{expected_version}.tgz", f"charts/{expected_category}/{expected_organization}/{expected_name}/{expected_version}/{expected_name}-{expected_version}.tgz.prov", ], + tarball_content=[os.path.join(expected_name, "Chart.yaml")], expected_submission=submission.Submission( api_url="https://api.github.com/repos/openshift-helm-charts/charts/pulls/5", modified_files=[ @@ -170,6 +234,85 @@ class SubmissionInitScenario: ), ), ), + # PR contains an unsigned tarball + # Tarball contains a Chart.yaml and additional files, placed under the directory named after the chart + SubmissionInitScenario( + api_url="https://api.github.com/repos/openshift-helm-charts/charts/pulls/4", + modified_files=[ + f"charts/{expected_category}/{expected_organization}/{expected_name}/{expected_version}/{expected_name}-{expected_version}.tgz" + ], + tarball_content=[ + os.path.join(expected_name, "Chart.yaml"), + os.path.join(expected_name, "bar"), + os.path.join(expected_name, "foo", "baz"), + ], + expected_submission=make_new_tarball_only_submission(), + ), + # PR contains an unsigned tarball + # Tarball contains a Chart.yaml, placed under the directory that is not named after the chart + SubmissionInitScenario( + api_url="https://api.github.com/repos/openshift-helm-charts/charts/pulls/4", + modified_files=[ + f"charts/{expected_category}/{expected_organization}/{expected_name}/{expected_version}/{expected_name}-{expected_version}.tgz" + ], + tarball_content=[os.path.join("not-awesome", "Chart.yaml")], + excepted_exception=pytest.raises( + submission.SubmissionError, + match=re.escape( + f"[ERROR] Incorrect tarball content: expected a {expected_name} directory" + ), + ), + ), + # PR contains an unsigned tarball + # Tarball contains additional files placed at the root directory + SubmissionInitScenario( + api_url="https://api.github.com/repos/openshift-helm-charts/charts/pulls/4", + modified_files=[ + f"charts/{expected_category}/{expected_organization}/{expected_name}/{expected_version}/{expected_name}-{expected_version}.tgz" + ], + tarball_content=[ + os.path.join(expected_name, "Chart.yaml"), + os.path.join("bar"), + ], + excepted_exception=pytest.raises( + submission.SubmissionError, + match=re.escape( + f"[ERROR] Incorrect tarball content: found a file outside the {expected_name} directory" + ), + ), + ), + # PR contains an unsigned tarball + # Tarball does not contain a Chart.yaml under the directory named after the chart + SubmissionInitScenario( + api_url="https://api.github.com/repos/openshift-helm-charts/charts/pulls/4", + modified_files=[ + f"charts/{expected_category}/{expected_organization}/{expected_name}/{expected_version}/{expected_name}-{expected_version}.tgz" + ], + tarball_content=[ + os.path.join(expected_name, "bar"), + ], + excepted_exception=pytest.raises( + submission.SubmissionError, + match=re.escape( + f"[ERROR] Incorrect tarball content: expected a {expected_name}/Chart.yaml file" + ), + ), + ), + # PR contains an unsigned tarball + # Tarball is empty + SubmissionInitScenario( + api_url="https://api.github.com/repos/openshift-helm-charts/charts/pulls/4", + modified_files=[ + f"charts/{expected_category}/{expected_organization}/{expected_name}/{expected_version}/{expected_name}-{expected_version}.tgz" + ], + tarball_content=[], + excepted_exception=pytest.raises( + submission.SubmissionError, + match=re.escape( + f"[ERROR] Incorrect tarball content: expected a {expected_name} directory" + ), + ), + ), # PR contains an OWNERS file SubmissionInitScenario( api_url="https://api.github.com/repos/openshift-helm-charts/charts/pulls/6", @@ -263,10 +406,24 @@ def test_submission_init(test_scenario): json=[{"filename": file} for file in test_scenario.modified_files], ) - with test_scenario.excepted_exception: - s = submission.Submission(api_url=test_scenario.api_url) - s.parse_modified_files() - assert s == test_scenario.expected_submission + # Mock step checking out the PR locally - create tempdir with PR content + with tempfile.TemporaryDirectory() as prbanch_temp_dir: + # This step is only necessary if the PR contains a tarball, otherwise we don't perform any check of the content + # of the PR files. + tarball_path, tarball_name = get_tarball_from_modified_files( + test_scenario.modified_files + ) + if tarball_path: + create_tarball( + os.path.join(prbanch_temp_dir, tarball_path), + tarball_name, + test_scenario.tarball_content, + ) + + with test_scenario.excepted_exception: + s = submission.Submission(api_url=test_scenario.api_url) + s.parse_modified_files(repo_path=prbanch_temp_dir) + assert s == test_scenario.expected_submission @responses.activate diff --git a/scripts/src/submission/validate.py b/scripts/src/submission/validate.py index ab92ef0a..602cad68 100644 --- a/scripts/src/submission/validate.py +++ b/scripts/src/submission/validate.py @@ -42,7 +42,7 @@ def craft_pr_content_error_msg( """ try: - s.parse_modified_files() + s.parse_modified_files(repo_path="pr-branch") except submission.SubmissionError as e: raise ParseFilesError(str(e))