Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
76 changes: 73 additions & 3 deletions scripts/src/submission/submission.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import os
import re
import tarfile
import requests
import semver
import yaml
Expand Down Expand Up @@ -54,6 +55,10 @@ class ChartError(SubmissionError):
pass


class TarballContentError(SubmissionError):
pass


@dataclass
class Chart:
"""Represents a Helm Chart
Expand Down Expand Up @@ -288,7 +293,7 @@ def _get_modified_files(self):
if "filename" in file:
self.modified_files.append(file["filename"])

def parse_modified_files(self):
def parse_modified_files(self, repo_path: str = ""):
"""Classify the list of modified files.

Modified files are categorized into 5 groups, mapping to 5 class attributes:
Expand All @@ -301,10 +306,15 @@ def parse_modified_files(self):
- A list of added / modified OWNERS files is recorded in the `modified_owners` attribute.
- The rest of the files are classified in the `modified_unknown` attribute.

Args:
repo_path (str): Local directory where the repo is checked out

Raises a SubmissionError if:
* The PR doesn't contain any files
* The Submission concerns more than one chart
* The version of the chart is not SemVer compatible
* The tarball file is named incorrectly
* The tarball content is incorrect

"""
if not self.modified_files:
Expand All @@ -322,7 +332,7 @@ def parse_modified_files(self):
elif file_category == "tarball":
category, organization, name, version, _ = match.groups()
self.chart.register_chart_info(category, organization, name, version)
self.set_tarball(file_path, match)
self.set_tarball(file_path, match, repo_path)
elif file_category == "owners":
category, organization, name = match.groups()
self.chart.register_chart_info(category, organization, name)
Expand Down Expand Up @@ -356,11 +366,22 @@ def set_source(self, file_path: str):
self.source.found = True
self.source.path = os.path.dirname(file_path)

def set_tarball(self, file_path: str, tarball_match: re.Match[str]):
def set_tarball(
self, file_path: str, tarball_match: re.Match[str], repo_path: str = ""
):
"""Action to take when a file related to the tarball is found.

This can either be the .tgz tarball itself, or the .prov provenance key.

Args:
file_path (str): File that has been potentially detected as the tarball or the provenance file.
tarball_match (re.Match[str]): Matching regex, used to get the chart name and version and the tarball name.
repo_path (str): Local directory where the repo is checked out

Raises:
SubmissionError: If the tarball is incorrectly named
TarballContentError: If an error is found when checking the tarball content

"""
_, file_extension = os.path.splitext(file_path)
if file_extension == ".tgz":
Expand All @@ -373,6 +394,10 @@ def set_tarball(self, file_path: str, tarball_match: re.Match[str]):
if tar_name != expected_tar_name:
msg = f"[ERROR] the tgz file is named incorrectly. Expected: {expected_tar_name}. Got: {tar_name}"
raise SubmissionError(msg)

# Raise a TarballContentError if content check fails
check_tarball_content(os.path.join(repo_path, file_path), chart_name)

elif file_extension == ".prov":
self.tarball.provenance = file_path
else:
Expand Down Expand Up @@ -631,3 +656,48 @@ def download_index_data(
raise HelmIndexError(f"Error parsing index file at {index_url}") from e

return data


def check_tarball_content(tarball_path: str, chart_name: str):
"""Check the tarball content for errors.

Checks that the tarball contains a unique directory named after the chart. This directory must contain a Chart.yaml
file. The directory may contain additional files or folders. No other files or folder should be placed at the root
of the archive.

Args:
tarball_path (str): Location of the tarball to check on the local filesystem.
chart_name (str): Name of the corresponding chart.

Raise:
TarballContentError: If an error is found when checking the tarball content

"""
found_chart_directory = False
found_chart_yaml = False
found_file_out_of_dir = False
expected_chart_file_path = os.path.join(chart_name, "Chart.yaml")
with tarfile.open(tarball_path, "r:gz") as tar:
tarinfo = tar.next()
while tarinfo:
if (
tarinfo.isdir() and tarinfo.name == chart_name
) or tarinfo.name.startswith(chart_name + "/"):
found_chart_directory = True
if tarinfo.isfile() and tarinfo.name == expected_chart_file_path:
found_chart_yaml = True
else:
found_file_out_of_dir = True
tarinfo = tar.next()

if not found_chart_directory:
msg = f"[ERROR] Incorrect tarball content: expected a {chart_name} directory"
raise TarballContentError(msg)

if found_file_out_of_dir:
msg = f"[ERROR] Incorrect tarball content: found a file outside the {chart_name} directory"
raise TarballContentError(msg)

if not found_chart_yaml:
msg = f"[ERROR] Incorrect tarball content: expected a {expected_chart_file_path} file"
raise TarballContentError(msg)
165 changes: 161 additions & 4 deletions scripts/src/submission/submission_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,15 @@
import contextlib
import pytest
import os
import re
import responses
import tarfile
import tempfile

from dataclasses import dataclass, field
from reporegex import matchers

from pathlib import Path
from submission import submission

# Define assets that are being reused accross tests
Expand Down Expand Up @@ -69,10 +73,66 @@ def make_new_tarball_only_submission():
return s


def get_tarball_from_modified_files(modified_files: list[str]) -> str:
"""Browse the modified files and return information on the first tarball found (.tgz)

Args:
modified_files (list[str]): List of files modified by the PR

Returns:
(str, str): basedir and filename of the tarball

"""
_, _, tarballpattern = matchers.get_file_match_compiled_patterns()

for file in modified_files:
if tarballpattern.match(file):
return os.path.split(file)
return "", ""


def create_tarball(tarball_basedir: str, tarball_name: str, tarball_content: list[str]):
"""Create a tarball.

Args:
tarball_basedir (str): Directory in which to place the tarball. Will be created.
tarball_name (str): Name of the tarball including .tgz extension.
tarball_content (list[str]): List files to be included in the tarball. Files are empty as we don't perform any
check based on the content of the files, only on theire presence or absence in the tarball.

"""
with tempfile.TemporaryDirectory() as content_temp_dir:
# Create tarball content in a temporary directory.
for file in tarball_content:
# Ensure base directory exists
os.makedirs(
os.path.join(content_temp_dir, os.path.dirname(file)), exist_ok=True
)
# Create an empty file - we don't check actual content of any files
Path(os.path.join(content_temp_dir, file)).touch()

# Create tarball base directory
os.makedirs(tarball_basedir)

# Create tarball
with tarfile.open(
os.path.join(tarball_basedir, tarball_name), mode="w:gz"
) as tarball:
# os.listdir lists both files and directory
for file in os.listdir(content_temp_dir):
# Add the files and directories present in the root folder into the archive. Directory content is
# automatically added recursively.
# Use arcname option to rename the file/directory within the archive and make them relative paths.
tarball.add(
os.path.join(content_temp_dir, file), arcname=os.path.basename(file)
)


@dataclass
class SubmissionInitScenario:
api_url: str
modified_files: list[str]
tarball_content: list[str] = field(default_factory=lambda: list())
expected_submission: submission.Submission = None
excepted_exception: contextlib.ContextDecorator = field(
default_factory=lambda: contextlib.nullcontext()
Expand Down Expand Up @@ -142,20 +202,24 @@ class SubmissionInitScenario:
),
),
# PR contains an unsigned tarball
# Tarball contains a Chart.yaml placed under the directory named after the chart
SubmissionInitScenario(
api_url="https://api.github.com/repos/openshift-helm-charts/charts/pulls/4",
modified_files=[
f"charts/{expected_category}/{expected_organization}/{expected_name}/{expected_version}/{expected_name}-{expected_version}.tgz"
],
tarball_content=[os.path.join(expected_name, "Chart.yaml")],
expected_submission=make_new_tarball_only_submission(),
),
# PR contains a signed tarball
# Tarball contains a Chart.yaml placed under the directory named after the chart
SubmissionInitScenario(
api_url="https://api.github.com/repos/openshift-helm-charts/charts/pulls/5",
modified_files=[
f"charts/{expected_category}/{expected_organization}/{expected_name}/{expected_version}/{expected_name}-{expected_version}.tgz",
f"charts/{expected_category}/{expected_organization}/{expected_name}/{expected_version}/{expected_name}-{expected_version}.tgz.prov",
],
tarball_content=[os.path.join(expected_name, "Chart.yaml")],
expected_submission=submission.Submission(
api_url="https://api.github.com/repos/openshift-helm-charts/charts/pulls/5",
modified_files=[
Expand All @@ -170,6 +234,85 @@ class SubmissionInitScenario:
),
),
),
# PR contains an unsigned tarball
# Tarball contains a Chart.yaml and additional files, placed under the directory named after the chart
SubmissionInitScenario(
api_url="https://api.github.com/repos/openshift-helm-charts/charts/pulls/4",
modified_files=[
f"charts/{expected_category}/{expected_organization}/{expected_name}/{expected_version}/{expected_name}-{expected_version}.tgz"
],
tarball_content=[
os.path.join(expected_name, "Chart.yaml"),
os.path.join(expected_name, "bar"),
os.path.join(expected_name, "foo", "baz"),
],
expected_submission=make_new_tarball_only_submission(),
),
# PR contains an unsigned tarball
# Tarball contains a Chart.yaml, placed under the directory that is not named after the chart
SubmissionInitScenario(
api_url="https://api.github.com/repos/openshift-helm-charts/charts/pulls/4",
modified_files=[
f"charts/{expected_category}/{expected_organization}/{expected_name}/{expected_version}/{expected_name}-{expected_version}.tgz"
],
tarball_content=[os.path.join("not-awesome", "Chart.yaml")],
excepted_exception=pytest.raises(
submission.SubmissionError,
match=re.escape(
f"[ERROR] Incorrect tarball content: expected a {expected_name} directory"
),
),
),
# PR contains an unsigned tarball
# Tarball contains additional files placed at the root directory
SubmissionInitScenario(
api_url="https://api.github.com/repos/openshift-helm-charts/charts/pulls/4",
modified_files=[
f"charts/{expected_category}/{expected_organization}/{expected_name}/{expected_version}/{expected_name}-{expected_version}.tgz"
],
tarball_content=[
os.path.join(expected_name, "Chart.yaml"),
os.path.join("bar"),
],
excepted_exception=pytest.raises(
submission.SubmissionError,
match=re.escape(
f"[ERROR] Incorrect tarball content: found a file outside the {expected_name} directory"
),
),
),
# PR contains an unsigned tarball
# Tarball does not contain a Chart.yaml under the directory named after the chart
SubmissionInitScenario(
api_url="https://api.github.com/repos/openshift-helm-charts/charts/pulls/4",
modified_files=[
f"charts/{expected_category}/{expected_organization}/{expected_name}/{expected_version}/{expected_name}-{expected_version}.tgz"
],
tarball_content=[
os.path.join(expected_name, "bar"),
],
excepted_exception=pytest.raises(
submission.SubmissionError,
match=re.escape(
f"[ERROR] Incorrect tarball content: expected a {expected_name}/Chart.yaml file"
),
),
),
# PR contains an unsigned tarball
# Tarball is empty
SubmissionInitScenario(
api_url="https://api.github.com/repos/openshift-helm-charts/charts/pulls/4",
modified_files=[
f"charts/{expected_category}/{expected_organization}/{expected_name}/{expected_version}/{expected_name}-{expected_version}.tgz"
],
tarball_content=[],
excepted_exception=pytest.raises(
submission.SubmissionError,
match=re.escape(
f"[ERROR] Incorrect tarball content: expected a {expected_name} directory"
),
),
),
# PR contains an OWNERS file
SubmissionInitScenario(
api_url="https://api.github.com/repos/openshift-helm-charts/charts/pulls/6",
Expand Down Expand Up @@ -263,10 +406,24 @@ def test_submission_init(test_scenario):
json=[{"filename": file} for file in test_scenario.modified_files],
)

with test_scenario.excepted_exception:
s = submission.Submission(api_url=test_scenario.api_url)
s.parse_modified_files()
assert s == test_scenario.expected_submission
# Mock step checking out the PR locally - create tempdir with PR content
with tempfile.TemporaryDirectory() as prbanch_temp_dir:
# This step is only necessary if the PR contains a tarball, otherwise we don't perform any check of the content
# of the PR files.
tarball_path, tarball_name = get_tarball_from_modified_files(
test_scenario.modified_files
)
if tarball_path:
create_tarball(
os.path.join(prbanch_temp_dir, tarball_path),
tarball_name,
test_scenario.tarball_content,
)

with test_scenario.excepted_exception:
s = submission.Submission(api_url=test_scenario.api_url)
s.parse_modified_files(repo_path=prbanch_temp_dir)
assert s == test_scenario.expected_submission


@responses.activate
Expand Down
2 changes: 1 addition & 1 deletion scripts/src/submission/validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def craft_pr_content_error_msg(

"""
try:
s.parse_modified_files()
s.parse_modified_files(repo_path="pr-branch")
except submission.SubmissionError as e:
raise ParseFilesError(str(e))

Expand Down