From 2a05e4fafea681bf450c46072982d1b8ee73c826 Mon Sep 17 00:00:00 2001 From: ziad hany Date: Wed, 3 Sep 2025 11:05:47 +0300 Subject: [PATCH 01/12] Create minecode_pipeline module for mine cargo Signed-off-by: ziad hany --- minecode_pipeline/README.rst | 43 ++++++++++++++ minecode_pipeline/__init__.py | 8 +++ minecode_pipeline/pipelines/__init__.py | 8 +++ minecode_pipeline/pipelines/mine_cargo.py | 71 +++++++++++++++++++++++ minecode_pipeline/pipes/__init__.py | 52 +++++++++++++++++ minecode_pipeline/pipes/cargo.py | 26 +++++++++ minecode_pipeline/tests/__init__.py | 8 +++ setup.cfg | 1 + 8 files changed, 217 insertions(+) create mode 100644 minecode_pipeline/README.rst create mode 100644 minecode_pipeline/__init__.py create mode 100644 minecode_pipeline/pipelines/__init__.py create mode 100644 minecode_pipeline/pipelines/mine_cargo.py create mode 100644 minecode_pipeline/pipes/__init__.py create mode 100644 minecode_pipeline/pipes/cargo.py create mode 100644 minecode_pipeline/tests/__init__.py diff --git a/minecode_pipeline/README.rst b/minecode_pipeline/README.rst new file mode 100644 index 00000000..6f42f0cb --- /dev/null +++ b/minecode_pipeline/README.rst @@ -0,0 +1,43 @@ +minecode-pipeline +=================== + +minecode-pipeline is an add-on library working with scancode.io to define pipelines to mine +packageURLs and package metadata from ecosystem repositories and APIs. + +Installation +------------ + +Requirements +############ + +* install minecode-pipeline dependencies +* `pip install minecode-pipeline` + + +Funding +------- + +This project was funded through the NGI Assure Fund https://nlnet.nl/assure, a +fund established by NLnet https://nlnet.nl/ with financial support from the +European Commission's Next Generation Internet programme, under the aegis of DG +Communications Networks, Content and Technology under grant agreement No 957073. + +This project is also funded through grants from the Google Summer of Code +program, continuing support and sponsoring from nexB Inc. and generous +donations from multiple sponsors. + + +License +------- + +Copyright (c) nexB Inc. and others. All rights reserved. + +purldb is a trademark of nexB Inc. + +SPDX-License-Identifier: Apache-2.0 + +minecode-pipeline is licensed under the Apache License version 2.0. + +See https://www.apache.org/licenses/LICENSE-2.0 for the license text. +See https://github.com/aboutcode-org/purldb for support or download. +See https://aboutcode.org for more information about nexB OSS projects. diff --git a/minecode_pipeline/__init__.py b/minecode_pipeline/__init__.py new file mode 100644 index 00000000..e1521118 --- /dev/null +++ b/minecode_pipeline/__init__.py @@ -0,0 +1,8 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# purldb is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/purldb for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# diff --git a/minecode_pipeline/pipelines/__init__.py b/minecode_pipeline/pipelines/__init__.py new file mode 100644 index 00000000..e1521118 --- /dev/null +++ b/minecode_pipeline/pipelines/__init__.py @@ -0,0 +1,8 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# purldb is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/purldb for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# diff --git a/minecode_pipeline/pipelines/mine_cargo.py b/minecode_pipeline/pipelines/mine_cargo.py new file mode 100644 index 00000000..0cecef6d --- /dev/null +++ b/minecode_pipeline/pipelines/mine_cargo.py @@ -0,0 +1,71 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# http://nexb.com and https://github.com/aboutcode-org/scancode.io +# The ScanCode.io software is licensed under the Apache License version 2.0. +# Data generated with ScanCode.io is provided as-is without warranties. +# ScanCode is a trademark of nexB Inc. +# +# You may not use this software except in compliance with the License. +# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software distributed +# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +# CONDITIONS OF ANY KIND, either express or implied. See the License for the +# specific language governing permissions and limitations under the License. +# +# Data Generated with ScanCode.io is provided on an "AS IS" BASIS, WITHOUT WARRANTIES +# OR CONDITIONS OF ANY KIND, either express or implied. No content created from +# ScanCode.io should be considered or used as legal advice. Consult an Attorney +# for any legal advice. +# +# ScanCode.io is a free software code scanning tool from nexB Inc. and others. +# Visit https://github.com/aboutcode-org/scancode.io for support and download. +import json +from pathlib import Path + +from minecode_pipeline.pipes import cargo +from scanpipe.pipelines.publish_to_federatedcode import PublishToFederatedCode +from fetchcode.vcs import fetch_via_vcs + + +class MineCargo(PublishToFederatedCode): + """Pipeline to mine Cargo (crates.io) packages and publish them to FederatedCode.""" + + repo_url = "git+https://github.com/rust-lang/crates.io-index" + + @classmethod + def steps(cls): + return ( + cls.check_federatedcode_eligibility, + cls.clone_cargo_index, + cls.clone_repository, + cls.collect_packages_from_cargo, + cls.delete_local_clone, + ) + + def clone_cargo_index(self, repo_url): + """ + Clone the repo at repo_url and return the VCSResponse object + """ + self.vcs_response = fetch_via_vcs(repo_url) + + def collect_packages_from_cargo(self): + base_path = Path(self.vcs_response.dest_dir) + + json_files = [] + for file_path in base_path.glob("**/*"): + if not file_path.is_file(): + continue + if file_path.name in {"config.json", "README.md", "update-dl-url.yml"}: + continue + json_files.append(file_path) + + for idx, file_path in enumerate(json_files, start=1): + try: + with open(file_path, encoding="utf-8") as f: + packages = json.load(f) + except (json.JSONDecodeError, UnicodeDecodeError): + continue + + if packages: + push_commit = idx == len(json_files) # only True on last + cargo.collect_packages_from_cargo(packages, self.vcs_response, push_commit) diff --git a/minecode_pipeline/pipes/__init__.py b/minecode_pipeline/pipes/__init__.py new file mode 100644 index 00000000..76f194f2 --- /dev/null +++ b/minecode_pipeline/pipes/__init__.py @@ -0,0 +1,52 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# purldb is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/purldb for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# +import textwrap +from pathlib import Path +import saneyaml +from aboutcode import hashid + +ALLOWED_HOST = "ALLOWED_HOST" +VERSION = "ALLOWED_HOST" +author_name = "FEDERATEDCODE_GIT_SERVICE_NAME" +author_email = "FEDERATEDCODE_GIT_SERVICE_EMAIL" +remote_name = "origin" + + +def write_purls_to_repo(repo, package, packages_yaml, push_commit=False): + """Write or update package purls in the repo and optionally commit/push changes.""" + + ppath = hashid.get_package_purls_yml_file_path(package) + add_purl_result(packages_yaml, repo, ppath) + + if push_commit: + change_type = "Add" if ppath in repo.untracked_files else "Update" + commit_message = f"""\ + {change_type} list of available {package} versions + Tool: pkg:github/aboutcode-org/purldb@v{VERSION} + Reference: https://{ALLOWED_HOST}/ + Signed-off-by: {author_name} <{author_email}> + """ + + default_branch = repo.active_branch.name + repo.index.commit(textwrap.dedent(commit_message)) + repo.git.push(remote_name, default_branch, "--no-verify") + + +def add_purl_result(purls, repo, purls_file): + """Add package urls result to the local Git repository.""" + relative_purl_file_path = Path(*purls_file.parts[1:]) + + write_to = Path(repo.working_dir) / relative_purl_file_path + write_to.parent.mkdir(parents=True, exist_ok=True) + + with open(purls_file, encoding="utf-8", mode="w") as f: + f.write(saneyaml.dump(purls)) + + repo.index.add([relative_purl_file_path]) + return relative_purl_file_path diff --git a/minecode_pipeline/pipes/cargo.py b/minecode_pipeline/pipes/cargo.py new file mode 100644 index 00000000..3211a407 --- /dev/null +++ b/minecode_pipeline/pipes/cargo.py @@ -0,0 +1,26 @@ +from packageurl import PackageURL +from aboutcode.hashid import get_core_purl +from minecode_pipeline.pipes import write_purls_to_repo + + +def collect_packages_from_cargo(packages, repo, push_commit=False): + """Collect Cargo package versions into purls and write them to the repo.""" + + if not packages and len(packages) > 0: + raise ValueError("No packages found") + + updated_purls = [] + first_pkg = packages[0] + version = first_pkg.get("vers") + name = first_pkg.get("name") + purl = PackageURL(type="cargo", name=name, version=version) + base_purl = get_core_purl(purl) + + for package in packages: + version = package.get("vers") + name = package.get("name") + + purl = PackageURL(type="cargo", name=name, version=version).to_string() + updated_purls.append(purl) + + write_purls_to_repo(repo, base_purl, packages, push_commit) diff --git a/minecode_pipeline/tests/__init__.py b/minecode_pipeline/tests/__init__.py new file mode 100644 index 00000000..e1521118 --- /dev/null +++ b/minecode_pipeline/tests/__init__.py @@ -0,0 +1,8 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# purldb is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/purldb for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# diff --git a/setup.cfg b/setup.cfg index f3fda32b..1b3be9ce 100644 --- a/setup.cfg +++ b/setup.cfg @@ -100,3 +100,4 @@ console_scripts = scancodeio_pipelines = matching = matchcode_pipeline.pipelines.matching:Matching d2d = scanpipe.pipelines.deploy_to_develop:DeployToDevelop + mine_cargo = minecode_pipeline.pipelines.mine_cargo:MineCargo \ No newline at end of file From 99e507ec17d64682a5a3df3060dd26326dc0e26f Mon Sep 17 00:00:00 2001 From: ziad hany Date: Wed, 3 Sep 2025 22:40:35 +0300 Subject: [PATCH 02/12] Add a test for mine cargo Signed-off-by: ziad hany --- minecode_pipeline/pipelines/mine_cargo.py | 32 ++++++-- minecode_pipeline/pipes/__init__.py | 6 +- minecode_pipeline/pipes/cargo.py | 11 ++- minecode_pipeline/tests/test_cargo_pipes.py | 77 +++++++++++++++++++ minecode_pipeline/tests/test_data/c5store | 14 ++++ .../tests/test_data/c5store-expected.yaml | 14 ++++ setup.cfg | 3 +- 7 files changed, 139 insertions(+), 18 deletions(-) create mode 100644 minecode_pipeline/tests/test_cargo_pipes.py create mode 100644 minecode_pipeline/tests/test_data/c5store create mode 100644 minecode_pipeline/tests/test_data/c5store-expected.yaml diff --git a/minecode_pipeline/pipelines/mine_cargo.py b/minecode_pipeline/pipelines/mine_cargo.py index 0cecef6d..76e0e5fa 100644 --- a/minecode_pipeline/pipelines/mine_cargo.py +++ b/minecode_pipeline/pipelines/mine_cargo.py @@ -23,11 +23,13 @@ from pathlib import Path from minecode_pipeline.pipes import cargo -from scanpipe.pipelines.publish_to_federatedcode import PublishToFederatedCode + +from scanpipe.pipelines import Pipeline from fetchcode.vcs import fetch_via_vcs +from scanpipe.pipes import federatedcode -class MineCargo(PublishToFederatedCode): +class MineCargo(Pipeline): """Pipeline to mine Cargo (crates.io) packages and publish them to FederatedCode.""" repo_url = "git+https://github.com/rust-lang/crates.io-index" @@ -36,13 +38,18 @@ class MineCargo(PublishToFederatedCode): def steps(cls): return ( cls.check_federatedcode_eligibility, - cls.clone_cargo_index, - cls.clone_repository, + cls.clone_cargo_repo, cls.collect_packages_from_cargo, - cls.delete_local_clone, ) - def clone_cargo_index(self, repo_url): + def check_federatedcode_eligibility(self): + """ + Check if the project fulfills the following criteria for + pushing the project result to FederatedCode. + """ + federatedcode.check_federatedcode_eligibility(project=self.project) + + def clone_cargo_repo(self, repo_url): """ Clone the repo at repo_url and return the VCSResponse object """ @@ -62,10 +69,21 @@ def collect_packages_from_cargo(self): for idx, file_path in enumerate(json_files, start=1): try: with open(file_path, encoding="utf-8") as f: - packages = json.load(f) + packages = [] + for line in f: + if line.strip(): + packages.append(json.loads(line)) + except (json.JSONDecodeError, UnicodeDecodeError): continue if packages: push_commit = idx == len(json_files) # only True on last cargo.collect_packages_from_cargo(packages, self.vcs_response, push_commit) + + def clean_cargo_repo(self): + """ + Delete the VCS response repository if it exists. + """ + if self.vcs_response: + self.vcs_response.delete() diff --git a/minecode_pipeline/pipes/__init__.py b/minecode_pipeline/pipes/__init__.py index 76f194f2..d5ddd7ea 100644 --- a/minecode_pipeline/pipes/__init__.py +++ b/minecode_pipeline/pipes/__init__.py @@ -18,11 +18,11 @@ remote_name = "origin" -def write_purls_to_repo(repo, package, packages_yaml, push_commit=False): +def write_purls_to_repo(repo, package, updated_purls, push_commit=False): """Write or update package purls in the repo and optionally commit/push changes.""" ppath = hashid.get_package_purls_yml_file_path(package) - add_purl_result(packages_yaml, repo, ppath) + add_purl_result(updated_purls, repo, ppath) if push_commit: change_type = "Add" if ppath in repo.untracked_files else "Update" @@ -40,7 +40,7 @@ def write_purls_to_repo(repo, package, packages_yaml, push_commit=False): def add_purl_result(purls, repo, purls_file): """Add package urls result to the local Git repository.""" - relative_purl_file_path = Path(*purls_file.parts[1:]) + relative_purl_file_path = Path(purls_file) write_to = Path(repo.working_dir) / relative_purl_file_path write_to.parent.mkdir(parents=True, exist_ok=True) diff --git a/minecode_pipeline/pipes/cargo.py b/minecode_pipeline/pipes/cargo.py index 3211a407..426fc81b 100644 --- a/minecode_pipeline/pipes/cargo.py +++ b/minecode_pipeline/pipes/cargo.py @@ -6,21 +6,20 @@ def collect_packages_from_cargo(packages, repo, push_commit=False): """Collect Cargo package versions into purls and write them to the repo.""" - if not packages and len(packages) > 0: + if not packages: raise ValueError("No packages found") - updated_purls = [] first_pkg = packages[0] - version = first_pkg.get("vers") name = first_pkg.get("name") + version = first_pkg.get("vers") purl = PackageURL(type="cargo", name=name, version=version) base_purl = get_core_purl(purl) + updated_purls = [] for package in packages: - version = package.get("vers") name = package.get("name") - + version = package.get("vers") purl = PackageURL(type="cargo", name=name, version=version).to_string() updated_purls.append(purl) - write_purls_to_repo(repo, base_purl, packages, push_commit) + write_purls_to_repo(repo, base_purl, updated_purls, push_commit) diff --git a/minecode_pipeline/tests/test_cargo_pipes.py b/minecode_pipeline/tests/test_cargo_pipes.py new file mode 100644 index 00000000..15d8f3f5 --- /dev/null +++ b/minecode_pipeline/tests/test_cargo_pipes.py @@ -0,0 +1,77 @@ +import json +import tempfile +from pathlib import Path +from unittest import mock +from unittest.mock import Mock, patch +import saneyaml + +from django.test import TestCase +from packageurl import PackageURL + +from minecode_pipeline.pipes import add_purl_result +from minecode_pipeline.pipes.cargo import collect_packages_from_cargo + +DATA_DIR = Path(__file__).parent / "test_data" + + +class CargoPipelineTests(TestCase): + def _get_temp_dir(self): + import tempfile + + return tempfile.mkdtemp() + + @patch("minecode_pipeline.pipes.cargo.write_purls_to_repo") + def test_collect_packages_from_cargo_calls_write(self, mock_write): + packages_file = DATA_DIR / "c5store" + expected_file = DATA_DIR / "c5store-expected.yaml" + + packages = [] + with open(packages_file, encoding="utf-8") as f: + for line in f: + if line.strip(): + packages.append(json.loads(line)) + + with open(expected_file, encoding="utf-8") as f: + expected = saneyaml.load(f) + + repo = Mock() + result = collect_packages_from_cargo(packages, repo) + self.assertIsNone(result) + + mock_write.assert_called_once() + args, kwargs = mock_write.call_args + called_repo, base_purl, written_packages, push_commit = args + + self.assertEqual(called_repo, repo) + + first_pkg = packages[0] + expected_base_purl = PackageURL( + type="cargo", + name=first_pkg["name"], + ) + self.assertEqual(str(base_purl), str(expected_base_purl)) + + self.assertEqual(written_packages, expected) + + def test_add_purl_result_with_mock_repo(self): + purls = [{"purl": "pkg:pypi/django@4.2.0"}, {"purl": "pkg:pypi/django@4.3.0"}] + + with tempfile.TemporaryDirectory() as tmpdir: + repo_dir = Path(tmpdir) + + mock_repo = mock.MagicMock() + mock_repo.working_dir = str(repo_dir) + mock_repo.index.add = mock.MagicMock() + + purls_file = repo_dir / "purls.yaml" + + relative_path = add_purl_result(purls, mock_repo, purls_file) + + written_file = repo_dir / relative_path + self.assertTrue(written_file.exists()) + + with open(written_file, encoding="utf-8") as f: + content = saneyaml.load(f) + self.assertEqual(content, purls) + + mock_repo.index.add.assert_called_once_with([relative_path]) diff --git a/minecode_pipeline/tests/test_data/c5store b/minecode_pipeline/tests/test_data/c5store new file mode 100644 index 00000000..46b93b08 --- /dev/null +++ b/minecode_pipeline/tests/test_data/c5store @@ -0,0 +1,14 @@ +{"name":"c5store","vers":"0.1.0","deps":[{"name":"log","req":"^0.4","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"multimap","req":"^0.8","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"natord","req":"^1.0","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"num-rational","req":"^0.3","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"num-traits","req":"^0.2","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"parking_lot","req":"^0.10","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"scheduled-thread-pool","req":"^0.2","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"serde","req":"^1.0","features":["derive"],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"serde_json","req":"^1.0","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"serde_test","req":"^1.0","features":[],"optional":false,"default_features":true,"target":null,"kind":"dev"},{"name":"serde_yaml","req":"^0.8","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"shellexpand","req":"^2.0","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"skiplist","req":"^0.3","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"yaml-rust","req":"^0.4","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"}],"cksum":"8d0338954ccebef3aafc0f338219861bb04e4b7f9af67cf0d5cb38418980385b","features":{},"yanked":false} +{"name":"c5store","vers":"0.2.0","deps":[{"name":"base64","req":"^0.13","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"curve25519-parser","req":"^0.2","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"ecies_25519","req":"^0.1","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"log","req":"^0.4","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"maplit","req":"^1.0.2","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"multimap","req":"^0.8","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"natord","req":"^1.0","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"num-rational","req":"^0.4","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"num-traits","req":"^0.2","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"parking_lot","req":"^0.11","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"scheduled-thread-pool","req":"^0.2","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"serde","req":"^1.0","features":["derive"],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"serde_json","req":"^1.0","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"serde_test","req":"^1.0","features":[],"optional":false,"default_features":true,"target":null,"kind":"dev"},{"name":"serde_yaml","req":"^0.8","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"sha2","req":"^0.9","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"shellexpand","req":"^2.0","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"skiplist","req":"^0.4","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"yaml-rust","req":"^0.4","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"}],"cksum":"680a74300cd6393972d36b2ec3b3922573e438b49e3dda2612b5b5f410192c45","features":{},"yanked":false} +{"name":"c5store","vers":"0.2.1","deps":[{"name":"base64","req":"^0.13","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"curve25519-parser","req":"^0.2","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"ecies_25519","req":"^0.1","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"log","req":"^0.4","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"maplit","req":"^1.0.2","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"multimap","req":"^0.8","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"natord","req":"^1.0","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"num-rational","req":"^0.4","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"num-traits","req":"^0.2","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"parking_lot","req":"^0.11","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"scheduled-thread-pool","req":"^0.2","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"serde","req":"^1.0","features":["derive"],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"serde_json","req":"^1.0","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"serde_test","req":"^1.0","features":[],"optional":false,"default_features":true,"target":null,"kind":"dev"},{"name":"serde_yaml","req":"^0.8","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"sha2","req":"^0.9","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"shellexpand","req":"^2.0","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"skiplist","req":"^0.4","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"yaml-rust","req":"^0.4","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"}],"cksum":"19d948501daaf3350548e8a476e8e2928377f2cb9664fe3d800c29c081cbdbaf","features":{},"yanked":false} +{"name":"c5store","vers":"0.2.2","deps":[{"name":"base64","req":"^0.13","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"curve25519-parser","req":"^0.2","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"ecies_25519","req":"^0.1","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"log","req":"^0.4","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"maplit","req":"^1.0.2","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"multimap","req":"^0.8","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"natord","req":"^1.0","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"num-rational","req":"^0.4","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"num-traits","req":"^0.2","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"parking_lot","req":"^0.11","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"scheduled-thread-pool","req":"^0.2","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"serde","req":"^1.0","features":["derive"],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"serde_json","req":"^1.0","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"serde_test","req":"^1.0","features":[],"optional":false,"default_features":true,"target":null,"kind":"dev"},{"name":"serde_yaml","req":"^0.8","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"sha2","req":"^0.9","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"shellexpand","req":"^2.0","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"skiplist","req":"^0.4","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"yaml-rust","req":"^0.4","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"}],"cksum":"93fb874be3d0f54ce67ef3eeda2a2d455a0cde4ffd4da49dd057a4bc0735ef6c","features":{},"yanked":false} +{"name":"c5store","vers":"0.2.3","deps":[{"name":"base64","req":"^0.13","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"curve25519-parser","req":"^0.2","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"ecies_25519","req":"^0.1","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"log","req":"^0.4","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"maplit","req":"^1.0.2","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"multimap","req":"^0.8","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"natord","req":"^1.0","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"num-rational","req":"^0.4","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"num-traits","req":"^0.2","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"parking_lot","req":"^0.11","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"scheduled-thread-pool","req":"^0.2","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"serde","req":"^1.0","features":["derive"],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"serde_json","req":"^1.0","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"serde_test","req":"^1.0","features":[],"optional":false,"default_features":true,"target":null,"kind":"dev"},{"name":"serde_yaml","req":"^0.8","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"sha2","req":"^0.9","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"shellexpand","req":"^2.0","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"skiplist","req":"^0.4","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"yaml-rust","req":"^0.4","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"}],"cksum":"5cdbd1960746beb170e2054ccdde793283e202e33c0a35c3b39cb09b40bb77ed","features":{},"yanked":false} +{"name":"c5store","vers":"0.2.4","deps":[{"name":"base64","req":"^0.13","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"curve25519-parser","req":"^0","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"ecies_25519","req":"^0.1","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"log","req":"^0.4","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"maplit","req":"^1.0.2","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"multimap","req":"^0.8","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"natord","req":"^1.0","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"num-rational","req":"^0.4","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"num-traits","req":"^0.2","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"parking_lot","req":"^0.11","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"paste","req":"^1.0.12","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"scheduled-thread-pool","req":"^0.2","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"serde","req":"^1.0","features":["derive"],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"serde_json","req":"^1.0","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"serde_test","req":"^1.0","features":[],"optional":false,"default_features":true,"target":null,"kind":"dev"},{"name":"serde_yaml","req":"^0.8","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"sha2","req":"^0.9","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"shellexpand","req":"^2.0","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"skiplist","req":"^0.4","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"yaml-rust","req":"^0.4","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"}],"cksum":"e559cc84dd9f14038b831b9227b5394b26e343758b112ba7d7adaab34d38d2ea","features":{},"yanked":false} +{"name":"c5store","vers":"0.2.5","deps":[{"name":"base64","req":"^0.13","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"curve25519-parser","req":"^0","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"ecies_25519","req":"^0","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"log","req":"^0.4","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"maplit","req":"^1.0.2","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"multimap","req":"^0.8","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"natord","req":"^1.0","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"num-rational","req":"^0.4","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"num-traits","req":"^0.2","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"parking_lot","req":"^0.11","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"paste","req":"^1.0.12","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"scheduled-thread-pool","req":"^0.2","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"serde","req":"^1.0","features":["derive"],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"serde_json","req":"^1.0","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"serde_test","req":"^1.0","features":[],"optional":false,"default_features":true,"target":null,"kind":"dev"},{"name":"serde_yaml","req":"^0.8","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"sha2","req":"^0.9","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"shellexpand","req":"^2.0","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"skiplist","req":"^0.4","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"yaml-rust","req":"^0.4","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"}],"cksum":"1eacfb944c4b337cb9e6373878ba75ad27573dc05a122ff116ded02faf6c5abb","features":{},"yanked":false} +{"name":"c5store","vers":"0.2.6","deps":[{"name":"base64","req":"^0.13","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"curve25519-parser","req":"^0","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"ecies_25519","req":"^0","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"log","req":"^0.4","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"maplit","req":"^1.0.2","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"multimap","req":"^0.8","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"natord","req":"^1.0","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"num-rational","req":"^0.4","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"num-traits","req":"^0.2","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"parking_lot","req":"^0.11","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"paste","req":"^1.0.12","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"scheduled-thread-pool","req":"^0.2","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"serde","req":"^1.0","features":["derive"],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"serde_json","req":"^1.0","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"serde_test","req":"^1.0","features":[],"optional":false,"default_features":true,"target":null,"kind":"dev"},{"name":"serde_yaml","req":"^0.8","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"sha2","req":"^0.9","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"shellexpand","req":"^2.0","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"skiplist","req":"^0.4","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"yaml-rust","req":"^0.4","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"}],"cksum":"535d1ded0d73dd95f0909d40ff72ed3f4ef330201bbf7f7ac2abde5f2b177da0","features":{},"yanked":false} +{"name":"c5store","vers":"0.2.7","deps":[{"name":"base64","req":"^0.22","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"curve25519-parser","req":"^0","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"ecies_25519","req":"^0","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"log","req":"^0.4","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"maplit","req":"^1","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"multimap","req":"^0.10","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"natord","req":"^1.0","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"num-rational","req":"^0.4","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"num-traits","req":"^0.2","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"parking_lot","req":"^0.12","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"paste","req":"^1","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"scheduled-thread-pool","req":"^0.2","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"serde","req":"^1","features":["derive"],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"serde_json","req":"^1","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"serde_test","req":"^1.0","features":[],"optional":false,"default_features":true,"target":null,"kind":"dev"},{"name":"serde_yaml","req":"^0","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"sha2","req":"^0.10","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"shellexpand","req":"^3.0","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"skiplist","req":"^0.5","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"}],"cksum":"57eaafc115042c8c69fcddd5dbf5c5c62be93fc3493d21ea67bd77f55b000237","features":{},"yanked":false} +{"name":"c5store","vers":"0.3.0","deps":[{"name":"base64","req":"^0.22","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"curve25519-parser","req":"^0","features":[],"optional":true,"default_features":true,"target":null,"kind":"normal"},{"name":"dotenvy","req":"^0.15","features":[],"optional":true,"default_features":true,"target":null,"kind":"normal"},{"name":"ecies_25519","req":"^0","features":[],"optional":true,"default_features":true,"target":null,"kind":"normal"},{"name":"log","req":"^0.4","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"maplit","req":"^1","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"multimap","req":"^0.10","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"natord","req":"^1.0","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"num-rational","req":"^0.4","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"num-traits","req":"^0.2","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"parking_lot","req":"^0.12","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"paste","req":"^1","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"scheduled-thread-pool","req":"^0.2","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"serde","req":"^1","features":["derive"],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"serde_json","req":"^1","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"serde_test","req":"^1.0","features":[],"optional":false,"default_features":true,"target":null,"kind":"dev"},{"name":"serde_yaml","req":"^0","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"sha2","req":"^0.10","features":[],"optional":true,"default_features":true,"target":null,"kind":"normal"},{"name":"shellexpand","req":"^3.0","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"skiplist","req":"^0.5","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"thiserror","req":"^2.0.12","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"toml","req":"^0.8","features":[],"optional":true,"default_features":true,"target":null,"kind":"normal"}],"cksum":"5b4dd483681f3aaeec256e302f3226b8267444cee93df19e56afc499ab425e62","features":{},"features2":{"default":["secrets"],"dotenv":["dep:dotenvy"],"full":["dotenv","toml","secrets"],"secrets":["dep:ecies_25519","dep:curve25519-parser","dep:sha2"],"toml":["dep:toml"]},"yanked":false,"v":2} +{"name":"c5store","vers":"0.3.1","deps":[{"name":"base64","req":"^0.22","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"curve25519-parser","req":"^0","features":[],"optional":true,"default_features":true,"target":null,"kind":"normal"},{"name":"dotenvy","req":"^0.15","features":[],"optional":true,"default_features":true,"target":null,"kind":"normal"},{"name":"ecies_25519","req":"^0","features":[],"optional":true,"default_features":true,"target":null,"kind":"normal"},{"name":"log","req":"^0.4","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"maplit","req":"^1","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"multimap","req":"^0.10","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"natlex_sort","req":"^0","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"num-rational","req":"^0.4","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"num-traits","req":"^0.2","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"parking_lot","req":"^0.12","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"paste","req":"^1","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"scheduled-thread-pool","req":"^0.2","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"serde","req":"^1","features":["derive"],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"serde_json","req":"^1","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"serde_test","req":"^1.0","features":[],"optional":false,"default_features":true,"target":null,"kind":"dev"},{"name":"serde_yaml","req":"^0","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"sha2","req":"^0.10","features":[],"optional":true,"default_features":true,"target":null,"kind":"normal"},{"name":"shellexpand","req":"^3.0","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"skiplist","req":"^0.5","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"thiserror","req":"^2.0.12","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"toml","req":"^0.8","features":[],"optional":true,"default_features":true,"target":null,"kind":"normal"}],"cksum":"055ea0dbc26388254d2cef557d0f134bce351691695fe4ef014adfc818d91ae6","features":{},"features2":{"default":["secrets"],"dotenv":["dep:dotenvy"],"full":["dotenv","toml","secrets"],"secrets":["dep:ecies_25519","dep:curve25519-parser","dep:sha2"],"toml":["dep:toml"]},"yanked":false,"v":2} +{"name":"c5store","vers":"0.4.0","deps":[{"name":"base64","req":"^0.22","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"curve25519-parser","req":"^0","features":[],"optional":true,"default_features":true,"target":null,"kind":"normal"},{"name":"dotenvy","req":"^0.15","features":[],"optional":true,"default_features":true,"target":null,"kind":"normal"},{"name":"ecies_25519","req":"^0","features":[],"optional":true,"default_features":true,"target":null,"kind":"normal"},{"name":"log","req":"^0.4","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"maplit","req":"^1","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"multimap","req":"^0.10","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"natlex_sort","req":"^0","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"num-rational","req":"^0.4","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"num-traits","req":"^0.2","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"parking_lot","req":"^0.12","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"paste","req":"^1","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"reqwest","req":"^0.12","features":[],"optional":true,"default_features":true,"target":null,"kind":"normal"},{"name":"scheduled-thread-pool","req":"^0.2","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"serde","req":"^1","features":["derive"],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"serde_json","req":"^1","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"serde_test","req":"^1.0","features":[],"optional":false,"default_features":true,"target":null,"kind":"dev"},{"name":"serde_yaml","req":"^0","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"sha2","req":"^0.10","features":[],"optional":true,"default_features":true,"target":null,"kind":"normal"},{"name":"shellexpand","req":"^3.0","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"skiplist","req":"^0.5","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"thiserror","req":"^2.0.12","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"tokio","req":"^1","features":["fs","macros","rt-multi-thread"],"optional":true,"default_features":true,"target":null,"kind":"normal"},{"name":"toml","req":"^0.8","features":[],"optional":true,"default_features":true,"target":null,"kind":"normal"},{"name":"url","req":"^2.3","features":[],"optional":true,"default_features":true,"target":null,"kind":"normal"}],"cksum":"bc5e0f7cd5d907f970384e6c9f352367bfbd59a9a7e90a704e1e8fbbb2f9523f","features":{},"features2":{"bootstrapper":["dep:reqwest","dep:tokio","dep:url"],"default":["secrets"],"dotenv":["dep:dotenvy"],"full":["dotenv","toml","secrets","bootstrapper"],"secrets":["dep:ecies_25519","dep:curve25519-parser","dep:sha2"],"toml":["dep:toml"]},"yanked":false,"v":2} +{"name":"c5store","vers":"0.4.1","deps":[{"name":"base64","req":"^0.22","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"curve25519-parser","req":"^0","features":[],"optional":true,"default_features":true,"target":null,"kind":"normal"},{"name":"dotenvy","req":"^0.15","features":[],"optional":true,"default_features":true,"target":null,"kind":"normal"},{"name":"ecies_25519","req":"^0","features":[],"optional":true,"default_features":true,"target":null,"kind":"normal"},{"name":"log","req":"^0.4","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"maplit","req":"^1","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"multimap","req":"^0.10","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"natlex_sort","req":"^0","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"num-rational","req":"^0.4","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"num-traits","req":"^0.2","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"parking_lot","req":"^0.12","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"paste","req":"^1","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"reqwest","req":"^0.12","features":[],"optional":true,"default_features":true,"target":null,"kind":"normal"},{"name":"scheduled-thread-pool","req":"^0.2","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"serde","req":"^1","features":["derive"],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"serde_json","req":"^1","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"serde_test","req":"^1.0","features":[],"optional":false,"default_features":true,"target":null,"kind":"dev"},{"name":"serde_yaml","req":"^0","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"serial_test","req":"^3.2.0","features":[],"optional":false,"default_features":true,"target":null,"kind":"dev"},{"name":"sha2","req":"^0.10","features":[],"optional":true,"default_features":true,"target":null,"kind":"normal"},{"name":"shellexpand","req":"^3.0","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"skiplist","req":"^0.5","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"thiserror","req":"^2.0.12","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"tokio","req":"^1","features":["fs","macros","rt-multi-thread"],"optional":true,"default_features":true,"target":null,"kind":"normal"},{"name":"toml","req":"^0.8","features":[],"optional":true,"default_features":true,"target":null,"kind":"normal"},{"name":"url","req":"^2.3","features":[],"optional":true,"default_features":true,"target":null,"kind":"normal"}],"cksum":"74039262902fbe52f8f0cf43f8015a5d99109e2e3988448ed387105f3c3a360d","features":{},"features2":{"bootstrapper":["dep:reqwest","dep:tokio","dep:url"],"default":["secrets"],"dotenv":["dep:dotenvy"],"full":["dotenv","toml","secrets","bootstrapper"],"secrets":["dep:ecies_25519","dep:curve25519-parser","dep:sha2"],"toml":["dep:toml"]},"yanked":false,"v":2} +{"name":"c5store","vers":"0.4.2","deps":[{"name":"base64","req":"^0.22","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"curve25519-parser","req":"^0","features":[],"optional":true,"default_features":true,"target":null,"kind":"normal"},{"name":"dotenvy","req":"^0.15","features":[],"optional":true,"default_features":true,"target":null,"kind":"normal"},{"name":"ecies_25519","req":"^0","features":[],"optional":true,"default_features":true,"target":null,"kind":"normal"},{"name":"log","req":"^0.4","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"maplit","req":"^1","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"multimap","req":"^0.10","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"natlex_sort","req":"^0","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"num-rational","req":"^0.4","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"num-traits","req":"^0.2","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"parking_lot","req":"^0.12","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"paste","req":"^1","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"reqwest","req":"^0.12","features":[],"optional":true,"default_features":true,"target":null,"kind":"normal"},{"name":"scheduled-thread-pool","req":"^0.2","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"serde","req":"^1","features":["derive"],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"serde_json","req":"^1","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"serde_test","req":"^1.0","features":[],"optional":false,"default_features":true,"target":null,"kind":"dev"},{"name":"serde_yaml","req":"^0","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"serial_test","req":"^3.2.0","features":[],"optional":false,"default_features":true,"target":null,"kind":"dev"},{"name":"sha2","req":"^0.10","features":[],"optional":true,"default_features":true,"target":null,"kind":"normal"},{"name":"shellexpand","req":"^3.0","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"skiplist","req":"^0.5","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"thiserror","req":"^2.0.12","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"tokio","req":"^1","features":["fs","macros","rt-multi-thread"],"optional":true,"default_features":true,"target":null,"kind":"normal"},{"name":"toml","req":"^0.8","features":[],"optional":true,"default_features":true,"target":null,"kind":"normal"},{"name":"url","req":"^2.3","features":[],"optional":true,"default_features":true,"target":null,"kind":"normal"}],"cksum":"11d63d9e525154071e657b5ae4f39d704ee912cf5d383aab756652a5205f4d08","features":{},"features2":{"bootstrapper":["dep:reqwest","dep:tokio","dep:url"],"default":["secrets"],"dotenv":["dep:dotenvy"],"full":["dotenv","toml","secrets","bootstrapper"],"secrets":["dep:ecies_25519","dep:curve25519-parser","dep:sha2"],"secrets_systemd":["secrets"],"toml":["dep:toml"]},"yanked":false,"v":2} \ No newline at end of file diff --git a/minecode_pipeline/tests/test_data/c5store-expected.yaml b/minecode_pipeline/tests/test_data/c5store-expected.yaml new file mode 100644 index 00000000..c74d51b9 --- /dev/null +++ b/minecode_pipeline/tests/test_data/c5store-expected.yaml @@ -0,0 +1,14 @@ +- pkg:cargo/c5store@0.1.0 +- pkg:cargo/c5store@0.2.0 +- pkg:cargo/c5store@0.2.1 +- pkg:cargo/c5store@0.2.2 +- pkg:cargo/c5store@0.2.3 +- pkg:cargo/c5store@0.2.4 +- pkg:cargo/c5store@0.2.5 +- pkg:cargo/c5store@0.2.6 +- pkg:cargo/c5store@0.2.7 +- pkg:cargo/c5store@0.3.0 +- pkg:cargo/c5store@0.3.1 +- pkg:cargo/c5store@0.4.0 +- pkg:cargo/c5store@0.4.1 +- pkg:cargo/c5store@0.4.2 \ No newline at end of file diff --git a/setup.cfg b/setup.cfg index 1b3be9ce..aa3f44c8 100644 --- a/setup.cfg +++ b/setup.cfg @@ -99,5 +99,4 @@ console_scripts = scancodeio_pipelines = matching = matchcode_pipeline.pipelines.matching:Matching - d2d = scanpipe.pipelines.deploy_to_develop:DeployToDevelop - mine_cargo = minecode_pipeline.pipelines.mine_cargo:MineCargo \ No newline at end of file + d2d = scanpipe.pipelines.deploy_to_develop:DeployToDevelop \ No newline at end of file From cb1632a76a98eeea52f21a235e747103b23ca780 Mon Sep 17 00:00:00 2001 From: ziad hany Date: Thu, 4 Sep 2025 14:27:43 +0300 Subject: [PATCH 03/12] Add commit tracking capabilities. Refactor code to be compatible with the new pipeline. Signed-off-by: ziad hany --- minecode_pipeline/README.rst | 43 -------------- minecode_pipeline/__init__.py | 8 --- minecode_pipeline/pipelines/__init__.py | 8 --- minecode_pipeline/pipes/__init__.py | 52 ----------------- minecode_pipeline/tests/__init__.py | 8 --- minecode_pipelines/miners/cargo.py | 42 ++++++++++++++ .../pipelines/mine_cargo.py | 56 +++++++------------ .../pipes/cargo.py | 6 +- minecode_pipelines/pipes/pypi.py | 1 + .../tests/pipes}/test_cargo_pipes.py | 13 ++--- .../tests/test_data/cargo}/c5store | 0 .../test_data/cargo}/c5store-expected.yaml | 0 minecode_pipelines/utils.py | 27 ++++++++- setup.cfg | 2 +- 14 files changed, 97 insertions(+), 169 deletions(-) delete mode 100644 minecode_pipeline/README.rst delete mode 100644 minecode_pipeline/__init__.py delete mode 100644 minecode_pipeline/pipelines/__init__.py delete mode 100644 minecode_pipeline/pipes/__init__.py delete mode 100644 minecode_pipeline/tests/__init__.py create mode 100644 minecode_pipelines/miners/cargo.py rename {minecode_pipeline => minecode_pipelines}/pipelines/mine_cargo.py (59%) rename {minecode_pipeline => minecode_pipelines}/pipes/cargo.py (77%) rename {minecode_pipeline/tests => minecode_pipelines/tests/pipes}/test_cargo_pipes.py (85%) rename {minecode_pipeline/tests/test_data => minecode_pipelines/tests/test_data/cargo}/c5store (100%) rename {minecode_pipeline/tests/test_data => minecode_pipelines/tests/test_data/cargo}/c5store-expected.yaml (100%) diff --git a/minecode_pipeline/README.rst b/minecode_pipeline/README.rst deleted file mode 100644 index 6f42f0cb..00000000 --- a/minecode_pipeline/README.rst +++ /dev/null @@ -1,43 +0,0 @@ -minecode-pipeline -=================== - -minecode-pipeline is an add-on library working with scancode.io to define pipelines to mine -packageURLs and package metadata from ecosystem repositories and APIs. - -Installation ------------- - -Requirements -############ - -* install minecode-pipeline dependencies -* `pip install minecode-pipeline` - - -Funding -------- - -This project was funded through the NGI Assure Fund https://nlnet.nl/assure, a -fund established by NLnet https://nlnet.nl/ with financial support from the -European Commission's Next Generation Internet programme, under the aegis of DG -Communications Networks, Content and Technology under grant agreement No 957073. - -This project is also funded through grants from the Google Summer of Code -program, continuing support and sponsoring from nexB Inc. and generous -donations from multiple sponsors. - - -License -------- - -Copyright (c) nexB Inc. and others. All rights reserved. - -purldb is a trademark of nexB Inc. - -SPDX-License-Identifier: Apache-2.0 - -minecode-pipeline is licensed under the Apache License version 2.0. - -See https://www.apache.org/licenses/LICENSE-2.0 for the license text. -See https://github.com/aboutcode-org/purldb for support or download. -See https://aboutcode.org for more information about nexB OSS projects. diff --git a/minecode_pipeline/__init__.py b/minecode_pipeline/__init__.py deleted file mode 100644 index e1521118..00000000 --- a/minecode_pipeline/__init__.py +++ /dev/null @@ -1,8 +0,0 @@ -# -# Copyright (c) nexB Inc. and others. All rights reserved. -# purldb is a trademark of nexB Inc. -# SPDX-License-Identifier: Apache-2.0 -# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. -# See https://github.com/aboutcode-org/purldb for support or download. -# See https://aboutcode.org for more information about nexB OSS projects. -# diff --git a/minecode_pipeline/pipelines/__init__.py b/minecode_pipeline/pipelines/__init__.py deleted file mode 100644 index e1521118..00000000 --- a/minecode_pipeline/pipelines/__init__.py +++ /dev/null @@ -1,8 +0,0 @@ -# -# Copyright (c) nexB Inc. and others. All rights reserved. -# purldb is a trademark of nexB Inc. -# SPDX-License-Identifier: Apache-2.0 -# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. -# See https://github.com/aboutcode-org/purldb for support or download. -# See https://aboutcode.org for more information about nexB OSS projects. -# diff --git a/minecode_pipeline/pipes/__init__.py b/minecode_pipeline/pipes/__init__.py deleted file mode 100644 index d5ddd7ea..00000000 --- a/minecode_pipeline/pipes/__init__.py +++ /dev/null @@ -1,52 +0,0 @@ -# -# Copyright (c) nexB Inc. and others. All rights reserved. -# purldb is a trademark of nexB Inc. -# SPDX-License-Identifier: Apache-2.0 -# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. -# See https://github.com/aboutcode-org/purldb for support or download. -# See https://aboutcode.org for more information about nexB OSS projects. -# -import textwrap -from pathlib import Path -import saneyaml -from aboutcode import hashid - -ALLOWED_HOST = "ALLOWED_HOST" -VERSION = "ALLOWED_HOST" -author_name = "FEDERATEDCODE_GIT_SERVICE_NAME" -author_email = "FEDERATEDCODE_GIT_SERVICE_EMAIL" -remote_name = "origin" - - -def write_purls_to_repo(repo, package, updated_purls, push_commit=False): - """Write or update package purls in the repo and optionally commit/push changes.""" - - ppath = hashid.get_package_purls_yml_file_path(package) - add_purl_result(updated_purls, repo, ppath) - - if push_commit: - change_type = "Add" if ppath in repo.untracked_files else "Update" - commit_message = f"""\ - {change_type} list of available {package} versions - Tool: pkg:github/aboutcode-org/purldb@v{VERSION} - Reference: https://{ALLOWED_HOST}/ - Signed-off-by: {author_name} <{author_email}> - """ - - default_branch = repo.active_branch.name - repo.index.commit(textwrap.dedent(commit_message)) - repo.git.push(remote_name, default_branch, "--no-verify") - - -def add_purl_result(purls, repo, purls_file): - """Add package urls result to the local Git repository.""" - relative_purl_file_path = Path(purls_file) - - write_to = Path(repo.working_dir) / relative_purl_file_path - write_to.parent.mkdir(parents=True, exist_ok=True) - - with open(purls_file, encoding="utf-8", mode="w") as f: - f.write(saneyaml.dump(purls)) - - repo.index.add([relative_purl_file_path]) - return relative_purl_file_path diff --git a/minecode_pipeline/tests/__init__.py b/minecode_pipeline/tests/__init__.py deleted file mode 100644 index e1521118..00000000 --- a/minecode_pipeline/tests/__init__.py +++ /dev/null @@ -1,8 +0,0 @@ -# -# Copyright (c) nexB Inc. and others. All rights reserved. -# purldb is a trademark of nexB Inc. -# SPDX-License-Identifier: Apache-2.0 -# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. -# See https://github.com/aboutcode-org/purldb for support or download. -# See https://aboutcode.org for more information about nexB OSS projects. -# diff --git a/minecode_pipelines/miners/cargo.py b/minecode_pipelines/miners/cargo.py new file mode 100644 index 00000000..edfa8854 --- /dev/null +++ b/minecode_pipelines/miners/cargo.py @@ -0,0 +1,42 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# purldb is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/purldb for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# +import json +from pathlib import Path + +from minecode_pipelines.pipes.cargo import store_cargo_packages +from minecode_pipelines.utils import get_changed_files + + +def process_cargo_packages(cargo_repo, fed_repo): + base_path = Path(cargo_repo.working_tree_dir) + valid_files = get_changed_files(cargo_repo) # start from empty tree hash + + json_files = [] + for file_path in base_path.glob("**/*"): + if not file_path.is_file() or file_path not in valid_files: + continue + + if file_path.name in {"config.json", "README.md", "update-dl-url.yml"}: + continue + json_files.append(file_path) + + for idx, file_path in enumerate(json_files, start=1): + try: + with open(file_path, encoding="utf-8") as f: + packages = [] + for line in f: + if line.strip(): + packages.append(json.loads(line)) + + except (json.JSONDecodeError, UnicodeDecodeError): + continue + + if packages: + push_commit = idx == len(json_files) # only True on last + store_cargo_packages(packages, fed_repo, push_commit) diff --git a/minecode_pipeline/pipelines/mine_cargo.py b/minecode_pipelines/pipelines/mine_cargo.py similarity index 59% rename from minecode_pipeline/pipelines/mine_cargo.py rename to minecode_pipelines/pipelines/mine_cargo.py index 76e0e5fa..c6ce58fc 100644 --- a/minecode_pipeline/pipelines/mine_cargo.py +++ b/minecode_pipelines/pipelines/mine_cargo.py @@ -19,27 +19,25 @@ # # ScanCode.io is a free software code scanning tool from nexB Inc. and others. # Visit https://github.com/aboutcode-org/scancode.io for support and download. -import json -from pathlib import Path - -from minecode_pipeline.pipes import cargo +from git.repo.base import Repo +from scanpipe.pipes.federatedcode import delete_local_clone +from minecode.utils import get_temp_file from scanpipe.pipelines import Pipeline -from fetchcode.vcs import fetch_via_vcs from scanpipe.pipes import federatedcode +from minecode_pipelines.miners import cargo -class MineCargo(Pipeline): +class MineandPublishCargoPURLs(Pipeline): """Pipeline to mine Cargo (crates.io) packages and publish them to FederatedCode.""" - repo_url = "git+https://github.com/rust-lang/crates.io-index" - @classmethod def steps(cls): return ( cls.check_federatedcode_eligibility, cls.clone_cargo_repo, cls.collect_packages_from_cargo, + cls.clean_cargo_repo, ) def check_federatedcode_eligibility(self): @@ -49,41 +47,25 @@ def check_federatedcode_eligibility(self): """ federatedcode.check_federatedcode_eligibility(project=self.project) - def clone_cargo_repo(self, repo_url): + def clone_cargo_repo(self): """ Clone the repo at repo_url and return the VCSResponse object """ - self.vcs_response = fetch_via_vcs(repo_url) - - def collect_packages_from_cargo(self): - base_path = Path(self.vcs_response.dest_dir) + conan_repo_url = "git+https://github.com/rust-lang/crates.io-index" + fed_repo_url = "git+https://github.com/ziadhany/cargo-test" - json_files = [] - for file_path in base_path.glob("**/*"): - if not file_path.is_file(): - continue - if file_path.name in {"config.json", "README.md", "update-dl-url.yml"}: - continue - json_files.append(file_path) + self.fed_repo = federatedcode.clone_repository(fed_repo_url) + self.cargo_repo = Repo.clone_from(conan_repo_url, get_temp_file()) - for idx, file_path in enumerate(json_files, start=1): - try: - with open(file_path, encoding="utf-8") as f: - packages = [] - for line in f: - if line.strip(): - packages.append(json.loads(line)) - - except (json.JSONDecodeError, UnicodeDecodeError): - continue - - if packages: - push_commit = idx == len(json_files) # only True on last - cargo.collect_packages_from_cargo(packages, self.vcs_response, push_commit) + def collect_packages_from_cargo(self): + cargo.process_cargo_packages(self.cargo_repo, self.fed_repo) def clean_cargo_repo(self): """ - Delete the VCS response repository if it exists. + Delete the federatedcode repository if it exists, and also delete the Cargo repository if it exists. """ - if self.vcs_response: - self.vcs_response.delete() + if self.cargo_repo: + delete_local_clone(self.cargo_repo) + + if self.fed_repo: + delete_local_clone(self.fed_repo) diff --git a/minecode_pipeline/pipes/cargo.py b/minecode_pipelines/pipes/cargo.py similarity index 77% rename from minecode_pipeline/pipes/cargo.py rename to minecode_pipelines/pipes/cargo.py index 426fc81b..8eeb278b 100644 --- a/minecode_pipeline/pipes/cargo.py +++ b/minecode_pipelines/pipes/cargo.py @@ -1,9 +1,9 @@ from packageurl import PackageURL from aboutcode.hashid import get_core_purl -from minecode_pipeline.pipes import write_purls_to_repo +from minecode_pipelines.pipes import write_purls_to_repo -def collect_packages_from_cargo(packages, repo, push_commit=False): +def store_cargo_packages(packages, fed_repo, push_commit=False): """Collect Cargo package versions into purls and write them to the repo.""" if not packages: @@ -22,4 +22,4 @@ def collect_packages_from_cargo(packages, repo, push_commit=False): purl = PackageURL(type="cargo", name=name, version=version).to_string() updated_purls.append(purl) - write_purls_to_repo(repo, base_purl, updated_purls, push_commit) + write_purls_to_repo(fed_repo, base_purl, updated_purls, push_commit) diff --git a/minecode_pipelines/pipes/pypi.py b/minecode_pipelines/pipes/pypi.py index 75717eac..b8fdc7c9 100644 --- a/minecode_pipelines/pipes/pypi.py +++ b/minecode_pipelines/pipes/pypi.py @@ -45,6 +45,7 @@ from aboutcode.hashid import get_package_base_dir from packageurl import PackageURL from scanpipe.pipes.federatedcode import clone_repository + from scanpipe.pipes.federatedcode import commit_changes from scanpipe.pipes.federatedcode import push_changes diff --git a/minecode_pipeline/tests/test_cargo_pipes.py b/minecode_pipelines/tests/pipes/test_cargo_pipes.py similarity index 85% rename from minecode_pipeline/tests/test_cargo_pipes.py rename to minecode_pipelines/tests/pipes/test_cargo_pipes.py index 15d8f3f5..92518871 100644 --- a/minecode_pipeline/tests/test_cargo_pipes.py +++ b/minecode_pipelines/tests/pipes/test_cargo_pipes.py @@ -4,14 +4,13 @@ from unittest import mock from unittest.mock import Mock, patch import saneyaml - from django.test import TestCase from packageurl import PackageURL -from minecode_pipeline.pipes import add_purl_result -from minecode_pipeline.pipes.cargo import collect_packages_from_cargo +from minecode_pipelines.pipes import git_stage_purls +from minecode_pipelines.pipes.cargo import store_cargo_packages -DATA_DIR = Path(__file__).parent / "test_data" +DATA_DIR = Path(__file__).parent.parent / "test_data" / "cargo" class CargoPipelineTests(TestCase): @@ -20,7 +19,7 @@ def _get_temp_dir(self): return tempfile.mkdtemp() - @patch("minecode_pipeline.pipes.cargo.write_purls_to_repo") + @patch("minecode_pipelines.pipes.cargo.write_purls_to_repo") def test_collect_packages_from_cargo_calls_write(self, mock_write): packages_file = DATA_DIR / "c5store" expected_file = DATA_DIR / "c5store-expected.yaml" @@ -35,7 +34,7 @@ def test_collect_packages_from_cargo_calls_write(self, mock_write): expected = saneyaml.load(f) repo = Mock() - result = collect_packages_from_cargo(packages, repo) + result = store_cargo_packages(packages, repo) self.assertIsNone(result) mock_write.assert_called_once() @@ -65,7 +64,7 @@ def test_add_purl_result_with_mock_repo(self): purls_file = repo_dir / "purls.yaml" - relative_path = add_purl_result(purls, mock_repo, purls_file) + relative_path = git_stage_purls(purls, mock_repo, purls_file) written_file = repo_dir / relative_path self.assertTrue(written_file.exists()) diff --git a/minecode_pipeline/tests/test_data/c5store b/minecode_pipelines/tests/test_data/cargo/c5store similarity index 100% rename from minecode_pipeline/tests/test_data/c5store rename to minecode_pipelines/tests/test_data/cargo/c5store diff --git a/minecode_pipeline/tests/test_data/c5store-expected.yaml b/minecode_pipelines/tests/test_data/cargo/c5store-expected.yaml similarity index 100% rename from minecode_pipeline/tests/test_data/c5store-expected.yaml rename to minecode_pipelines/tests/test_data/cargo/c5store-expected.yaml diff --git a/minecode_pipelines/utils.py b/minecode_pipelines/utils.py index a583536f..6e274041 100644 --- a/minecode_pipelines/utils.py +++ b/minecode_pipelines/utils.py @@ -7,10 +7,10 @@ # See https://aboutcode.org for more information about nexB OSS projects. # -import os import tempfile - +import os from commoncode.fileutils import create_dir +from git import Repo from itertools import zip_longest @@ -61,3 +61,26 @@ def get_temp_file(file_name="data", extension=".file", dir_name=""): temp_dir = get_temp_dir(dir_name) location = os.path.join(temp_dir, file_name) return location + + +def get_changed_files( + repo: Repo, commit_x: str = "4b825dc642cb6eb9a060e54bf8d69288fbee4904", commit_y: str = None +): + """ + Return a list of files changed between two commits using GitPython. + Includes added, modified, deleted, and renamed files. + + - commit_x is the empty tree hash (repo root). + - commit_y is the latest commit (HEAD). + """ + + if commit_y is None: + commit_y = repo.head.commit.hexsha + + commit_x_obj = repo.commit(commit_x) + commit_y_obj = repo.commit(commit_y) + + diff_index = commit_x_obj.diff(commit_y_obj) + changed_files = {item.a_path or item.b_path for item in diff_index} + + return list(changed_files) diff --git a/setup.cfg b/setup.cfg index aa3f44c8..f3fda32b 100644 --- a/setup.cfg +++ b/setup.cfg @@ -99,4 +99,4 @@ console_scripts = scancodeio_pipelines = matching = matchcode_pipeline.pipelines.matching:Matching - d2d = scanpipe.pipelines.deploy_to_develop:DeployToDevelop \ No newline at end of file + d2d = scanpipe.pipelines.deploy_to_develop:DeployToDevelop From d617c82edd21a240282af1d56cecd020cee76e9a Mon Sep 17 00:00:00 2001 From: ziad hany Date: Sat, 6 Sep 2025 01:12:29 +0300 Subject: [PATCH 04/12] Mine only the unprocessed data. Add logging and fix bug in process_cargo_packages Signed-off-by: ziad hany --- minecode_pipelines/miners/cargo.py | 48 +++++++++++++--------- minecode_pipelines/pipelines/mine_cargo.py | 16 +++++--- minecode_pipelines/utils.py | 24 ----------- 3 files changed, 39 insertions(+), 49 deletions(-) diff --git a/minecode_pipelines/miners/cargo.py b/minecode_pipelines/miners/cargo.py index edfa8854..f137e9b2 100644 --- a/minecode_pipelines/miners/cargo.py +++ b/minecode_pipelines/miners/cargo.py @@ -6,37 +6,47 @@ # See https://github.com/aboutcode-org/purldb for support or download. # See https://aboutcode.org for more information about nexB OSS projects. # +from minecode_pipelines.pipes import get_last_commit, get_changed_files, update_last_commit +from minecode_pipelines.pipes.cargo import store_cargo_packages import json from pathlib import Path -from minecode_pipelines.pipes.cargo import store_cargo_packages -from minecode_pipelines.utils import get_changed_files - -def process_cargo_packages(cargo_repo, fed_repo): +def process_cargo_packages(cargo_repo, fed_repo, logger): base_path = Path(cargo_repo.working_tree_dir) - valid_files = get_changed_files(cargo_repo) # start from empty tree hash + setting_last_commit = get_last_commit(fed_repo, "cargo") + valid_files = get_changed_files(cargo_repo, setting_last_commit) # start from empty tree hash - json_files = [] + logger(f"Found {len(valid_files)} changed files in Cargo index.") + targets_files = [] for file_path in base_path.glob("**/*"): - if not file_path.is_file() or file_path not in valid_files: + if not file_path.is_file(): + continue + + rel_path = str(file_path.relative_to(base_path)) + if rel_path not in valid_files: continue if file_path.name in {"config.json", "README.md", "update-dl-url.yml"}: continue - json_files.append(file_path) - for idx, file_path in enumerate(json_files, start=1): - try: - with open(file_path, encoding="utf-8") as f: - packages = [] - for line in f: - if line.strip(): - packages.append(json.loads(line)) + targets_files.append(file_path) + + logger(f"Collected {len(targets_files)} target package files to process.") - except (json.JSONDecodeError, UnicodeDecodeError): + for idx, file_path in enumerate(targets_files, start=1): + packages = [] + with open(file_path, encoding="utf-8") as f: + for line in f: + if line.strip(): + packages.append(json.loads(line)) + + if not packages: continue - if packages: - push_commit = idx == len(json_files) # only True on last - store_cargo_packages(packages, fed_repo, push_commit) + push_commit = idx == len(targets_files) # only True on last + store_cargo_packages(packages, fed_repo, push_commit) + logger(f"Processed {len(packages)} packages from {file_path} ({idx}/{len(targets_files)}).") + + update_last_commit(setting_last_commit, fed_repo, "cargo") + logger("Updated last commit checkpoint for Cargo.") diff --git a/minecode_pipelines/pipelines/mine_cargo.py b/minecode_pipelines/pipelines/mine_cargo.py index c6ce58fc..4d7fe5f5 100644 --- a/minecode_pipelines/pipelines/mine_cargo.py +++ b/minecode_pipelines/pipelines/mine_cargo.py @@ -19,14 +19,19 @@ # # ScanCode.io is a free software code scanning tool from nexB Inc. and others. # Visit https://github.com/aboutcode-org/scancode.io for support and download. +import os from git.repo.base import Repo from scanpipe.pipes.federatedcode import delete_local_clone -from minecode.utils import get_temp_file +from minecode_pipelines.utils import get_temp_file from scanpipe.pipelines import Pipeline from scanpipe.pipes import federatedcode from minecode_pipelines.miners import cargo +FEDERATEDCODE_CARGO_GIT_URL = os.environ.get( + "FEDERATEDCODE_CARGO_GIT_URL", "https://github.com/ziadhany/cargo-test" +) + class MineandPublishCargoPURLs(Pipeline): """Pipeline to mine Cargo (crates.io) packages and publish them to FederatedCode.""" @@ -49,16 +54,15 @@ def check_federatedcode_eligibility(self): def clone_cargo_repo(self): """ - Clone the repo at repo_url and return the VCSResponse object + Clone the repo at repo_url and return the Repo object """ - conan_repo_url = "git+https://github.com/rust-lang/crates.io-index" - fed_repo_url = "git+https://github.com/ziadhany/cargo-test" + conan_repo_url = "https://github.com/rust-lang/crates.io-index" - self.fed_repo = federatedcode.clone_repository(fed_repo_url) + self.fed_repo = federatedcode.clone_repository(FEDERATEDCODE_CARGO_GIT_URL) self.cargo_repo = Repo.clone_from(conan_repo_url, get_temp_file()) def collect_packages_from_cargo(self): - cargo.process_cargo_packages(self.cargo_repo, self.fed_repo) + cargo.process_cargo_packages(self.cargo_repo, self.fed_repo, self.log) def clean_cargo_repo(self): """ diff --git a/minecode_pipelines/utils.py b/minecode_pipelines/utils.py index 6e274041..89b27982 100644 --- a/minecode_pipelines/utils.py +++ b/minecode_pipelines/utils.py @@ -10,7 +10,6 @@ import tempfile import os from commoncode.fileutils import create_dir -from git import Repo from itertools import zip_longest @@ -61,26 +60,3 @@ def get_temp_file(file_name="data", extension=".file", dir_name=""): temp_dir = get_temp_dir(dir_name) location = os.path.join(temp_dir, file_name) return location - - -def get_changed_files( - repo: Repo, commit_x: str = "4b825dc642cb6eb9a060e54bf8d69288fbee4904", commit_y: str = None -): - """ - Return a list of files changed between two commits using GitPython. - Includes added, modified, deleted, and renamed files. - - - commit_x is the empty tree hash (repo root). - - commit_y is the latest commit (HEAD). - """ - - if commit_y is None: - commit_y = repo.head.commit.hexsha - - commit_x_obj = repo.commit(commit_x) - commit_y_obj = repo.commit(commit_y) - - diff_index = commit_x_obj.diff(commit_y_obj) - changed_files = {item.a_path or item.b_path for item in diff_index} - - return list(changed_files) From 2fb96aae657ea602af797fabae611daa1bd7ed71 Mon Sep 17 00:00:00 2001 From: ziad hany Date: Thu, 11 Sep 2025 13:36:49 +0300 Subject: [PATCH 05/12] Implement functionality to process bulk of commits Signed-off-by: ziad hany --- minecode_pipelines/miners/cargo.py | 72 +++++++++++++--------- minecode_pipelines/pipelines/mine_cargo.py | 10 ++- minecode_pipelines/utils.py | 29 ++++++++- 3 files changed, 80 insertions(+), 31 deletions(-) diff --git a/minecode_pipelines/miners/cargo.py b/minecode_pipelines/miners/cargo.py index f137e9b2..a4b3f63f 100644 --- a/minecode_pipelines/miners/cargo.py +++ b/minecode_pipelines/miners/cargo.py @@ -6,47 +6,61 @@ # See https://github.com/aboutcode-org/purldb for support or download. # See https://aboutcode.org for more information about nexB OSS projects. # -from minecode_pipelines.pipes import get_last_commit, get_changed_files, update_last_commit +from minecode_pipelines.pipes import get_last_commit +from minecode_pipelines.pipes import get_changed_files +from minecode_pipelines.pipes import update_last_commit from minecode_pipelines.pipes.cargo import store_cargo_packages import json from pathlib import Path +from minecode_pipelines.utils import get_next_x_commit + + +def process_cargo_packages(cargo_repo, fed_repo, fed_conf_repo, logger): + """ + Process Cargo index files commit by commit. + Push changes to fed_repo after: + - every `commit_batch_size` commits, OR + - every `file_batch_size` files, OR + - when reaching HEAD. + """ -def process_cargo_packages(cargo_repo, fed_repo, logger): base_path = Path(cargo_repo.working_tree_dir) - setting_last_commit = get_last_commit(fed_repo, "cargo") - valid_files = get_changed_files(cargo_repo, setting_last_commit) # start from empty tree hash - logger(f"Found {len(valid_files)} changed files in Cargo index.") - targets_files = [] - for file_path in base_path.glob("**/*"): - if not file_path.is_file(): - continue + while True: + setting_last_commit = get_last_commit(fed_conf_repo, "cargo") + + if setting_last_commit is None: + setting_last_commit = "4b825dc642cb6eb9a060e54bf8d69288fbee4904" - rel_path = str(file_path.relative_to(base_path)) - if rel_path not in valid_files: - continue + next_commit = get_next_x_commit(cargo_repo, setting_last_commit, x=1000, branch="master") - if file_path.name in {"config.json", "README.md", "update-dl-url.yml"}: - continue + if next_commit == setting_last_commit: + logger("No new commits to mine") + break - targets_files.append(file_path) + changed_files = get_changed_files( + cargo_repo, commit_x=setting_last_commit, commit_y=next_commit + ) + logger(f"Found {len(changed_files)} changed files in Cargo index.") - logger(f"Collected {len(targets_files)} target package files to process.") + for idx, rel_path in enumerate(changed_files): + file_path = base_path / rel_path + logger(f"Found {file_path}.") - for idx, file_path in enumerate(targets_files, start=1): - packages = [] - with open(file_path, encoding="utf-8") as f: - for line in f: - if line.strip(): - packages.append(json.loads(line)) + if not file_path.is_file(): + continue - if not packages: - continue + if file_path.name in {"config.json", "README.md", "update-dl-url.yml"}: + continue + packages = [] + with open(file_path, encoding="utf-8") as f: + for line in f: + if line.strip(): + packages.append(json.loads(line)) - push_commit = idx == len(targets_files) # only True on last - store_cargo_packages(packages, fed_repo, push_commit) - logger(f"Processed {len(packages)} packages from {file_path} ({idx}/{len(targets_files)}).") + push_commit = idx == len(changed_files) + store_cargo_packages(packages, fed_repo, push_commit) - update_last_commit(setting_last_commit, fed_repo, "cargo") - logger("Updated last commit checkpoint for Cargo.") + update_last_commit(next_commit, fed_conf_repo, "cargo") + logger(f"Pushed batch for commit range {setting_last_commit}:{next_commit}.") diff --git a/minecode_pipelines/pipelines/mine_cargo.py b/minecode_pipelines/pipelines/mine_cargo.py index 4d7fe5f5..6087bb1f 100644 --- a/minecode_pipelines/pipelines/mine_cargo.py +++ b/minecode_pipelines/pipelines/mine_cargo.py @@ -32,6 +32,10 @@ "FEDERATEDCODE_CARGO_GIT_URL", "https://github.com/ziadhany/cargo-test" ) +FEDERATEDCODE_CONFIG_GIT_URL = os.environ.get( + "FEDERATEDCODE_CONFIG_GIT_URL", "https://github.com/ziadhany/federatedcode-config" +) + class MineandPublishCargoPURLs(Pipeline): """Pipeline to mine Cargo (crates.io) packages and publish them to FederatedCode.""" @@ -50,7 +54,7 @@ def check_federatedcode_eligibility(self): Check if the project fulfills the following criteria for pushing the project result to FederatedCode. """ - federatedcode.check_federatedcode_eligibility(project=self.project) + federatedcode.check_federatedcode_configured_and_available(project=self.project) def clone_cargo_repo(self): """ @@ -59,6 +63,7 @@ def clone_cargo_repo(self): conan_repo_url = "https://github.com/rust-lang/crates.io-index" self.fed_repo = federatedcode.clone_repository(FEDERATEDCODE_CARGO_GIT_URL) + self.fed_conf_repo = federatedcode.clone_repository(FEDERATEDCODE_CONFIG_GIT_URL) self.cargo_repo = Repo.clone_from(conan_repo_url, get_temp_file()) def collect_packages_from_cargo(self): @@ -73,3 +78,6 @@ def clean_cargo_repo(self): if self.fed_repo: delete_local_clone(self.fed_repo) + + if self.fed_conf_repo: + delete_local_clone(self.fed_repo) diff --git a/minecode_pipelines/utils.py b/minecode_pipelines/utils.py index 89b27982..23059bf4 100644 --- a/minecode_pipelines/utils.py +++ b/minecode_pipelines/utils.py @@ -6,10 +6,10 @@ # See https://github.com/aboutcode-org/purldb for support or download. # See https://aboutcode.org for more information about nexB OSS projects. # - import tempfile import os from commoncode.fileutils import create_dir +from git.repo.base import Repo from itertools import zip_longest @@ -60,3 +60,30 @@ def get_temp_file(file_name="data", extension=".file", dir_name=""): temp_dir = get_temp_dir(dir_name) location = os.path.join(temp_dir, file_name) return location + + +EMPTY_TREE_HASH = "4b825dc642cb6eb9a060e54bf8d69288fbee4904" + + +def get_next_x_commit(repo: Repo, current_commit: str, x: int = 1, branch: str = "master") -> str: + if x == 0: + return current_commit + + history = list(repo.iter_commits(branch)) + if not history: + return current_commit # no commits, return current_commit + + if not current_commit or current_commit == EMPTY_TREE_HASH: + if x == 1: + return history[-1].hexsha + else: + return history[0].hexsha + + for i, commit in enumerate(history): + if commit.hexsha == current_commit: + if i + x < len(history): + return history[i + x].hexsha + else: + return history[0].hexsha + + return history[0].hexsha From 22ae800e4e8beebb2b63de6b3ed70bb9060c5ec4 Mon Sep 17 00:00:00 2001 From: ziad hany Date: Fri, 12 Sep 2025 02:08:03 +0300 Subject: [PATCH 06/12] Update for get_next_x_commit function Signed-off-by: ziad hany --- minecode_pipelines/miners/cargo.py | 6 +---- minecode_pipelines/utils.py | 36 +++++++++--------------------- 2 files changed, 11 insertions(+), 31 deletions(-) diff --git a/minecode_pipelines/miners/cargo.py b/minecode_pipelines/miners/cargo.py index a4b3f63f..65f99886 100644 --- a/minecode_pipelines/miners/cargo.py +++ b/minecode_pipelines/miners/cargo.py @@ -29,11 +29,7 @@ def process_cargo_packages(cargo_repo, fed_repo, fed_conf_repo, logger): while True: setting_last_commit = get_last_commit(fed_conf_repo, "cargo") - - if setting_last_commit is None: - setting_last_commit = "4b825dc642cb6eb9a060e54bf8d69288fbee4904" - - next_commit = get_next_x_commit(cargo_repo, setting_last_commit, x=1000, branch="master") + next_commit = get_next_x_commit(cargo_repo, setting_last_commit, x=10, branch="master") if next_commit == setting_last_commit: logger("No new commits to mine") diff --git a/minecode_pipelines/utils.py b/minecode_pipelines/utils.py index 23059bf4..ad97a51c 100644 --- a/minecode_pipelines/utils.py +++ b/minecode_pipelines/utils.py @@ -61,29 +61,13 @@ def get_temp_file(file_name="data", extension=".file", dir_name=""): location = os.path.join(temp_dir, file_name) return location - -EMPTY_TREE_HASH = "4b825dc642cb6eb9a060e54bf8d69288fbee4904" - - -def get_next_x_commit(repo: Repo, current_commit: str, x: int = 1, branch: str = "master") -> str: - if x == 0: - return current_commit - - history = list(repo.iter_commits(branch)) - if not history: - return current_commit # no commits, return current_commit - - if not current_commit or current_commit == EMPTY_TREE_HASH: - if x == 1: - return history[-1].hexsha - else: - return history[0].hexsha - - for i, commit in enumerate(history): - if commit.hexsha == current_commit: - if i + x < len(history): - return history[i + x].hexsha - else: - return history[0].hexsha - - return history[0].hexsha +def get_next_x_commit(repo: Repo, current_commit: str, x: int = 10, branch: str = "master") -> str: + """ + Get the x-th next commit after the current commit in the specified branch. + """ + if not current_commit: + current_commit = "4b825dc642cb6eb9a060e54bf8d69288fbee4904" + revs = repo.git.rev_list(f"^{current_commit}", branch).splitlines() + if len(revs) < x: + raise ValueError(f"Not enough commits ahead; only {len(revs)} available.") + return revs[-x] \ No newline at end of file From c94e16cd9b0a7d771a31c3978c1389d582752dec Mon Sep 17 00:00:00 2001 From: ziad hany Date: Sat, 13 Sep 2025 01:07:18 +0300 Subject: [PATCH 07/12] Update Cargo miners to push after every 1k file changes. Fix bug in process_cargo_packages. Signed-off-by: ziad hany --- minecode_pipelines/miners/cargo.py | 8 +++++--- minecode_pipelines/pipelines/mine_cargo.py | 2 +- minecode_pipelines/utils.py | 3 ++- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/minecode_pipelines/miners/cargo.py b/minecode_pipelines/miners/cargo.py index 65f99886..0b679266 100644 --- a/minecode_pipelines/miners/cargo.py +++ b/minecode_pipelines/miners/cargo.py @@ -20,8 +20,7 @@ def process_cargo_packages(cargo_repo, fed_repo, fed_conf_repo, logger): """ Process Cargo index files commit by commit. Push changes to fed_repo after: - - every `commit_batch_size` commits, OR - - every `file_batch_size` files, OR + - every `commit_batch` commits, OR - when reaching HEAD. """ @@ -40,6 +39,7 @@ def process_cargo_packages(cargo_repo, fed_repo, fed_conf_repo, logger): ) logger(f"Found {len(changed_files)} changed files in Cargo index.") + file_counter = 0 for idx, rel_path in enumerate(changed_files): file_path = base_path / rel_path logger(f"Found {file_path}.") @@ -49,13 +49,15 @@ def process_cargo_packages(cargo_repo, fed_repo, fed_conf_repo, logger): if file_path.name in {"config.json", "README.md", "update-dl-url.yml"}: continue + packages = [] with open(file_path, encoding="utf-8") as f: for line in f: if line.strip(): packages.append(json.loads(line)) - push_commit = idx == len(changed_files) + file_counter += 1 + push_commit = (file_counter % 1000 == 0) or (idx == len(changed_files)) store_cargo_packages(packages, fed_repo, push_commit) update_last_commit(next_commit, fed_conf_repo, "cargo") diff --git a/minecode_pipelines/pipelines/mine_cargo.py b/minecode_pipelines/pipelines/mine_cargo.py index 6087bb1f..1535fc54 100644 --- a/minecode_pipelines/pipelines/mine_cargo.py +++ b/minecode_pipelines/pipelines/mine_cargo.py @@ -67,7 +67,7 @@ def clone_cargo_repo(self): self.cargo_repo = Repo.clone_from(conan_repo_url, get_temp_file()) def collect_packages_from_cargo(self): - cargo.process_cargo_packages(self.cargo_repo, self.fed_repo, self.log) + cargo.process_cargo_packages(self.cargo_repo, self.fed_repo, self.fed_conf_repo, self.log) def clean_cargo_repo(self): """ diff --git a/minecode_pipelines/utils.py b/minecode_pipelines/utils.py index ad97a51c..b4aede0a 100644 --- a/minecode_pipelines/utils.py +++ b/minecode_pipelines/utils.py @@ -61,6 +61,7 @@ def get_temp_file(file_name="data", extension=".file", dir_name=""): location = os.path.join(temp_dir, file_name) return location + def get_next_x_commit(repo: Repo, current_commit: str, x: int = 10, branch: str = "master") -> str: """ Get the x-th next commit after the current commit in the specified branch. @@ -70,4 +71,4 @@ def get_next_x_commit(repo: Repo, current_commit: str, x: int = 10, branch: str revs = repo.git.rev_list(f"^{current_commit}", branch).splitlines() if len(revs) < x: raise ValueError(f"Not enough commits ahead; only {len(revs)} available.") - return revs[-x] \ No newline at end of file + return revs[-x] From 3a3197ce48511acf0b2272acc23a2a46acb4dd19 Mon Sep 17 00:00:00 2001 From: ziad hany Date: Mon, 22 Sep 2025 11:44:16 +0300 Subject: [PATCH 08/12] Refactored mine Cargo to work with the newly updated common functions Signed-off-by: ziad hany --- minecode_pipelines/miners/cargo.py | 83 +++++++++++++++---- minecode_pipelines/pipelines/mine_cargo.py | 62 +++++++------- minecode_pipelines/pipes/__init__.py | 59 +++++++++++++ minecode_pipelines/pipes/cargo.py | 11 ++- .../tests/pipes/test_cargo_pipes.py | 28 ++----- 5 files changed, 173 insertions(+), 70 deletions(-) diff --git a/minecode_pipelines/miners/cargo.py b/minecode_pipelines/miners/cargo.py index 0b679266..872f908d 100644 --- a/minecode_pipelines/miners/cargo.py +++ b/minecode_pipelines/miners/cargo.py @@ -6,40 +6,58 @@ # See https://github.com/aboutcode-org/purldb for support or download. # See https://aboutcode.org for more information about nexB OSS projects. # -from minecode_pipelines.pipes import get_last_commit +from datetime import datetime + +from minecode_pipelines.pipes import fetch_checkpoint_from_github +from minecode_pipelines.pipes import update_checkpoints_in_github +from minecode_pipelines.pipes import MINECODE_PIPELINES_CONFIG_REPO from minecode_pipelines.pipes import get_changed_files -from minecode_pipelines.pipes import update_last_commit from minecode_pipelines.pipes.cargo import store_cargo_packages +from scanpipe.pipes.federatedcode import commit_changes +from scanpipe.pipes.federatedcode import push_changes +from minecode_pipelines import VERSION + import json from pathlib import Path from minecode_pipelines.utils import get_next_x_commit +PACKAGE_BATCH_SIZE = 500 +CARGO_CHECKPOINT_PATH = "cargo/checkpoints.json" -def process_cargo_packages(cargo_repo, fed_repo, fed_conf_repo, logger): + +def process_cargo_packages(cargo_index_repo, cloned_data_repo, config_repo, logger): """ Process Cargo index files commit by commit. Push changes to fed_repo after: - - every `commit_batch` commits, OR - - when reaching HEAD. + - every `commit_batch` commits, OR when reaching HEAD. """ - base_path = Path(cargo_repo.working_tree_dir) + base_path = Path(cargo_index_repo.working_tree_dir) while True: - setting_last_commit = get_last_commit(fed_conf_repo, "cargo") - next_commit = get_next_x_commit(cargo_repo, setting_last_commit, x=10, branch="master") + cargo_checkpoints = ( + fetch_checkpoint_from_github(MINECODE_PIPELINES_CONFIG_REPO, CARGO_CHECKPOINT_PATH) + or {} + ) + checkpoints_last_commit = cargo_checkpoints.get("last_commit") - if next_commit == setting_last_commit: + next_commit = get_next_x_commit( + cargo_index_repo, checkpoints_last_commit, x=10, branch="master" + ) + + if next_commit == checkpoints_last_commit: logger("No new commits to mine") break changed_files = get_changed_files( - cargo_repo, commit_x=setting_last_commit, commit_y=next_commit + cargo_index_repo, commit_x=checkpoints_last_commit, commit_y=next_commit ) logger(f"Found {len(changed_files)} changed files in Cargo index.") file_counter = 0 + purl_files = [] + purls = [] for idx, rel_path in enumerate(changed_files): file_path = base_path / rel_path logger(f"Found {file_path}.") @@ -57,8 +75,45 @@ def process_cargo_packages(cargo_repo, fed_repo, fed_conf_repo, logger): packages.append(json.loads(line)) file_counter += 1 - push_commit = (file_counter % 1000 == 0) or (idx == len(changed_files)) - store_cargo_packages(packages, fed_repo, push_commit) + commit_and_push = (file_counter % PACKAGE_BATCH_SIZE == 0) or ( + idx == len(changed_files) + ) + purl_file, base_purl = store_cargo_packages(packages, cloned_data_repo) + logger(f"writing packageURLs for package: {base_purl} at: {purl_file}") + + purl_files.append(purl_file) + purls.append(str(base_purl)) + if not commit_and_push: + continue + + commit_changes( + repo=cloned_data_repo, + files_to_commit=purl_files, + purls=purls, + mine_type="packageURL", + tool_name="pkg:cargo/minecode-pipelines", + tool_version=VERSION, + ) + + # Push changes to remote repository + push_changes(repo=cloned_data_repo) + purl_files = [] + purls = [] + + if logger: + logger( + f"Updating checkpoint at: {CARGO_CHECKPOINT_PATH} with last commit: {checkpoints_last_commit}" + ) + + settings_data = { + "date": str(datetime.now()), + "last_commit": next_commit, + } + + update_checkpoints_in_github( + checkpoint=settings_data, + cloned_repo=config_repo, + path=CARGO_CHECKPOINT_PATH, + ) - update_last_commit(next_commit, fed_conf_repo, "cargo") - logger(f"Pushed batch for commit range {setting_last_commit}:{next_commit}.") + logger(f"Pushed batch for commit range {checkpoints_last_commit}:{next_commit}.") diff --git a/minecode_pipelines/pipelines/mine_cargo.py b/minecode_pipelines/pipelines/mine_cargo.py index 1535fc54..b2f33912 100644 --- a/minecode_pipelines/pipelines/mine_cargo.py +++ b/minecode_pipelines/pipelines/mine_cargo.py @@ -19,22 +19,18 @@ # # ScanCode.io is a free software code scanning tool from nexB Inc. and others. # Visit https://github.com/aboutcode-org/scancode.io for support and download. -import os -from git.repo.base import Repo -from scanpipe.pipes.federatedcode import delete_local_clone -from minecode_pipelines.utils import get_temp_file +import os from scanpipe.pipelines import Pipeline from scanpipe.pipes import federatedcode from minecode_pipelines.miners import cargo +from minecode_pipelines import pipes +from minecode_pipelines.pipes import MINECODE_PIPELINES_CONFIG_REPO -FEDERATEDCODE_CARGO_GIT_URL = os.environ.get( - "FEDERATEDCODE_CARGO_GIT_URL", "https://github.com/ziadhany/cargo-test" -) - -FEDERATEDCODE_CONFIG_GIT_URL = os.environ.get( - "FEDERATEDCODE_CONFIG_GIT_URL", "https://github.com/ziadhany/federatedcode-config" +MINECODE_DATA_CARGO_REPO = os.environ.get( + "MINECODE_DATA_CARGO_REPO", "https://github.com/aboutcode-data/minecode-data-cargo-test" ) +MINECODE_CARGO_INDEX_REPO = "https://github.com/rust-lang/crates.io-index" class MineandPublishCargoPURLs(Pipeline): @@ -45,8 +41,8 @@ def steps(cls): return ( cls.check_federatedcode_eligibility, cls.clone_cargo_repo, - cls.collect_packages_from_cargo, - cls.clean_cargo_repo, + cls.mine_and_publish_cargo_packageurls, + cls.delete_cloned_repos, ) def check_federatedcode_eligibility(self): @@ -54,30 +50,34 @@ def check_federatedcode_eligibility(self): Check if the project fulfills the following criteria for pushing the project result to FederatedCode. """ - federatedcode.check_federatedcode_configured_and_available(project=self.project) + federatedcode.check_federatedcode_configured_and_available(logger=self.log) def clone_cargo_repo(self): """ Clone the repo at repo_url and return the Repo object """ - conan_repo_url = "https://github.com/rust-lang/crates.io-index" - - self.fed_repo = federatedcode.clone_repository(FEDERATEDCODE_CARGO_GIT_URL) - self.fed_conf_repo = federatedcode.clone_repository(FEDERATEDCODE_CONFIG_GIT_URL) - self.cargo_repo = Repo.clone_from(conan_repo_url, get_temp_file()) + self.cargo_index_repo = federatedcode.clone_repository(MINECODE_CARGO_INDEX_REPO) + self.cloned_data_repo = federatedcode.clone_repository(MINECODE_DATA_CARGO_REPO) + self.cloned_config_repo = federatedcode.clone_repository(MINECODE_PIPELINES_CONFIG_REPO) - def collect_packages_from_cargo(self): - cargo.process_cargo_packages(self.cargo_repo, self.fed_repo, self.fed_conf_repo, self.log) + if self.log: + self.log( + f"{MINECODE_CARGO_INDEX_REPO} repo cloned at: {self.cargo_index_repo.working_dir}" + ) + self.log( + f"{MINECODE_DATA_CARGO_REPO} repo cloned at: {self.cloned_data_repo.working_dir}" + ) + self.log( + f"{MINECODE_PIPELINES_CONFIG_REPO} repo cloned at: {self.cloned_config_repo.working_dir}" + ) - def clean_cargo_repo(self): - """ - Delete the federatedcode repository if it exists, and also delete the Cargo repository if it exists. - """ - if self.cargo_repo: - delete_local_clone(self.cargo_repo) - - if self.fed_repo: - delete_local_clone(self.fed_repo) + def mine_and_publish_cargo_packageurls(self): + cargo.process_cargo_packages( + self.cargo_index_repo, self.cloned_data_repo, self.cloned_config_repo, self.log + ) - if self.fed_conf_repo: - delete_local_clone(self.fed_repo) + def delete_cloned_repos(self): + pipes.delete_cloned_repos( + repos=[self.cargo_index_repo, self.cloned_data_repo, self.cloned_config_repo], + logger=self.log, + ) diff --git a/minecode_pipelines/pipes/__init__.py b/minecode_pipelines/pipes/__init__.py index 5d030617..577f6019 100644 --- a/minecode_pipelines/pipes/__init__.py +++ b/minecode_pipelines/pipes/__init__.py @@ -15,6 +15,8 @@ import saneyaml from aboutcode.hashid import PURLS_FILENAME +from git import Repo + from scanpipe.pipes.federatedcode import delete_local_clone from scanpipe.pipes.federatedcode import commit_and_push_changes @@ -112,3 +114,60 @@ def delete_cloned_repos(repos, logger=None): if logger: logger(f"Deleting local clone at: {repo.working_dir}") delete_local_clone(repo) + + +def get_changed_files(repo: Repo, commit_x: str = None, commit_y: str = None): + """ + Return a list of files changed between two commits using GitPython. + Includes added, modified, deleted, and renamed files. + - commit_x: base commit (or the empty tree hash for the first commit) + - commit_y: target commit (defaults to HEAD if not provided) + """ + EMPTY_TREE_HASH = "4b825dc642cb6eb9a060e54bf8d69288fbee4904" + + if commit_y is None: + commit_y = repo.head.commit.hexsha + commit_y_obj = repo.commit(commit_y) + + if commit_x is None or commit_x == EMPTY_TREE_HASH: + # First commit case: diff against empty tree + diff_index = commit_y_obj.diff(EMPTY_TREE_HASH, R=True) + else: + commit_x_obj = repo.commit(commit_x) + diff_index = commit_x_obj.diff(commit_y_obj, R=True) + + changed_files = {item.a_path or item.b_path for item in diff_index} + return list(changed_files) + + +def get_last_commit(repo, ecosystem): + """ + Retrieve the last mined commit for a given ecosystem. + This function reads a JSON checkpoint file from the repository, which stores + mining progress. Each checkpoint contains the "last_commit" from the package + index (e.g., PyPI) that was previously mined. + https://github.com/AyanSinhaMahapatra/minecode-test/blob/main/minecode_checkpoints/pypi.json + https://github.com/ziadhany/cargo-test/blob/main/minecode_checkpoints/cargo.json + """ + + last_commit_file_path = ( + Path(repo.working_tree_dir) / "minecode_checkpoints" / f"{ecosystem}.json" + ) + try: + with open(last_commit_file_path) as f: + settings_data = json.load(f) + except FileNotFoundError: + return + return settings_data.get("last_commit") + + +def get_next_x_commit(repo: Repo, current_commit: str, x: int = 10, branch: str = "master") -> str: + """ + Get the x-th next commit after the current commit in the specified branch. + """ + if not current_commit: + current_commit = "4b825dc642cb6eb9a060e54bf8d69288fbee4904" + revs = repo.git.rev_list(f"^{current_commit}", branch).splitlines() + if len(revs) < x: + raise ValueError(f"Not enough commits ahead; only {len(revs)} available.") + return revs[-x] diff --git a/minecode_pipelines/pipes/cargo.py b/minecode_pipelines/pipes/cargo.py index 8eeb278b..1bd9da01 100644 --- a/minecode_pipelines/pipes/cargo.py +++ b/minecode_pipelines/pipes/cargo.py @@ -1,13 +1,15 @@ +from aboutcode import hashid from packageurl import PackageURL from aboutcode.hashid import get_core_purl -from minecode_pipelines.pipes import write_purls_to_repo +from minecode_pipelines.miners import write_packageurls_to_file -def store_cargo_packages(packages, fed_repo, push_commit=False): + +def store_cargo_packages(packages, repo): """Collect Cargo package versions into purls and write them to the repo.""" if not packages: - raise ValueError("No packages found") + return first_pkg = packages[0] name = first_pkg.get("name") @@ -22,4 +24,5 @@ def store_cargo_packages(packages, fed_repo, push_commit=False): purl = PackageURL(type="cargo", name=name, version=version).to_string() updated_purls.append(purl) - write_purls_to_repo(fed_repo, base_purl, updated_purls, push_commit) + ppath = hashid.get_package_purls_yml_file_path(base_purl) + return write_packageurls_to_file(repo, ppath, updated_purls), base_purl diff --git a/minecode_pipelines/tests/pipes/test_cargo_pipes.py b/minecode_pipelines/tests/pipes/test_cargo_pipes.py index 92518871..d864f489 100644 --- a/minecode_pipelines/tests/pipes/test_cargo_pipes.py +++ b/minecode_pipelines/tests/pipes/test_cargo_pipes.py @@ -5,21 +5,15 @@ from unittest.mock import Mock, patch import saneyaml from django.test import TestCase -from packageurl import PackageURL -from minecode_pipelines.pipes import git_stage_purls +from minecode_pipelines.miners import write_packageurls_to_file from minecode_pipelines.pipes.cargo import store_cargo_packages DATA_DIR = Path(__file__).parent.parent / "test_data" / "cargo" class CargoPipelineTests(TestCase): - def _get_temp_dir(self): - import tempfile - - return tempfile.mkdtemp() - - @patch("minecode_pipelines.pipes.cargo.write_purls_to_repo") + @patch("minecode_pipelines.pipes.cargo.write_packageurls_to_file") def test_collect_packages_from_cargo_calls_write(self, mock_write): packages_file = DATA_DIR / "c5store" expected_file = DATA_DIR / "c5store-expected.yaml" @@ -34,22 +28,16 @@ def test_collect_packages_from_cargo_calls_write(self, mock_write): expected = saneyaml.load(f) repo = Mock() - result = store_cargo_packages(packages, repo) - self.assertIsNone(result) + store_cargo_packages(packages, repo) mock_write.assert_called_once() args, kwargs = mock_write.call_args - called_repo, base_purl, written_packages, push_commit = args + called_repo, base_purl, written_packages = args self.assertEqual(called_repo, repo) - first_pkg = packages[0] - expected_base_purl = PackageURL( - type="cargo", - name=first_pkg["name"], - ) + expected_base_purl = 'aboutcode-packages-cargo-0/cargo/c5store/purls.yml' self.assertEqual(str(base_purl), str(expected_base_purl)) - self.assertEqual(written_packages, expected) def test_add_purl_result_with_mock_repo(self): @@ -64,13 +52,11 @@ def test_add_purl_result_with_mock_repo(self): purls_file = repo_dir / "purls.yaml" - relative_path = git_stage_purls(purls, mock_repo, purls_file) + relative_path = write_packageurls_to_file(mock_repo, purls_file, purls) written_file = repo_dir / relative_path self.assertTrue(written_file.exists()) with open(written_file, encoding="utf-8") as f: content = saneyaml.load(f) - self.assertEqual(content, purls) - - mock_repo.index.add.assert_called_once_with([relative_path]) + self.assertEqual(content, purls) \ No newline at end of file From 078371784f1bfed4c5bf62c6e73ffc9bf39cc8da Mon Sep 17 00:00:00 2001 From: ziad hany Date: Wed, 24 Sep 2025 09:28:57 +0300 Subject: [PATCH 09/12] Fix requested changes Signed-off-by: ziad hany --- minecode_pipelines/miners/cargo.py | 48 +++++++++++-------- minecode_pipelines/pipelines/mine_cargo.py | 7 +-- minecode_pipelines/pipes/__init__.py | 18 ++++--- .../tests/pipes/test_cargo_pipes.py | 4 +- minecode_pipelines/utils.py | 13 ----- 5 files changed, 45 insertions(+), 45 deletions(-) diff --git a/minecode_pipelines/miners/cargo.py b/minecode_pipelines/miners/cargo.py index 872f908d..97aac13e 100644 --- a/minecode_pipelines/miners/cargo.py +++ b/minecode_pipelines/miners/cargo.py @@ -9,6 +9,7 @@ from datetime import datetime from minecode_pipelines.pipes import fetch_checkpoint_from_github +from minecode_pipelines.pipes import get_commit_at_distance_ahead from minecode_pipelines.pipes import update_checkpoints_in_github from minecode_pipelines.pipes import MINECODE_PIPELINES_CONFIG_REPO from minecode_pipelines.pipes import get_changed_files @@ -20,9 +21,10 @@ import json from pathlib import Path -from minecode_pipelines.utils import get_next_x_commit PACKAGE_BATCH_SIZE = 500 +COMMIT_BATCH_SIZE = 10 + CARGO_CHECKPOINT_PATH = "cargo/checkpoints.json" @@ -36,14 +38,14 @@ def process_cargo_packages(cargo_index_repo, cloned_data_repo, config_repo, logg base_path = Path(cargo_index_repo.working_tree_dir) while True: - cargo_checkpoints = ( - fetch_checkpoint_from_github(MINECODE_PIPELINES_CONFIG_REPO, CARGO_CHECKPOINT_PATH) - or {} + cargo_checkpoints = fetch_checkpoint_from_github( + config_repo=MINECODE_PIPELINES_CONFIG_REPO, checkpoint_path=CARGO_CHECKPOINT_PATH ) + checkpoints_last_commit = cargo_checkpoints.get("last_commit") - next_commit = get_next_x_commit( - cargo_index_repo, checkpoints_last_commit, x=10, branch="master" + next_commit = get_commit_at_distance_ahead( + cargo_index_repo, checkpoints_last_commit, num_commits_ahead=10, branch_name="master" ) if next_commit == checkpoints_last_commit: @@ -62,10 +64,11 @@ def process_cargo_packages(cargo_index_repo, cloned_data_repo, config_repo, logg file_path = base_path / rel_path logger(f"Found {file_path}.") - if not file_path.is_file(): - continue - - if file_path.name in {"config.json", "README.md", "update-dl-url.yml"}: + if not file_path.is_file() or file_path.name in { + "config.json", + "README.md", + "update-dl-url.yml", + }: continue packages = [] @@ -75,6 +78,8 @@ def process_cargo_packages(cargo_index_repo, cloned_data_repo, config_repo, logg packages.append(json.loads(line)) file_counter += 1 + + # Commit and push after each full batch or when processing the last file commit_and_push = (file_counter % PACKAGE_BATCH_SIZE == 0) or ( idx == len(changed_files) ) @@ -83,6 +88,7 @@ def process_cargo_packages(cargo_index_repo, cloned_data_repo, config_repo, logg purl_files.append(purl_file) purls.append(str(base_purl)) + if not commit_and_push: continue @@ -91,11 +97,10 @@ def process_cargo_packages(cargo_index_repo, cloned_data_repo, config_repo, logg files_to_commit=purl_files, purls=purls, mine_type="packageURL", - tool_name="pkg:cargo/minecode-pipelines", + tool_name="pkg:pypi/minecode-pipelines", tool_version=VERSION, ) - # Push changes to remote repository push_changes(repo=cloned_data_repo) purl_files = [] purls = [] @@ -105,15 +110,16 @@ def process_cargo_packages(cargo_index_repo, cloned_data_repo, config_repo, logg f"Updating checkpoint at: {CARGO_CHECKPOINT_PATH} with last commit: {checkpoints_last_commit}" ) - settings_data = { - "date": str(datetime.now()), - "last_commit": next_commit, - } + if next_commit != checkpoints_last_commit: + settings_data = { + "date": str(datetime.now()), + "last_commit": next_commit, + } - update_checkpoints_in_github( - checkpoint=settings_data, - cloned_repo=config_repo, - path=CARGO_CHECKPOINT_PATH, - ) + update_checkpoints_in_github( + checkpoint=settings_data, + cloned_repo=config_repo, + path=CARGO_CHECKPOINT_PATH, + ) logger(f"Pushed batch for commit range {checkpoints_last_commit}:{next_commit}.") diff --git a/minecode_pipelines/pipelines/mine_cargo.py b/minecode_pipelines/pipelines/mine_cargo.py index b2f33912..f14bc4af 100644 --- a/minecode_pipelines/pipelines/mine_cargo.py +++ b/minecode_pipelines/pipelines/mine_cargo.py @@ -40,7 +40,7 @@ class MineandPublishCargoPURLs(Pipeline): def steps(cls): return ( cls.check_federatedcode_eligibility, - cls.clone_cargo_repo, + cls.clone_cargo_repos, cls.mine_and_publish_cargo_packageurls, cls.delete_cloned_repos, ) @@ -52,9 +52,10 @@ def check_federatedcode_eligibility(self): """ federatedcode.check_federatedcode_configured_and_available(logger=self.log) - def clone_cargo_repo(self): + def clone_cargo_repos(self): """ - Clone the repo at repo_url and return the Repo object + Clone the Cargo-related repositories (index, data, and pipelines config) + and store their Repo objects in the corresponding instance variables. """ self.cargo_index_repo = federatedcode.clone_repository(MINECODE_CARGO_INDEX_REPO) self.cloned_data_repo = federatedcode.clone_repository(MINECODE_DATA_CARGO_REPO) diff --git a/minecode_pipelines/pipes/__init__.py b/minecode_pipelines/pipes/__init__.py index 577f6019..595712b8 100644 --- a/minecode_pipelines/pipes/__init__.py +++ b/minecode_pipelines/pipes/__init__.py @@ -36,7 +36,7 @@ def fetch_checkpoint_from_github(config_repo, checkpoint_path): ) response = requests.get(checkpoints_file) if not response.ok: - return + return {} checkpoint_data = json.loads(response.text) return checkpoint_data @@ -161,13 +161,19 @@ def get_last_commit(repo, ecosystem): return settings_data.get("last_commit") -def get_next_x_commit(repo: Repo, current_commit: str, x: int = 10, branch: str = "master") -> str: +def get_commit_at_distance_ahead( + repo: Repo, + current_commit: str, + num_commits_ahead: int = 10, + branch_name: str = "master", +) -> str: """ - Get the x-th next commit after the current commit in the specified branch. + Return the commit hash that is `num_commits_ahead` commits ahead of `current_commit` + on the given branch. """ if not current_commit: current_commit = "4b825dc642cb6eb9a060e54bf8d69288fbee4904" - revs = repo.git.rev_list(f"^{current_commit}", branch).splitlines() - if len(revs) < x: + revs = repo.git.rev_list(f"^{current_commit}", branch_name).splitlines() + if len(revs) < num_commits_ahead: raise ValueError(f"Not enough commits ahead; only {len(revs)} available.") - return revs[-x] + return revs[-num_commits_ahead] diff --git a/minecode_pipelines/tests/pipes/test_cargo_pipes.py b/minecode_pipelines/tests/pipes/test_cargo_pipes.py index d864f489..e5eafc9a 100644 --- a/minecode_pipelines/tests/pipes/test_cargo_pipes.py +++ b/minecode_pipelines/tests/pipes/test_cargo_pipes.py @@ -36,7 +36,7 @@ def test_collect_packages_from_cargo_calls_write(self, mock_write): self.assertEqual(called_repo, repo) - expected_base_purl = 'aboutcode-packages-cargo-0/cargo/c5store/purls.yml' + expected_base_purl = "aboutcode-packages-cargo-0/cargo/c5store/purls.yml" self.assertEqual(str(base_purl), str(expected_base_purl)) self.assertEqual(written_packages, expected) @@ -59,4 +59,4 @@ def test_add_purl_result_with_mock_repo(self): with open(written_file, encoding="utf-8") as f: content = saneyaml.load(f) - self.assertEqual(content, purls) \ No newline at end of file + self.assertEqual(content, purls) diff --git a/minecode_pipelines/utils.py b/minecode_pipelines/utils.py index b4aede0a..004cc043 100644 --- a/minecode_pipelines/utils.py +++ b/minecode_pipelines/utils.py @@ -9,7 +9,6 @@ import tempfile import os from commoncode.fileutils import create_dir -from git.repo.base import Repo from itertools import zip_longest @@ -60,15 +59,3 @@ def get_temp_file(file_name="data", extension=".file", dir_name=""): temp_dir = get_temp_dir(dir_name) location = os.path.join(temp_dir, file_name) return location - - -def get_next_x_commit(repo: Repo, current_commit: str, x: int = 10, branch: str = "master") -> str: - """ - Get the x-th next commit after the current commit in the specified branch. - """ - if not current_commit: - current_commit = "4b825dc642cb6eb9a060e54bf8d69288fbee4904" - revs = repo.git.rev_list(f"^{current_commit}", branch).splitlines() - if len(revs) < x: - raise ValueError(f"Not enough commits ahead; only {len(revs)} available.") - return revs[-x] From 7e1ae32470b39976022d462094957c48d509814f Mon Sep 17 00:00:00 2001 From: ziad hany Date: Wed, 24 Sep 2025 09:47:39 +0300 Subject: [PATCH 10/12] Resolve migration conflict Signed-off-by: ziad hany --- minecode_pipelines/pipelines/mine_cargo.py | 2 +- minecode_pipelines/pipes/cargo.py | 2 +- .../tests/pipes/{test_cargo_pipes.py => test_cargo.py} | 2 +- pyproject-minecode_pipeline.toml | 1 + 4 files changed, 4 insertions(+), 3 deletions(-) rename minecode_pipelines/tests/pipes/{test_cargo_pipes.py => test_cargo.py} (97%) diff --git a/minecode_pipelines/pipelines/mine_cargo.py b/minecode_pipelines/pipelines/mine_cargo.py index f14bc4af..1bd1defe 100644 --- a/minecode_pipelines/pipelines/mine_cargo.py +++ b/minecode_pipelines/pipelines/mine_cargo.py @@ -33,7 +33,7 @@ MINECODE_CARGO_INDEX_REPO = "https://github.com/rust-lang/crates.io-index" -class MineandPublishCargoPURLs(Pipeline): +class MineCargo(Pipeline): """Pipeline to mine Cargo (crates.io) packages and publish them to FederatedCode.""" @classmethod diff --git a/minecode_pipelines/pipes/cargo.py b/minecode_pipelines/pipes/cargo.py index 1bd9da01..3203f51f 100644 --- a/minecode_pipelines/pipes/cargo.py +++ b/minecode_pipelines/pipes/cargo.py @@ -2,7 +2,7 @@ from packageurl import PackageURL from aboutcode.hashid import get_core_purl -from minecode_pipelines.miners import write_packageurls_to_file +from minecode_pipelines.pipes import write_packageurls_to_file def store_cargo_packages(packages, repo): diff --git a/minecode_pipelines/tests/pipes/test_cargo_pipes.py b/minecode_pipelines/tests/pipes/test_cargo.py similarity index 97% rename from minecode_pipelines/tests/pipes/test_cargo_pipes.py rename to minecode_pipelines/tests/pipes/test_cargo.py index e5eafc9a..939215ba 100644 --- a/minecode_pipelines/tests/pipes/test_cargo_pipes.py +++ b/minecode_pipelines/tests/pipes/test_cargo.py @@ -6,7 +6,7 @@ import saneyaml from django.test import TestCase -from minecode_pipelines.miners import write_packageurls_to_file +from minecode_pipelines.pipes import write_packageurls_to_file from minecode_pipelines.pipes.cargo import store_cargo_packages DATA_DIR = Path(__file__).parent.parent / "test_data" / "cargo" diff --git a/pyproject-minecode_pipeline.toml b/pyproject-minecode_pipeline.toml index abcd37ee..a9dcce8f 100644 --- a/pyproject-minecode_pipeline.toml +++ b/pyproject-minecode_pipeline.toml @@ -47,6 +47,7 @@ urls = { Homepage = "https://github.com/aboutcode-org/purldb" } [project.entry-points."scancodeio_pipelines"] mine_pypi = "minecode_pipelines.pipelines.mine_pypi:MinePypi" mine_maven = "minecode_pipeline.pipelines.mine_maven:MineMaven" +mine_cargo = "minecode_pipelines.pipelines.mine_cargo:MineCargo" [tool.bumpversion] current_version = "0.0.1b1" From f33a0ce7961c47e12eec7d9e9af52af5f1b2eccc Mon Sep 17 00:00:00 2001 From: ziad hany Date: Wed, 24 Sep 2025 17:56:29 +0300 Subject: [PATCH 11/12] Refactor code and fix bugs Signed-off-by: ziad hany --- minecode_pipelines/miners/cargo.py | 35 ++++++++++++-------- minecode_pipelines/pipes/cargo.py | 8 +++-- minecode_pipelines/tests/pipes/test_cargo.py | 2 +- 3 files changed, 29 insertions(+), 16 deletions(-) diff --git a/minecode_pipelines/miners/cargo.py b/minecode_pipelines/miners/cargo.py index 97aac13e..2a345bb0 100644 --- a/minecode_pipelines/miners/cargo.py +++ b/minecode_pipelines/miners/cargo.py @@ -8,10 +8,9 @@ # from datetime import datetime -from minecode_pipelines.pipes import fetch_checkpoint_from_github +from minecode_pipelines.pipes import get_checkpoint_from_file from minecode_pipelines.pipes import get_commit_at_distance_ahead from minecode_pipelines.pipes import update_checkpoints_in_github -from minecode_pipelines.pipes import MINECODE_PIPELINES_CONFIG_REPO from minecode_pipelines.pipes import get_changed_files from minecode_pipelines.pipes.cargo import store_cargo_packages from scanpipe.pipes.federatedcode import commit_changes @@ -38,15 +37,19 @@ def process_cargo_packages(cargo_index_repo, cloned_data_repo, config_repo, logg base_path = Path(cargo_index_repo.working_tree_dir) while True: - cargo_checkpoints = fetch_checkpoint_from_github( - config_repo=MINECODE_PIPELINES_CONFIG_REPO, checkpoint_path=CARGO_CHECKPOINT_PATH + cargo_checkpoints = get_checkpoint_from_file( + cloned_repo=config_repo, path=CARGO_CHECKPOINT_PATH ) checkpoints_last_commit = cargo_checkpoints.get("last_commit") - next_commit = get_commit_at_distance_ahead( - cargo_index_repo, checkpoints_last_commit, num_commits_ahead=10, branch_name="master" - ) + try: + next_commit = get_commit_at_distance_ahead( + cargo_index_repo, checkpoints_last_commit, num_commits_ahead=COMMIT_BATCH_SIZE, branch_name="master" + ) + except ValueError as e: + logger(str(e)) + break if next_commit == checkpoints_last_commit: logger("No new commits to mine") @@ -75,19 +78,25 @@ def process_cargo_packages(cargo_index_repo, cloned_data_repo, config_repo, logg with open(file_path, encoding="utf-8") as f: for line in f: if line.strip(): - packages.append(json.loads(line)) + try: + packages.append(json.loads(line)) + except json.JSONDecodeError as e: + logger(f"Skipping invalid JSON in {file_path}: {e}") file_counter += 1 # Commit and push after each full batch or when processing the last file commit_and_push = (file_counter % PACKAGE_BATCH_SIZE == 0) or ( - idx == len(changed_files) + idx == len(changed_files) - 1 ) - purl_file, base_purl = store_cargo_packages(packages, cloned_data_repo) - logger(f"writing packageURLs for package: {base_purl} at: {purl_file}") - purl_files.append(purl_file) - purls.append(str(base_purl)) + result_store = store_cargo_packages(packages, cloned_data_repo) + if result_store: + purl_file, base_purl = result_store + logger(f"writing packageURLs for package: {base_purl} at: {purl_file}") + + purl_files.append(purl_file) + purls.append(str(base_purl)) if not commit_and_push: continue diff --git a/minecode_pipelines/pipes/cargo.py b/minecode_pipelines/pipes/cargo.py index 3203f51f..20f0de85 100644 --- a/minecode_pipelines/pipes/cargo.py +++ b/minecode_pipelines/pipes/cargo.py @@ -1,8 +1,10 @@ +from pathlib import Path + from aboutcode import hashid from packageurl import PackageURL from aboutcode.hashid import get_core_purl -from minecode_pipelines.pipes import write_packageurls_to_file +from minecode_pipelines.pipes import write_data_to_yaml_file def store_cargo_packages(packages, repo): @@ -25,4 +27,6 @@ def store_cargo_packages(packages, repo): updated_purls.append(purl) ppath = hashid.get_package_purls_yml_file_path(base_purl) - return write_packageurls_to_file(repo, ppath, updated_purls), base_purl + purl_file_full_path = Path(repo.working_dir) / ppath + write_data_to_yaml_file(path=purl_file_full_path, data=updated_purls) + return purl_file_full_path, base_purl \ No newline at end of file diff --git a/minecode_pipelines/tests/pipes/test_cargo.py b/minecode_pipelines/tests/pipes/test_cargo.py index 939215ba..64d5e633 100644 --- a/minecode_pipelines/tests/pipes/test_cargo.py +++ b/minecode_pipelines/tests/pipes/test_cargo.py @@ -13,7 +13,7 @@ class CargoPipelineTests(TestCase): - @patch("minecode_pipelines.pipes.cargo.write_packageurls_to_file") + @patch("minecode_pipelines.pipes.cargo.write_data_to_yaml_file") def test_collect_packages_from_cargo_calls_write(self, mock_write): packages_file = DATA_DIR / "c5store" expected_file = DATA_DIR / "c5store-expected.yaml" From 91e22bae4dd3884aebc4d61a76095d3e8a097f8a Mon Sep 17 00:00:00 2001 From: ziad hany Date: Thu, 25 Sep 2025 01:06:07 +0300 Subject: [PATCH 12/12] Improve Cargo testing with more cases Signed-off-by: ziad hany --- minecode_pipelines/miners/cargo.py | 5 +- minecode_pipelines/pipes/cargo.py | 2 +- minecode_pipelines/tests/pipes/test_cargo.py | 55 ++++++++++++----- minecode_pipelines/tests/pipes/test_pipes.py | 65 ++++++++++++++++++++ 4 files changed, 109 insertions(+), 18 deletions(-) create mode 100644 minecode_pipelines/tests/pipes/test_pipes.py diff --git a/minecode_pipelines/miners/cargo.py b/minecode_pipelines/miners/cargo.py index 2a345bb0..f9d8e755 100644 --- a/minecode_pipelines/miners/cargo.py +++ b/minecode_pipelines/miners/cargo.py @@ -45,7 +45,10 @@ def process_cargo_packages(cargo_index_repo, cloned_data_repo, config_repo, logg try: next_commit = get_commit_at_distance_ahead( - cargo_index_repo, checkpoints_last_commit, num_commits_ahead=COMMIT_BATCH_SIZE, branch_name="master" + cargo_index_repo, + checkpoints_last_commit, + num_commits_ahead=COMMIT_BATCH_SIZE, + branch_name="master", ) except ValueError as e: logger(str(e)) diff --git a/minecode_pipelines/pipes/cargo.py b/minecode_pipelines/pipes/cargo.py index 20f0de85..4ea0ab12 100644 --- a/minecode_pipelines/pipes/cargo.py +++ b/minecode_pipelines/pipes/cargo.py @@ -29,4 +29,4 @@ def store_cargo_packages(packages, repo): ppath = hashid.get_package_purls_yml_file_path(base_purl) purl_file_full_path = Path(repo.working_dir) / ppath write_data_to_yaml_file(path=purl_file_full_path, data=updated_purls) - return purl_file_full_path, base_purl \ No newline at end of file + return purl_file_full_path, base_purl diff --git a/minecode_pipelines/tests/pipes/test_cargo.py b/minecode_pipelines/tests/pipes/test_cargo.py index 64d5e633..742f428a 100644 --- a/minecode_pipelines/tests/pipes/test_cargo.py +++ b/minecode_pipelines/tests/pipes/test_cargo.py @@ -1,3 +1,12 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# purldb is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/purldb for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + import json import tempfile from pathlib import Path @@ -6,7 +15,7 @@ import saneyaml from django.test import TestCase -from minecode_pipelines.pipes import write_packageurls_to_file +from minecode_pipelines.pipes import write_data_to_yaml_file from minecode_pipelines.pipes.cargo import store_cargo_packages DATA_DIR = Path(__file__).parent.parent / "test_data" / "cargo" @@ -27,22 +36,24 @@ def test_collect_packages_from_cargo_calls_write(self, mock_write): with open(expected_file, encoding="utf-8") as f: expected = saneyaml.load(f) - repo = Mock() - store_cargo_packages(packages, repo) + with tempfile.TemporaryDirectory() as tmpdir: + repo = Mock() + repo.working_dir = tmpdir - mock_write.assert_called_once() - args, kwargs = mock_write.call_args - called_repo, base_purl, written_packages = args + store_cargo_packages(packages, repo) - self.assertEqual(called_repo, repo) + mock_write.assert_called_once() + args, kwargs = mock_write.call_args + base_purl, written_packages = kwargs["path"], kwargs["data"] - expected_base_purl = "aboutcode-packages-cargo-0/cargo/c5store/purls.yml" - self.assertEqual(str(base_purl), str(expected_base_purl)) - self.assertEqual(written_packages, expected) + expected_base_purl = ( + Path(tmpdir) / "aboutcode-packages-cargo-0" / "cargo" / "c5store" / "purls.yml" + ) - def test_add_purl_result_with_mock_repo(self): - purls = [{"purl": "pkg:pypi/django@4.2.0"}, {"purl": "pkg:pypi/django@4.3.0"}] + self.assertEqual(str(base_purl), str(expected_base_purl)) + self.assertEqual(written_packages, expected) + def _assert_purls_written(self, purls): with tempfile.TemporaryDirectory() as tmpdir: repo_dir = Path(tmpdir) @@ -52,11 +63,23 @@ def test_add_purl_result_with_mock_repo(self): purls_file = repo_dir / "purls.yaml" - relative_path = write_packageurls_to_file(mock_repo, purls_file, purls) + write_data_to_yaml_file(purls_file, purls) - written_file = repo_dir / relative_path - self.assertTrue(written_file.exists()) + self.assertTrue(purls_file.exists()) - with open(written_file, encoding="utf-8") as f: + with open(purls_file, encoding="utf-8") as f: content = saneyaml.load(f) + self.assertEqual(content, purls) + + def test_add_purl_result_with_mock_repo(self): + self._assert_purls_written( + [{"purl": "pkg:pypi/django@4.2.0"}, {"purl": "pkg:pypi/django@4.3.0"}] + ) + + def test_add_empty_purl_result_with_mock_repo(self): + self._assert_purls_written([]) + + def test_add_invalid_purl_with_mock_repo(self): + # invalid but still written as empty file + self._assert_purls_written([{"purl": "pkg:pypi/django"}]) diff --git a/minecode_pipelines/tests/pipes/test_pipes.py b/minecode_pipelines/tests/pipes/test_pipes.py new file mode 100644 index 00000000..1c8161bc --- /dev/null +++ b/minecode_pipelines/tests/pipes/test_pipes.py @@ -0,0 +1,65 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# purldb is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/purldb for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# +import tempfile +from pathlib import Path +from unittest import TestCase +from git import Repo + +from minecode_pipelines.pipes import get_commit_at_distance_ahead + + +class GetCommitAtDistanceAheadIntegrationTests(TestCase): + def setUp(self): + # Create a temporary directory and init a repo + self.tmpdir = tempfile.TemporaryDirectory() + self.repo_path = Path(self.tmpdir.name) + self.repo = Repo.init(self.repo_path) + + # Configure identity (needed for commits) + with self.repo.config_writer() as cw: + cw.set_value("user", "name", "Test User") + cw.set_value("user", "email", "test@example.com") + + # Create 5 commits + self.commits = [] + for i in range(5): + file_path = self.repo_path / f"file{i}.txt" + file_path.write_text(f"content {i}") + self.repo.index.add([str(file_path)]) + commit = self.repo.index.commit(f"commit {i}") + self.commits.append(commit.hexsha) + + # By construction, self.commits[0] = first commit, self.commits[-1] = latest commit + + def tearDown(self): + self.tmpdir.cleanup() + + def test_get_commit_at_distance_none_current_commit(self): + # If current_commit is None, it should start from the empty tree hash + result = get_commit_at_distance_ahead( + self.repo, None, num_commits_ahead=3, branch_name="master" + ) + # Should return the 3rd commit in history + self.assertEqual(result, self.commits[2]) + + def test_get_commit_at_distance(self): + # current_commit = first commit, ask for 3 commits ahead + result = get_commit_at_distance_ahead( + self.repo, self.commits[0], num_commits_ahead=3, branch_name="master" + ) + # Should return the 3rd commit from start (self.commits[3]) + self.assertEqual(result, self.commits[3]) + + def test_raises_if_not_enough_commits(self): + # From latest commit, ask for 10 ahead (only 0 available) + with self.assertRaises(ValueError) as cm: + get_commit_at_distance_ahead( + self.repo, self.commits[-1], num_commits_ahead=10, branch_name="master" + ) + self.assertIn("Not enough commits ahead; only 0 available.", str(cm.exception))