Skip to content

Commit ae60ff2

Browse files
committed
Add commit tracking capabilities.
Refactor code to be compatible with the new pipeline. Signed-off-by: ziad hany <ziadhany2016@gmail.com>
1 parent 6dc86d5 commit ae60ff2

File tree

16 files changed

+145
-172
lines changed

16 files changed

+145
-172
lines changed

minecode_pipeline/README.rst

Lines changed: 0 additions & 43 deletions
This file was deleted.

minecode_pipeline/__init__.py

Lines changed: 0 additions & 8 deletions
This file was deleted.

minecode_pipeline/pipelines/__init__.py

Lines changed: 0 additions & 8 deletions
This file was deleted.

minecode_pipeline/pipes/__init__.py

Lines changed: 0 additions & 52 deletions
This file was deleted.

minecode_pipeline/tests/__init__.py

Lines changed: 0 additions & 8 deletions
This file was deleted.

minecode_pipelines/miners/cargo.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
#
2+
# Copyright (c) nexB Inc. and others. All rights reserved.
3+
# purldb is a trademark of nexB Inc.
4+
# SPDX-License-Identifier: Apache-2.0
5+
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
6+
# See https://github.com/aboutcode-org/purldb for support or download.
7+
# See https://aboutcode.org for more information about nexB OSS projects.
8+
#
9+
import json
10+
from pathlib import Path
11+
12+
from minecode_pipelines.pipes.cargo import store_cargo_packages
13+
from minecode_pipelines.utils import get_changed_files
14+
15+
16+
def process_cargo_packages(cargo_repo, fed_repo):
17+
base_path = Path(cargo_repo.working_tree_dir)
18+
valid_files = get_changed_files(cargo_repo) # start from empty tree hash
19+
20+
json_files = []
21+
for file_path in base_path.glob("**/*"):
22+
if not file_path.is_file() or file_path not in valid_files:
23+
continue
24+
25+
if file_path.name in {"config.json", "README.md", "update-dl-url.yml"}:
26+
continue
27+
json_files.append(file_path)
28+
29+
for idx, file_path in enumerate(json_files, start=1):
30+
try:
31+
with open(file_path, encoding="utf-8") as f:
32+
packages = []
33+
for line in f:
34+
if line.strip():
35+
packages.append(json.loads(line))
36+
37+
except (json.JSONDecodeError, UnicodeDecodeError):
38+
continue
39+
40+
if packages:
41+
push_commit = idx == len(json_files) # only True on last
42+
store_cargo_packages(packages, fed_repo, push_commit)

minecode_pipeline/pipelines/mine_cargo.py renamed to minecode_pipelines/pipelines/mine_cargo.py

Lines changed: 19 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -19,27 +19,25 @@
1919
#
2020
# ScanCode.io is a free software code scanning tool from nexB Inc. and others.
2121
# Visit https://github.com/aboutcode-org/scancode.io for support and download.
22-
import json
23-
from pathlib import Path
24-
25-
from minecode_pipeline.pipes import cargo
2622

23+
from git.repo.base import Repo
24+
from scanpipe.pipes.federatedcode import delete_local_clone
25+
from minecode.utils import get_temp_file
2726
from scanpipe.pipelines import Pipeline
28-
from fetchcode.vcs import fetch_via_vcs
2927
from scanpipe.pipes import federatedcode
28+
from minecode_pipelines.miners import cargo
3029

3130

32-
class MineCargo(Pipeline):
31+
class MineandPublishCargoPURLs(Pipeline):
3332
"""Pipeline to mine Cargo (crates.io) packages and publish them to FederatedCode."""
3433

35-
repo_url = "git+https://github.com/rust-lang/crates.io-index"
36-
3734
@classmethod
3835
def steps(cls):
3936
return (
4037
cls.check_federatedcode_eligibility,
4138
cls.clone_cargo_repo,
4239
cls.collect_packages_from_cargo,
40+
cls.clean_cargo_repo,
4341
)
4442

4543
def check_federatedcode_eligibility(self):
@@ -49,41 +47,25 @@ def check_federatedcode_eligibility(self):
4947
"""
5048
federatedcode.check_federatedcode_eligibility(project=self.project)
5149

52-
def clone_cargo_repo(self, repo_url):
50+
def clone_cargo_repo(self):
5351
"""
5452
Clone the repo at repo_url and return the VCSResponse object
5553
"""
56-
self.vcs_response = fetch_via_vcs(repo_url)
57-
58-
def collect_packages_from_cargo(self):
59-
base_path = Path(self.vcs_response.dest_dir)
54+
conan_repo_url = "git+https://github.com/rust-lang/crates.io-index"
55+
fed_repo_url = "git+https://github.com/ziadhany/cargo-test"
6056

61-
json_files = []
62-
for file_path in base_path.glob("**/*"):
63-
if not file_path.is_file():
64-
continue
65-
if file_path.name in {"config.json", "README.md", "update-dl-url.yml"}:
66-
continue
67-
json_files.append(file_path)
57+
self.fed_repo = federatedcode.clone_repository(fed_repo_url)
58+
self.cargo_repo = Repo.clone_from(conan_repo_url, get_temp_file())
6859

69-
for idx, file_path in enumerate(json_files, start=1):
70-
try:
71-
with open(file_path, encoding="utf-8") as f:
72-
packages = []
73-
for line in f:
74-
if line.strip():
75-
packages.append(json.loads(line))
76-
77-
except (json.JSONDecodeError, UnicodeDecodeError):
78-
continue
79-
80-
if packages:
81-
push_commit = idx == len(json_files) # only True on last
82-
cargo.collect_packages_from_cargo(packages, self.vcs_response, push_commit)
60+
def collect_packages_from_cargo(self):
61+
cargo.process_cargo_packages(self.cargo_repo, self.fed_repo)
8362

8463
def clean_cargo_repo(self):
8564
"""
86-
Delete the VCS response repository if it exists.
65+
Delete the federatedcode repository if it exists, and also delete the Cargo repository if it exists.
8766
"""
88-
if self.vcs_response:
89-
self.vcs_response.delete()
67+
if self.cargo_repo:
68+
delete_local_clone(self.cargo_repo)
69+
70+
if self.fed_repo:
71+
delete_local_clone(self.fed_repo)

minecode_pipelines/pipes/__init__.py

Lines changed: 47 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,18 @@
77
# See https://aboutcode.org for more information about nexB OSS projects.
88
#
99

10+
from aboutcode.hashid import PURLS_FILENAME
1011
import os
11-
import saneyaml
12-
12+
import textwrap
1313
from pathlib import Path
14+
import saneyaml
15+
from aboutcode import hashid
1416

15-
from aboutcode.hashid import PURLS_FILENAME
17+
VERSION = os.environ.get("VERSION", "")
18+
PURLDB_ALLOWED_HOST = os.environ.get("FEDERATEDCODE_GIT_ALLOWED_HOST", "")
19+
author_name = os.environ.get("FEDERATEDCODE_GIT_SERVICE_NAME", "")
20+
author_email = os.environ.get("FEDERATEDCODE_GIT_SERVICE_EMAIL", "")
21+
remote_name = os.environ.get("FEDERATEDCODE_GIT_REMOTE_NAME", "origin")
1622

1723

1824
def write_packageurls_to_file(repo, base_dir, packageurls):
@@ -26,3 +32,41 @@ def write_data_to_file(path, data):
2632
path.parent.mkdir(parents=True, exist_ok=True)
2733
with open(path, encoding="utf-8", mode="w") as f:
2834
f.write(saneyaml.dump(data))
35+
36+
37+
def write_purls_to_repo(repo, package, updated_purls, push_commits=False):
38+
"""Write or update package purls in the repo and optionally commit/push changes."""
39+
ppath = hashid.get_package_purls_yml_file_path(package)
40+
git_stage_purls(updated_purls, repo, ppath)
41+
if push_commits:
42+
commit_and_push_changes(repo)
43+
44+
45+
def git_stage_purls(purls, repo, purls_file):
46+
"""Write package URLs to a file and stage it in the local Git repository."""
47+
relative_purl_file_path = Path(purls_file)
48+
49+
write_to = Path(repo.working_dir) / relative_purl_file_path
50+
51+
write_data_to_file(path=write_to, data=purls)
52+
53+
repo.index.add([relative_purl_file_path])
54+
return relative_purl_file_path
55+
56+
57+
def commit_and_push_changes(repo):
58+
"""
59+
Commit staged changes to the local repository and push them
60+
to the remote on the current active branch.
61+
"""
62+
63+
commit_message = f"""\
64+
Add/Update list of available package versions
65+
Tool: pkg:github/aboutcode-org/purldb@v{VERSION}
66+
Reference: https://{PURLDB_ALLOWED_HOST}/
67+
Signed-off-by: {author_name} <{author_email}>
68+
"""
69+
70+
default_branch = repo.active_branch.name
71+
repo.index.commit(textwrap.dedent(commit_message))
72+
repo.git.push(remote_name, default_branch, "--no-verify")

minecode_pipeline/pipes/cargo.py renamed to minecode_pipelines/pipes/cargo.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
from packageurl import PackageURL
22
from aboutcode.hashid import get_core_purl
3-
from minecode_pipeline.pipes import write_purls_to_repo
3+
from minecode_pipelines.pipes import write_purls_to_repo
44

55

6-
def collect_packages_from_cargo(packages, repo, push_commit=False):
6+
def store_cargo_packages(packages, fed_repo, push_commit=False):
77
"""Collect Cargo package versions into purls and write them to the repo."""
88

99
if not packages:
@@ -22,4 +22,4 @@ def collect_packages_from_cargo(packages, repo, push_commit=False):
2222
purl = PackageURL(type="cargo", name=name, version=version).to_string()
2323
updated_purls.append(purl)
2424

25-
write_purls_to_repo(repo, base_purl, updated_purls, push_commit)
25+
write_purls_to_repo(fed_repo, base_purl, updated_purls, push_commit)

minecode_pipelines/pipes/pypi.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040

4141

4242
from scanpipe.pipes.federatedcode import clone_repository
43+
4344
from scanpipe.pipes.federatedcode import commit_changes
4445
from scanpipe.pipes.federatedcode import push_changes
4546
from scanpipe.pipes.federatedcode import commit_and_push_changes

0 commit comments

Comments
 (0)