generated from aboutcode-org/skeleton
-
-
Notifications
You must be signed in to change notification settings - Fork 36
Add support to mine Composer Package-URLs #691
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
+534
−0
Merged
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,103 @@ | ||
# | ||
# Copyright (c) nexB Inc. and others. All rights reserved. | ||
# purldb is a trademark of nexB Inc. | ||
# SPDX-License-Identifier: Apache-2.0 | ||
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. | ||
# See https://github.com/aboutcode-org/purldb for support or download. | ||
# See https://aboutcode.org for more information about nexB OSS projects. | ||
# | ||
|
||
import json | ||
from minecode_pipelines.utils import get_temp_file | ||
import requests | ||
from packageurl import PackageURL | ||
|
||
|
||
def get_composer_packages(): | ||
""" | ||
Fetch all Composer packages from Packagist and save them to a temporary JSON file. | ||
Response example: | ||
{ | ||
"packageNames" ["0.0.0/composer-include-files", "0.0.0/laravel-env-shim"] | ||
} | ||
""" | ||
|
||
response = requests.get("https://packagist.org/packages/list.json") | ||
if not response.ok: | ||
return | ||
|
||
packages = response.json() | ||
temp_file = get_temp_file("ComposerPackages", "json") | ||
with open(temp_file, "w", encoding="utf-8") as f: | ||
json.dump(packages, f, indent=4) | ||
|
||
return temp_file | ||
|
||
|
||
def get_composer_purl(vendor, package): | ||
""" | ||
Fetch all available Package URLs (purls) for a Composer package from Packagist. | ||
Response example: | ||
{ | ||
"minified": "composer/2.0", | ||
"packages": [ | ||
{ | ||
"monolog/monolog": { | ||
"0": { | ||
"name": "monolog/monolog", | ||
"version": "3.9.0" | ||
} | ||
} | ||
} | ||
], | ||
"security-advisories": [ | ||
{ | ||
"advisoryId": "PKSA-dmw8-jd8k-q3c6", | ||
"affectedVersions": ">=1.8.0,<1.12.0" | ||
} | ||
] | ||
} | ||
get_composer_purl("monolog", "monolog") | ||
-> ["pkg:composer/monolog/monolog@3.9.0", "pkg:composer/monolog/monolog@3.8.0", ...] | ||
""" | ||
purls = [] | ||
url = f"https://repo.packagist.org/p2/{vendor}/{package}.json" | ||
|
||
try: | ||
response = requests.get(url, timeout=10) | ||
response.raise_for_status() | ||
except requests.RequestException: | ||
return purls | ||
|
||
data = response.json() | ||
packages = data.get("packages", {}) | ||
releases = packages.get(f"{vendor}/{package}", []) | ||
|
||
for release in releases: | ||
version = release.get("version") | ||
if version: | ||
purl = PackageURL( | ||
type="composer", | ||
namespace=vendor, | ||
name=package, | ||
version=version, | ||
) | ||
purls.append(purl.to_string()) | ||
|
||
return purls | ||
|
||
|
||
def load_composer_packages(packages_file): | ||
"""Load and return a list of (vendor, package) tuples from a JSON file.""" | ||
with open(packages_file, encoding="utf-8") as f: | ||
packages_data = json.load(f) | ||
|
||
package_names = packages_data.get("packageNames", []) | ||
result = [] | ||
|
||
for item in package_names: | ||
if "/" in item: | ||
vendor, package = item.split("/", 1) | ||
result.append((vendor, package)) | ||
|
||
return result |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,82 @@ | ||
# SPDX-License-Identifier: Apache-2.0 | ||
# | ||
# http://nexb.com and https://github.com/aboutcode-org/scancode.io | ||
# The ScanCode.io software is licensed under the Apache License version 2.0. | ||
# Data generated with ScanCode.io is provided as-is without warranties. | ||
# ScanCode is a trademark of nexB Inc. | ||
# | ||
# You may not use this software except in compliance with the License. | ||
# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0 | ||
# Unless required by applicable law or agreed to in writing, software distributed | ||
# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR | ||
# CONDITIONS OF ANY KIND, either express or implied. See the License for the | ||
# specific language governing permissions and limitations under the License. | ||
# | ||
# Data Generated with ScanCode.io is provided on an "AS IS" BASIS, WITHOUT WARRANTIES | ||
# OR CONDITIONS OF ANY KIND, either express or implied. No content created from | ||
# ScanCode.io should be considered or used as legal advice. Consult an Attorney | ||
# for any legal advice. | ||
# | ||
# ScanCode.io is a free software code scanning tool from nexB Inc. and others. | ||
# Visit https://github.com/aboutcode-org/scancode.io for support and download. | ||
|
||
import os | ||
from scanpipe.pipelines import Pipeline | ||
from scanpipe.pipes import federatedcode | ||
|
||
from matchcode_pipeline import pipes | ||
from minecode_pipelines.pipes import MINECODE_PIPELINES_CONFIG_REPO | ||
from minecode_pipelines.pipes.composer import mine_composer_packages | ||
from minecode_pipelines.pipes.composer import mine_and_publish_composer_purls | ||
|
||
MINECODE_COMPOSER_GIT_URL = os.environ.get( | ||
"MINECODE_COMPOSER_GIT_URL", "https://github.com/aboutcode-data/minecode-data-composer-test" | ||
) | ||
|
||
|
||
class MineComposer(Pipeline): | ||
""" | ||
Mine all packageURLs from a composer index and publish them to a FederatedCode repo. | ||
""" | ||
|
||
@classmethod | ||
def steps(cls): | ||
return ( | ||
cls.check_federatedcode_eligibility, | ||
cls.clone_composer_repo, | ||
cls.mine_and_publish_composer_purls, | ||
) | ||
|
||
def check_federatedcode_eligibility(self): | ||
""" | ||
Check if the project fulfills the following criteria for | ||
pushing the project result to FederatedCode. | ||
""" | ||
federatedcode.check_federatedcode_configured_and_available(logger=self.log) | ||
|
||
def clone_composer_repo(self): | ||
""" | ||
Clone the federatedcode composer url and return the Repo object | ||
""" | ||
self.cloned_data_repo = federatedcode.clone_repository(MINECODE_COMPOSER_GIT_URL) | ||
self.cloned_config_repo = federatedcode.clone_repository(MINECODE_PIPELINES_CONFIG_REPO) | ||
|
||
def mine_and_publish_composer_purls(self): | ||
""" | ||
Mine Composer package names from Composer indexes and generate | ||
package URLs (pURLs) for all mined Composer packages. | ||
""" | ||
|
||
composer_packages = mine_composer_packages() | ||
mine_and_publish_composer_purls( | ||
packages=composer_packages, | ||
cloned_data_repo=self.cloned_data_repo, | ||
cloned_config_repo=self.cloned_config_repo, | ||
logger=self.log, | ||
) | ||
|
||
def delete_cloned_repos(self): | ||
pipes.delete_cloned_repos( | ||
repos=[self.cloned_data_repo, self.cloned_config_repo], | ||
logger=self.log, | ||
) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,111 @@ | ||
# SPDX-License-Identifier: Apache-2.0 | ||
# | ||
# http://nexb.com and https://github.com/aboutcode-org/scancode.io | ||
# The ScanCode.io software is licensed under the Apache License version 2.0. | ||
# Data generated with ScanCode.io is provided as-is without warranties. | ||
# ScanCode is a trademark of nexB Inc. | ||
# | ||
# You may not use this software except in compliance with the License. | ||
# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0 | ||
# Unless required by applicable law or agreed to in writing, software distributed | ||
# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR | ||
# CONDITIONS OF ANY KIND, either express or implied. See the License for the | ||
# specific language governing permissions and limitations under the License. | ||
# | ||
# Data Generated with ScanCode.io is provided on an "AS IS" BASIS, WITHOUT WARRANTIES | ||
# OR CONDITIONS OF ANY KIND, either express or implied. No content created from | ||
# ScanCode.io should be considered or used as legal advice. Consult an Attorney | ||
# for any legal advice. | ||
# | ||
# ScanCode.io is a free software code scanning tool from nexB Inc. and others. | ||
# Visit https://github.com/aboutcode-org/scancode.io for support and download. | ||
|
||
from datetime import datetime | ||
from pathlib import Path | ||
from aboutcode import hashid | ||
from aboutcode.hashid import get_package_base_dir | ||
from minecode_pipelines.miners.composer import get_composer_packages | ||
from minecode_pipelines.miners.composer import load_composer_packages | ||
from minecode_pipelines.miners.composer import get_composer_purl | ||
from minecode_pipelines.pipes import ( | ||
write_data_to_yaml_file, | ||
get_checkpoint_from_file, | ||
update_checkpoints_in_github, | ||
) | ||
from scanpipe.pipes.federatedcode import commit_changes | ||
from scanpipe.pipes.federatedcode import push_changes | ||
from minecode_pipelines import VERSION | ||
from minecode_pipelines.utils import cycle_from_index, grouper | ||
|
||
PACKAGE_BATCH_SIZE = 100 | ||
COMPOSER_CHECKPOINT_PATH = "composer/checkpoints.json" | ||
|
||
|
||
def mine_composer_packages(): | ||
"""Mine Composer package names from Packagist and return List of (vendor, package) tuples.""" | ||
packages_file = get_composer_packages() | ||
return load_composer_packages(packages_file) | ||
|
||
|
||
def mine_and_publish_composer_purls(packages, cloned_data_repo, cloned_config_repo, logger): | ||
"""Mine Composer packages and publish their PURLs to a FederatedCode repository.""" | ||
composer_checkpoint = get_checkpoint_from_file( | ||
cloned_repo=cloned_config_repo, path=COMPOSER_CHECKPOINT_PATH | ||
) | ||
|
||
start_index = composer_checkpoint.get("start_index", 0) | ||
|
||
packages_iter = cycle_from_index(packages, start_index) | ||
|
||
for batch_index, package_batch in enumerate( | ||
grouper(n=PACKAGE_BATCH_SIZE, iterable=packages_iter) | ||
): | ||
purl_files = [] | ||
purls = [] | ||
|
||
for item in package_batch: | ||
if not item: | ||
continue | ||
|
||
vendor, package = item | ||
logger(f"getting packageURLs for package: {vendor}/{package}") | ||
|
||
updated_purls = get_composer_purl(vendor=vendor, package=package) | ||
if not updated_purls: | ||
continue | ||
|
||
base_purl = updated_purls[0] | ||
package_base_dir = get_package_base_dir(purl=base_purl) | ||
|
||
logger(f"writing packageURLs for package: {base_purl} at: {package_base_dir}") | ||
logger(f"packageURLs: {' '.join(updated_purls)}") | ||
|
||
purl_file_full_path = Path( | ||
cloned_data_repo.working_dir | ||
) / hashid.get_package_purls_yml_file_path(base_purl) | ||
|
||
write_data_to_yaml_file(path=purl_file_full_path, data=updated_purls) | ||
|
||
purl_files.append(purl_file_full_path) | ||
purls.append(str(base_purl)) | ||
|
||
if purl_files: | ||
commit_changes( | ||
repo=cloned_data_repo, | ||
files_to_commit=purl_files, | ||
purls=purls, | ||
mine_type="packageURL", | ||
tool_name="pkg:composer/minecode-pipelines", | ||
tool_version=VERSION, | ||
) | ||
push_changes(repo=cloned_data_repo) | ||
|
||
settings_data = { | ||
"date": str(datetime.now()), | ||
"start_index": start_index + (batch_index + 1) * PACKAGE_BATCH_SIZE, | ||
} | ||
update_checkpoints_in_github( | ||
checkpoint=settings_data, | ||
cloned_repo=cloned_config_repo, | ||
path=COMPOSER_CHECKPOINT_PATH, | ||
) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
# | ||
# Copyright (c) nexB Inc. and others. All rights reserved. | ||
# purldb is a trademark of nexB Inc. | ||
# SPDX-License-Identifier: Apache-2.0 | ||
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. | ||
# See https://github.com/aboutcode-org/purldb for support or download. | ||
# See https://aboutcode.org for more information about nexB OSS projects. | ||
# | ||
|
||
import json | ||
from pathlib import Path | ||
from unittest.mock import patch, MagicMock | ||
from django.test import SimpleTestCase | ||
|
||
from minecode_pipelines.miners.composer import get_composer_packages | ||
from minecode_pipelines.miners.composer import load_composer_packages | ||
from minecode_pipelines.miners.composer import get_composer_purl | ||
|
||
DATA_DIR = Path(__file__).parent.parent / "test_data" / "composer" | ||
|
||
|
||
class ComposerPipelineTests(SimpleTestCase): | ||
@patch("requests.get") | ||
def test_generate_purls_from_composer(self, mock_get): | ||
""" | ||
Test mining composer packages and generating PURLs with mocked Packagist requests | ||
using JSON files stored in test_data/composer. | ||
""" | ||
|
||
with open(DATA_DIR / "packages_list.json", encoding="utf-8") as f: | ||
fake_packages_list = json.load(f) | ||
|
||
with open(DATA_DIR / "package_details.json", encoding="utf-8") as f: | ||
fake_package_details = json.load(f) | ||
|
||
with open(DATA_DIR / "expected_output.json", encoding="utf-8") as f: | ||
expected_output = json.load(f) | ||
|
||
resp_list = MagicMock() | ||
resp_list.ok = True | ||
resp_list.json.return_value = fake_packages_list | ||
|
||
resp_package_details = MagicMock() | ||
resp_package_details.ok = True | ||
resp_package_details.json.return_value = fake_package_details | ||
|
||
mock_get.side_effect = [resp_list, resp_package_details] | ||
|
||
packages_file = get_composer_packages() | ||
packages = load_composer_packages(packages_file) | ||
|
||
all_purls = [] | ||
for vendor, package in packages: | ||
purls = get_composer_purl(vendor, package) | ||
all_purls.extend(purls) | ||
|
||
assert len(all_purls) == 85 | ||
assert all_purls == expected_output |
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.