Skip to content

Commit cc938d7

Browse files
committed
Update Cargo miners to push after every 1k file changes.
Fix bug in process_cargo_packages. Signed-off-by: ziad hany <ziadhany2016@gmail.com>
1 parent 4f9b659 commit cc938d7

File tree

3 files changed

+8
-5
lines changed

3 files changed

+8
-5
lines changed

minecode_pipelines/miners/cargo.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,7 @@ def process_cargo_packages(cargo_repo, fed_repo, fed_conf_repo, logger):
2020
"""
2121
Process Cargo index files commit by commit.
2222
Push changes to fed_repo after:
23-
- every `commit_batch_size` commits, OR
24-
- every `file_batch_size` files, OR
23+
- every `commit_batch` commits, OR
2524
- when reaching HEAD.
2625
"""
2726

@@ -40,6 +39,7 @@ def process_cargo_packages(cargo_repo, fed_repo, fed_conf_repo, logger):
4039
)
4140
logger(f"Found {len(changed_files)} changed files in Cargo index.")
4241

42+
file_counter = 0
4343
for idx, rel_path in enumerate(changed_files):
4444
file_path = base_path / rel_path
4545
logger(f"Found {file_path}.")
@@ -49,13 +49,15 @@ def process_cargo_packages(cargo_repo, fed_repo, fed_conf_repo, logger):
4949

5050
if file_path.name in {"config.json", "README.md", "update-dl-url.yml"}:
5151
continue
52+
5253
packages = []
5354
with open(file_path, encoding="utf-8") as f:
5455
for line in f:
5556
if line.strip():
5657
packages.append(json.loads(line))
5758

58-
push_commit = idx == len(changed_files)
59+
file_counter += 1
60+
push_commit = (file_counter % 1000 == 0) or (idx == len(changed_files))
5961
store_cargo_packages(packages, fed_repo, push_commit)
6062

6163
update_last_commit(next_commit, fed_conf_repo, "cargo")

minecode_pipelines/pipelines/mine_cargo.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ def clone_cargo_repo(self):
6767
self.cargo_repo = Repo.clone_from(conan_repo_url, get_temp_file())
6868

6969
def collect_packages_from_cargo(self):
70-
cargo.process_cargo_packages(self.cargo_repo, self.fed_repo, self.log)
70+
cargo.process_cargo_packages(self.cargo_repo, self.fed_repo, self.fed_conf_repo, self.log)
7171

7272
def clean_cargo_repo(self):
7373
"""

minecode_pipelines/utils.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ def get_temp_file(file_name="data", extension=".file", dir_name=""):
4949
location = os.path.join(temp_dir, file_name)
5050
return location
5151

52+
5253
def get_next_x_commit(repo: Repo, current_commit: str, x: int = 10, branch: str = "master") -> str:
5354
"""
5455
Get the x-th next commit after the current commit in the specified branch.
@@ -58,4 +59,4 @@ def get_next_x_commit(repo: Repo, current_commit: str, x: int = 10, branch: str
5859
revs = repo.git.rev_list(f"^{current_commit}", branch).splitlines()
5960
if len(revs) < x:
6061
raise ValueError(f"Not enough commits ahead; only {len(revs)} available.")
61-
return revs[-x]
62+
return revs[-x]

0 commit comments

Comments
 (0)