Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion augur/application/db/models/augur_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -954,7 +954,7 @@ def is_valid_github_repo(gh_session, url: str) -> bool:

return False, {"status": f"Github Error: {data['message']}"}

return True, {"status": "Valid repo", "repo_type": data["owner"]["type"]}
return True, {"status": "Valid repo", "repo_type": data["owner"]["type"], "repo_src_id": data["id"]}

return False, {"status": "Failed to validate repo after multiple attempts"}

Expand Down Expand Up @@ -1145,6 +1145,15 @@ def insert_github_repo(session, url: str, repo_group_id: int, tool_source, repo_
if not owner or not repo:
return None

# If we know the numeric source ID, check whether this repo is already
# tracked under a different URL (e.g. after a GitHub rename/transfer).
# This prevents duplicate rows when the same repo is added twice with
# different URLs (issue #3056).
if repo_src_id is not None:
existing = session.query(Repo).filter(Repo.repo_src_id == repo_src_id).first()
if existing:
return existing.repo_id

repo_data = {
"repo_group_id": repo_group_id,
"repo_git": url,
Expand Down
13 changes: 10 additions & 3 deletions augur/util/repo_load_controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ def add_cli_repo(self, repo_data: Dict[str, Any], from_org_list=False, repo_type
url = repo_data["url"]
repo_group_id = repo_data["repo_group_id"]

repo_src_id = None

# if it is from not from an org list then we need to check its validity, and get the repo type
if not from_org_list:
if "gitlab" in url:
Expand All @@ -57,19 +59,24 @@ def add_cli_repo(self, repo_data: Dict[str, Any], from_org_list=False, repo_type
result = Repo.is_valid_github_repo(self.session, url)
if not result[0]:
return False, {"status": result[1]["status"], "repo_url": url}

try:
repo_type = result[1]["repo_type"]
except KeyError:
print("Skipping repo type...")

# Capture the source ID so we can deduplicate by it (issue #3056).
# Repos that were renamed keep the same numeric src ID even though
# their URL changes, so using src_id prevents double-ingestion.
repo_src_id = result[1].get("repo_src_id")


# if the repo doesn't exist it adds it
if "gitlab" in url:
repo_id = Repo.insert_gitlab_repo(self.session, url, repo_group_id, "CLI")
repo_id = Repo.insert_gitlab_repo(self.session, url, repo_group_id, "CLI", repo_src_id=repo_src_id)
CollectionStatus.insert(self.session, logger, repo_id)
else:
repo_id = Repo.insert_github_repo(self.session, url, repo_group_id, "CLI", repo_type)
repo_id = Repo.insert_github_repo(self.session, url, repo_group_id, "CLI", repo_type, repo_src_id=repo_src_id)
CollectionStatus.insert(self.session, logger, repo_id)

if not repo_id:
Expand Down
Loading