Skip to content
This repository was archived by the owner on Sep 13, 2023. It is now read-only.

Commit 0a19b96

Browse files
authored
Adds check that github path has valid sha after tree/ (#401)
* Adds check that github path has valid sha after `tree/` closes #396 * update regexp
1 parent 5cef637 commit 0a19b96

File tree

2 files changed

+29
-12
lines changed

2 files changed

+29
-12
lines changed

mlem/contrib/github.py

Lines changed: 21 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import pathlib
22
import posixpath
3+
import re
34
from typing import ClassVar, Dict, Optional
45
from urllib.parse import quote_plus, urlparse
56

@@ -14,7 +15,6 @@ def ls_branches(repo_url: str) -> Dict[str, str]:
1415
"""List branches in remote git repo"""
1516
import git
1617

17-
git.cmd.Git().ls_remote(repo_url)
1818
g = git.cmd.Git()
1919
remote_refs: Dict[str, str] = dict(
2020
tuple(reversed(ref.split("\t")[:2]))
@@ -54,6 +54,10 @@ def _ls_github_refs(org: str, repo: str, endpoint: str):
5454
return None
5555

5656

57+
def is_long_sha(sha: str):
58+
return re.match(r"^[a-f\d]{40}$", sha)
59+
60+
5761
class GithubResolver(CloudGitResolver):
5862
"""Resolve https://github.com URLs"""
5963

@@ -87,18 +91,23 @@ def get_kwargs(cls, uri):
8791
return {"org": org, "repo": repo, "path": ""}
8892
if path[0] == "tree":
8993
sha = path[1]
90-
refs = ls_github_branches(org, repo)
91-
refs.update(ls_github_tags(org, repo))
92-
branches = {quote_plus(k) for k in refs}
93-
# match beginning of path with one of existing branches
94-
# "" is hack for cases with empty path (like 'github.com/org/rep/tree/branch/')
95-
for i, part in enumerate(path[2:] + [""], start=2):
96-
if sha in branches:
97-
path = path[i:]
98-
break
99-
sha = f"{sha}%2F{part}"
94+
if is_long_sha(sha):
95+
path = path[2:]
10096
else:
101-
raise ValueError(f'Could not resolve branch from uri "{uri}"')
97+
refs = ls_github_branches(org, repo)
98+
refs.update(ls_github_tags(org, repo))
99+
branches = {quote_plus(k) for k in refs}
100+
# match beginning of path with one of existing branches
101+
# "" is hack for cases with empty path (like 'github.com/org/rep/tree/branch/')
102+
for i, part in enumerate(path[2:] + [""], start=2):
103+
if sha in branches:
104+
path = path[i:]
105+
break
106+
sha = f"{sha}%2F{part}"
107+
else:
108+
raise ValueError(
109+
f'Could not resolve branch from uri "{uri}"'
110+
)
102111
else:
103112
sha = None
104113
return {

tests/contrib/test_github.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from mlem.contrib.github import (
66
GithubResolver,
77
github_check_rev,
8+
is_long_sha,
89
ls_branches,
910
ls_github_branches,
1011
)
@@ -91,3 +92,10 @@ def test_github_check_rev():
9192
assert github_check_rev(
9293
MLEM_TEST_REPO_ORG, MLEM_TEST_REPO_NAME, "first_rev_link"
9394
) # tag
95+
96+
97+
def test_is_long_sha():
98+
assert is_long_sha("cd7c2a08911b697c3f80c73d0394fb105d3044d5")
99+
assert not is_long_sha("cd7c2a08911b697c3f80c73d0394fb105d3044d51")
100+
assert not is_long_sha("cd7c2a08911b697c3f80c73d0394fb105d3044dA")
101+
assert not is_long_sha("cd7c2a08911b697c3f80c73d0394fb105d3044d")

0 commit comments

Comments
 (0)