From 730ee91c4eb404bc7b7413feedb5921978a1f4f7 Mon Sep 17 00:00:00 2001 From: root Date: Sat, 13 Sep 2025 14:17:40 +0000 Subject: [PATCH 1/3] Fix issue #133: Repository upload requires GitHub token for public repositories --- prometheus/app/api/routes/repository.py | 39 +++++++++++-------- prometheus/app/models/requests/repository.py | 2 +- prometheus/app/services/repository_service.py | 4 +- prometheus/git/git_repository.py | 9 +++-- 4 files changed, 32 insertions(+), 22 deletions(-) diff --git a/prometheus/app/api/routes/repository.py b/prometheus/app/api/routes/repository.py index e2945da6..9c53be6b 100644 --- a/prometheus/app/api/routes/repository.py +++ b/prometheus/app/api/routes/repository.py @@ -19,28 +19,28 @@ router = APIRouter() -async def get_github_token(request: Request, github_token: str) -> str: - """Retrieve GitHub token from the request or user profile.""" +async def get_github_token(request: Request, github_token: str | None = None) -> str | None: + """Retrieve GitHub token from the request or user profile. + + Returns: + str | None: GitHub token if available, None for public repositories + """ # If the token is provided in the request, use it directly if github_token: return github_token - # If the token is not provided, fetch it from the user profile if logged in - # Check if the user is authenticated + + # If no token is provided and authentication is disabled, return None + # This allows public repositories to be cloned without authentication if not settings.ENABLE_AUTHENTICATION: - # If the user is not authenticated, raise an exception - raise ServerException( - code=400, message="GitHub token is required, please provide it or log in" - ) + return None + # If the user is authenticated, get the user service and fetch the token user_service: UserService = request.app.state.service["user_service"] user = await user_service.get_user_by_id(request.state.user_id) github_token = user.github_token if user else None - # If the token is still not available, raise an exception - if not github_token: - raise ServerException( - code=400, message="Either provide a GitHub token or set it in your user profile" - ) + # Return the token if available, otherwise None + # This allows the caller to handle public vs private repository logic return github_token @@ -86,7 +86,7 @@ async def upload_github_repository( message=f"You have reached the maximum number of repositories ({settings.DEFAULT_USER_REPOSITORY_LIMIT}). Please delete some repositories before uploading new ones.", ) - # Get the GitHub token + # Get the GitHub token (may be None for public repositories) github_token = await get_github_token(request, upload_repository_request.github_token) # Clone the repository @@ -94,9 +94,16 @@ async def upload_github_repository( saved_path = await repository_service.clone_github_repo( github_token, upload_repository_request.https_url, upload_repository_request.commit_id ) - except git.exc.GitCommandError: + except git.exc.GitCommandError as e: + # If cloning failed and no token was provided, it might be a private repository + if github_token is None and "Authentication failed" in str(e): + raise ServerException( + code=400, + message=f"Unable to clone {upload_repository_request.https_url}. " + f"This appears to be a private repository. Please provide a GitHub token." + ) raise ServerException( - code=400, message=f"Unable to clone {upload_repository_request.https_url}." + code=400, message=f"Unable to clone {upload_repository_request.https_url}: {str(e)}" ) # Build and save the knowledge graph from the cloned repository diff --git a/prometheus/app/models/requests/repository.py b/prometheus/app/models/requests/repository.py index 42bd51b5..8556b0f0 100644 --- a/prometheus/app/models/requests/repository.py +++ b/prometheus/app/models/requests/repository.py @@ -14,7 +14,7 @@ class UploadRepositoryRequest(BaseModel): ) github_token: str | None = Field( default=None, - description="Optional GitHub token for repository clone", + description="GitHub token for private repository clone. Optional for public repositories.", max_length=100, ) diff --git a/prometheus/app/services/repository_service.py b/prometheus/app/services/repository_service.py index 91910646..b64f1c85 100644 --- a/prometheus/app/services/repository_service.py +++ b/prometheus/app/services/repository_service.py @@ -58,7 +58,7 @@ def get_new_playground_path(self) -> Path: return new_path async def clone_github_repo( - self, github_token: str, https_url: str, commit_id: Optional[str] = None + self, github_token: str | None, https_url: str, commit_id: Optional[str] = None ) -> Path: """Clones a GitHub repository to the local workspace. @@ -67,7 +67,7 @@ async def clone_github_repo( the operation may be skipped. Args: - github_token: GitHub access token for authentication. + github_token: GitHub access token for authentication. None for public repositories. https_url: HTTPS URL of the GitHub repository. commit_id: Optional specific commit to check out. diff --git a/prometheus/git/git_repository.py b/prometheus/git/git_repository.py index 5a96413e..aabef811 100644 --- a/prometheus/git/git_repository.py +++ b/prometheus/git/git_repository.py @@ -51,19 +51,22 @@ def _set_default_branch(self): self.default_branch = self.repo.active_branch.name async def from_clone_repository( - self, https_url: str, github_access_token: str, target_directory: Path + self, https_url: str, github_access_token: str | None, target_directory: Path ): """Clone a remote repository using HTTPS authentication. Args: https_url: HTTPS URL of the remote repository. - github_access_token: GitHub access token for authentication. + github_access_token: GitHub access token for authentication. None for public repositories. target_directory: Directory where the repository will be cloned. Returns: Repo: GitPython Repo object representing the cloned repository. """ - https_url = https_url.replace("https://", f"https://x-access-token:{github_access_token}@") + # Only modify the URL with token authentication if a token is provided + if github_access_token: + https_url = https_url.replace("https://", f"https://x-access-token:{github_access_token}@") + repo_name = https_url.split("/")[-1].split(".")[0] local_path = target_directory / repo_name if local_path.exists(): From b8c2d51db6cd2a6e499627f15291c5470222a80a Mon Sep 17 00:00:00 2001 From: Yue Pan <79363355+dcloud347@users.noreply.github.com> Date: Sat, 13 Sep 2025 23:46:46 +0800 Subject: [PATCH 2/3] feat: Add public repository check and improve GitHub token handling --- prometheus/app/api/routes/repository.py | 35 +++++++++++-------------- prometheus/git/git_repository.py | 6 +++-- prometheus/utils/github_utils.py | 32 ++++++++++++++++++++++ 3 files changed, 51 insertions(+), 22 deletions(-) diff --git a/prometheus/app/api/routes/repository.py b/prometheus/app/api/routes/repository.py index 9c53be6b..b068800a 100644 --- a/prometheus/app/api/routes/repository.py +++ b/prometheus/app/api/routes/repository.py @@ -15,32 +15,26 @@ from prometheus.app.services.user_service import UserService from prometheus.configuration.config import settings from prometheus.exceptions.server_exception import ServerException +from prometheus.utils.github_utils import is_repository_public router = APIRouter() async def get_github_token(request: Request, github_token: str | None = None) -> str | None: """Retrieve GitHub token from the request or user profile. - + Returns: str | None: GitHub token if available, None for public repositories """ # If the token is provided in the request, use it directly if github_token: return github_token - - # If no token is provided and authentication is disabled, return None - # This allows public repositories to be cloned without authentication - if not settings.ENABLE_AUTHENTICATION: - return None - - # If the user is authenticated, get the user service and fetch the token - user_service: UserService = request.app.state.service["user_service"] - user = await user_service.get_user_by_id(request.state.user_id) - github_token = user.github_token if user else None - # Return the token if available, otherwise None - # This allows the caller to handle public vs private repository logic + # If the user is authenticated, get the user service and fetch the token + if settings.ENABLE_AUTHENTICATION: + user_service: UserService = request.app.state.service["user_service"] + user = await user_service.get_user_by_id(request.state.user_id) + github_token = user.github_token if user else None return github_token @@ -89,19 +83,20 @@ async def upload_github_repository( # Get the GitHub token (may be None for public repositories) github_token = await get_github_token(request, upload_repository_request.github_token) + # Check if the repository is public or private + is_repository_public_ = await is_repository_public(upload_repository_request.https_url) + if not is_repository_public_ and not github_token: + raise ServerException( + code=400, + message="This appears to be a private repository. Please provide a GitHub token.", + ) + # Clone the repository try: saved_path = await repository_service.clone_github_repo( github_token, upload_repository_request.https_url, upload_repository_request.commit_id ) except git.exc.GitCommandError as e: - # If cloning failed and no token was provided, it might be a private repository - if github_token is None and "Authentication failed" in str(e): - raise ServerException( - code=400, - message=f"Unable to clone {upload_repository_request.https_url}. " - f"This appears to be a private repository. Please provide a GitHub token." - ) raise ServerException( code=400, message=f"Unable to clone {upload_repository_request.https_url}: {str(e)}" ) diff --git a/prometheus/git/git_repository.py b/prometheus/git/git_repository.py index aabef811..f62e8880 100644 --- a/prometheus/git/git_repository.py +++ b/prometheus/git/git_repository.py @@ -65,8 +65,10 @@ async def from_clone_repository( """ # Only modify the URL with token authentication if a token is provided if github_access_token: - https_url = https_url.replace("https://", f"https://x-access-token:{github_access_token}@") - + https_url = https_url.replace( + "https://", f"https://x-access-token:{github_access_token}@" + ) + repo_name = https_url.split("/")[-1].split(".")[0] local_path = target_directory / repo_name if local_path.exists(): diff --git a/prometheus/utils/github_utils.py b/prometheus/utils/github_utils.py index 479c7b1e..874d2f81 100644 --- a/prometheus/utils/github_utils.py +++ b/prometheus/utils/github_utils.py @@ -55,3 +55,35 @@ async def get_github_issue(repo: str, issue_number: int, github_token: str) -> D "state": issue_data["state"], "html_url": issue_data["html_url"], } + + +async def is_repository_public(https_url: str) -> bool: + """ + Check if a GitHub repository is public by making an unauthenticated request. + + Args: + https_url: HTTPS URL of the GitHub repository + + Returns: + bool: True if the repository is public, False if private or not found + """ + # Extract owner and repo from HTTPS URL + # Example: https://github.com/owner/repo.git -> owner/repo + url_parts = https_url.replace("https://github.com/", "").replace(".git", "") + owner, repo = url_parts.split("/") + # Make unauthenticated request to check repository visibility + async with httpx.AsyncClient() as client: + response = await client.get( + f"https://api.github.com/repos/{owner}/{repo}", + headers={"Accept": "application/vnd.github.v3+json"}, + ) + + if response.status_code == 200: + # Repository exists and is accessible without authentication (public) + return True + elif response.status_code == 404: + # Repository not found or private (requires authentication) + return False + else: + # Other error, assume private for safety + return False From 487e691a1c5c41253fa3b429d8132841045218bc Mon Sep 17 00:00:00 2001 From: Yue Pan <79363355+dcloud347@users.noreply.github.com> Date: Sat, 13 Sep 2025 23:51:53 +0800 Subject: [PATCH 3/3] fix: Simplify error handling by removing exception message from GitCommandError --- prometheus/app/api/routes/repository.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/prometheus/app/api/routes/repository.py b/prometheus/app/api/routes/repository.py index b068800a..eb8541a2 100644 --- a/prometheus/app/api/routes/repository.py +++ b/prometheus/app/api/routes/repository.py @@ -96,9 +96,9 @@ async def upload_github_repository( saved_path = await repository_service.clone_github_repo( github_token, upload_repository_request.https_url, upload_repository_request.commit_id ) - except git.exc.GitCommandError as e: + except git.exc.GitCommandError: raise ServerException( - code=400, message=f"Unable to clone {upload_repository_request.https_url}: {str(e)}" + code=400, message=f"Unable to clone {upload_repository_request.https_url}" ) # Build and save the knowledge graph from the cloned repository