diff --git a/prometheus/app/api/routes/repository.py b/prometheus/app/api/routes/repository.py index e2945da..eb8541a 100644 --- a/prometheus/app/api/routes/repository.py +++ b/prometheus/app/api/routes/repository.py @@ -15,32 +15,26 @@ from prometheus.app.services.user_service import UserService from prometheus.configuration.config import settings from prometheus.exceptions.server_exception import ServerException +from prometheus.utils.github_utils import is_repository_public router = APIRouter() -async def get_github_token(request: Request, github_token: str) -> str: - """Retrieve GitHub token from the request or user profile.""" +async def get_github_token(request: Request, github_token: str | None = None) -> str | None: + """Retrieve GitHub token from the request or user profile. + + Returns: + str | None: GitHub token if available, None for public repositories + """ # If the token is provided in the request, use it directly if github_token: return github_token - # If the token is not provided, fetch it from the user profile if logged in - # Check if the user is authenticated - if not settings.ENABLE_AUTHENTICATION: - # If the user is not authenticated, raise an exception - raise ServerException( - code=400, message="GitHub token is required, please provide it or log in" - ) - # If the user is authenticated, get the user service and fetch the token - user_service: UserService = request.app.state.service["user_service"] - user = await user_service.get_user_by_id(request.state.user_id) - github_token = user.github_token if user else None - # If the token is still not available, raise an exception - if not github_token: - raise ServerException( - code=400, message="Either provide a GitHub token or set it in your user profile" - ) + # If the user is authenticated, get the user service and fetch the token + if settings.ENABLE_AUTHENTICATION: + user_service: UserService = request.app.state.service["user_service"] + user = await user_service.get_user_by_id(request.state.user_id) + github_token = user.github_token if user else None return github_token @@ -86,9 +80,17 @@ async def upload_github_repository( message=f"You have reached the maximum number of repositories ({settings.DEFAULT_USER_REPOSITORY_LIMIT}). Please delete some repositories before uploading new ones.", ) - # Get the GitHub token + # Get the GitHub token (may be None for public repositories) github_token = await get_github_token(request, upload_repository_request.github_token) + # Check if the repository is public or private + is_repository_public_ = await is_repository_public(upload_repository_request.https_url) + if not is_repository_public_ and not github_token: + raise ServerException( + code=400, + message="This appears to be a private repository. Please provide a GitHub token.", + ) + # Clone the repository try: saved_path = await repository_service.clone_github_repo( @@ -96,7 +98,7 @@ async def upload_github_repository( ) except git.exc.GitCommandError: raise ServerException( - code=400, message=f"Unable to clone {upload_repository_request.https_url}." + code=400, message=f"Unable to clone {upload_repository_request.https_url}" ) # Build and save the knowledge graph from the cloned repository diff --git a/prometheus/app/models/requests/repository.py b/prometheus/app/models/requests/repository.py index 42bd51b..8556b0f 100644 --- a/prometheus/app/models/requests/repository.py +++ b/prometheus/app/models/requests/repository.py @@ -14,7 +14,7 @@ class UploadRepositoryRequest(BaseModel): ) github_token: str | None = Field( default=None, - description="Optional GitHub token for repository clone", + description="GitHub token for private repository clone. Optional for public repositories.", max_length=100, ) diff --git a/prometheus/app/services/repository_service.py b/prometheus/app/services/repository_service.py index 9191064..b64f1c8 100644 --- a/prometheus/app/services/repository_service.py +++ b/prometheus/app/services/repository_service.py @@ -58,7 +58,7 @@ def get_new_playground_path(self) -> Path: return new_path async def clone_github_repo( - self, github_token: str, https_url: str, commit_id: Optional[str] = None + self, github_token: str | None, https_url: str, commit_id: Optional[str] = None ) -> Path: """Clones a GitHub repository to the local workspace. @@ -67,7 +67,7 @@ async def clone_github_repo( the operation may be skipped. Args: - github_token: GitHub access token for authentication. + github_token: GitHub access token for authentication. None for public repositories. https_url: HTTPS URL of the GitHub repository. commit_id: Optional specific commit to check out. diff --git a/prometheus/git/git_repository.py b/prometheus/git/git_repository.py index 5a96413..f62e888 100644 --- a/prometheus/git/git_repository.py +++ b/prometheus/git/git_repository.py @@ -51,19 +51,24 @@ def _set_default_branch(self): self.default_branch = self.repo.active_branch.name async def from_clone_repository( - self, https_url: str, github_access_token: str, target_directory: Path + self, https_url: str, github_access_token: str | None, target_directory: Path ): """Clone a remote repository using HTTPS authentication. Args: https_url: HTTPS URL of the remote repository. - github_access_token: GitHub access token for authentication. + github_access_token: GitHub access token for authentication. None for public repositories. target_directory: Directory where the repository will be cloned. Returns: Repo: GitPython Repo object representing the cloned repository. """ - https_url = https_url.replace("https://", f"https://x-access-token:{github_access_token}@") + # Only modify the URL with token authentication if a token is provided + if github_access_token: + https_url = https_url.replace( + "https://", f"https://x-access-token:{github_access_token}@" + ) + repo_name = https_url.split("/")[-1].split(".")[0] local_path = target_directory / repo_name if local_path.exists(): diff --git a/prometheus/utils/github_utils.py b/prometheus/utils/github_utils.py index 479c7b1..874d2f8 100644 --- a/prometheus/utils/github_utils.py +++ b/prometheus/utils/github_utils.py @@ -55,3 +55,35 @@ async def get_github_issue(repo: str, issue_number: int, github_token: str) -> D "state": issue_data["state"], "html_url": issue_data["html_url"], } + + +async def is_repository_public(https_url: str) -> bool: + """ + Check if a GitHub repository is public by making an unauthenticated request. + + Args: + https_url: HTTPS URL of the GitHub repository + + Returns: + bool: True if the repository is public, False if private or not found + """ + # Extract owner and repo from HTTPS URL + # Example: https://github.com/owner/repo.git -> owner/repo + url_parts = https_url.replace("https://github.com/", "").replace(".git", "") + owner, repo = url_parts.split("/") + # Make unauthenticated request to check repository visibility + async with httpx.AsyncClient() as client: + response = await client.get( + f"https://api.github.com/repos/{owner}/{repo}", + headers={"Accept": "application/vnd.github.v3+json"}, + ) + + if response.status_code == 200: + # Repository exists and is accessible without authentication (public) + return True + elif response.status_code == 404: + # Repository not found or private (requires authentication) + return False + else: + # Other error, assume private for safety + return False