|
5 | 5 | from collections.abc import Iterable |
6 | 6 | from contextlib import contextmanager |
7 | 7 | from pathlib import Path |
| 8 | +from typing import Literal |
8 | 9 | from xml.dom.minidom import parseString |
9 | 10 |
|
10 | 11 | import dicttoxml |
@@ -242,25 +243,59 @@ def get_language_file_extensions(language: ProgrammingLanguage): |
242 | 243 | return set(TSFile.get_extensions()) |
243 | 244 |
|
244 | 245 |
|
245 | | -def determine_project_language(folder_path: str): |
246 | | - from codegen.sdk.python import PyFile |
247 | | - from codegen.sdk.typescript.file import TSFile |
| 246 | +def determine_project_language(folder_path: str, strategy: Literal["most_common", "package_json"] = "package_json") -> ProgrammingLanguage: |
| 247 | + """Determines the primary programming language of a project. |
248 | 248 |
|
249 | | - EXTENSIONS = { |
250 | | - ProgrammingLanguage.PYTHON: PyFile.get_extensions(), |
251 | | - ProgrammingLanguage.TYPESCRIPT: TSFile.get_extensions(), |
252 | | - } |
| 249 | + Args: |
| 250 | + folder_path (str): Path to the folder to analyze |
| 251 | + strategy (Literal["most_common", "package_json"]): Strategy to use for determining language. |
| 252 | + "most_common" analyzes file extensions, "package_json" checks for package.json presence. |
| 253 | +
|
| 254 | + Returns: |
| 255 | + ProgrammingLanguage: The determined programming language |
| 256 | + """ |
| 257 | + # TODO: Create a new strategy that follows gitignore |
| 258 | + if strategy == "most_common": |
| 259 | + return _determine_language_by_file_count(folder_path) |
| 260 | + elif strategy == "package_json": |
| 261 | + return _determine_language_by_package_json(folder_path) |
| 262 | + |
| 263 | + |
| 264 | +def _determine_language_by_package_json(folder_path: str) -> ProgrammingLanguage: |
| 265 | + """Determines project language by checking for presence of package.json. |
| 266 | + Faster but less accurate than file count strategy. |
| 267 | +
|
| 268 | + Args: |
| 269 | + folder_path (str): Path to the folder to analyze |
253 | 270 |
|
| 271 | + Returns: |
| 272 | + ProgrammingLanguage: TYPESCRIPT if package.json exists, otherwise PYTHON |
254 | 273 | """ |
255 | | - Analyzes a folder to determine the primary programming language based on file extensions. |
| 274 | + package_json_path = Path(folder_path) / "package.json" |
| 275 | + if package_json_path.exists(): |
| 276 | + return ProgrammingLanguage.TYPESCRIPT |
| 277 | + else: |
| 278 | + return ProgrammingLanguage.PYTHON |
| 279 | + |
| 280 | + |
| 281 | +def _determine_language_by_file_count(folder_path: str) -> ProgrammingLanguage: |
| 282 | + """Analyzes a folder to determine the primary programming language based on file extensions. |
256 | 283 | Returns the language with the most matching files. |
257 | 284 |
|
258 | 285 | Args: |
259 | 286 | folder_path (str): Path to the folder to analyze |
260 | 287 |
|
261 | 288 | Returns: |
262 | | - Optional[ProgrammingLanguage]: The dominant programming language, or None if no matching files found |
| 289 | + ProgrammingLanguage: The dominant programming language, or UNSUPPORTED if no matching files found |
263 | 290 | """ |
| 291 | + from codegen.sdk.python import PyFile |
| 292 | + from codegen.sdk.typescript.file import TSFile |
| 293 | + |
| 294 | + EXTENSIONS = { |
| 295 | + ProgrammingLanguage.PYTHON: PyFile.get_extensions(), |
| 296 | + ProgrammingLanguage.TYPESCRIPT: TSFile.get_extensions(), |
| 297 | + } |
| 298 | + |
264 | 299 | folder = Path(folder_path) |
265 | 300 | if not folder.exists() or not folder.is_dir(): |
266 | 301 | msg = f"Invalid folder path: {folder_path}" |
|
0 commit comments