Skip to content

Commit 5246abf

Browse files
EdwardJXLicodegen-bot
andauthored
Add package.json-based repo language detection (#186)
# Motivation <!-- Why is this change necessary? --> # Content <!-- Please include a summary of the change --> # Testing <!-- How was the change tested? --> # Please check the following before marking your PR as ready for review - [ ] I have added tests for my changes - [ ] I have updated the documentation or added new documentation as needed - [ ] I have read and agree to the [Contributor License Agreement](../CLA.md) --------- Co-authored-by: codegen-bot <team+codegenbot@codegen.sh>
1 parent 58c7113 commit 5246abf

File tree

1 file changed

+44
-9
lines changed

1 file changed

+44
-9
lines changed

src/codegen/sdk/utils.py

Lines changed: 44 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from collections.abc import Iterable
66
from contextlib import contextmanager
77
from pathlib import Path
8+
from typing import Literal
89
from xml.dom.minidom import parseString
910

1011
import dicttoxml
@@ -242,25 +243,59 @@ def get_language_file_extensions(language: ProgrammingLanguage):
242243
return set(TSFile.get_extensions())
243244

244245

245-
def determine_project_language(folder_path: str):
246-
from codegen.sdk.python import PyFile
247-
from codegen.sdk.typescript.file import TSFile
246+
def determine_project_language(folder_path: str, strategy: Literal["most_common", "package_json"] = "package_json") -> ProgrammingLanguage:
247+
"""Determines the primary programming language of a project.
248248
249-
EXTENSIONS = {
250-
ProgrammingLanguage.PYTHON: PyFile.get_extensions(),
251-
ProgrammingLanguage.TYPESCRIPT: TSFile.get_extensions(),
252-
}
249+
Args:
250+
folder_path (str): Path to the folder to analyze
251+
strategy (Literal["most_common", "package_json"]): Strategy to use for determining language.
252+
"most_common" analyzes file extensions, "package_json" checks for package.json presence.
253+
254+
Returns:
255+
ProgrammingLanguage: The determined programming language
256+
"""
257+
# TODO: Create a new strategy that follows gitignore
258+
if strategy == "most_common":
259+
return _determine_language_by_file_count(folder_path)
260+
elif strategy == "package_json":
261+
return _determine_language_by_package_json(folder_path)
262+
263+
264+
def _determine_language_by_package_json(folder_path: str) -> ProgrammingLanguage:
265+
"""Determines project language by checking for presence of package.json.
266+
Faster but less accurate than file count strategy.
267+
268+
Args:
269+
folder_path (str): Path to the folder to analyze
253270
271+
Returns:
272+
ProgrammingLanguage: TYPESCRIPT if package.json exists, otherwise PYTHON
254273
"""
255-
Analyzes a folder to determine the primary programming language based on file extensions.
274+
package_json_path = Path(folder_path) / "package.json"
275+
if package_json_path.exists():
276+
return ProgrammingLanguage.TYPESCRIPT
277+
else:
278+
return ProgrammingLanguage.PYTHON
279+
280+
281+
def _determine_language_by_file_count(folder_path: str) -> ProgrammingLanguage:
282+
"""Analyzes a folder to determine the primary programming language based on file extensions.
256283
Returns the language with the most matching files.
257284
258285
Args:
259286
folder_path (str): Path to the folder to analyze
260287
261288
Returns:
262-
Optional[ProgrammingLanguage]: The dominant programming language, or None if no matching files found
289+
ProgrammingLanguage: The dominant programming language, or UNSUPPORTED if no matching files found
263290
"""
291+
from codegen.sdk.python import PyFile
292+
from codegen.sdk.typescript.file import TSFile
293+
294+
EXTENSIONS = {
295+
ProgrammingLanguage.PYTHON: PyFile.get_extensions(),
296+
ProgrammingLanguage.TYPESCRIPT: TSFile.get_extensions(),
297+
}
298+
264299
folder = Path(folder_path)
265300
if not folder.exists() or not folder.is_dir():
266301
msg = f"Invalid folder path: {folder_path}"

0 commit comments

Comments
 (0)