diff --git a/src/safeds_stubgen/api_analyzer/_ast_visitor.py b/src/safeds_stubgen/api_analyzer/_ast_visitor.py index de871c24..2be70811 100644 --- a/src/safeds_stubgen/api_analyzer/_ast_visitor.py +++ b/src/safeds_stubgen/api_analyzer/_ast_visitor.py @@ -775,7 +775,7 @@ def _create_inferred_results( if type__ not in result_array[i]: result_array[i].append(type__) - longest_inner_list = max(len(result_array[i]), longest_inner_list) + longest_inner_list = max(longest_inner_list, len(result_array[i])) else: result_array.append([type__]) @@ -1452,18 +1452,12 @@ def _check_publicity_in_reexports(self, name: str, qname: str, parent: Module | continue # If the whole module was reexported we have to check if the name or alias is intern - if module_is_reexported: + if module_is_reexported and not_internal and (isinstance(parent, Module) or parent.is_public): # Check the wildcard imports of the source for wildcard_import in reexport_source.wildcard_imports: - if ( - ( - (is_from_same_package and wildcard_import.module_name == module_name) - or (is_from_another_package and wildcard_import.module_name == module_qname) - ) - and not_internal - and (isinstance(parent, Module) or parent.is_public) - ): + if ((is_from_same_package and wildcard_import.module_name == module_name) + or (is_from_another_package and wildcard_import.module_name == module_qname)): return True # Check the qualified imports of the source @@ -1474,11 +1468,9 @@ def _check_publicity_in_reexports(self, name: str, qname: str, parent: Module | if ( qualified_import.qualified_name in {module_name, module_qname} and ( - (qualified_import.alias is None and not_internal) + qualified_import.alias is None or (qualified_import.alias is not None and not is_internal(qualified_import.alias)) ) - and not_internal - and (isinstance(parent, Module) or parent.is_public) ): # If the module name or alias is not internal, check if the parent is public return True diff --git a/src/safeds_stubgen/docstring_parsing/_docstring_parser.py b/src/safeds_stubgen/docstring_parsing/_docstring_parser.py index 87a06b8e..0dde2aff 100644 --- a/src/safeds_stubgen/docstring_parsing/_docstring_parser.py +++ b/src/safeds_stubgen/docstring_parsing/_docstring_parser.py @@ -3,8 +3,8 @@ import logging from typing import TYPE_CHECKING, Literal +from _griffe import models as griffe_models from griffe import load -from griffe.dataclasses import Docstring from griffe.docstrings.dataclasses import DocstringAttribute, DocstringParameter from griffe.docstrings.utils import parse_annotation from griffe.enumerations import DocstringSectionKind, Parser @@ -25,34 +25,49 @@ if TYPE_CHECKING: from pathlib import Path - from griffe.dataclasses import Object from mypy import nodes class DocstringParser(AbstractDocstringParser): def __init__(self, parser: Parser, package_path: Path): + self.parser = parser + while True: # If a package has no __init__.py file Griffe can't parse it, therefore we check the parent try: - self.griffe_build = load(package_path, docstring_parser=parser) + griffe_build = load(package_path, docstring_parser=parser) break except KeyError: package_path = package_path.parent - self.parser = parser - self.__cached_node: str | None = None - self.__cached_docstring: Docstring | None = None + self.griffe_index: dict[str, griffe_models.Object] = {} + self._recursive_griffe_indexer(griffe_build) + + def _recursive_griffe_indexer(self, griffe_build: griffe_models.Object | griffe_models.Alias) -> None: + for member in griffe_build.all_members.values(): + if isinstance( + member, + griffe_models.Class | griffe_models.Function | griffe_models.Attribute | griffe_models.Alias, + ): + self.griffe_index[member.path] = member + + if isinstance(member, griffe_models.Module | griffe_models.Class): + self._recursive_griffe_indexer(member) def get_class_documentation(self, class_node: nodes.ClassDef) -> ClassDocstring: - griffe_node = self._get_griffe_node(class_node.fullname) + griffe_node = self.griffe_index.get(class_node.fullname, None) if griffe_node is None: # pragma: no cover - raise TypeError(f"Expected a griffe node for {class_node.fullname}, got None.") + msg = ( + f"Something went wrong while searching for the docstring for {class_node.fullname}. Please make sure" + " that all directories with python files have an __init__.py file." + ) + logging.warning(msg) description = "" docstring = "" examples = [] - if griffe_node.docstring is not None: + if griffe_node is not None and griffe_node.docstring is not None: docstring = griffe_node.docstring.value.strip("\n") try: @@ -76,7 +91,7 @@ def get_function_documentation(self, function_node: nodes.FuncDef) -> FunctionDo docstring = "" description = "" examples = [] - griffe_docstring = self.__get_cached_docstring(function_node.fullname) + griffe_docstring = self._get_griffe_docstring(function_node.fullname) if griffe_docstring is not None: docstring = griffe_docstring.value.strip("\n") @@ -110,9 +125,9 @@ def get_parameter_documentation( # For constructors (__init__ functions) the parameters are described on the class if function_name == "__init__" and parent_class_qname: parent_qname = parent_class_qname.replace("/", ".") - griffe_docstring = self.__get_cached_docstring(parent_qname) + griffe_docstring = self._get_griffe_docstring(parent_qname) else: - griffe_docstring = self.__get_cached_docstring(function_qname) + griffe_docstring = self._get_griffe_docstring(function_qname) # Find matching parameter docstrings matching_parameters = [] @@ -123,7 +138,7 @@ def get_parameter_documentation( # https://github.com/Safe-DS/Library-Analyzer/issues/10) if self.parser == Parser.numpy and len(matching_parameters) == 0 and function_name == "__init__": # Get constructor docstring & find matching parameter docstrings - constructor_docstring = self.__get_cached_docstring(function_qname) + constructor_docstring = self._get_griffe_docstring(function_qname) if constructor_docstring is not None: matching_parameters = self._get_matching_docstrings(constructor_docstring, parameter_name, "param") @@ -136,7 +151,7 @@ def get_parameter_documentation( raise TypeError(f"Expected parameter docstring, got {type(last_parameter)}.") if griffe_docstring is None: # pragma: no cover - griffe_docstring = Docstring("") + griffe_docstring = griffe_models.Docstring("") annotation = last_parameter.annotation if annotation is None: @@ -154,19 +169,15 @@ def get_parameter_documentation( description=last_parameter.description.strip("\n") or "", ) - def get_attribute_documentation( - self, - parent_class_qname: str, - attribute_name: str, - ) -> AttributeDocstring: + def get_attribute_documentation(self, parent_class_qname: str, attribute_name: str) -> AttributeDocstring: parent_class_qname = parent_class_qname.replace("/", ".") # Find matching attribute docstrings parent_qname = parent_class_qname - griffe_docstring = self.__get_cached_docstring(parent_qname) + griffe_docstring = self._get_griffe_docstring(parent_qname) if griffe_docstring is None: matching_attributes = [] - griffe_docstring = Docstring("") + griffe_docstring = griffe_models.Docstring("") else: matching_attributes = self._get_matching_docstrings(griffe_docstring, attribute_name, "attr") @@ -174,7 +185,7 @@ def get_attribute_documentation( # (see issue https://github.com/Safe-DS/Library-Analyzer/issues/10) if self.parser == Parser.numpy and len(matching_attributes) == 0: constructor_qname = f"{parent_class_qname}.__init__" - constructor_docstring = self.__get_cached_docstring(constructor_qname) + constructor_docstring = self._get_griffe_docstring(constructor_qname) # Find matching parameter docstrings if constructor_docstring is not None: @@ -198,7 +209,7 @@ def get_attribute_documentation( def get_result_documentation(self, function_qname: str) -> list[ResultDocstring]: # Find matching parameter docstrings - griffe_docstring = self.__get_cached_docstring(function_qname) + griffe_docstring = self._get_griffe_docstring(function_qname) if griffe_docstring is None: return [] @@ -251,7 +262,7 @@ def get_result_documentation(self, function_qname: str) -> list[ResultDocstring] @staticmethod def _get_matching_docstrings( - function_doc: Docstring, + function_doc: griffe_models.Docstring, name: str, type_: Literal["attr", "param"], ) -> list[DocstringAttribute | DocstringParameter]: @@ -278,7 +289,7 @@ def _get_matching_docstrings( def _griffe_annotation_to_api_type( self, annotation: Expr | str, - docstring: Docstring, + docstring: griffe_models.Docstring, ) -> sds_types.AbstractType | None: if isinstance(annotation, ExprName | ExprAttribute): if annotation.canonical_path == "typing.Any": @@ -291,6 +302,8 @@ def _griffe_annotation_to_api_type( return sds_types.NamedType(name="float", qname="builtins.float") elif annotation.canonical_path == "str": return sds_types.NamedType(name="str", qname="builtins.str") + elif annotation.canonical_path == "bytes": + return sds_types.NamedType(name="bytes", qname="builtins.bytes") elif annotation.canonical_path == "list": return sds_types.ListType(types=[]) elif annotation.canonical_path == "tuple": @@ -403,49 +416,15 @@ def _remove_default_from_griffe_annotation(self, annotation: str) -> str: return annotation.split(", default")[0] return annotation - def _get_griffe_node(self, qname: str) -> Object | None: - node_qname_parts = qname.split(".") - griffe_node = self.griffe_build - for part in node_qname_parts: - if part in griffe_node.modules: - griffe_node = griffe_node.modules[part] - elif part in griffe_node.classes: - griffe_node = griffe_node.classes[part] - elif part in griffe_node.functions: - griffe_node = griffe_node.functions[part] - elif part in griffe_node.attributes: - griffe_node = griffe_node.attributes[part] - elif part == "__init__" and griffe_node.is_class: - return None - elif griffe_node.name == part: - continue - else: # pragma: no cover - msg = ( - f"Something went wrong while searching for the docstring for {qname}. Please make sure" - " that all directories with python files have an __init__.py file.", - ) - logging.warning(msg) - - return griffe_node - - def __get_cached_docstring(self, qname: str) -> Docstring | None: - """ - Return the Docstring for the given function node. + def _get_griffe_docstring(self, qname: str) -> griffe_models.Docstring | None: + griffe_node = self.griffe_index.get(qname, None) - It is only recomputed when the function node differs from the previous one that was passed to this function. - This avoids reparsing the docstring for the function itself and all of its parameters. + if griffe_node is not None: + return griffe_node.docstring - On Lars's system this caused a significant performance improvement: Previously, 8.382s were spent inside the - function get_parameter_documentation when parsing sklearn. Afterward, it was only 2.113s. - """ - if self.__cached_node != qname or qname.endswith("__init__"): - self.__cached_node = qname - - griffe_node = self._get_griffe_node(qname) - if griffe_node is not None: - griffe_docstring = griffe_node.docstring - self.__cached_docstring = griffe_docstring - else: - self.__cached_docstring = None - - return self.__cached_docstring + msg = ( + f"Something went wrong while searching for the docstring for {qname}. Please make sure" + " that all directories with python files have an __init__.py file.", + ) + logging.warning(msg) + return None diff --git a/src/safeds_stubgen/stubs_generator/_stub_string_generator.py b/src/safeds_stubgen/stubs_generator/_stub_string_generator.py index 8fd717a6..d6072439 100644 --- a/src/safeds_stubgen/stubs_generator/_stub_string_generator.py +++ b/src/safeds_stubgen/stubs_generator/_stub_string_generator.py @@ -50,8 +50,9 @@ def __init__(self, api: API, convert_identifiers: bool) -> None: self.class_generics: list = [] self.module_imports: set[str] = set() self.currently_creating_reexport_data: bool = False + self.import_index: dict[str, str] = {} - self.api = api + self.api: API = api self.naming_convention = NamingConvention.SAFE_DS if convert_identifiers else NamingConvention.PYTHON self.classes_outside_package: set[str] = set() self.reexport_modules: dict[str, list[Class | Function]] = defaultdict(list) @@ -1012,20 +1013,20 @@ def _has_node_shorter_reexport(self, node: Class | Function) -> bool: return False def _is_path_connected_to_class(self, path: str, class_path: str) -> bool: - if class_path.endswith(path): + if class_path.endswith(f"/{path}") or class_path == path: return True name = path.split("/")[-1] class_name = class_path.split("/")[-1] for reexport in self.api.reexport_map: - if reexport.endswith(name): - for module in self.api.reexport_map[reexport]: + if reexport.endswith(f"/{name}") or reexport == name: + for module in self.api.reexport_map[reexport]: # pragma: no cover # Added "no cover" since I can't recreate this in the tests if ( path.startswith(module.id) and class_path.startswith(module.id) and path.lstrip(module.id).lstrip("/") == name == class_name - ): # pragma: no cover + ): return True return False @@ -1047,28 +1048,49 @@ def _add_to_imports(self, import_qname: str) -> None: module_id = self._get_module_id(get_actual_id=True).replace("/", ".") if module_id not in import_qname: - # We need the full path for an import from the same package, but we sometimes don't get enough information, - # therefore we have to search for the class and get its id - import_qname_path = import_qname.replace(".", "/") - in_package = False qname = "" - for class_id in self.api.classes: - if self._is_path_connected_to_class(import_qname_path, class_id): - qname = class_id.replace("/", ".") - - name = qname.split(".")[-1] - shortest_qname, _ = _get_shortest_public_reexport_and_alias( - reexport_map=self.api.reexport_map, - name=name, - qname=qname, - is_module=False, - ) + module_id_parts = module_id.split(".") + + # First we hope that we already found and indexed the type we are searching + if import_qname in self.import_index: + qname = self.import_index[import_qname] + + # To save performance we next try to build the possible paths the type could originate from + if not qname: + for i in range(1, len(module_id_parts)): + test_id = ".".join(module_id_parts[:-i]) + "." + import_qname + if test_id.replace(".", "/") in self.api.classes: + qname = test_id + break + + # If the tries above did not work we have to use this performance heavy way. + # We need the full path for an import from the same package, but we sometimes don't get enough + # information, therefore we have to search for the class and get its id + if not qname: + import_qname_path = import_qname.replace(".", "/") + import_path_name = import_qname_path.split("/")[-1] + for class_id in self.api.classes: + if (import_path_name == class_id.split("/")[-1] and + self._is_path_connected_to_class(import_qname_path, class_id)): + qname = class_id.replace("/", ".") + break + + in_package = False + if qname: + self.import_index[import_qname] = qname + + name = qname.split(".")[-1] + shortest_qname, _ = _get_shortest_public_reexport_and_alias( + reexport_map=self.api.reexport_map, + name=name, + qname=qname, + is_module=False, + ) - if shortest_qname: - qname = f"{shortest_qname}.{name}" + if shortest_qname: + qname = f"{shortest_qname}.{name}" - in_package = True - break + in_package = True qname = qname or import_qname diff --git a/tests/data/docstring_parser_package/googledoc.py b/tests/data/docstring_parser_package/googledoc.py index 70781cc7..e06aba66 100644 --- a/tests/data/docstring_parser_package/googledoc.py +++ b/tests/data/docstring_parser_package/googledoc.py @@ -175,6 +175,7 @@ class ClassWithVariousParameterTypes: bool_type (bool): str_type (str): float_type (float): + byte_type (bytes): multiple_types (int, bool): list_type_1 (list): list_type_2 (list[str]): @@ -195,9 +196,9 @@ class ClassWithVariousParameterTypes: """ def __init__( - self, no_type, optional_type, none_type, int_type, bool_type, str_type, float_type, multiple_types, list_type_1, - list_type_2, list_type_3, list_type_4, list_type_5, set_type_1, set_type_2, set_type_3, set_type_4, set_type_5, - tuple_type_1, tuple_type_2, tuple_type_3, tuple_type_4, any_type: Any, + self, no_type, optional_type, none_type, int_type, bool_type, str_type, float_type, byte_type, multiple_types, + list_type_1, list_type_2, list_type_3, list_type_4, list_type_5, set_type_1, set_type_2, set_type_3, set_type_4, + set_type_5, tuple_type_1, tuple_type_2, tuple_type_3, tuple_type_4, any_type: Any, optional_type_2: Optional[int], class_type, imported_type ) -> None: pass diff --git a/tests/data/docstring_parser_package/numpydoc.py b/tests/data/docstring_parser_package/numpydoc.py index b066ff52..9a7a1386 100644 --- a/tests/data/docstring_parser_package/numpydoc.py +++ b/tests/data/docstring_parser_package/numpydoc.py @@ -308,6 +308,7 @@ class ClassWithVariousParameterTypes: bool_type : bool str_type : str float_type : float + byte_type : bytes multiple_types : int, bool list_type_1 : list list_type_2 : list[str] @@ -328,9 +329,9 @@ class ClassWithVariousParameterTypes: """ def __init__( - self, no_type, optional_type, none_type, int_type, bool_type, str_type, float_type, multiple_types, list_type_1, - list_type_2, list_type_3, list_type_4, list_type_5, set_type_1, set_type_2, set_type_3, set_type_4, set_type_5, - tuple_type_1, tuple_type_2, tuple_type_3, tuple_type_4, any_type: Any, + self, no_type, optional_type, none_type, int_type, bool_type, str_type, float_type, byte_type, multiple_types, + list_type_1, list_type_2, list_type_3, list_type_4, list_type_5, set_type_1, set_type_2, set_type_3, set_type_4, + set_type_5, tuple_type_1, tuple_type_2, tuple_type_3, tuple_type_4, any_type: Any, optional_type_2: Optional[int], class_type, imported_type ) -> None: pass diff --git a/tests/data/docstring_parser_package/restdoc.py b/tests/data/docstring_parser_package/restdoc.py index 94e34a5b..9ae96883 100644 --- a/tests/data/docstring_parser_package/restdoc.py +++ b/tests/data/docstring_parser_package/restdoc.py @@ -164,6 +164,8 @@ class ClassWithVariousParameterTypes: :type str_type: str :param float_type: :type float_type: float + :param byte_type: + :type byte_type: bytes :param multiple_types: :type multiple_types: int, bool :param list_type_1: @@ -201,9 +203,9 @@ class ClassWithVariousParameterTypes: """ def __init__( - self, no_type, optional_type, none_type, int_type, bool_type, str_type, float_type, multiple_types, list_type_1, - list_type_2, list_type_3, list_type_4, list_type_5, set_type_1, set_type_2, set_type_3, set_type_4, set_type_5, - tuple_type_1, tuple_type_2, tuple_type_3, tuple_type_4, any_type: Any, + self, no_type, optional_type, none_type, int_type, bool_type, str_type, float_type, byte_type, multiple_types, + list_type_1, list_type_2, list_type_3, list_type_4, list_type_5, set_type_1, set_type_2, set_type_3, set_type_4, + set_type_5, tuple_type_1, tuple_type_2, tuple_type_3, tuple_type_4, any_type: Any, optional_type_2: Optional[int], class_type, imported_type ) -> None: pass diff --git a/tests/safeds_stubgen/stubs_generator/__snapshots__/test_generate_stubs/test_stub_docstring_creation[googledoc-GOOGLE].sdsstub b/tests/safeds_stubgen/stubs_generator/__snapshots__/test_generate_stubs/test_stub_docstring_creation[googledoc-GOOGLE].sdsstub index 3846e8fe..b1d7a11d 100644 --- a/tests/safeds_stubgen/stubs_generator/__snapshots__/test_generate_stubs/test_stub_docstring_creation[googledoc-GOOGLE].sdsstub +++ b/tests/safeds_stubgen/stubs_generator/__snapshots__/test_generate_stubs/test_stub_docstring_creation[googledoc-GOOGLE].sdsstub @@ -250,6 +250,7 @@ class ClassWithVariousParameterTypes( @PythonName("bool_type") boolType: Boolean, @PythonName("str_type") strType: String, @PythonName("float_type") floatType: Float, + @PythonName("byte_type") byteType: bytes, @PythonName("multiple_types") multipleTypes: Tuple, @PythonName("list_type_1") listType1: List, @PythonName("list_type_2") listType2: List, diff --git a/tests/safeds_stubgen/stubs_generator/__snapshots__/test_generate_stubs/test_stub_docstring_creation[numpydoc-NUMPYDOC].sdsstub b/tests/safeds_stubgen/stubs_generator/__snapshots__/test_generate_stubs/test_stub_docstring_creation[numpydoc-NUMPYDOC].sdsstub index 94d6886a..69da1e9a 100644 --- a/tests/safeds_stubgen/stubs_generator/__snapshots__/test_generate_stubs/test_stub_docstring_creation[numpydoc-NUMPYDOC].sdsstub +++ b/tests/safeds_stubgen/stubs_generator/__snapshots__/test_generate_stubs/test_stub_docstring_creation[numpydoc-NUMPYDOC].sdsstub @@ -377,6 +377,7 @@ class ClassWithVariousParameterTypes( @PythonName("bool_type") boolType: Boolean, @PythonName("str_type") strType: String, @PythonName("float_type") floatType: Float, + @PythonName("byte_type") byteType: bytes, @PythonName("multiple_types") multipleTypes: Tuple, @PythonName("list_type_1") listType1: List, @PythonName("list_type_2") listType2: List, diff --git a/tests/safeds_stubgen/stubs_generator/__snapshots__/test_generate_stubs/test_stub_docstring_creation[restdoc-REST].sdsstub b/tests/safeds_stubgen/stubs_generator/__snapshots__/test_generate_stubs/test_stub_docstring_creation[restdoc-REST].sdsstub index aa8939ca..748762da 100644 --- a/tests/safeds_stubgen/stubs_generator/__snapshots__/test_generate_stubs/test_stub_docstring_creation[restdoc-REST].sdsstub +++ b/tests/safeds_stubgen/stubs_generator/__snapshots__/test_generate_stubs/test_stub_docstring_creation[restdoc-REST].sdsstub @@ -191,6 +191,7 @@ class ClassWithVariousParameterTypes( @PythonName("bool_type") boolType: Boolean, @PythonName("str_type") strType: String, @PythonName("float_type") floatType: Float, + @PythonName("byte_type") byteType: bytes, @PythonName("multiple_types") multipleTypes: Tuple, @PythonName("list_type_1") listType1: List, @PythonName("list_type_2") listType2: List,