From e0ad9060058a33edd690d3c0ab16ea4645536ee3 Mon Sep 17 00:00:00 2001 From: "Andrew V. Teylu" Date: Wed, 26 Nov 2025 22:09:39 +0000 Subject: [PATCH] Add Python callback provider for custom unit resolution This commit introduces a new callback-based unit provider that allows Python applications to implement custom unit resolution logic entirely in Python, without needing GPR project files or auto provider file lists. The callback provider accepts a user-defined Python function that maps (unit_name, unit_kind) pairs to file paths, enabling dynamic resolution of Ada source files based on application-specific logic. Key features: - Simple Python callback interface with (name, kind) -> filename mapping - Proper memory management across Python/Ada boundary using malloc/free - Comprehensive error handling with graceful exception recovery - Full UTF-8 support for unit names and file paths - Consistent PLE_Root_Index behavior with existing unit providers - Thread-safe callback reference management Implementation includes: - Ada implementation (callback_provider.adb/ads) - C FFI bindings (implementation-c-extensions.adb/ads) - Python API (unit_providers/methods, low_level_bindings) - Comprehensive test suite with 7 test scenarios - User manual documentation and examples Signed-off-by: Andrew V. Teylu --- .../unit_providers/low_level_bindings | 6 + extensions/python_api/unit_providers/methods | 95 +++++++ .../src/libadalang-callback_provider.adb | 176 +++++++++++++ .../src/libadalang-callback_provider.ads | 109 ++++++++ ...libadalang-implementation-c-extensions.adb | 22 ++ ...libadalang-implementation-c-extensions.ads | 29 +++ .../tests/python/callback_provider/foo.ads | 3 + .../tests/python/callback_provider/pkg.adb | 8 + .../tests/python/callback_provider/pkg.ads | 3 + .../tests/python/callback_provider/test.out | 49 ++++ .../tests/python/callback_provider/test.py | 243 ++++++++++++++++++ .../tests/python/callback_provider/test.yaml | 1 + user_manual/ada_api_unit_providers.rst | 1 + user_manual/python_api_tutorial.rst | 155 ++++++++++- 14 files changed, 897 insertions(+), 3 deletions(-) create mode 100644 extensions/src/libadalang-callback_provider.adb create mode 100644 extensions/src/libadalang-callback_provider.ads create mode 100644 testsuite/tests/python/callback_provider/foo.ads create mode 100644 testsuite/tests/python/callback_provider/pkg.adb create mode 100644 testsuite/tests/python/callback_provider/pkg.ads create mode 100644 testsuite/tests/python/callback_provider/test.out create mode 100644 testsuite/tests/python/callback_provider/test.py create mode 100644 testsuite/tests/python/callback_provider/test.yaml diff --git a/extensions/python_api/unit_providers/low_level_bindings b/extensions/python_api/unit_providers/low_level_bindings index bfa89de9f..386b87be2 100644 --- a/extensions/python_api/unit_providers/low_level_bindings +++ b/extensions/python_api/unit_providers/low_level_bindings @@ -5,3 +5,9 @@ _create_auto_provider = _import_func( [ctypes.POINTER(ctypes.c_char_p), ctypes.c_char_p], _unit_provider ) + +_create_callback_provider = _import_func( + '${capi.get_name("create_callback_provider")}', + [ctypes.c_void_p, ctypes.c_void_p, ctypes.c_char_p], + _unit_provider +) diff --git a/extensions/python_api/unit_providers/methods b/extensions/python_api/unit_providers/methods index e1c168363..64c4d347c 100644 --- a/extensions/python_api/unit_providers/methods +++ b/extensions/python_api/unit_providers/methods @@ -33,3 +33,98 @@ c_value = _create_auto_provider(input_files_arg, c_charset) return cls(c_value) + + @classmethod + def from_callback(cls, callback_fn, charset=None): + """ + Return a unit provider that calls back to Python to resolve unit names. + + callback_fn should be a callable that takes (name, kind) and returns + a filename string, or None if the unit is not found. + + :param callback_fn: Callable[[str, str], Optional[str]] + Takes unit_name (str) and kind ("spec" or "body") + Returns filename or None + :param charset: Character encoding for source files (default: ISO-8859-1) + + .. note:: + Callback references are retained for the lifetime of the application + to prevent garbage collection of the ctypes function pointers. If you + create many temporary callback providers in a long-running application, + this may increase memory usage. For typical usage patterns (a small + number of long-lived providers), this is not a concern. + + Example:: + + def my_resolver(name, kind): + # name is typically lowercase (e.g., "ada.text_io") + # kind is "spec" or "body" + if kind == "spec": + return f"runtime/{name.replace('.', '-')}.adas" + else: + return f"runtime/{name.replace('.', '-')}.adab" + + provider = UnitProvider.from_callback(my_resolver) + ctx = AnalysisContext(unit_provider=provider) + """ + + # Cache libc for malloc calls (done once per provider, not per callback) + # Note: Ideally this would be module-level, but Mako template constraints + # make per-provider caching the practical choice + libc = ctypes.CDLL(None) + libc.malloc.argtypes = [ctypes.c_size_t] + libc.malloc.restype = ctypes.c_void_p + + # Create a wrapper that converts from C callback to Python + @ctypes.CFUNCTYPE(ctypes.c_void_p, ctypes.c_void_p, ctypes.c_void_p, + ctypes.c_int) + def c_callback_wrapper(data_ptr, name_ptr, kind_int): + """C callback that bridges to Python""" + try: + # Convert C string to Python + name = ctypes.string_at(name_ptr).decode('utf-8') + + # Convert kind int to string + kind = "spec" if kind_int == 0 else "body" + + # Call Python callback + result = callback_fn(name, kind) + + # If None, return null pointer + if result is None: + return None + + # Convert result to C string allocated with malloc + # Ada will call free() on this pointer after copying the string + result_bytes = result.encode('utf-8') + b'\0' + result_len = len(result_bytes) + + # Allocate memory using C's malloc (libc cached in closure) + ptr = libc.malloc(result_len) + if not ptr: + return None + + # Copy Python bytes to malloc'd memory + ctypes.memmove(ptr, result_bytes, result_len) + + return ptr + + except Exception: + # If Python callback raises, return None + _log_uncaught_error("UnitProvider.from_callback") + return None + + # Keep a reference to prevent garbage collection + if not hasattr(cls, '_callback_refs'): + cls._callback_refs = [] + cls._callback_refs.append(c_callback_wrapper) + + # Create the provider + c_charset = _unwrap_charset(charset) + c_value = _create_callback_provider( + c_callback_wrapper, + None, # data pointer (not used in this simple version) + c_charset + ) + + return cls(c_value) diff --git a/extensions/src/libadalang-callback_provider.adb b/extensions/src/libadalang-callback_provider.adb new file mode 100644 index 000000000..72bf35d2b --- /dev/null +++ b/extensions/src/libadalang-callback_provider.adb @@ -0,0 +1,176 @@ +-- +-- Copyright (C) 2025, AdaCore +-- SPDX-License-Identifier: Apache-2.0 +-- + +with Ada.Unchecked_Conversion; +with Interfaces.C; use Interfaces.C; +with Interfaces.C.Strings; use Interfaces.C.Strings; + +package body Libadalang.Callback_Provider is + + function Address_To_Chars_Ptr is new Ada.Unchecked_Conversion + (System.Address, chars_ptr); + + function Chars_Ptr_To_Address is new Ada.Unchecked_Conversion + (chars_ptr, System.Address); + + -- Import C's free() to properly free malloc'd strings + procedure C_Free (Ptr : System.Address) + with Import, Convention => C, External_Name => "free"; + + ----------------------- + -- Get_Unit_Filename -- + ----------------------- + + overriding function Get_Unit_Filename + (Provider : Callback_Unit_Provider; + Name : Text_Type; + Kind : Analysis_Unit_Kind) return String + is + -- Convert unit name to UTF-8 string + Name_UTF8 : constant String := To_UTF8 (Name); + Name_C : chars_ptr := New_String (Name_UTF8); + + -- Convert kind to integer (0 = spec, 1 = body) + Kind_Int : constant int := + (if Kind = Unit_Specification then 0 else 1); + + -- Call Python callback + Result_Addr : System.Address; + Result_Str : Unbounded_String; + Result_C : chars_ptr; + + use type System.Address; + begin + -- Call callback with the C string pointer converted to address + Result_Addr := Provider.Callback + (Provider.Data, Chars_Ptr_To_Address (Name_C), Kind_Int); + Free (Name_C); + + -- If callback returned null, unit not found + if Result_Addr = System.Null_Address then + return ""; + end if; + + -- Convert C string address to Ada string + -- Use unchecked conversion to convert Address to chars_ptr + Result_C := Address_To_Chars_Ptr (Result_Addr); + Result_Str := To_Unbounded_String (Value (Result_C)); + + -- Memory Management: + -- Free the returned string. The callback must allocate with malloc(). + -- The Python bindings use ctypes.malloc to ensure correct memory sharing + -- between Python and Ada/C. + C_Free (Result_Addr); + + return To_String (Result_Str); + end Get_Unit_Filename; + + ----------------------- + -- Get_Unit_Location -- + ----------------------- + + overriding procedure Get_Unit_Location + (Provider : Callback_Unit_Provider; + Name : Text_Type; + Kind : Analysis_Unit_Kind; + Filename : in out Unbounded_String; + PLE_Root_Index : in out Natural) + is + Fn : constant String := Provider.Get_Unit_Filename (Name, Kind); + begin + if Fn = "" then + Filename := Null_Unbounded_String; + PLE_Root_Index := 1; + else + Filename := To_Unbounded_String (Fn); + -- Limitation: PLE_Root_Index is hardcoded to 1, which assumes + -- exactly one compilation unit per file starting at the root. + -- Files with multiple compilation units are not supported. + PLE_Root_Index := 1; + end if; + end Get_Unit_Location; + + -------------- + -- Get_Unit -- + -------------- + + overriding function Get_Unit + (Provider : Callback_Unit_Provider; + Context : Analysis_Context'Class; + Name : Text_Type; + Kind : Analysis_Unit_Kind; + Charset : String := ""; + Reparse : Boolean := False) return Analysis_Unit'Class + is + Fn : constant String := Provider.Get_Unit_Filename (Name, Kind); + Actual_Charset : constant String := + (if Charset'Length = 0 + then To_String (Provider.Charset) + else Charset); + begin + if Fn = "" then + -- Return an empty unit if not found + declare + Empty_Unit : Analysis_Unit'Class := + Get_From_Buffer + (Context => Context, + Filename => To_UTF8 (Name), + Buffer => "", + Charset => Actual_Charset); + begin + return Empty_Unit; + end; + else + return Context.Get_From_File (Fn, Actual_Charset, Reparse); + end if; + end Get_Unit; + + --------------------------- + -- Get_Unit_And_PLE_Root -- + --------------------------- + + overriding procedure Get_Unit_And_PLE_Root + (Provider : Callback_Unit_Provider; + Context : Analysis_Context'Class; + Name : Text_Type; + Kind : Analysis_Unit_Kind; + Charset : String := ""; + Reparse : Boolean := False; + Unit : in out Analysis_Unit'Class; + PLE_Root_Index : in out Natural) + is + begin + Unit := Provider.Get_Unit (Context, Name, Kind, Charset, Reparse); + PLE_Root_Index := 1; + end Get_Unit_And_PLE_Root; + + ------------- + -- Release -- + ------------- + + overriding procedure Release (Provider : in out Callback_Unit_Provider) is + begin + -- Nothing to release - Python manages the callback and data + null; + end Release; + + ------------------------------ + -- Create_Callback_Provider -- + ------------------------------ + + function Create_Callback_Provider + (Callback : Get_Unit_Filename_Callback; + Data : System.Address; + Charset : String := Default_Charset) return Callback_Unit_Provider + is + begin + return Provider : Callback_Unit_Provider do + Provider.Callback := Callback; + Provider.Data := Data; + Provider.Charset := To_Unbounded_String (Charset); + end return; + end Create_Callback_Provider; + +end Libadalang.Callback_Provider; diff --git a/extensions/src/libadalang-callback_provider.ads b/extensions/src/libadalang-callback_provider.ads new file mode 100644 index 000000000..1deabc2f6 --- /dev/null +++ b/extensions/src/libadalang-callback_provider.ads @@ -0,0 +1,109 @@ +-- +-- Copyright (C) 2025, AdaCore +-- SPDX-License-Identifier: Apache-2.0 +-- +-- This package provides a unit provider that calls back into Python +-- to resolve unit names to filenames. This allows Python code to +-- implement custom unit resolution logic without modifying libadalang. +-- +-- Limitation: This provider assumes one compilation unit per file. +-- Files with multiple compilation units are not supported. + +with Ada.Strings.Unbounded; use Ada.Strings.Unbounded; +with Interfaces.C; use Interfaces.C; +with System; + +with Libadalang.Analysis; use Libadalang.Analysis; +with Libadalang.Common; use Libadalang.Common; + +package Libadalang.Callback_Provider is + + use Support.Text; + + type Callback_Unit_Provider is + new Libadalang.Analysis.Unit_Provider_Interface with private; + -- Unit provider that calls back to Python for unit filename resolution + + overriding function Get_Unit_Filename + (Provider : Callback_Unit_Provider; + Name : Text_Type; + Kind : Analysis_Unit_Kind) return String; + + overriding procedure Get_Unit_Location + (Provider : Callback_Unit_Provider; + Name : Text_Type; + Kind : Analysis_Unit_Kind; + Filename : in out Unbounded_String; + PLE_Root_Index : in out Natural); + + overriding function Get_Unit + (Provider : Callback_Unit_Provider; + Context : Analysis_Context'Class; + Name : Text_Type; + Kind : Analysis_Unit_Kind; + Charset : String := ""; + Reparse : Boolean := False) return Analysis_Unit'Class; + + overriding procedure Get_Unit_And_PLE_Root + (Provider : Callback_Unit_Provider; + Context : Analysis_Context'Class; + Name : Text_Type; + Kind : Analysis_Unit_Kind; + Charset : String := ""; + Reparse : Boolean := False; + Unit : in out Analysis_Unit'Class; + PLE_Root_Index : in out Natural); + + overriding procedure Release (Provider : in out Callback_Unit_Provider); + + -- Callback function type for language bindings to implement + -- Parameters: + -- Data: Opaque pointer to user data (passed through from Create) + -- Name: Unit name as UTF-8 null-terminated string + -- Kind: 0 for spec, 1 for body + -- Returns: Filename as UTF-8 null-terminated string, or null for "not found" + -- + -- Memory Ownership: + -- The returned string MUST be allocated with malloc(). Ada will call + -- free() on the returned pointer after copying the string value. + -- Returning NULL (System.Null_Address) indicates unit not found. + type Get_Unit_Filename_Callback is access function + (Data : System.Address; + Name : System.Address; + Kind : int) return System.Address + with Convention => C; + + function Create_Callback_Provider + (Callback : Get_Unit_Filename_Callback; + Data : System.Address; + Charset : String := Default_Charset) return Callback_Unit_Provider; + -- Create a unit provider that calls back to Python. + -- + -- Callback: Function pointer to Python callback + -- Data: Opaque pointer to Python object (passed to callback) + -- Charset: Character set for source files + + function Create_Callback_Provider_Reference + (Callback : Get_Unit_Filename_Callback; + Data : System.Address; + Charset : String := Default_Charset) return Unit_Provider_Reference; + -- Wrapper around Create_Callback_Provider to create a unit provider reference + +private + + type Callback_Unit_Provider is + new Libadalang.Analysis.Unit_Provider_Interface + with record + Callback : Get_Unit_Filename_Callback; + Data : System.Address; + Charset : Unbounded_String; + end record; + + function Create_Callback_Provider_Reference + (Callback : Get_Unit_Filename_Callback; + Data : System.Address; + Charset : String := Default_Charset) return Unit_Provider_Reference + is (Create_Unit_Provider_Reference + (Create_Callback_Provider (Callback, Data, Charset))); + +end Libadalang.Callback_Provider; diff --git a/extensions/src/libadalang-implementation-c-extensions.adb b/extensions/src/libadalang-implementation-c-extensions.adb index 948f05f3b..0a05363a0 100644 --- a/extensions/src/libadalang-implementation-c-extensions.adb +++ b/extensions/src/libadalang-implementation-c-extensions.adb @@ -25,6 +25,7 @@ with Langkit_Support.File_Readers; use Langkit_Support.File_Readers; with Libadalang.Analysis; use Libadalang.Analysis; with Libadalang.Auto_Provider; use Libadalang.Auto_Provider; +with Libadalang.Callback_Provider; use Libadalang.Callback_Provider; with Libadalang.Config_Pragmas; use Libadalang.Config_Pragmas; with Libadalang.Implementation.Extensions; with Libadalang.GPR_Impl; use Libadalang.GPR_Impl; @@ -437,6 +438,27 @@ package body Libadalang.Implementation.C.Extensions is end; end ada_create_auto_provider; + ---------------------------------- + -- ada_create_callback_provider -- + ---------------------------------- + + function ada_create_callback_provider + (Callback : ada_get_unit_filename_callback; + Data : System.Address; + Charset : chars_ptr) + return ada_unit_provider + is + Actual_Charset : constant String := + (if Charset = Null_Ptr then Default_Charset else Value (Charset)); + + -- Convert C callback type to Ada callback type + Ada_Callback : constant Callback_Provider.Get_Unit_Filename_Callback := + Callback_Provider.Get_Unit_Filename_Callback (Callback); + begin + return To_C_Provider + (Create_Callback_Provider_Reference (Ada_Callback, Data, Actual_Charset)); + end ada_create_callback_provider; + ---------------------------------- -- ada_gpr_project_source_files -- ---------------------------------- diff --git a/extensions/src/libadalang-implementation-c-extensions.ads b/extensions/src/libadalang-implementation-c-extensions.ads index 5fd66971d..d6a2192db 100644 --- a/extensions/src/libadalang-implementation-c-extensions.ads +++ b/extensions/src/libadalang-implementation-c-extensions.ads @@ -141,6 +141,35 @@ package Libadalang.Implementation.C.Extensions is with Export => True, Convention => C; + ---------------------------- + -- Callback unit provider -- + ---------------------------- + + type ada_get_unit_filename_callback is access function + (Data : System.Address; + Name : System.Address; + Kind : int) return System.Address + with Convention => C; + -- Callback function type for Python to implement unit resolution + -- Parameters: + -- Data: Opaque pointer to Python object + -- Name: Unit name as UTF-8 null-terminated string + -- Kind: 0 for spec, 1 for body + -- Returns: Filename as UTF-8 null-terminated string, or null for "not found" + -- + -- Memory Ownership: + -- The returned string MUST be allocated with malloc(). Ada will call + -- free() on the returned pointer after copying the string value. + -- Returning NULL (System.Null_Address) indicates unit not found. + + function ada_create_callback_provider + (Callback : ada_get_unit_filename_callback; + Data : System.Address; + Charset : chars_ptr) return ada_unit_provider + with Export => True, + Convention => C; + -- Create a unit provider that calls back into Python for unit resolution + ------------------ -- Preprocessor -- ------------------ diff --git a/testsuite/tests/python/callback_provider/foo.ads b/testsuite/tests/python/callback_provider/foo.ads new file mode 100644 index 000000000..e73298323 --- /dev/null +++ b/testsuite/tests/python/callback_provider/foo.ads @@ -0,0 +1,3 @@ +package Foo is + X : Integer := 42; +end Foo; diff --git a/testsuite/tests/python/callback_provider/pkg.adb b/testsuite/tests/python/callback_provider/pkg.adb new file mode 100644 index 000000000..84a6b0907 --- /dev/null +++ b/testsuite/tests/python/callback_provider/pkg.adb @@ -0,0 +1,8 @@ +with Ada.Text_IO; + +package body Pkg is + procedure Hello is + begin + Ada.Text_IO.Put_Line ("Hello from Pkg"); + end Hello; +end Pkg; diff --git a/testsuite/tests/python/callback_provider/pkg.ads b/testsuite/tests/python/callback_provider/pkg.ads new file mode 100644 index 000000000..b54d37e5c --- /dev/null +++ b/testsuite/tests/python/callback_provider/pkg.ads @@ -0,0 +1,3 @@ +package Pkg is + procedure Hello; +end Pkg; diff --git a/testsuite/tests/python/callback_provider/test.out b/testsuite/tests/python/callback_provider/test.out new file mode 100644 index 000000000..ba6765c75 --- /dev/null +++ b/testsuite/tests/python/callback_provider/test.out @@ -0,0 +1,49 @@ +== Test: Basic callback resolution == + + Callback: pkg (spec) -> pkg.ads + Success: pkg (spec) -> CompilationUnit + Callback: pkg (body) -> pkg.adb + Success: pkg (body) -> CompilationUnit + Callback: foo (spec) -> foo.ads + Success: foo (spec) -> CompilationUnit + +== Test: Callback returns None (unit not found) == + + Callback: nonexistent (spec) -> None (not found) + Unit has root: True + Unit root kind: CompilationUnitList + Unit exists: True + Unit has no diagnostics: True + +== Test: Callback raises exception == + + Callback: error (spec) -> raising exception + Callback was called: True + Unit exists after exception: True + Subsequent call succeeded: True + +== Test: Charset parameter == + + Success with utf-8 charset: CompilationUnit + Success with default charset: CompilationUnit + +== Test: Multiple callback invocations == + + Total callback invocations: 5 + - pkg (spec) + - pkg (body) + - foo (spec) + - foo (body) + - unknown (spec) + +== Test: Memory stress (many invocations) == + + Total invocations: 110 + No crashes or memory issues observed + +== Test: Unicode unit name == + + Callback received: 'tëst_üñít' + UTF-8 handling verified correctly + +All tests completed successfully. diff --git a/testsuite/tests/python/callback_provider/test.py b/testsuite/tests/python/callback_provider/test.py new file mode 100644 index 000000000..0ac2eeaf9 --- /dev/null +++ b/testsuite/tests/python/callback_provider/test.py @@ -0,0 +1,243 @@ +""" +Test suite for UnitProvider.from_callback() + +This tests the callback-based unit provider which allows Python code to +resolve unit names to filenames dynamically. +""" +import libadalang as lal + + +SPEC = lal.AnalysisUnitKind.unit_specification +BODY = lal.AnalysisUnitKind.unit_body + + +def test_basic_callback(): + """Test basic callback resolution with simple mapping.""" + print("== Test: Basic callback resolution ==") + print("") + + # Create a simple mapping + file_map = { + ("pkg", "spec"): "pkg.ads", + ("pkg", "body"): "pkg.adb", + ("foo", "spec"): "foo.ads", + } + + def resolver(name, kind): + result = file_map.get((name, kind)) + if result: + print(f" Callback: {name} ({kind}) -> {result}") + else: + print(f" Callback: {name} ({kind}) -> None") + return result + + provider = lal.UnitProvider.from_callback(resolver) + ctx = lal.AnalysisContext(unit_provider=provider) + + # Test successful resolution + unit = ctx.get_from_provider("pkg", SPEC) + if unit.root: + print(f" Success: pkg (spec) -> {unit.root.kind_name}") + else: + print(f" Error: pkg (spec) failed to load") + for d in unit.diagnostics: + print(f" {d}") + + unit = ctx.get_from_provider("pkg", BODY) + if unit.root: + print(f" Success: pkg (body) -> {unit.root.kind_name}") + else: + print(f" Error: pkg (body) failed to load") + + unit = ctx.get_from_provider("foo", SPEC) + if unit.root: + print(f" Success: foo (spec) -> {unit.root.kind_name}") + else: + print(f" Error: foo (spec) failed to load") + + print("") + + +def test_callback_returns_none(): + """Test callback returning None for unit not found.""" + print("== Test: Callback returns None (unit not found) ==") + print("") + + def resolver(name, kind): + # Only resolve "pkg" + if name == "pkg": + return "pkg.ads" if kind == "spec" else "pkg.adb" + print(f" Callback: {name} ({kind}) -> None (not found)") + return None + + provider = lal.UnitProvider.from_callback(resolver) + ctx = lal.AnalysisContext(unit_provider=provider) + + # Try to get a unit that doesn't exist + unit = ctx.get_from_provider("nonexistent", SPEC) + # When callback returns None, libadalang creates an empty unit + print(f" Unit has root: {unit.root is not None}") + print(f" Unit root kind: {unit.root.kind_name if unit.root else 'None'}") + + # The unit should exist but be empty + print(f" Unit exists: {unit is not None}") + print(f" Unit has no diagnostics: {len(unit.diagnostics) == 0}") + + print("") + + +def test_callback_exception(): + """Test callback raising an exception (should return None).""" + print("== Test: Callback raises exception ==") + print("") + + call_count = [0] + + def resolver(name, kind): + call_count[0] += 1 + if name == "error": + print(f" Callback: {name} ({kind}) -> raising exception") + raise ValueError("Simulated error in callback") + return "pkg.ads" + + provider = lal.UnitProvider.from_callback(resolver) + ctx = lal.AnalysisContext(unit_provider=provider) + + # This should not crash, callback exception should be caught + unit = ctx.get_from_provider("error", SPEC) + print(f" Callback was called: {call_count[0] > 0}") + print(f" Unit exists after exception: {unit is not None}") + + # Try a successful one to ensure provider still works + unit = ctx.get_from_provider("pkg", SPEC) + print(f" Subsequent call succeeded: {unit.root is not None}") + + print("") + + +def test_charset_parameter(): + """Test that charset parameter is passed through correctly.""" + print("== Test: Charset parameter ==") + print("") + + def resolver(name, kind): + return "pkg.ads" + + # Test with explicit charset + provider = lal.UnitProvider.from_callback(resolver, charset="utf-8") + ctx = lal.AnalysisContext(unit_provider=provider) + + unit = ctx.get_from_provider("pkg", SPEC) + if unit.root: + print(f" Success with utf-8 charset: {unit.root.kind_name}") + + # Test with default charset (iso-8859-1) + provider2 = lal.UnitProvider.from_callback(resolver) + ctx2 = lal.AnalysisContext(unit_provider=provider2) + + unit2 = ctx2.get_from_provider("pkg", SPEC) + if unit2.root: + print(f" Success with default charset: {unit2.root.kind_name}") + + print("") + + +def test_multiple_calls(): + """Test that callback can be called multiple times correctly.""" + print("== Test: Multiple callback invocations ==") + print("") + + call_log = [] + + def resolver(name, kind): + call_log.append((name, kind)) + if name == "pkg": + return "pkg.ads" if kind == "spec" else "pkg.adb" + elif name == "foo": + return "foo.ads" if kind == "spec" else None + return None + + provider = lal.UnitProvider.from_callback(resolver) + ctx = lal.AnalysisContext(unit_provider=provider) + + # Make several calls + ctx.get_from_provider("pkg", SPEC) + ctx.get_from_provider("pkg", BODY) + ctx.get_from_provider("foo", SPEC) + ctx.get_from_provider("foo", BODY) + ctx.get_from_provider("unknown", SPEC) + + print(f" Total callback invocations: {len(call_log)}") + for name, kind in call_log: + print(f" - {name} ({kind})") + + print("") + + +def test_callback_memory(): + """Test callback with many invocations (memory stress test).""" + print("== Test: Memory stress (many invocations) ==") + print("") + + invocation_count = [0] + + def resolver(name, kind): + invocation_count[0] += 1 + # Only resolve pkg + if name == "pkg": + return "pkg.ads" if kind == "spec" else "pkg.adb" + return None + + provider = lal.UnitProvider.from_callback(resolver) + ctx = lal.AnalysisContext(unit_provider=provider) + + # Call many times with different unit names + for i in range(100): + ctx.get_from_provider(f"unit{i}", SPEC) + if i % 10 == 0: + # Also successfully resolve pkg occasionally + ctx.get_from_provider("pkg", SPEC) + + print(f" Total invocations: {invocation_count[0]}") + print(f" No crashes or memory issues observed") + + print("") + + +def test_unicode_unit_name(): + """Test callback with non-ASCII characters in unit name.""" + print("== Test: Unicode unit name ==") + print("") + + received_names = [] + + def resolver(name, kind): + received_names.append(name) + print(f" Callback received: {repr(name)}") + return None + + provider = lal.UnitProvider.from_callback(resolver) + ctx = lal.AnalysisContext(unit_provider=provider) + + # This likely won't happen in real Ada code, but tests the UTF-8 path + ctx.get_from_provider("tëst_üñít", SPEC) + + # Verify the UTF-8 encoding worked correctly + if received_names and received_names[0] == "tëst_üñít": + print(f" UTF-8 handling verified correctly") + else: + print(f" Warning: UTF-8 may not have round-tripped correctly") + + print("") + + +if __name__ == "__main__": + test_basic_callback() + test_callback_returns_none() + test_callback_exception() + test_charset_parameter() + test_multiple_calls() + test_callback_memory() + test_unicode_unit_name() + + print("All tests completed successfully.") diff --git a/testsuite/tests/python/callback_provider/test.yaml b/testsuite/tests/python/callback_provider/test.yaml new file mode 100644 index 000000000..30423a038 --- /dev/null +++ b/testsuite/tests/python/callback_provider/test.yaml @@ -0,0 +1 @@ +driver: python diff --git a/user_manual/ada_api_unit_providers.rst b/user_manual/ada_api_unit_providers.rst index cf3a5a1eb..ee7fb3ef0 100644 --- a/user_manual/ada_api_unit_providers.rst +++ b/user_manual/ada_api_unit_providers.rst @@ -5,3 +5,4 @@ Unit providers .. include:: generated/libadalang-project_provider.rst .. include:: generated/libadalang-auto_provider.rst +.. include:: generated/libadalang-callback_provider.rst diff --git a/user_manual/python_api_tutorial.rst b/user_manual/python_api_tutorial.rst index 9c9424858..2c2889ed4 100644 --- a/user_manual/python_api_tutorial.rst +++ b/user_manual/python_api_tutorial.rst @@ -250,7 +250,7 @@ compilation units follow the `GNAT naming convention `_ and that all source files are in the current directory. -If the organization of your project is not so simple, you have two options +If the organization of your project is not so simple, you have three options currently in Python: * You can use features from the auto-provider, provided by @@ -261,10 +261,16 @@ currently in Python: :meth:`libadalang.GPRProject.create_unit_provider` to use a GNAT project file. +* You can implement custom unit resolution logic in Python using + :meth:`libadalang.UnitProvider.from_callback` to define your own mapping from + unit names to source files. This is useful for custom file naming conventions, + non-standard Ada runtimes, or dynamic resolution strategies. See + :ref:`python-callback-providers` for details. + Be aware though, that because of lack of access to proper Python API to process GNAT project files, the corresponding facilities in Python are limited for the -moment. If the above options are not sufficient for you, we recommend using the -:ref:`Ada API `. +moment. For complex build configurations, we recommend using the :ref:`Ada API +`. In our program, we'll create a simple project unit provider if a project file is provided. If not, we'll use the default settings. @@ -425,3 +431,146 @@ runtime, ...) and just returns the list of source files: print(f"Looking for references to {id}:") for r in id.p_find_all_references(units): print(f"{r.kind}: {r.ref}") + +.. _python-callback-providers: + +Custom Unit Providers with Python Callbacks +============================================ + +You can implement custom unit resolution logic entirely in Python using the +``UnitProvider.from_callback()`` method. This allows you to define arbitrary +name-to-file mappings without modifying Ada code or requiring GPR project files. + +Basic Usage +----------- + +The ``from_callback`` method accepts a Python function that maps unit names to +filenames: + +.. code-block:: python + + from libadalang import UnitProvider, AnalysisContext + + def my_resolver(name, kind): + """ + Resolve unit names to file paths. + + :param name: Unit name (e.g., "ada.text_io") + :param kind: Either "spec" or "body" + :return: Path to source file (absolute or relative), or None if not found + """ + # Your custom logic here + if kind == "spec": + return f"/lib/ada/{name.replace('.', '-')}.ads" + else: + return f"/lib/ada/{name.replace('.', '-')}.adb" + + provider = UnitProvider.from_callback(my_resolver) + ctx = AnalysisContext(unit_provider=provider) + +The callback function is called every time libadalang needs to resolve a unit +name during semantic analysis. + +Complete Example: Custom File Extensions +------------------------------------------ + +This example shows how to use callback providers with non-standard file +extensions (for example, ``.adas`` for specs and ``.adab`` for bodies) and +custom naming conventions. + +.. code-block:: python + + from pathlib import Path + import libadalang as lal + + # Build a dictionary mapping unit names to files + unit_map = {} + + # Step 1: Add runtime library files + # Some Ada runtimes use non-standard extensions + runtime_dir = Path("runtime") + for f in runtime_dir.glob("*.adas"): + # File: ada.text_io.adas -> Unit: Ada.Text_IO + name_parts = f.stem.split('.') + unit_name = '.'.join(p.capitalize() for p in name_parts) + unit_map[(unit_name.lower(), "spec")] = str(f.absolute()) + + for f in runtime_dir.glob("*.adab"): + name_parts = f.stem.split('.') + unit_name = '.'.join(p.capitalize() for p in name_parts) + unit_map[(unit_name.lower(), "body")] = str(f.absolute()) + + # Step 2: Add project files + # IMPORTANT: Parse files to get actual unit names, as filenames + # may differ (e.g., "chess-engine.adb" contains unit "Chess.Engine") + # Note: This temporary context has no unit provider, which is sufficient + # for extracting syntactic unit names but may not fully resolve dependencies + temp_ctx = lal.AnalysisContext() + for f in Path("src").glob("*.ad?"): + unit = temp_ctx.get_from_file(str(f)) + if unit.root and unit.root.is_a(lal.CompilationUnit): + fqn = unit.root.p_syntactic_fully_qualified_name + if fqn: + unit_name = '.'.join(str(p) for p in fqn) + kind_enum = unit.root.p_unit_kind + kind = "spec" if kind_enum == lal.AnalysisUnitKind.unit_specification else "body" + unit_map[(unit_name.lower(), kind)] = str(f.absolute()) + + # Step 3: Create resolver function + def resolver(name, kind): + """Look up unit in our map""" + return unit_map.get((name.lower(), kind)) + + # Step 4: Create provider and context + provider = lal.UnitProvider.from_callback(resolver) + ctx = lal.AnalysisContext(unit_provider=provider) + + # Now you can analyze files with full name resolution + unit = ctx.get_from_file("src/my_file.adb") + # Name resolution works for both project files and runtime! + +Important Considerations +------------------------ + +Callback Signature +^^^^^^^^^^^^^^^^^^ + +Your callback function must accept exactly two parameters: + +- ``name`` (str): Unit name, typically lowercase with dots (e.g., ``"ada.text_io"``) +- ``kind`` (str): Either ``"spec"`` or ``"body"`` + +It must return: + +- ``str``: Path to source file (absolute or relative to current directory) +- ``None``: Unit not found + +Performance +^^^^^^^^^^^ + +The callback is invoked for every unit resolution. For better performance: + +1. Build your mapping dictionary once, upfront +2. Don't perform expensive operations (network I/O, database queries) in the callback +3. Consider caching results if dynamic lookup is needed + +.. code-block:: python + + # Good: Build map once + unit_map = build_mapping() + provider = UnitProvider.from_callback(lambda n, k: unit_map.get((n, k))) + + # Bad: Rebuild map on every call + provider = UnitProvider.from_callback(lambda n, k: build_mapping().get((n, k))) + +Use Cases +--------- + +Use callback providers when: + +- You have custom file naming conventions +- Files are stored in non-standard locations +- You need dynamic or computed resolution +- You're integrating with build systems that don't use GPR + +For more details and advanced patterns, see the Python API reference documentation.