From 63c5d87c12ce5bad51940c05e429e0250bc1dfed Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 18 Feb 2026 01:53:24 +0000 Subject: [PATCH 1/9] Initial plan From 9d520817b645c1523e36ebe9166fe928be307a4b Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 18 Feb 2026 01:59:46 +0000 Subject: [PATCH 2/9] Fix stdlib detection to exclude site-packages and add tests Co-authored-by: edvilme <5952839+edvilme@users.noreply.github.com> --- bundled/tool/lsp_utils.py | 14 ++-- .../python_tests/test_stdlib_detection.py | 77 +++++++++++++++++++ 2 files changed, 86 insertions(+), 5 deletions(-) create mode 100644 src/test/python_tests/test_stdlib_detection.py diff --git a/bundled/tool/lsp_utils.py b/bundled/tool/lsp_utils.py index f16a3c7..236fd11 100644 --- a/bundled/tool/lsp_utils.py +++ b/bundled/tool/lsp_utils.py @@ -31,11 +31,11 @@ def as_list(content: Union[Any, List[Any], Tuple[Any]]) -> List[Any]: def _get_sys_config_paths() -> List[str]: - """Returns paths from sysconfig.get_paths().""" + """Returns actual Python standard library paths from sysconfig.get_paths().""" return [ path for group, path in sysconfig.get_paths().items() - if group not in ["data", "platdata", "scripts"] + if group in ["stdlib", "platstdlib"] ] @@ -56,9 +56,7 @@ def _get_extensions_dir() -> List[str]: _stdlib_paths = set( str(pathlib.Path(p).resolve()) for p in ( - as_list(site.getsitepackages()) - + as_list(site.getusersitepackages()) - + _get_sys_config_paths() + _get_sys_config_paths() + _get_extensions_dir() ) ) @@ -85,6 +83,12 @@ def is_current_interpreter(executable) -> bool: def is_stdlib_file(file_path: str) -> bool: """Return True if the file belongs to the standard library.""" normalized_path = normalize_path(file_path, resolve_symlinks=True) + + # Exclude site-packages and dist-packages directories which contain third-party packages + path_parts = pathlib.Path(normalized_path).parts + if 'site-packages' in path_parts or 'dist-packages' in path_parts: + return False + return any(normalized_path.startswith(path) for path in _stdlib_paths) diff --git a/src/test/python_tests/test_stdlib_detection.py b/src/test/python_tests/test_stdlib_detection.py new file mode 100644 index 0000000..76b27c1 --- /dev/null +++ b/src/test/python_tests/test_stdlib_detection.py @@ -0,0 +1,77 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +""" +Test for stdlib file detection. +""" + +import os +import site +import sys +import sysconfig +import tempfile +from pathlib import Path + +# Add bundled tool to path +bundled_path = Path(__file__).parent.parent.parent.parent / "bundled" / "tool" +sys.path.insert(0, str(bundled_path)) + +from lsp_utils import is_stdlib_file + + +def test_stdlib_file_detection(): + """Test that stdlib files are correctly identified.""" + # Test with an actual stdlib file (os module) + os_file = os.__file__ + assert is_stdlib_file(os_file), f"os module file {os_file} should be detected as stdlib" + + # Test with sys module (built-in) + if hasattr(sys, '__file__'): + sys_file = sys.__file__ + assert is_stdlib_file(sys_file), f"sys module file {sys_file} should be detected as stdlib" + + +def test_site_packages_not_stdlib(): + """Test that site-packages files are NOT identified as stdlib.""" + # Get site-packages directories + site_packages = site.getsitepackages() + + for site_pkg_dir in site_packages: + # Create a hypothetical file path in site-packages + test_file = os.path.join(site_pkg_dir, "pytest", "__init__.py") + + # This should NOT be detected as stdlib + result = is_stdlib_file(test_file) + assert not result, f"File in site-packages {test_file} should NOT be detected as stdlib, but got {result}" + + +def test_user_site_packages_not_stdlib(): + """Test that user site-packages files are NOT identified as stdlib.""" + user_site = site.getusersitepackages() + + # Create a hypothetical file path in user site-packages + test_file = os.path.join(user_site, "some_package", "__init__.py") + + # This should NOT be detected as stdlib + result = is_stdlib_file(test_file) + assert not result, f"File in user site-packages {test_file} should NOT be detected as stdlib" + + +def test_random_file_not_stdlib(): + """Test that random user files are NOT identified as stdlib.""" + # Create a temporary file that's definitely not in stdlib + with tempfile.NamedTemporaryFile(suffix=".py", delete=False) as tmp: + tmp_path = tmp.name + + try: + result = is_stdlib_file(tmp_path) + assert not result, f"Temporary file {tmp_path} should NOT be detected as stdlib" + finally: + os.unlink(tmp_path) + + +if __name__ == "__main__": + test_stdlib_file_detection() + test_site_packages_not_stdlib() + test_user_site_packages_not_stdlib() + test_random_file_not_stdlib() + print("All tests passed!") From 1b2f96c727014060d54180ae103fdba2fd08be54 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 18 Feb 2026 02:01:00 +0000 Subject: [PATCH 3/9] Optimize is_stdlib_file to use string check instead of Path.parts Co-authored-by: edvilme <5952839+edvilme@users.noreply.github.com> --- bundled/tool/lsp_utils.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/bundled/tool/lsp_utils.py b/bundled/tool/lsp_utils.py index 236fd11..6bd846c 100644 --- a/bundled/tool/lsp_utils.py +++ b/bundled/tool/lsp_utils.py @@ -85,8 +85,7 @@ def is_stdlib_file(file_path: str) -> bool: normalized_path = normalize_path(file_path, resolve_symlinks=True) # Exclude site-packages and dist-packages directories which contain third-party packages - path_parts = pathlib.Path(normalized_path).parts - if 'site-packages' in path_parts or 'dist-packages' in path_parts: + if 'site-packages' in normalized_path or 'dist-packages' in normalized_path: return False return any(normalized_path.startswith(path) for path in _stdlib_paths) From ae270de68ce629bb4db076df1c2c636c2102c240 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 18 Feb 2026 02:02:08 +0000 Subject: [PATCH 4/9] Use path separators for more precise site-packages detection and add false positive test Co-authored-by: edvilme <5952839+edvilme@users.noreply.github.com> --- bundled/tool/lsp_utils.py | 6 +++++- src/test/python_tests/test_stdlib_detection.py | 11 +++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/bundled/tool/lsp_utils.py b/bundled/tool/lsp_utils.py index 6bd846c..a8a77a5 100644 --- a/bundled/tool/lsp_utils.py +++ b/bundled/tool/lsp_utils.py @@ -85,7 +85,11 @@ def is_stdlib_file(file_path: str) -> bool: normalized_path = normalize_path(file_path, resolve_symlinks=True) # Exclude site-packages and dist-packages directories which contain third-party packages - if 'site-packages' in normalized_path or 'dist-packages' in normalized_path: + # Use os.sep to ensure we match path segments, not arbitrary substrings + if (f'{os.sep}site-packages{os.sep}' in normalized_path or + f'{os.sep}dist-packages{os.sep}' in normalized_path or + normalized_path.endswith(f'{os.sep}site-packages') or + normalized_path.endswith(f'{os.sep}dist-packages')): return False return any(normalized_path.startswith(path) for path in _stdlib_paths) diff --git a/src/test/python_tests/test_stdlib_detection.py b/src/test/python_tests/test_stdlib_detection.py index 76b27c1..2eead98 100644 --- a/src/test/python_tests/test_stdlib_detection.py +++ b/src/test/python_tests/test_stdlib_detection.py @@ -69,9 +69,20 @@ def test_random_file_not_stdlib(): os.unlink(tmp_path) +def test_false_positive_site_packages_in_name(): + """Test that files with 'site-packages' in the project name are not falsely detected.""" + # A hypothetical project with 'site-packages' in its name + test_file = "/home/user/my-site-packages-project/src/main.py" + + # This should NOT be detected as stdlib (even though it has 'site-packages' in the name) + result = is_stdlib_file(test_file) + assert not result, f"User project file {test_file} should NOT be detected as stdlib" + + if __name__ == "__main__": test_stdlib_file_detection() test_site_packages_not_stdlib() test_user_site_packages_not_stdlib() test_random_file_not_stdlib() + test_false_positive_site_packages_in_name() print("All tests passed!") From 08970e37fb83baea13104737ed0abb6da0f282f5 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 18 Feb 2026 02:03:00 +0000 Subject: [PATCH 5/9] Handle edge cases: root-level site-packages and cross-platform paths in tests Co-authored-by: edvilme <5952839+edvilme@users.noreply.github.com> --- bundled/tool/lsp_utils.py | 10 +++++----- src/test/python_tests/test_stdlib_detection.py | 4 ++-- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/bundled/tool/lsp_utils.py b/bundled/tool/lsp_utils.py index a8a77a5..0d7027d 100644 --- a/bundled/tool/lsp_utils.py +++ b/bundled/tool/lsp_utils.py @@ -86,11 +86,11 @@ def is_stdlib_file(file_path: str) -> bool: # Exclude site-packages and dist-packages directories which contain third-party packages # Use os.sep to ensure we match path segments, not arbitrary substrings - if (f'{os.sep}site-packages{os.sep}' in normalized_path or - f'{os.sep}dist-packages{os.sep}' in normalized_path or - normalized_path.endswith(f'{os.sep}site-packages') or - normalized_path.endswith(f'{os.sep}dist-packages')): - return False + for pkg_dir in ('site-packages', 'dist-packages'): + if (f'{os.sep}{pkg_dir}{os.sep}' in normalized_path or + normalized_path.endswith(f'{os.sep}{pkg_dir}') or + normalized_path.startswith(f'{pkg_dir}{os.sep}')): + return False return any(normalized_path.startswith(path) for path in _stdlib_paths) diff --git a/src/test/python_tests/test_stdlib_detection.py b/src/test/python_tests/test_stdlib_detection.py index 2eead98..9d90ffe 100644 --- a/src/test/python_tests/test_stdlib_detection.py +++ b/src/test/python_tests/test_stdlib_detection.py @@ -71,8 +71,8 @@ def test_random_file_not_stdlib(): def test_false_positive_site_packages_in_name(): """Test that files with 'site-packages' in the project name are not falsely detected.""" - # A hypothetical project with 'site-packages' in its name - test_file = "/home/user/my-site-packages-project/src/main.py" + # A hypothetical project with 'site-packages' in its name (use os.path.join for cross-platform) + test_file = os.path.join(os.sep, "home", "user", "my-site-packages-project", "src", "main.py") # This should NOT be detected as stdlib (even though it has 'site-packages' in the name) result = is_stdlib_file(test_file) From 245a40621b73ad1e1fc4348715b375ff5b1886c6 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 18 Feb 2026 02:04:14 +0000 Subject: [PATCH 6/9] Remove unnecessary startswith check and improve false positive test Co-authored-by: edvilme <5952839+edvilme@users.noreply.github.com> --- bundled/tool/lsp_utils.py | 3 +-- src/test/python_tests/test_stdlib_detection.py | 17 +++++++++++------ 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/bundled/tool/lsp_utils.py b/bundled/tool/lsp_utils.py index 0d7027d..1bd8c33 100644 --- a/bundled/tool/lsp_utils.py +++ b/bundled/tool/lsp_utils.py @@ -88,8 +88,7 @@ def is_stdlib_file(file_path: str) -> bool: # Use os.sep to ensure we match path segments, not arbitrary substrings for pkg_dir in ('site-packages', 'dist-packages'): if (f'{os.sep}{pkg_dir}{os.sep}' in normalized_path or - normalized_path.endswith(f'{os.sep}{pkg_dir}') or - normalized_path.startswith(f'{pkg_dir}{os.sep}')): + normalized_path.endswith(f'{os.sep}{pkg_dir}')): return False return any(normalized_path.startswith(path) for path in _stdlib_paths) diff --git a/src/test/python_tests/test_stdlib_detection.py b/src/test/python_tests/test_stdlib_detection.py index 9d90ffe..761c90f 100644 --- a/src/test/python_tests/test_stdlib_detection.py +++ b/src/test/python_tests/test_stdlib_detection.py @@ -70,13 +70,18 @@ def test_random_file_not_stdlib(): def test_false_positive_site_packages_in_name(): - """Test that files with 'site-packages' in the project name are not falsely detected.""" - # A hypothetical project with 'site-packages' in its name (use os.path.join for cross-platform) - test_file = os.path.join(os.sep, "home", "user", "my-site-packages-project", "src", "main.py") + """Test that files with 'site-packages' in directory name are correctly handled.""" + # Test 1: A directory with 'site-packages' as part of the name (not a path segment) + # This should NOT be detected as stdlib because it's not in an actual stdlib path + test_file_1 = os.path.join(os.sep, "home", "user", "my-site-packages-project", "src", "main.py") + result_1 = is_stdlib_file(test_file_1) + assert not result_1, f"User project file {test_file_1} should NOT be detected as stdlib" - # This should NOT be detected as stdlib (even though it has 'site-packages' in the name) - result = is_stdlib_file(test_file) - assert not result, f"User project file {test_file} should NOT be detected as stdlib" + # Test 2: A directory literally named 'site-packages-backup' with the substring but not segment + # The path segment logic should NOT match this because it's 'site-packages-backup', not 'site-packages' + test_file_2 = os.path.join(os.sep, "backup", "site-packages-backup", "mymodule.py") + result_2 = is_stdlib_file(test_file_2) + assert not result_2, f"File in site-packages-backup {test_file_2} should NOT be detected as stdlib" if __name__ == "__main__": From cee6868b267a754de9eeaad58328901561c9bcfe Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 18 Feb 2026 02:05:36 +0000 Subject: [PATCH 7/9] Improve tests to validate path segment matching logic Co-authored-by: edvilme <5952839+edvilme@users.noreply.github.com> --- .../python_tests/test_stdlib_detection.py | 37 ++++++++++++++----- 1 file changed, 27 insertions(+), 10 deletions(-) diff --git a/src/test/python_tests/test_stdlib_detection.py b/src/test/python_tests/test_stdlib_detection.py index 761c90f..addd001 100644 --- a/src/test/python_tests/test_stdlib_detection.py +++ b/src/test/python_tests/test_stdlib_detection.py @@ -70,18 +70,35 @@ def test_random_file_not_stdlib(): def test_false_positive_site_packages_in_name(): - """Test that files with 'site-packages' in directory name are correctly handled.""" - # Test 1: A directory with 'site-packages' as part of the name (not a path segment) - # This should NOT be detected as stdlib because it's not in an actual stdlib path - test_file_1 = os.path.join(os.sep, "home", "user", "my-site-packages-project", "src", "main.py") - result_1 = is_stdlib_file(test_file_1) - assert not result_1, f"User project file {test_file_1} should NOT be detected as stdlib" + """Test that path segment matching works correctly and avoids false positives.""" + import sysconfig - # Test 2: A directory literally named 'site-packages-backup' with the substring but not segment - # The path segment logic should NOT match this because it's 'site-packages-backup', not 'site-packages' - test_file_2 = os.path.join(os.sep, "backup", "site-packages-backup", "mymodule.py") + # Get the actual stdlib path to ensure our test path would match if not for the exclusion + stdlib_path = sysconfig.get_path('stdlib') + if not stdlib_path: + # Fallback for systems where this might not be available + stdlib_path = os.path.join(os.sep, "usr", "lib", "python3.12") + + # Test 1: A file that would be in stdlib EXCEPT it's in a site-packages subdirectory + # This should be EXCLUDED even though it starts with the stdlib path + test_file_in_stdlib_site_packages = os.path.join(stdlib_path, "site-packages", "mymodule.py") + result_1 = is_stdlib_file(test_file_in_stdlib_site_packages) + assert not result_1, f"File in {test_file_in_stdlib_site_packages} should be EXCLUDED (site-packages)" + + # Test 2: A directory with 'site-packages' as part of the name (not a path segment) + # This would NOT match stdlib path anyway, but tests that substring matching doesn't cause issues + test_file_2 = os.path.join(os.sep, "home", "user", "my-site-packages-project", "src", "main.py") result_2 = is_stdlib_file(test_file_2) - assert not result_2, f"File in site-packages-backup {test_file_2} should NOT be detected as stdlib" + assert not result_2, f"User project file {test_file_2} should NOT be detected as stdlib" + + # Test 3: A directory literally named 'site-packages-backup' + # Should NOT match because 'site-packages-backup' is not the same segment as 'site-packages' + test_file_3 = os.path.join(stdlib_path, "site-packages-backup", "mymodule.py") + # This one is tricky - it starts with stdlib path but has 'site-packages-backup' not 'site-packages' + # With our current logic, this would NOT be excluded (which is correct) + result_3 = is_stdlib_file(test_file_3) + # This should be detected as stdlib because 'site-packages-backup' is not 'site-packages' + assert result_3, f"File in {test_file_3} should be detected as stdlib (not in site-packages)" if __name__ == "__main__": From 41788cff68bc86e61488d4c11bb17b4ed95cb41e Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 18 Feb 2026 02:06:32 +0000 Subject: [PATCH 8/9] Remove duplicate import and add edge case check for root-level packages Co-authored-by: edvilme <5952839+edvilme@users.noreply.github.com> --- bundled/tool/lsp_utils.py | 3 ++- src/test/python_tests/test_stdlib_detection.py | 2 -- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/bundled/tool/lsp_utils.py b/bundled/tool/lsp_utils.py index 1bd8c33..6077017 100644 --- a/bundled/tool/lsp_utils.py +++ b/bundled/tool/lsp_utils.py @@ -88,7 +88,8 @@ def is_stdlib_file(file_path: str) -> bool: # Use os.sep to ensure we match path segments, not arbitrary substrings for pkg_dir in ('site-packages', 'dist-packages'): if (f'{os.sep}{pkg_dir}{os.sep}' in normalized_path or - normalized_path.endswith(f'{os.sep}{pkg_dir}')): + normalized_path.endswith(f'{os.sep}{pkg_dir}') or + normalized_path.startswith(f'{os.sep}{pkg_dir}{os.sep}')): return False return any(normalized_path.startswith(path) for path in _stdlib_paths) diff --git a/src/test/python_tests/test_stdlib_detection.py b/src/test/python_tests/test_stdlib_detection.py index addd001..8e551bc 100644 --- a/src/test/python_tests/test_stdlib_detection.py +++ b/src/test/python_tests/test_stdlib_detection.py @@ -71,8 +71,6 @@ def test_random_file_not_stdlib(): def test_false_positive_site_packages_in_name(): """Test that path segment matching works correctly and avoids false positives.""" - import sysconfig - # Get the actual stdlib path to ensure our test path would match if not for the exclusion stdlib_path = sysconfig.get_path('stdlib') if not stdlib_path: From b18fd09b9de1ddbfbee16ffd516e99a71659edfb Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 18 Feb 2026 02:08:20 +0000 Subject: [PATCH 9/9] Remove redundant condition and improve test validation Co-authored-by: edvilme <5952839+edvilme@users.noreply.github.com> --- bundled/tool/lsp_utils.py | 3 +-- src/test/python_tests/test_stdlib_detection.py | 2 ++ 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/bundled/tool/lsp_utils.py b/bundled/tool/lsp_utils.py index 6077017..1bd8c33 100644 --- a/bundled/tool/lsp_utils.py +++ b/bundled/tool/lsp_utils.py @@ -88,8 +88,7 @@ def is_stdlib_file(file_path: str) -> bool: # Use os.sep to ensure we match path segments, not arbitrary substrings for pkg_dir in ('site-packages', 'dist-packages'): if (f'{os.sep}{pkg_dir}{os.sep}' in normalized_path or - normalized_path.endswith(f'{os.sep}{pkg_dir}') or - normalized_path.startswith(f'{os.sep}{pkg_dir}{os.sep}')): + normalized_path.endswith(f'{os.sep}{pkg_dir}')): return False return any(normalized_path.startswith(path) for path in _stdlib_paths) diff --git a/src/test/python_tests/test_stdlib_detection.py b/src/test/python_tests/test_stdlib_detection.py index 8e551bc..902a2f7 100644 --- a/src/test/python_tests/test_stdlib_detection.py +++ b/src/test/python_tests/test_stdlib_detection.py @@ -92,6 +92,8 @@ def test_false_positive_site_packages_in_name(): # Test 3: A directory literally named 'site-packages-backup' # Should NOT match because 'site-packages-backup' is not the same segment as 'site-packages' test_file_3 = os.path.join(stdlib_path, "site-packages-backup", "mymodule.py") + # Verify the test path would start with stdlib path if not excluded + assert test_file_3.startswith(stdlib_path), f"Test assumption failed: {test_file_3} should start with {stdlib_path}" # This one is tricky - it starts with stdlib path but has 'site-packages-backup' not 'site-packages' # With our current logic, this would NOT be excluded (which is correct) result_3 = is_stdlib_file(test_file_3)