diff --git a/bundled/tool/lsp_utils.py b/bundled/tool/lsp_utils.py index f16a3c7..1bd8c33 100644 --- a/bundled/tool/lsp_utils.py +++ b/bundled/tool/lsp_utils.py @@ -31,11 +31,11 @@ def as_list(content: Union[Any, List[Any], Tuple[Any]]) -> List[Any]: def _get_sys_config_paths() -> List[str]: - """Returns paths from sysconfig.get_paths().""" + """Returns actual Python standard library paths from sysconfig.get_paths().""" return [ path for group, path in sysconfig.get_paths().items() - if group not in ["data", "platdata", "scripts"] + if group in ["stdlib", "platstdlib"] ] @@ -56,9 +56,7 @@ def _get_extensions_dir() -> List[str]: _stdlib_paths = set( str(pathlib.Path(p).resolve()) for p in ( - as_list(site.getsitepackages()) - + as_list(site.getusersitepackages()) - + _get_sys_config_paths() + _get_sys_config_paths() + _get_extensions_dir() ) ) @@ -85,6 +83,14 @@ def is_current_interpreter(executable) -> bool: def is_stdlib_file(file_path: str) -> bool: """Return True if the file belongs to the standard library.""" normalized_path = normalize_path(file_path, resolve_symlinks=True) + + # Exclude site-packages and dist-packages directories which contain third-party packages + # Use os.sep to ensure we match path segments, not arbitrary substrings + for pkg_dir in ('site-packages', 'dist-packages'): + if (f'{os.sep}{pkg_dir}{os.sep}' in normalized_path or + normalized_path.endswith(f'{os.sep}{pkg_dir}')): + return False + return any(normalized_path.startswith(path) for path in _stdlib_paths) diff --git a/src/test/python_tests/test_stdlib_detection.py b/src/test/python_tests/test_stdlib_detection.py new file mode 100644 index 0000000..902a2f7 --- /dev/null +++ b/src/test/python_tests/test_stdlib_detection.py @@ -0,0 +1,110 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +""" +Test for stdlib file detection. +""" + +import os +import site +import sys +import sysconfig +import tempfile +from pathlib import Path + +# Add bundled tool to path +bundled_path = Path(__file__).parent.parent.parent.parent / "bundled" / "tool" +sys.path.insert(0, str(bundled_path)) + +from lsp_utils import is_stdlib_file + + +def test_stdlib_file_detection(): + """Test that stdlib files are correctly identified.""" + # Test with an actual stdlib file (os module) + os_file = os.__file__ + assert is_stdlib_file(os_file), f"os module file {os_file} should be detected as stdlib" + + # Test with sys module (built-in) + if hasattr(sys, '__file__'): + sys_file = sys.__file__ + assert is_stdlib_file(sys_file), f"sys module file {sys_file} should be detected as stdlib" + + +def test_site_packages_not_stdlib(): + """Test that site-packages files are NOT identified as stdlib.""" + # Get site-packages directories + site_packages = site.getsitepackages() + + for site_pkg_dir in site_packages: + # Create a hypothetical file path in site-packages + test_file = os.path.join(site_pkg_dir, "pytest", "__init__.py") + + # This should NOT be detected as stdlib + result = is_stdlib_file(test_file) + assert not result, f"File in site-packages {test_file} should NOT be detected as stdlib, but got {result}" + + +def test_user_site_packages_not_stdlib(): + """Test that user site-packages files are NOT identified as stdlib.""" + user_site = site.getusersitepackages() + + # Create a hypothetical file path in user site-packages + test_file = os.path.join(user_site, "some_package", "__init__.py") + + # This should NOT be detected as stdlib + result = is_stdlib_file(test_file) + assert not result, f"File in user site-packages {test_file} should NOT be detected as stdlib" + + +def test_random_file_not_stdlib(): + """Test that random user files are NOT identified as stdlib.""" + # Create a temporary file that's definitely not in stdlib + with tempfile.NamedTemporaryFile(suffix=".py", delete=False) as tmp: + tmp_path = tmp.name + + try: + result = is_stdlib_file(tmp_path) + assert not result, f"Temporary file {tmp_path} should NOT be detected as stdlib" + finally: + os.unlink(tmp_path) + + +def test_false_positive_site_packages_in_name(): + """Test that path segment matching works correctly and avoids false positives.""" + # Get the actual stdlib path to ensure our test path would match if not for the exclusion + stdlib_path = sysconfig.get_path('stdlib') + if not stdlib_path: + # Fallback for systems where this might not be available + stdlib_path = os.path.join(os.sep, "usr", "lib", "python3.12") + + # Test 1: A file that would be in stdlib EXCEPT it's in a site-packages subdirectory + # This should be EXCLUDED even though it starts with the stdlib path + test_file_in_stdlib_site_packages = os.path.join(stdlib_path, "site-packages", "mymodule.py") + result_1 = is_stdlib_file(test_file_in_stdlib_site_packages) + assert not result_1, f"File in {test_file_in_stdlib_site_packages} should be EXCLUDED (site-packages)" + + # Test 2: A directory with 'site-packages' as part of the name (not a path segment) + # This would NOT match stdlib path anyway, but tests that substring matching doesn't cause issues + test_file_2 = os.path.join(os.sep, "home", "user", "my-site-packages-project", "src", "main.py") + result_2 = is_stdlib_file(test_file_2) + assert not result_2, f"User project file {test_file_2} should NOT be detected as stdlib" + + # Test 3: A directory literally named 'site-packages-backup' + # Should NOT match because 'site-packages-backup' is not the same segment as 'site-packages' + test_file_3 = os.path.join(stdlib_path, "site-packages-backup", "mymodule.py") + # Verify the test path would start with stdlib path if not excluded + assert test_file_3.startswith(stdlib_path), f"Test assumption failed: {test_file_3} should start with {stdlib_path}" + # This one is tricky - it starts with stdlib path but has 'site-packages-backup' not 'site-packages' + # With our current logic, this would NOT be excluded (which is correct) + result_3 = is_stdlib_file(test_file_3) + # This should be detected as stdlib because 'site-packages-backup' is not 'site-packages' + assert result_3, f"File in {test_file_3} should be detected as stdlib (not in site-packages)" + + +if __name__ == "__main__": + test_stdlib_file_detection() + test_site_packages_not_stdlib() + test_user_site_packages_not_stdlib() + test_random_file_not_stdlib() + test_false_positive_site_packages_in_name() + print("All tests passed!")