diff --git a/src/McpServer.Services/Ingestion/RepoIngestor.cs b/src/McpServer.Services/Ingestion/RepoIngestor.cs index 2e544c10..f7333cc1 100644 --- a/src/McpServer.Services/Ingestion/RepoIngestor.cs +++ b/src/McpServer.Services/Ingestion/RepoIngestor.cs @@ -123,8 +123,9 @@ private static async IAsyncEnumerable EnumerateAllFilesAsync( { cancellationToken.ThrowIfCancellationRequested(); var name = Path.GetFileName(path); + var relativePath = Path.GetRelativePath(dir, path); if (name.StartsWith('.') || name == "mcp.db" || - path.Contains("bin", StringComparison.Ordinal) || path.Contains("obj", StringComparison.Ordinal)) + IsBuildArtifactPath(relativePath)) { continue; } @@ -133,6 +134,16 @@ private static async IAsyncEnumerable EnumerateAllFilesAsync( await Task.CompletedTask.ConfigureAwait(false); } + private static bool IsBuildArtifactPath(string relativePath) + { + var segments = relativePath + .Split(new[] { '/', '\\' }, StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries); + + return segments.Any(static segment => + segment.Equals("bin", StringComparison.OrdinalIgnoreCase) || + segment.Equals("obj", StringComparison.OrdinalIgnoreCase)); + } + private static bool MatchesAllowlist(string relativePath, IReadOnlyList patterns) { var normalizedPath = relativePath.Replace('\\', '/'); diff --git a/src/McpServer.Support.Mcp/Ingestion/RepoIngestor.cs b/src/McpServer.Support.Mcp/Ingestion/RepoIngestor.cs index 68a6b01a..d39a351c 100644 --- a/src/McpServer.Support.Mcp/Ingestion/RepoIngestor.cs +++ b/src/McpServer.Support.Mcp/Ingestion/RepoIngestor.cs @@ -123,8 +123,9 @@ private static async IAsyncEnumerable EnumerateAllFilesAsync( { cancellationToken.ThrowIfCancellationRequested(); var name = Path.GetFileName(path); + var relativePath = Path.GetRelativePath(dir, path); if (name.StartsWith('.') || name == "mcp.db" || - path.Contains("bin", StringComparison.Ordinal) || path.Contains("obj", StringComparison.Ordinal)) + IsBuildArtifactPath(relativePath)) { continue; } @@ -133,6 +134,16 @@ private static async IAsyncEnumerable EnumerateAllFilesAsync( await Task.CompletedTask.ConfigureAwait(false); } + private static bool IsBuildArtifactPath(string relativePath) + { + var segments = relativePath + .Split(new[] { '/', '\\' }, StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries); + + return segments.Any(static segment => + segment.Equals("bin", StringComparison.OrdinalIgnoreCase) || + segment.Equals("obj", StringComparison.OrdinalIgnoreCase)); + } + private static bool MatchesAllowlist(string relativePath, IReadOnlyList patterns) { var normalizedPath = relativePath.Replace('\\', '/'); diff --git a/tests/McpServer.Support.Mcp.Tests/Ingestion/RepoIngestorTests.cs b/tests/McpServer.Support.Mcp.Tests/Ingestion/RepoIngestorTests.cs index 78d69e13..bfc5fa7c 100644 --- a/tests/McpServer.Support.Mcp.Tests/Ingestion/RepoIngestorTests.cs +++ b/tests/McpServer.Support.Mcp.Tests/Ingestion/RepoIngestorTests.cs @@ -94,8 +94,8 @@ public async Task IngestAsync_ContentHash_DeterministicForSameContent() } [Fact] - public async Task IngestAsync_SkipsLargeFiles() - { + public async Task IngestAsync_SkipsLargeFiles() + { var largeContent = new string('x', 2 * 1024 * 1024); // 2MB File.WriteAllText(Path.Combine(_tempDir, "large.txt"), largeContent); File.WriteAllText(Path.Combine(_tempDir, "small.txt"), "small"); @@ -105,7 +105,22 @@ public async Task IngestAsync_SkipsLargeFiles() var results = await sut.IngestAsync().ConfigureAwait(true); - Assert.DoesNotContain(results, r => r.Doc.SourceKey.Contains("large.txt")); - Assert.Contains(results, r => r.Doc.SourceKey.Contains("small.txt")); - } -} + Assert.DoesNotContain(results, r => r.Doc.SourceKey.Contains("large.txt")); + Assert.Contains(results, r => r.Doc.SourceKey.Contains("small.txt")); + } + + [Fact] + public async Task IngestAsync_DoesNotSkipPathsThatContainBinOrObjAsSubstring() + { + var folderWithSubstring = Path.Combine(_tempDir, "binary-assets"); + Directory.CreateDirectory(folderWithSubstring); + File.WriteAllText(Path.Combine(folderWithSubstring, "keep.md"), "# keep"); + + var options = Microsoft.Extensions.Options.Options.Create(new IngestionOptions { RepoRoot = _tempDir }); + var sut = new RepoIngestor(new Chunker(), options, new WorkspaceContext(), NullLogger.Instance); + + var results = await sut.IngestAsync().ConfigureAwait(true); + + Assert.Contains(results, r => r.Doc.SourceKey.Equals("binary-assets/keep.md", StringComparison.Ordinal)); + } +}