diff --git a/src/index.ts b/src/index.ts index 0d1b16d..42848b2 100644 --- a/src/index.ts +++ b/src/index.ts @@ -98,6 +98,7 @@ export { fileReadTool, fileWriteTool, fileEditTool, + globTool, grepTool, } from './tool/built-in/index.js' diff --git a/src/tool/built-in/fs-walk.ts b/src/tool/built-in/fs-walk.ts new file mode 100644 index 0000000..14ec95d --- /dev/null +++ b/src/tool/built-in/fs-walk.ts @@ -0,0 +1,97 @@ +/** + * Shared recursive directory walk for built-in file tools. + * + * Used by {@link grepTool} and {@link globTool} so glob filtering and skip + * rules stay consistent. + */ + +import { readdir, stat } from 'fs/promises' +import { join } from 'path' + +/** Directories that are almost never useful to traverse for code search. */ +export const SKIP_DIRS = new Set([ + '.git', + '.svn', + '.hg', + 'node_modules', + '.next', + 'dist', + 'build', +]) + +export interface CollectFilesOptions { + /** When set, stop collecting once this many paths are gathered. */ + readonly maxFiles?: number +} + +/** + * Recursively walk `dir` and return file paths, honouring {@link SKIP_DIRS} + * and an optional filename glob pattern. + */ +export async function collectFiles( + dir: string, + glob: string | undefined, + signal: AbortSignal | undefined, + options?: CollectFilesOptions, +): Promise { + const results: string[] = [] + await walk(dir, glob, results, signal, options?.maxFiles) + return results +} + +async function walk( + dir: string, + glob: string | undefined, + results: string[], + signal: AbortSignal | undefined, + maxFiles: number | undefined, +): Promise { + if (signal?.aborted === true) return + if (maxFiles !== undefined && results.length >= maxFiles) return + + let entryNames: string[] + try { + entryNames = await readdir(dir, { encoding: 'utf8' }) + } catch { + return + } + + for (const entryName of entryNames) { + if (signal !== undefined && signal.aborted) return + if (maxFiles !== undefined && results.length >= maxFiles) return + + const fullPath = join(dir, entryName) + + let entryInfo: Awaited> + try { + entryInfo = await stat(fullPath) + } catch { + continue + } + + if (entryInfo.isDirectory()) { + if (!SKIP_DIRS.has(entryName)) { + await walk(fullPath, glob, results, signal, maxFiles) + } + } else if (entryInfo.isFile()) { + if (glob === undefined || matchesGlob(entryName, glob)) { + results.push(fullPath) + } + } + } +} +/** + * Minimal glob match supporting `*.ext` and `**` forms. + * +*/ + + +export function matchesGlob(filename: string, glob: string): boolean { + const pattern = glob.startsWith('**/') ? glob.slice(3) : glob + const regexSource = pattern + .replace(/[.+^${}()|[\]\\]/g, '\\$&') + .replace(/\*/g, '.*') + .replace(/\?/g, '.') + const re = new RegExp(`^${regexSource}$`, 'i') + return re.test(filename) +} diff --git a/src/tool/built-in/glob.ts b/src/tool/built-in/glob.ts new file mode 100644 index 0000000..f1c75c3 --- /dev/null +++ b/src/tool/built-in/glob.ts @@ -0,0 +1,99 @@ +/** + * Built-in glob tool. + * + * Lists file paths under a directory matching an optional filename glob. + * Does not read file contents — use {@link grepTool} to search inside files. + */ + +import { stat } from 'fs/promises' +import { basename, relative } from 'path' +import { z } from 'zod' +import type { ToolResult } from '../../types.js' +import { collectFiles, matchesGlob } from './fs-walk.js' +import { defineTool } from '../framework.js' + +const DEFAULT_MAX_FILES = 500 + +export const globTool = defineTool({ + name: 'glob', + description: + 'List file paths under a directory that match an optional filename glob. ' + + 'Does not read file contents — use `grep` to search inside files. ' + + 'Skips common bulky directories (node_modules, .git, dist, etc.). ' + + 'Paths in the result are relative to the process working directory. ' + + 'Results are capped by `maxFiles`.', + + inputSchema: z.object({ + path: z + .string() + .optional() + .describe( + 'Directory to list files under. Defaults to the current working directory.', + ), + pattern: z + .string() + .optional() + .describe( + 'Filename glob (e.g. "*.ts", "**/*.json"). When omitted, every file ' + + 'under the directory is listed (subject to maxFiles and skipped dirs).', + ), + maxFiles: z + .number() + .int() + .positive() + .optional() + .describe( + `Maximum number of file paths to return. Defaults to ${DEFAULT_MAX_FILES}.`, + ), + }), + + execute: async (input, context): Promise => { + const root = input.path ?? process.cwd() + const maxFiles = input.maxFiles ?? DEFAULT_MAX_FILES + const signal = context.abortSignal + + let linesOut: string[] + let truncated = false + + try { + const info = await stat(root) + if (info.isFile()) { + const name = basename(root) + if ( + input.pattern !== undefined && + !matchesGlob(name, input.pattern) + ) { + return { data: 'No files matched.', isError: false } + } + linesOut = [relative(process.cwd(), root) || root] + } else { + const collected = await collectFiles(root, input.pattern, signal, { + maxFiles: maxFiles + 1, + }) + truncated = collected.length > maxFiles + const capped = collected.slice(0, maxFiles) + linesOut = capped.map((f) => relative(process.cwd(), f) || f) + } + } catch (err) { + const message = err instanceof Error ? err.message : 'Unknown error' + return { + data: `Cannot access path "${root}": ${message}`, + isError: true, + } + } + + if (linesOut.length === 0) { + return { data: 'No files matched.', isError: false } + } + + const sorted = [...linesOut].sort((a, b) => a.localeCompare(b)) + const truncationNote = truncated + ? `\n\n(listing capped at ${maxFiles} paths; raise maxFiles for more)` + : '' + + return { + data: sorted.join('\n') + truncationNote, + isError: false, + } + }, +}) diff --git a/src/tool/built-in/grep.ts b/src/tool/built-in/grep.ts index 99bceb8..b75b402 100644 --- a/src/tool/built-in/grep.ts +++ b/src/tool/built-in/grep.ts @@ -8,28 +8,18 @@ */ import { spawn } from 'child_process' -import { readdir, readFile, stat } from 'fs/promises' -// Note: readdir is used with { encoding: 'utf8' } to return string[] directly. -import { join, relative } from 'path' +import { readFile, stat } from 'fs/promises' +import { relative } from 'path' import { z } from 'zod' import type { ToolResult } from '../../types.js' import { defineTool } from '../framework.js' +import { collectFiles } from './fs-walk.js' // --------------------------------------------------------------------------- // Constants // --------------------------------------------------------------------------- const DEFAULT_MAX_RESULTS = 100 -// Directories that are almost never useful to search inside -const SKIP_DIRS = new Set([ - '.git', - '.svn', - '.hg', - 'node_modules', - '.next', - 'dist', - 'build', -]) // --------------------------------------------------------------------------- // Tool definition @@ -42,6 +32,7 @@ export const grepTool = defineTool({ 'Returns matching lines with their file paths and 1-based line numbers. ' + 'Use the `glob` parameter to restrict the search to specific file types ' + '(e.g. "*.ts"). ' + + 'To list matching file paths without reading contents, use the `glob` tool. ' + 'Results are capped by `maxResults` to keep the response manageable.', inputSchema: z.object({ @@ -270,79 +261,6 @@ async function runNodeSearch( } } -// --------------------------------------------------------------------------- -// File collection with glob filtering -// --------------------------------------------------------------------------- - -/** - * Recursively walk `dir` and return file paths, honouring `SKIP_DIRS` and an - * optional glob pattern. - */ -async function collectFiles( - dir: string, - glob: string | undefined, - signal: AbortSignal | undefined, -): Promise { - const results: string[] = [] - await walk(dir, glob, results, signal) - return results -} - -async function walk( - dir: string, - glob: string | undefined, - results: string[], - signal: AbortSignal | undefined, -): Promise { - if (signal?.aborted === true) return - - let entryNames: string[] - try { - // Read as plain strings so we don't have to deal with Buffer Dirent variants. - entryNames = await readdir(dir, { encoding: 'utf8' }) - } catch { - return - } - - for (const entryName of entryNames) { - if (signal !== undefined && signal.aborted) return - - const fullPath = join(dir, entryName) - - let entryInfo: Awaited> - try { - entryInfo = await stat(fullPath) - } catch { - continue - } - - if (entryInfo.isDirectory()) { - if (!SKIP_DIRS.has(entryName)) { - await walk(fullPath, glob, results, signal) - } - } else if (entryInfo.isFile()) { - if (glob === undefined || matchesGlob(entryName, glob)) { - results.push(fullPath) - } - } - } -} - -/** - * Minimal glob match supporting `*.ext` and `**\/` forms. - */ -function matchesGlob(filename: string, glob: string): boolean { - // Strip leading **/ prefix — we already recurse into all directories - const pattern = glob.startsWith('**/') ? glob.slice(3) : glob - // Convert shell glob characters to regex equivalents - const regexSource = pattern - .replace(/[.+^${}()|[\]\\]/g, '\\$&') // escape special regex chars first - .replace(/\*/g, '.*') // * -> .* - .replace(/\?/g, '.') // ? -> . - const re = new RegExp(`^${regexSource}$`, 'i') - return re.test(filename) -} - // --------------------------------------------------------------------------- // ripgrep availability check (cached per process) // --------------------------------------------------------------------------- diff --git a/src/tool/built-in/index.ts b/src/tool/built-in/index.ts index 06ff764..b9c0977 100644 --- a/src/tool/built-in/index.ts +++ b/src/tool/built-in/index.ts @@ -11,9 +11,10 @@ import { bashTool } from './bash.js' import { fileEditTool } from './file-edit.js' import { fileReadTool } from './file-read.js' import { fileWriteTool } from './file-write.js' +import { globTool } from './glob.js' import { grepTool } from './grep.js' -export { bashTool, fileEditTool, fileReadTool, fileWriteTool, grepTool } +export { bashTool, fileEditTool, fileReadTool, fileWriteTool, globTool, grepTool } /** * The ordered list of all built-in tools. Import this when you need to @@ -29,6 +30,7 @@ export const BUILT_IN_TOOLS: ToolDefinition[] = [ fileWriteTool, fileEditTool, grepTool, + globTool, ] /** diff --git a/tests/built-in-tools.test.ts b/tests/built-in-tools.test.ts index 440fd42..e644726 100644 --- a/tests/built-in-tools.test.ts +++ b/tests/built-in-tools.test.ts @@ -6,6 +6,7 @@ import { fileReadTool } from '../src/tool/built-in/file-read.js' import { fileWriteTool } from '../src/tool/built-in/file-write.js' import { fileEditTool } from '../src/tool/built-in/file-edit.js' import { bashTool } from '../src/tool/built-in/bash.js' +import { globTool } from '../src/tool/built-in/glob.js' import { grepTool } from '../src/tool/built-in/grep.js' import { registerBuiltInTools, BUILT_IN_TOOLS } from '../src/tool/built-in/index.js' import { ToolRegistry } from '../src/tool/framework.js' @@ -34,7 +35,7 @@ afterEach(async () => { // =========================================================================== describe('registerBuiltInTools', () => { - it('registers all 5 built-in tools', () => { + it('registers all 6 built-in tools', () => { const registry = new ToolRegistry() registerBuiltInTools(registry) @@ -43,10 +44,11 @@ describe('registerBuiltInTools', () => { expect(registry.get('file_write')).toBeDefined() expect(registry.get('file_edit')).toBeDefined() expect(registry.get('grep')).toBeDefined() + expect(registry.get('glob')).toBeDefined() }) it('BUILT_IN_TOOLS has correct length', () => { - expect(BUILT_IN_TOOLS).toHaveLength(5) + expect(BUILT_IN_TOOLS).toHaveLength(6) }) }) @@ -305,6 +307,102 @@ describe('bash', () => { }) }) +// =========================================================================== +// glob +// =========================================================================== + +describe('glob', () => { + it('lists files matching a pattern without reading contents', async () => { + await writeFile(join(tmpDir, 'a.ts'), 'SECRET_CONTENT_SHOULD_NOT_APPEAR') + await writeFile(join(tmpDir, 'b.md'), 'also secret') + + const result = await globTool.execute( + { path: tmpDir, pattern: '*.ts' }, + defaultContext, + ) + + expect(result.isError).toBe(false) + expect(result.data).toContain('.ts') + expect(result.data).not.toContain('SECRET') + expect(result.data).not.toContain('b.md') + }) + + it('lists all files when pattern is omitted', async () => { + await writeFile(join(tmpDir, 'x.txt'), 'x') + await writeFile(join(tmpDir, 'y.txt'), 'y') + + const result = await globTool.execute({ path: tmpDir }, defaultContext) + + expect(result.isError).toBe(false) + expect(result.data).toContain('x.txt') + expect(result.data).toContain('y.txt') + }) + + it('lists a single file when path is a file', async () => { + const filePath = join(tmpDir, 'only.ts') + await writeFile(filePath, 'body') + + const result = await globTool.execute({ path: filePath }, defaultContext) + + expect(result.isError).toBe(false) + expect(result.data).toContain('only.ts') + }) + + it('returns no match when single file does not match pattern', async () => { + const filePath = join(tmpDir, 'readme.md') + await writeFile(filePath, '# doc') + + const result = await globTool.execute( + { path: filePath, pattern: '*.ts' }, + defaultContext, + ) + + expect(result.isError).toBe(false) + expect(result.data).toContain('No files matched') + }) + + it('recurses into subdirectories', async () => { + const sub = join(tmpDir, 'nested') + const { mkdir } = await import('fs/promises') + await mkdir(sub, { recursive: true }) + await writeFile(join(sub, 'deep.ts'), '') + + const result = await globTool.execute( + { path: tmpDir, pattern: '*.ts' }, + defaultContext, + ) + + expect(result.isError).toBe(false) + expect(result.data).toContain('deep.ts') + }) + + it('errors on inaccessible path', async () => { + const result = await globTool.execute( + { path: '/nonexistent/path/xyz' }, + defaultContext, + ) + + expect(result.isError).toBe(true) + expect(result.data).toContain('Cannot access path') + }) + + it('notes truncation when maxFiles is exceeded', async () => { + for (let i = 0; i < 5; i++) { + await writeFile(join(tmpDir, `f${i}.txt`), '') + } + + const result = await globTool.execute( + { path: tmpDir, pattern: '*.txt', maxFiles: 3 }, + defaultContext, + ) + + expect(result.isError).toBe(false) + const lines = (result.data as string).split('\n').filter((l) => l.endsWith('.txt')) + expect(lines).toHaveLength(3) + expect(result.data).toContain('capped at 3') + }) +}) + // =========================================================================== // grep (Node.js fallback — tests do not depend on ripgrep availability) // =========================================================================== diff --git a/tests/tool-filtering.test.ts b/tests/tool-filtering.test.ts index 4f3b6f0..f42b8e1 100644 --- a/tests/tool-filtering.test.ts +++ b/tests/tool-filtering.test.ts @@ -61,6 +61,13 @@ function createTestTools() { execute: async () => ({ data: 'matches', isError: false }), })) + registry.register(defineTool({ + name: 'glob', + description: 'List paths', + inputSchema: z.object({ path: z.string().optional() }), + execute: async () => ({ data: 'paths', isError: false }), + })) + registry.register(defineTool({ name: 'bash', description: 'Run shell command', @@ -110,7 +117,15 @@ describe('Tool filtering', () => { const tools = (runner as any).resolveTools() as LLMToolDef[] const toolNames = tools.map((t: LLMToolDef) => t.name).sort() - expect(toolNames).toEqual(['bash', 'custom_tool', 'file_edit', 'file_read', 'file_write', 'grep']) + expect(toolNames).toEqual([ + 'bash', + 'custom_tool', + 'file_edit', + 'file_read', + 'file_write', + 'glob', + 'grep', + ]) }) }) @@ -124,7 +139,7 @@ describe('Tool filtering', () => { const tools = (runner as any).resolveTools() as LLMToolDef[] const toolNames = tools.map((t: LLMToolDef) => t.name).sort() - expect(toolNames).toEqual(['custom_tool', 'file_read', 'grep']) + expect(toolNames).toEqual(['custom_tool', 'file_read', 'glob', 'grep']) }) it('readwrite preset filters correctly', () => { @@ -136,7 +151,14 @@ describe('Tool filtering', () => { const tools = (runner as any).resolveTools() as LLMToolDef[] const toolNames = tools.map((t: LLMToolDef) => t.name).sort() - expect(toolNames).toEqual(['custom_tool', 'file_edit', 'file_read', 'file_write', 'grep']) + expect(toolNames).toEqual([ + 'custom_tool', + 'file_edit', + 'file_read', + 'file_write', + 'glob', + 'grep', + ]) }) it('full preset filters correctly', () => { @@ -148,7 +170,15 @@ describe('Tool filtering', () => { const tools = (runner as any).resolveTools() as LLMToolDef[] const toolNames = tools.map((t: LLMToolDef) => t.name).sort() - expect(toolNames).toEqual(['bash', 'custom_tool', 'file_edit', 'file_read', 'file_write', 'grep']) + expect(toolNames).toEqual([ + 'bash', + 'custom_tool', + 'file_edit', + 'file_read', + 'file_write', + 'glob', + 'grep', + ]) }) }) @@ -186,7 +216,14 @@ describe('Tool filtering', () => { const tools = (runner as any).resolveTools() as LLMToolDef[] const toolNames = tools.map((t: LLMToolDef) => t.name).sort() - expect(toolNames).toEqual(['custom_tool', 'file_edit', 'file_read', 'file_write', 'grep']) + expect(toolNames).toEqual([ + 'custom_tool', + 'file_edit', + 'file_read', + 'file_write', + 'glob', + 'grep', + ]) }) it('empty denylist returns all tools', () => { @@ -196,13 +233,13 @@ describe('Tool filtering', () => { }) const tools = (runner as any).resolveTools() - expect(tools).toHaveLength(6) // All registered tools + expect(tools).toHaveLength(7) // All registered tools }) }) describe('resolveTools - combined filtering (preset + allowlist + denylist)', () => { it('preset + allowlist + denylist work together', () => { - // Start with readwrite preset: ['file_read', 'file_write', 'file_edit', 'grep'] + // Start with readwrite preset: ['file_read', 'file_write', 'file_edit', 'grep', 'glob'] // Then allowlist: intersect with ['file_read', 'file_write', 'grep'] = ['file_read', 'file_write', 'grep'] // Then denylist: subtract ['file_write'] = ['file_read', 'grep'] const runner = new AgentRunner(mockAdapter, registry, executor, { @@ -219,7 +256,7 @@ describe('Tool filtering', () => { }) it('preset filters first, then allowlist intersects, then denylist subtracts', () => { - // Start with readonly preset: ['file_read', 'grep'] + // Start with readonly preset: ['file_read', 'grep', 'glob'] // Allowlist intersect with ['file_read', 'bash']: ['file_read'] // Denylist subtract ['file_read']: [] const runner = new AgentRunner(mockAdapter, registry, executor, {