diff options
Diffstat (limited to 'packages/cli/src/tools/grep.tool.ts')
| -rw-r--r-- | packages/cli/src/tools/grep.tool.ts | 493 |
1 files changed, 493 insertions, 0 deletions
diff --git a/packages/cli/src/tools/grep.tool.ts b/packages/cli/src/tools/grep.tool.ts new file mode 100644 index 00000000..50a62c47 --- /dev/null +++ b/packages/cli/src/tools/grep.tool.ts @@ -0,0 +1,493 @@ +import fs from 'fs'; // Used for sync checks in validation +import fsPromises from 'fs/promises'; // Used for async operations in fallback +import path from 'path'; +import { EOL } from 'os'; // Used for parsing grep output lines +import { spawn } from 'child_process'; // Used for git grep and system grep +import fastGlob from 'fast-glob'; // Used for JS fallback file searching +import { ToolResult } from './ToolResult.js'; +import { BaseTool } from './BaseTool.js'; +import { SchemaValidator } from '../utils/schemaValidator.js'; +import { makeRelative, shortenPath } from '../utils/paths.js'; + +// --- Interfaces (kept separate for clarity) --- + +/** + * Parameters for the GrepTool + */ +export interface GrepToolParams { + /** + * The regular expression pattern to search for in file contents + */ + pattern: string; + + /** + * The directory to search in (optional, defaults to current directory relative to root) + */ + path?: string; + + /** + * File pattern to include in the search (e.g. "*.js", "*.{ts,tsx}") + */ + include?: string; +} + +/** + * Result object for a single grep match + */ +interface GrepMatch { + filePath: string; + lineNumber: number; + line: string; +} + +/** + * Result from the GrepTool + */ +export interface GrepToolResult extends ToolResult { +} + +// --- GrepTool Class --- + +/** + * Implementation of the GrepTool that searches file contents using git grep, system grep, or JS fallback. + */ +export class GrepTool extends BaseTool<GrepToolParams, GrepToolResult> { + private rootDirectory: string; + + /** + * Creates a new instance of the GrepTool + * @param rootDirectory Root directory to ground this tool in. All operations will be restricted to this directory. + */ + constructor(rootDirectory: string) { + super( + 'search_file_content', + 'SearchText', + 'Searches for a regular expression pattern within the content of files in a specified directory (or current working directory). Can filter files by a glob pattern. Returns the lines containing matches, along with their file paths and line numbers.', + { + properties: { + pattern: { + description: 'The regular expression (regex) pattern to search for within file contents (e.g., \'function\\s+myFunction\', \'import\\s+\\{.*\\}\\s+from\\s+.*\').', + type: 'string' + }, + path: { + description: 'Optional: The absolute path to the directory to search within. If omitted, searches the current working directory.', + type: 'string' + }, + include: { + description: 'Optional: A glob pattern to filter which files are searched (e.g., \'*.js\', \'*.{ts,tsx}\', \'src/**\'). If omitted, searches all files (respecting potential global ignores).', + type: 'string' + } + }, + required: ['pattern'], + type: 'object' + } + ); + // Ensure rootDirectory is absolute and normalized + this.rootDirectory = path.resolve(rootDirectory); + } + + // --- Validation Methods --- + + /** + * Checks if a path is within the root directory and resolves it. + * @param relativePath Path relative to the root directory (or undefined for root). + * @returns The absolute path if valid and exists. + * @throws {Error} If path is outside root, doesn't exist, or isn't a directory. + */ + private resolveAndValidatePath(relativePath?: string): string { + const targetPath = path.resolve(this.rootDirectory, relativePath || '.'); + + // Security Check: Ensure the resolved path is still within the root directory. + if (!targetPath.startsWith(this.rootDirectory) && targetPath !== this.rootDirectory) { + throw new Error(`Path validation failed: Attempted path "${relativePath || '.'}" resolves outside the allowed root directory "${this.rootDirectory}".`); + } + + // Check existence and type after resolving + try { + const stats = fs.statSync(targetPath); + if (!stats.isDirectory()) { + throw new Error(`Path is not a directory: ${targetPath}`); + } + } catch (err: any) { + if (err.code === 'ENOENT') { + throw new Error(`Path does not exist: ${targetPath}`); + } + throw new Error(`Failed to access path stats for ${targetPath}: ${err.message}`); + } + + return targetPath; + } + + /** + * Validates the parameters for the tool + * @param params Parameters to validate + * @returns An error message string if invalid, null otherwise + */ + invalidParams(params: GrepToolParams): string | null { + if (this.schema.parameters && !SchemaValidator.validate(this.schema.parameters as Record<string, unknown>, params)) { + return "Parameters failed schema validation."; + } + + try { + new RegExp(params.pattern); + } catch (error) { + return `Invalid regular expression pattern provided: ${params.pattern}. Error: ${error instanceof Error ? error.message : String(error)}`; + } + + try { + this.resolveAndValidatePath(params.path); + } catch (error) { + return error instanceof Error ? error.message : String(error); + } + + return null; // Parameters are valid + } + + + // --- Core Execution --- + + /** + * Executes the grep search with the given parameters + * @param params Parameters for the grep search + * @returns Result of the grep search + */ + async execute(params: GrepToolParams): Promise<GrepToolResult> { + const validationError = this.invalidParams(params); + if (validationError) { + console.error(`GrepTool Parameter Validation Failed: ${validationError}`); + return { + llmContent: `Error: Invalid parameters provided. Reason: ${validationError}`, + returnDisplay: `**Error:** Failed to execute tool.` + }; + } + + let searchDirAbs: string; + try { + searchDirAbs = this.resolveAndValidatePath(params.path); + const searchDirDisplay = params.path || '.'; + + const matches: GrepMatch[] = await this.performGrepSearch({ + pattern: params.pattern, + path: searchDirAbs, + include: params.include, + }); + + if (matches.length === 0) { + const noMatchMsg = `No matches found for pattern "${params.pattern}" in path "${searchDirDisplay}"${params.include ? ` (filter: "${params.include}")` : ''}.`; + const noMatchUser = `No matches found`; + return { llmContent: noMatchMsg, returnDisplay: noMatchUser }; + } + + const matchesByFile = matches.reduce((acc, match) => { + const relativeFilePath = path.relative(searchDirAbs, path.resolve(searchDirAbs, match.filePath)) || path.basename(match.filePath); + if (!acc[relativeFilePath]) { + acc[relativeFilePath] = []; + } + acc[relativeFilePath].push(match); + acc[relativeFilePath].sort((a, b) => a.lineNumber - b.lineNumber); + return acc; + }, {} as Record<string, GrepMatch[]>); + + let llmContent = `Found ${matches.length} match(es) for pattern "${params.pattern}" in path "${searchDirDisplay}"${params.include ? ` (filter: "${params.include}")` : ''}:\n---\n`; + + for (const filePath in matchesByFile) { + llmContent += `File: ${filePath}\n`; + matchesByFile[filePath].forEach(match => { + const trimmedLine = match.line.trim(); + llmContent += `L${match.lineNumber}: ${trimmedLine}\n`; + }); + llmContent += '---\n'; + } + + return { llmContent: llmContent.trim(), returnDisplay: `Found ${matches.length} matche(s)` }; + + } catch (error) { + console.error(`Error during GrepTool execution: ${error}`); + const errorMessage = error instanceof Error ? error.message : String(error); + return { + llmContent: `Error during grep search operation: ${errorMessage}`, + returnDisplay: errorMessage + }; + } + } + + + // --- Inlined Grep Logic and Helpers --- + + /** + * Checks if a command is available in the system's PATH. + * @param {string} command The command name (e.g., 'git', 'grep'). + * @returns {Promise<boolean>} True if the command is available, false otherwise. + */ + private isCommandAvailable(command: string): Promise<boolean> { + return new Promise((resolve) => { + const checkCommand = process.platform === 'win32' ? 'where' : 'command'; + const checkArgs = process.platform === 'win32' ? [command] : ['-v', command]; + try { + const child = spawn(checkCommand, checkArgs, { stdio: 'ignore', shell: process.platform === 'win32' }); + child.on('close', (code) => resolve(code === 0)); + child.on('error', () => resolve(false)); + } catch (e) { + resolve(false); + } + }); + } + + /** + * Checks if a directory or its parent directories contain a .git folder. + * @param {string} dirPath Absolute path to the directory to check. + * @returns {Promise<boolean>} True if it's a Git repository, false otherwise. + */ + private async isGitRepository(dirPath: string): Promise<boolean> { + let currentPath = path.resolve(dirPath); + const root = path.parse(currentPath).root; + + try { + while (true) { + const gitPath = path.join(currentPath, '.git'); + try { + const stats = await fsPromises.stat(gitPath); + if (stats.isDirectory() || stats.isFile()) { + return true; + } + return false; + } catch (err: any) { + if (err.code !== 'ENOENT') { + console.error(`Error checking for .git in ${currentPath}: ${err.message}`); + return false; + } + } + + if (currentPath === root) { + break; + } + currentPath = path.dirname(currentPath); + } + } catch (err: any) { + console.error(`Error traversing directory structure upwards from ${dirPath}: ${err instanceof Error ? err.message : String(err)}`); + } + return false; + } + + /** + * Parses the standard output of grep-like commands (git grep, system grep). + * Expects format: filePath:lineNumber:lineContent + * Handles colons within file paths and line content correctly. + * @param {string} output The raw stdout string. + * @param {string} basePath The absolute directory the search was run from, for relative paths. + * @returns {GrepMatch[]} Array of match objects. + */ + private parseGrepOutput(output: string, basePath: string): GrepMatch[] { + const results: GrepMatch[] = []; + if (!output) return results; + + const lines = output.split(EOL); // Use OS-specific end-of-line + + for (const line of lines) { + if (!line.trim()) continue; + + // Find the index of the first colon. + const firstColonIndex = line.indexOf(':'); + if (firstColonIndex === -1) { + // Malformed line: Does not contain any colon. Skip. + continue; + } + + // Find the index of the second colon, searching *after* the first one. + const secondColonIndex = line.indexOf(':', firstColonIndex + 1); + if (secondColonIndex === -1) { + // Malformed line: Contains only one colon (e.g., filename:content). Skip. + // Grep output with -n should always have file:line:content. + continue; + } + + // Extract parts based on the found colon indices + const filePathRaw = line.substring(0, firstColonIndex); + const lineNumberStr = line.substring(firstColonIndex + 1, secondColonIndex); + // The rest of the line, starting after the second colon, is the content. + const lineContent = line.substring(secondColonIndex + 1); + + const lineNumber = parseInt(lineNumberStr, 10); + + if (!isNaN(lineNumber)) { + // Resolve the raw path relative to the base path where grep ran + const absoluteFilePath = path.resolve(basePath, filePathRaw); + // Make the final path relative to the basePath for consistency + const relativeFilePath = path.relative(basePath, absoluteFilePath); + + results.push({ + // Use relative path, or just the filename if it's in the base path itself + filePath: relativeFilePath || path.basename(absoluteFilePath), + lineNumber: lineNumber, + line: lineContent, // Use the full extracted line content + }); + } + // Silently ignore lines where the line number isn't parsable + } + return results; + } + + /** + * Gets a description of the grep operation + * @param params Parameters for the grep operation + * @returns A string describing the grep + */ + getDescription(params: GrepToolParams): string { + let description = `'${params.pattern}'`; + + if (params.include) { + description += ` in ${params.include}`; + } + + if (params.path) { + const searchDir = params.path || this.rootDirectory; + const relativePath = makeRelative(searchDir, this.rootDirectory); + description += ` within ${shortenPath(relativePath || './')}`; + } + + return description; + } + + /** + * Performs the actual search using the prioritized strategies. + * @param options Search options including pattern, absolute path, and include glob. + * @returns A promise resolving to an array of match objects. + */ + private async performGrepSearch(options: { + pattern: string; + path: string; // Expects absolute path + include?: string; + }): Promise<GrepMatch[]> { + const { pattern, path: absolutePath, include } = options; + let strategyUsed = 'none'; // Keep track for potential error reporting + + try { + // --- Strategy 1: git grep --- + const isGit = await this.isGitRepository(absolutePath); + const gitAvailable = isGit && await this.isCommandAvailable('git'); + + if (gitAvailable) { + strategyUsed = 'git grep'; + const gitArgs = ['grep', '--untracked', '-n', '-E', '--ignore-case', pattern]; + if (include) { + gitArgs.push('--', include); + } + + try { + const output = await new Promise<string>((resolve, reject) => { + const child = spawn('git', gitArgs, { cwd: absolutePath, windowsHide: true }); + const stdoutChunks: Buffer[] = []; + const stderrChunks: Buffer[] = []; + + child.stdout.on('data', (chunk) => { stdoutChunks.push(chunk); }); + child.stderr.on('data', (chunk) => { stderrChunks.push(chunk); }); + + child.on('error', (err) => reject(new Error(`Failed to start git grep: ${err.message}`))); + + child.on('close', (code) => { + const stdoutData = Buffer.concat(stdoutChunks).toString('utf8'); + const stderrData = Buffer.concat(stderrChunks).toString('utf8'); + if (code === 0) resolve(stdoutData); + else if (code === 1) resolve(''); // No matches is not an error + else reject(new Error(`git grep exited with code ${code}: ${stderrData}`)); + }); + }); + return this.parseGrepOutput(output, absolutePath); + } catch (gitError: any) { + console.error(`GrepTool: git grep strategy failed: ${gitError.message}. Falling back...`); + } + } + + // --- Strategy 2: System grep --- + const grepAvailable = await this.isCommandAvailable('grep'); + if (grepAvailable) { + strategyUsed = 'system grep'; + const grepArgs = ['-r', '-n', '-H', '-E']; + const commonExcludes = ['.git', 'node_modules', 'bower_components']; + commonExcludes.forEach(dir => grepArgs.push(`--exclude-dir=${dir}`)); + if (include) { + grepArgs.push(`--include=${include}`); + } + grepArgs.push(pattern); + grepArgs.push('.'); + + try { + const output = await new Promise<string>((resolve, reject) => { + const child = spawn('grep', grepArgs, { cwd: absolutePath, windowsHide: true }); + const stdoutChunks: Buffer[] = []; + const stderrChunks: Buffer[] = []; + + child.stdout.on('data', (chunk) => { stdoutChunks.push(chunk); }); + child.stderr.on('data', (chunk) => { + const stderrStr = chunk.toString(); + if (!stderrStr.includes('Permission denied') && !/grep:.*: Is a directory/i.test(stderrStr)) { + stderrChunks.push(chunk); + } + }); + + child.on('error', (err) => reject(new Error(`Failed to start system grep: ${err.message}`))); + + child.on('close', (code) => { + const stdoutData = Buffer.concat(stdoutChunks).toString('utf8'); + const stderrData = Buffer.concat(stderrChunks).toString('utf8').trim(); + if (code === 0) resolve(stdoutData); + else if (code === 1) resolve(''); // No matches + else { + if (stderrData) reject(new Error(`System grep exited with code ${code}: ${stderrData}`)); + else resolve(''); + } + }); + }); + return this.parseGrepOutput(output, absolutePath); + } catch (grepError: any) { + console.error(`GrepTool: System grep strategy failed: ${grepError.message}. Falling back...`); + } + } + + // --- Strategy 3: Pure JavaScript Fallback --- + strategyUsed = 'javascript fallback'; + const globPattern = include ? include : '**/*'; + const ignorePatterns = ['.git', 'node_modules', 'bower_components', '.svn', '.hg']; + + const filesStream = fastGlob.stream(globPattern, { + cwd: absolutePath, + dot: true, + ignore: ignorePatterns, + absolute: true, + onlyFiles: true, + suppressErrors: true, + stats: false, + }); + + const regex = new RegExp(pattern, 'i'); + const allMatches: GrepMatch[] = []; + + for await (const filePath of filesStream) { + const fileAbsolutePath = filePath as string; + try { + const content = await fsPromises.readFile(fileAbsolutePath, 'utf8'); + const lines = content.split(/\r?\n/); + lines.forEach((line, index) => { + if (regex.test(line)) { + allMatches.push({ + filePath: path.relative(absolutePath, fileAbsolutePath) || path.basename(fileAbsolutePath), + lineNumber: index + 1, + line: line, + }); + } + }); + } catch (readError: any) { + if (readError.code !== 'ENOENT') { + console.error(`GrepTool: Could not read or process file ${fileAbsolutePath}: ${readError.message}`); + } + } + } + + return allMatches; + + } catch (error: any) { + console.error(`GrepTool: Error during performGrepSearch (Strategy: ${strategyUsed}): ${error.message}`); + throw error; // Re-throw to be caught by the execute method's handler + } + } +}
\ No newline at end of file |
