summaryrefslogtreecommitdiff
path: root/packages/server/src
diff options
context:
space:
mode:
authorAllen Hutchison <[email protected]>2025-04-23 17:25:47 -0700
committerGitHub <[email protected]>2025-04-23 17:25:47 -0700
commitcf92ffab346b3844ed3922f5647bd03170d3e448 (patch)
tree416423a2870df44d77415b20c034a7212f0e84b8 /packages/server/src
parentd771dcbdb975664647ebc555f6adfcbba5a2b7da (diff)
Add concatenation tool (#130)
* Adding a tool inspired by files-to-prompt that will recursivly read through all the files in a directory (guarded by targetDir) and concatenate those files for the model. Ignores common build artifacts and non-text files. * Migraded glob logic to fast-glob. Buffed the tool description to give more guidance to the model. Incorporated reveiw feedback. * lint and error checking.
Diffstat (limited to 'packages/server/src')
-rw-r--r--packages/server/src/config/config.ts2
-rw-r--r--packages/server/src/tools/read-many-files.ts386
2 files changed, 388 insertions, 0 deletions
diff --git a/packages/server/src/config/config.ts b/packages/server/src/config/config.ts
index 2cb05318..d24fad4e 100644
--- a/packages/server/src/config/config.ts
+++ b/packages/server/src/config/config.ts
@@ -17,6 +17,7 @@ import { EditTool } from '../tools/edit.js';
import { TerminalTool } from '../tools/terminal.js';
import { WriteFileTool } from '../tools/write-file.js';
import { WebFetchTool } from '../tools/web-fetch.js';
+import { ReadManyFilesTool } from '../tools/read-many-files.js';
const DEFAULT_PASSTHROUGH_COMMANDS = ['ls', 'git', 'npm'];
@@ -130,6 +131,7 @@ function createToolRegistry(config: Config): ToolRegistry {
new TerminalTool(targetDir, config),
new WriteFileTool(targetDir),
new WebFetchTool(), // Note: WebFetchTool takes no arguments
+ new ReadManyFilesTool(targetDir),
];
for (const tool of tools) {
registry.registerTool(tool);
diff --git a/packages/server/src/tools/read-many-files.ts b/packages/server/src/tools/read-many-files.ts
new file mode 100644
index 00000000..eb06d35a
--- /dev/null
+++ b/packages/server/src/tools/read-many-files.ts
@@ -0,0 +1,386 @@
+/**
+ * @license
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { BaseTool, ToolResult } from './tools.js';
+import { SchemaValidator } from '../utils/schemaValidator.js';
+import { getErrorMessage } from '../utils/errors.js';
+import * as fs from 'fs/promises';
+import * as path from 'path';
+import fg from 'fast-glob';
+
+/**
+ * Parameters for the ReadManyFilesTool.
+ */
+export interface ReadManyFilesParams {
+ /**
+ * An array of file paths or directory paths to search within.
+ * Paths are relative to the tool's configured target directory.
+ * Glob patterns can be used directly in these paths.
+ */
+ paths: string[];
+
+ /**
+ * Optional. Glob patterns for files to include.
+ * These are effectively combined with the `paths`.
+ * Example: ["*.ts", "src/** /*.md"]
+ */
+ include?: string[];
+
+ /**
+ * Optional. Glob patterns for files/directories to exclude.
+ * Applied as ignore patterns.
+ * Example: ["*.log", "dist/**"]
+ */
+ exclude?: string[];
+
+ /**
+ * Optional. Search directories recursively.
+ * This is generally controlled by glob patterns (e.g., `**`).
+ * The glob implementation is recursive by default for `**`.
+ * For simplicity, we'll rely on `**` for recursion.
+ */
+ recursive?: boolean;
+
+ /**
+ * Optional. Apply default exclusion patterns. Defaults to true.
+ */
+ useDefaultExcludes?: boolean;
+}
+
+/**
+ * Default exclusion patterns for commonly ignored directories and binary file types.
+ * These are compatible with glob ignore patterns.
+ * TODO(adh): Consider making this configurable or extendable through a command line arguement.
+ * TODO(adh): Look into sharing this list with the glob tool.
+ */
+const DEFAULT_EXCLUDES: string[] = [
+ '**/node_modules/**',
+ '**/.git/**',
+ '**/.vscode/**',
+ '**/.idea/**',
+ '**/dist/**',
+ '**/build/**',
+ '**/coverage/**',
+ '**/__pycache__/**',
+ '**/*.pyc',
+ '**/*.pyo',
+ '**/*.bin',
+ '**/*.exe',
+ '**/*.dll',
+ '**/*.so',
+ '**/*.dylib',
+ '**/*.class',
+ '**/*.jar',
+ '**/*.war',
+ '**/*.zip',
+ '**/*.tar',
+ '**/*.gz',
+ '**/*.bz2',
+ '**/*.rar',
+ '**/*.7z',
+ '**/*.png',
+ '**/*.jpg',
+ '**/*.jpeg',
+ '**/*.gif',
+ '**/*.bmp',
+ '**/*.tiff',
+ '**/*.ico',
+ '**/*.pdf',
+ '**/*.doc',
+ '**/*.docx',
+ '**/*.xls',
+ '**/*.xlsx',
+ '**/*.ppt',
+ '**/*.pptx',
+ '**/*.odt',
+ '**/*.ods',
+ '**/*.odp',
+ '**/*.DS_Store',
+ '**/.env',
+];
+
+// Default values for encoding and separator format
+const DEFAULT_ENCODING: BufferEncoding = 'utf-8';
+const DEFAULT_OUTPUT_SEPARATOR_FORMAT: string = '--- {filePath} ---';
+
+/**
+ * Tool implementation for finding and reading multiple text files from the local filesystem
+ * within a specified target directory. The content is concatenated.
+ * It is intended to run in an environment with access to the local file system (e.g., a Node.js backend).
+ */
+export class ReadManyFilesTool extends BaseTool<
+ ReadManyFilesParams,
+ ToolResult
+> {
+ static readonly Name: string = 'readManyFiles';
+ readonly targetDir: string;
+
+ /**
+ * Creates an instance of ReadManyFilesTool.
+ * @param targetDir The absolute root directory within which this tool is allowed to operate.
+ * All paths provided in `params` will be resolved relative to this directory.
+ */
+ constructor(targetDir: string) {
+ const parameterSchema: Record<string, unknown> = {
+ type: 'object',
+ properties: {
+ paths: {
+ type: 'array',
+ items: { type: 'string' },
+ description:
+ "Required. An array of glob patterns or paths relative to the tool's target directory. Examples: ['src/**/*.ts'], ['README.md', 'docs/']",
+ },
+ include: {
+ type: 'array',
+ items: { type: 'string' },
+ description:
+ 'Optional. Additional glob patterns to include. These are merged with `paths`. Example: ["*.test.ts"] to specifically add test files if they were broadly excluded.',
+ default: [],
+ },
+ exclude: {
+ type: 'array',
+ items: { type: 'string' },
+ description:
+ 'Optional. Glob patterns for files/directories to exclude. Added to default excludes if useDefaultExcludes is true. Example: ["**/*.log", "temp/"]',
+ default: [],
+ },
+ recursive: {
+ type: 'boolean',
+ description:
+ 'Optional. Whether to search recursively (primarily controlled by `**` in glob patterns). Defaults to true.',
+ default: true,
+ },
+ useDefaultExcludes: {
+ type: 'boolean',
+ description:
+ 'Optional. Whether to apply a list of default exclusion patterns (e.g., node_modules, .git, binary files). Defaults to true.',
+ default: true,
+ },
+ },
+ required: ['paths'],
+ };
+
+ super(
+ ReadManyFilesTool.Name,
+ 'Read Many Files',
+ `Reads content from multiple text files specified by paths or glob patterns within a configured target directory and concatenates them into a single string.
+This tool is useful when you need to understand or analyze a collection of files, such as:
+- Getting an overview of a codebase or parts of it (e.g., all TypeScript files in the 'src' directory).
+- Finding where specific functionality is implemented if the user asks broad questions about code.
+- Reviewing documentation files (e.g., all Markdown files in the 'docs' directory).
+- Gathering context from multiple configuration files.
+- When the user asks to "read all files in X directory" or "show me the content of all Y files".
+
+Use this tool when the user's query implies needing the content of several files simultaneously for context, analysis, or summarization.
+It uses default UTF-8 encoding and a '--- {filePath} ---' separator between file contents.
+Ensure paths are relative to the target directory. Glob patterns like 'src/**/*.js' are supported.
+Avoid using for single files if a more specific single-file reading tool is available, unless the user specifically requests to process a list containing just one file via this tool.
+This tool should NOT be used for binary files; it attempts to skip them.
+Default excludes apply to common non-text files and large dependency directories unless 'useDefaultExcludes' is false.`,
+ parameterSchema,
+ );
+ this.targetDir = path.resolve(targetDir);
+ }
+
+ validateParams(params: ReadManyFilesParams): string | null {
+ if (
+ this.schema.parameters &&
+ !SchemaValidator.validate(
+ this.schema.parameters as Record<string, unknown>,
+ params,
+ )
+ ) {
+ if (
+ !params.paths ||
+ !Array.isArray(params.paths) ||
+ params.paths.length === 0
+ ) {
+ return 'The "paths" parameter is required and must be a non-empty array of strings/glob patterns.';
+ }
+ return 'Parameters failed schema validation. Ensure "paths" is a non-empty array and other parameters match their expected types.';
+ }
+ for (const p of params.paths) {
+ if (typeof p !== 'string' || p.trim() === '') {
+ return 'Each item in "paths" must be a non-empty string/glob pattern.';
+ }
+ }
+ if (
+ params.include &&
+ (!Array.isArray(params.include) ||
+ !params.include.every((item) => typeof item === 'string'))
+ ) {
+ return 'If provided, "include" must be an array of strings/glob patterns.';
+ }
+ if (
+ params.exclude &&
+ (!Array.isArray(params.exclude) ||
+ !params.exclude.every((item) => typeof item === 'string'))
+ ) {
+ return 'If provided, "exclude" must be an array of strings/glob patterns.';
+ }
+ return null;
+ }
+
+ getDescription(params: ReadManyFilesParams): string {
+ const allPatterns = [...params.paths, ...(params.include || [])];
+ const pathDesc = `using patterns: \`${allPatterns.join('`, `')}\` (within target directory: \`${this.targetDir}\`)`;
+
+ let effectiveExcludes =
+ params.useDefaultExcludes !== false ? [...DEFAULT_EXCLUDES] : [];
+ if (params.exclude && params.exclude.length > 0) {
+ effectiveExcludes = [...effectiveExcludes, ...params.exclude];
+ }
+ const excludeDesc = `Excluding: ${effectiveExcludes.length > 0 ? `patterns like \`${effectiveExcludes.slice(0, 2).join('`, `')}${effectiveExcludes.length > 2 ? '...`' : '`'}` : 'none explicitly (beyond default non-text file avoidance).'}`;
+
+ return `Will attempt to read and concatenate files ${pathDesc}. ${excludeDesc}. File encoding: ${DEFAULT_ENCODING}. Separator: "${DEFAULT_OUTPUT_SEPARATOR_FORMAT.replace('{filePath}', 'path/to/file.ext')}".`;
+ }
+
+ async execute(params: ReadManyFilesParams): Promise<ToolResult> {
+ const validationError = this.validateParams(params);
+ if (validationError) {
+ return {
+ llmContent: `Error: Invalid parameters for ${this.displayName}. Reason: ${validationError}`,
+ returnDisplay: `## Parameter Error\n\n${validationError}`,
+ };
+ }
+
+ const {
+ paths: inputPatterns,
+ include = [],
+ exclude = [],
+ useDefaultExcludes = true,
+ } = params;
+
+ const toolBaseDir = this.targetDir;
+
+ const filesToConsider = new Set<string>();
+ const skippedFiles: { path: string; reason: string }[] = [];
+ const processedFilesRelativePaths: string[] = [];
+ let concatenatedContent = '';
+
+ const effectiveExcludes = useDefaultExcludes
+ ? [...DEFAULT_EXCLUDES, ...exclude]
+ : [...exclude];
+
+ const searchPatterns = [...inputPatterns, ...include];
+ if (searchPatterns.length === 0) {
+ return {
+ llmContent: 'No search paths or include patterns provided.',
+ returnDisplay: `## Information\n\nNo search paths or include patterns were specified. Nothing to read or concatenate.`,
+ };
+ }
+
+ try {
+ // Using fast-glob (fg) for file searching based on patterns.
+ // The `cwd` option scopes the search to the toolBaseDir.
+ // `ignore` handles exclusions.
+ // `onlyFiles` ensures only files are returned.
+ // `dot` allows matching dotfiles (which can still be excluded by patterns).
+ // `absolute` returns absolute paths for consistent handling.
+ const entries = await fg(searchPatterns, {
+ cwd: toolBaseDir,
+ ignore: effectiveExcludes,
+ onlyFiles: true,
+ dot: true,
+ absolute: true,
+ caseSensitiveMatch: false,
+ });
+
+ for (const absoluteFilePath of entries) {
+ // Security check: ensure the glob library didn't return something outside targetDir.
+ // This should be guaranteed by `cwd` and the library's sandboxing, but an extra check is good practice.
+ if (!absoluteFilePath.startsWith(toolBaseDir)) {
+ skippedFiles.push({
+ path: absoluteFilePath,
+ reason: `Security: Glob library returned path outside target directory. Base: ${toolBaseDir}, Path: ${absoluteFilePath}`,
+ });
+ continue;
+ }
+ filesToConsider.add(absoluteFilePath);
+ }
+ } catch (error) {
+ return {
+ llmContent: `Error during file search: ${getErrorMessage(error)}`,
+ returnDisplay: `## File Search Error\n\nAn error occurred while searching for files:\n\`\`\`\n${getErrorMessage(error)}\n\`\`\``,
+ };
+ }
+
+ const sortedFiles = Array.from(filesToConsider).sort();
+
+ for (const filePath of sortedFiles) {
+ const relativePathForDisplay = path
+ .relative(toolBaseDir, filePath)
+ .replace(/\\/g, '/');
+ try {
+ const contentBuffer = await fs.readFile(filePath);
+ // Basic binary detection: check for null bytes in the first 1KB
+ const sample = contentBuffer.subarray(
+ 0,
+ Math.min(contentBuffer.length, 1024),
+ );
+ if (sample.includes(0)) {
+ skippedFiles.push({
+ path: relativePathForDisplay,
+ reason: 'Skipped (appears to be binary)',
+ });
+ continue;
+ }
+ // Using default encoding
+ const fileContent = contentBuffer.toString(DEFAULT_ENCODING);
+ // Using default separator format
+ const separator = DEFAULT_OUTPUT_SEPARATOR_FORMAT.replace(
+ '{filePath}',
+ relativePathForDisplay,
+ );
+ concatenatedContent += `${separator}\n\n${fileContent}\n\n`;
+ processedFilesRelativePaths.push(relativePathForDisplay);
+ } catch (error) {
+ skippedFiles.push({
+ path: relativePathForDisplay,
+ reason: `Read error: ${getErrorMessage(error)}`,
+ });
+ }
+ }
+
+ let displayMessage = `### Read Many Files Result (Target Dir: \`${this.targetDir}\`)\n\n`;
+ if (processedFilesRelativePaths.length > 0) {
+ displayMessage += `Successfully read and concatenated content from **${processedFilesRelativePaths.length} file(s)**.\n`;
+ displayMessage += `\n**Processed Files (up to 10 shown):**\n`;
+ processedFilesRelativePaths
+ .slice(0, 10)
+ .forEach((p) => (displayMessage += `- \`${p}\`\n`));
+ if (processedFilesRelativePaths.length > 10) {
+ displayMessage += `- ...and ${processedFilesRelativePaths.length - 10} more.\n`;
+ }
+ } else {
+ displayMessage += `No files were read and concatenated based on the criteria.\n`;
+ }
+
+ if (skippedFiles.length > 0) {
+ displayMessage += `\n**Skipped ${skippedFiles.length} item(s) (up to 5 shown):**\n`;
+ skippedFiles
+ .slice(0, 5)
+ .forEach(
+ (f) => (displayMessage += `- \`${f.path}\` (Reason: ${f.reason})\n`),
+ );
+ if (skippedFiles.length > 5) {
+ displayMessage += `- ...and ${skippedFiles.length - 5} more.\n`;
+ }
+ }
+ if (
+ concatenatedContent.length === 0 &&
+ processedFilesRelativePaths.length === 0
+ ) {
+ concatenatedContent =
+ 'No files matching the criteria were found or all were skipped.';
+ }
+
+ return {
+ llmContent: concatenatedContent,
+ returnDisplay: displayMessage,
+ };
+ }
+}