1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
|
/**
* @license
* Copyright 2025 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import Parser from 'tree-sitter';
import Python from 'tree-sitter-python';
import Java from 'tree-sitter-java';
import Go from 'tree-sitter-go';
import CSharp from 'tree-sitter-c-sharp';
import TreeSitterTypeScript from 'tree-sitter-typescript';
import Rust from 'tree-sitter-rust'; // Added
import fs from 'fs/promises';
import path from 'path';
import { BaseTool, ToolResult, ToolCallConfirmationDetails } from './tools.js';
import { SchemaValidator } from '../utils/schemaValidator.js';
import { makeRelative, shortenPath } from '../utils/paths.js'; // Removed isWithinRoot
import { Config } from '../config/config.js';
type TreeSitterLanguage = Parameters<typeof Parser.prototype.setLanguage>[0];
export interface CodeParserToolParams {
path: string;
ignore?: string[];
languages?: string[];
}
export class CodeParserTool extends BaseTool<CodeParserToolParams, ToolResult> {
static readonly Name = 'code_parser';
private parser: Parser;
constructor(
private rootDirectory: string,
private config: Config,
) {
super(
CodeParserTool.Name,
'CodeParser',
'Parses the code in the specified directory path or a single file to generate AST representations. This should be used to get a better understanding of the codebase when refactoring and building out new features.',
{
properties: {
path: {
type: 'string',
description:
'The absolute path to the directory or file to parse (must be absolute, not relative)',
},
languages: {
type: 'array',
description:
'Optional: specific languages to parse (e.g., ["python", "java", "go", "csharp", "typescript", "tsx", "javascript", "rust"]). Defaults to supported languages.',
items: {
type: 'string',
},
},
},
required: ['path'],
type: 'object',
},
);
this.rootDirectory = path.resolve(rootDirectory);
this.parser = new Parser();
}
// Added private isWithinRoot method
private isWithinRoot(dirpath: string): boolean {
const normalizedPath = path.normalize(dirpath);
const normalizedRoot = path.normalize(this.rootDirectory);
const rootWithSep = normalizedRoot.endsWith(path.sep)
? normalizedRoot
: normalizedRoot + path.sep;
return (
normalizedPath === normalizedRoot ||
normalizedPath.startsWith(rootWithSep)
);
}
private getLanguageParser(language: string): TreeSitterLanguage | undefined {
switch (language.toLowerCase()) {
case 'python':
return Python;
case 'java':
return Java;
case 'go':
return Go;
case 'csharp':
return CSharp;
case 'typescript':
return TreeSitterTypeScript.typescript;
case 'tsx':
return TreeSitterTypeScript.tsx;
case 'javascript': // Use TypeScript parser for JS as it handles modern JS well
return TreeSitterTypeScript.typescript;
case 'rust': // Added
return Rust; // Added
default:
console.warn(
`Language '${language}' is not supported by the CodeParserTool.`,
);
return undefined;
}
}
validateToolParams(params: CodeParserToolParams): string | null {
if (
this.schema.parameters &&
!SchemaValidator.validate(
this.schema.parameters as Record<string, unknown>,
params,
)
) {
return 'Parameters failed schema validation.';
}
if (!path.isAbsolute(params.path)) {
return `Path must be absolute: ${params.path}`;
}
if (!this.isWithinRoot(params.path)) {
// Use the class method
return `Path must be within the root directory (${this.rootDirectory}): ${params.path}`;
}
if (
params.languages &&
(!Array.isArray(params.languages) ||
!params.languages.every((lang) => typeof lang === 'string'))
) {
return 'Languages parameter must be an array of strings.';
}
return null;
}
getDescription(params: CodeParserToolParams): string {
const relativePath = makeRelative(params.path, this.rootDirectory);
return `Parse ${shortenPath(relativePath)}`;
}
private errorResult(llmContent: string, returnDisplay: string): ToolResult {
return {
llmContent,
returnDisplay: `Error: ${returnDisplay}`,
};
}
private async parseFile(
filePath: string,
language: string,
maxFileSize?: number,
): Promise<string | null> {
const langParser = this.getLanguageParser(language);
if (!langParser) {
return null;
}
this.parser.setLanguage(langParser);
try {
const stats = await fs.stat(filePath);
if (maxFileSize && stats.size > maxFileSize) {
console.warn(
`File ${filePath} exceeds maxFileSize (${stats.size} > ${maxFileSize}), skipping.`,
);
return null;
}
const fileContent = await fs.readFile(filePath, 'utf8');
const tree = this.parser.parse(fileContent);
return this.formatTree(tree.rootNode, 0);
} catch (error) {
console.error(
`Error parsing file ${filePath} with language ${language}:`,
error,
);
return null;
}
}
// Helper function to format the AST similar to the Go version
private formatTree(node: Parser.SyntaxNode, level: number): string {
let formattedTree = '';
const indent = ' '.repeat(level);
const sexp = node.toString(); // tree-sitter's Node.toString() returns S-expression
const maxLength = 100;
if (sexp.length < maxLength) {
// MODIFIED LINE: Removed !sexp.includes('\n')
formattedTree += `${indent}${sexp}\n`;
return formattedTree;
}
// Expand full format if the S-expression is complex or long
formattedTree += `${indent}(${node.type}\n`;
for (const child of node.namedChildren) {
formattedTree += this.formatTree(child, level + 1);
}
// Iterating all children (named and unnamed) to be closer to Go's formatTree.
// The original Go code iterates `node.NamedChildCount()` and then `node.ChildCount()`
// which implies it processes named children and then all children (including named again).
// Here, we iterate named, then iterate all, but skip if already processed as named.
// This logic might need further refinement if the exact Go output for unnamed nodes is critical.
// For now, focusing on named children as per the Go code's primary loop in formatTree.
// If a more exact match for unnamed nodes is needed, the iteration logic for `node.children`
// and skipping already processed namedChildren would be added here.
formattedTree += `${indent})\n`;
return formattedTree;
}
private getFileLanguage(filePath: string): string | undefined {
const extension = path.extname(filePath).toLowerCase();
switch (extension) {
case '.py':
return 'python';
case '.java':
return 'java';
case '.go':
return 'go';
case '.cs':
return 'csharp';
case '.ts':
return 'typescript';
case '.tsx':
return 'tsx';
case '.js':
return 'javascript';
case '.jsx': // Treat jsx as tsx for parsing
return 'tsx';
case '.mjs':
return 'javascript';
case '.cjs':
return 'javascript';
case '.rs': // Added
return 'rust'; // Added
default:
return undefined;
}
}
async execute(
params: CodeParserToolParams,
_signal: AbortSignal,
): Promise<ToolResult> {
const validationError = this.validateToolParams(params);
if (validationError) {
return this.errorResult(
`Error: Invalid parameters provided. Reason: ${validationError}`,
'Failed to execute tool.',
);
}
const targetPath = params.path;
let stats;
try {
stats = await fs.stat(targetPath);
} catch (error) {
if ((error as NodeJS.ErrnoException).code === 'ENOENT') {
return this.errorResult(
`Error: Path not found or inaccessible: ${targetPath}`,
'Path not found or inaccessible.',
);
}
return this.errorResult(
`Error: Cannot access path: ${(error as Error).message}`,
'Cannot access path.',
);
}
const defaultLanguages = [
'python',
'java',
'go',
'csharp',
'typescript',
'tsx',
'javascript',
'rust', // Added
];
const languagesToParse = (
params.languages && params.languages.length > 0
? params.languages
: defaultLanguages
).map((lang) => lang.toLowerCase());
const maxFileSize = 1024 * 1024; // 1MB
const supportedLanguagesToParse = languagesToParse.filter((lang) =>
this.getLanguageParser(lang),
);
if (supportedLanguagesToParse.length === 0) {
const availableLangs =
defaultLanguages
.filter((lang) => this.getLanguageParser(lang))
.join(', ') || 'none configured';
return this.errorResult(
`Error: No supported languages specified for parsing. Requested: ${languagesToParse.join(', ') || 'default'}. Available: ${availableLangs}.`,
'No supported languages to parse.',
);
}
let parsedCodeOutput = '';
let filesProcessedCount = 0;
if (stats.isDirectory()) {
try {
const files = await fs.readdir(targetPath);
if (files.length === 0) {
return {
llmContent: `Directory ${targetPath} is empty.`,
returnDisplay: 'Directory is empty.',
};
}
for (const file of files) {
const filePath = path.join(targetPath, file);
let fileStats;
try {
fileStats = await fs.stat(filePath);
} catch {
console.warn(`Could not stat file ${filePath}, skipping.`);
continue;
}
if (fileStats.isFile()) {
const fileLang = this.getFileLanguage(filePath);
if (fileLang && supportedLanguagesToParse.includes(fileLang)) {
const ast = await this.parseFile(filePath, fileLang, maxFileSize);
if (ast) {
parsedCodeOutput += `-------------${filePath}-------------\n`;
parsedCodeOutput += ast + '\n';
filesProcessedCount++;
}
}
}
}
} catch (error) {
return this.errorResult(
`Error listing or processing directory ${targetPath}: ${(error as Error).message}`,
'Failed to process directory.',
);
}
} else if (stats.isFile()) {
const fileLang = this.getFileLanguage(targetPath);
if (fileLang && supportedLanguagesToParse.includes(fileLang)) {
const ast = await this.parseFile(targetPath, fileLang, maxFileSize);
if (ast) {
parsedCodeOutput += `-------------${targetPath}-------------\n`;
parsedCodeOutput += ast + '\n';
filesProcessedCount++;
} else {
return this.errorResult(
`Error: Could not parse file ${targetPath}. Language '${fileLang}' is supported but parsing failed. Check logs.`,
'Failed to parse file.',
);
}
} else {
return this.errorResult(
`Error: File ${targetPath} is not of a supported language type for parsing or language not specified. Supported: ${supportedLanguagesToParse.join(', ')}. Detected extension for language: ${fileLang || 'unknown'}.`,
'Unsupported file type or language.',
);
}
} else {
return this.errorResult(
`Error: Path is not a file or directory: ${targetPath}`,
'Path is not a file or directory.',
);
}
if (filesProcessedCount === 0) {
return {
llmContent: `No files were parsed in ${targetPath}. Ensure files match supported languages (${supportedLanguagesToParse.join(', ')}), are not empty or too large, and are not ignored.`,
returnDisplay: 'No files parsed.',
};
}
const returnDisplay = `Parsed ${filesProcessedCount} file(s).`;
return {
llmContent: `Parsed code from ${targetPath}:\n${parsedCodeOutput}`,
returnDisplay,
};
}
async requiresConfirmation(
_params: CodeParserToolParams,
): Promise<ToolCallConfirmationDetails | null> {
return null;
}
}
|