summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJaeHo Jang <[email protected]>2025-08-22 03:21:04 +0900
committerGitHub <[email protected]>2025-08-21 18:21:04 +0000
commit1e5ead6960d531c51593be25c8665e4e8f118562 (patch)
treead77be43cb289ab7a3705e8abd423432eb1ac161
parent714b3dab73bb2a6e0f3c66ac8142db9ea7cc5fc7 (diff)
perf(core): parallelize memory discovery file operations performance gain (#5751)
Co-authored-by: Jacob Richman <[email protected]>
-rw-r--r--packages/core/src/utils/memoryDiscovery.test.ts71
-rw-r--r--packages/core/src/utils/memoryDiscovery.ts120
2 files changed, 154 insertions, 37 deletions
diff --git a/packages/core/src/utils/memoryDiscovery.test.ts b/packages/core/src/utils/memoryDiscovery.test.ts
index d8f3ccc5..c67427e9 100644
--- a/packages/core/src/utils/memoryDiscovery.test.ts
+++ b/packages/core/src/utils/memoryDiscovery.test.ts
@@ -368,4 +368,75 @@ describe('loadServerHierarchicalMemory', () => {
fileCount: 1,
});
});
+
+ it('should handle multiple directories and files in parallel correctly', async () => {
+ // Create multiple test directories with GEMINI.md files
+ const numDirs = 5;
+ const createdFiles: string[] = [];
+
+ for (let i = 0; i < numDirs; i++) {
+ const dirPath = await createEmptyDir(
+ path.join(testRootDir, `project-${i}`),
+ );
+ const filePath = await createTestFile(
+ path.join(dirPath, DEFAULT_CONTEXT_FILENAME),
+ `Content from project ${i}`,
+ );
+ createdFiles.push(filePath);
+ }
+
+ // Load memory from all directories
+ const result = await loadServerHierarchicalMemory(
+ cwd,
+ createdFiles.map((f) => path.dirname(f)),
+ false,
+ new FileDiscoveryService(projectRoot),
+ );
+
+ // Should have loaded all files
+ expect(result.fileCount).toBe(numDirs);
+
+ // Content should include all project contents
+ for (let i = 0; i < numDirs; i++) {
+ expect(result.memoryContent).toContain(`Content from project ${i}`);
+ }
+ });
+
+ it('should preserve order and prevent duplicates when processing multiple directories', async () => {
+ // Create overlapping directory structure
+ const parentDir = await createEmptyDir(path.join(testRootDir, 'parent'));
+ const childDir = await createEmptyDir(path.join(parentDir, 'child'));
+
+ await createTestFile(
+ path.join(parentDir, DEFAULT_CONTEXT_FILENAME),
+ 'Parent content',
+ );
+ await createTestFile(
+ path.join(childDir, DEFAULT_CONTEXT_FILENAME),
+ 'Child content',
+ );
+
+ // Include both parent and child directories
+ const result = await loadServerHierarchicalMemory(
+ parentDir,
+ [childDir, parentDir], // Deliberately include duplicates
+ false,
+ new FileDiscoveryService(projectRoot),
+ );
+
+ // Should have both files without duplicates
+ expect(result.fileCount).toBe(2);
+ expect(result.memoryContent).toContain('Parent content');
+ expect(result.memoryContent).toContain('Child content');
+
+ // Check that files are not duplicated
+ const parentOccurrences = (
+ result.memoryContent.match(/Parent content/g) || []
+ ).length;
+ const childOccurrences = (
+ result.memoryContent.match(/Child content/g) || []
+ ).length;
+ expect(parentOccurrences).toBe(1);
+ expect(childOccurrences).toBe(1);
+ });
});
diff --git a/packages/core/src/utils/memoryDiscovery.ts b/packages/core/src/utils/memoryDiscovery.ts
index d3c24baf..d2eff39c 100644
--- a/packages/core/src/utils/memoryDiscovery.ts
+++ b/packages/core/src/utils/memoryDiscovery.ts
@@ -96,19 +96,41 @@ async function getGeminiMdFilePathsInternal(
...includeDirectoriesToReadGemini,
currentWorkingDirectory,
]);
- const paths = [];
- for (const dir of dirs) {
- const pathsByDir = await getGeminiMdFilePathsInternalForEachDir(
- dir,
- userHomePath,
- debugMode,
- fileService,
- extensionContextFilePaths,
- fileFilteringOptions,
- maxDirs,
+
+ // Process directories in parallel with concurrency limit to prevent EMFILE errors
+ const CONCURRENT_LIMIT = 10;
+ const dirsArray = Array.from(dirs);
+ const pathsArrays: string[][] = [];
+
+ for (let i = 0; i < dirsArray.length; i += CONCURRENT_LIMIT) {
+ const batch = dirsArray.slice(i, i + CONCURRENT_LIMIT);
+ const batchPromises = batch.map((dir) =>
+ getGeminiMdFilePathsInternalForEachDir(
+ dir,
+ userHomePath,
+ debugMode,
+ fileService,
+ extensionContextFilePaths,
+ fileFilteringOptions,
+ maxDirs,
+ ),
);
- paths.push(...pathsByDir);
+
+ const batchResults = await Promise.allSettled(batchPromises);
+
+ for (const result of batchResults) {
+ if (result.status === 'fulfilled') {
+ pathsArrays.push(result.value);
+ } else {
+ const error = result.reason;
+ const message = error instanceof Error ? error.message : String(error);
+ logger.error(`Error discovering files in directory: ${message}`);
+ // Continue processing other directories
+ }
+ }
}
+
+ const paths = pathsArrays.flat();
return Array.from(new Set<string>(paths));
}
@@ -226,39 +248,63 @@ async function readGeminiMdFiles(
debugMode: boolean,
importFormat: 'flat' | 'tree' = 'tree',
): Promise<GeminiFileContent[]> {
+ // Process files in parallel with concurrency limit to prevent EMFILE errors
+ const CONCURRENT_LIMIT = 20; // Higher limit for file reads as they're typically faster
const results: GeminiFileContent[] = [];
- for (const filePath of filePaths) {
- try {
- const content = await fs.readFile(filePath, 'utf-8');
- // Process imports in the content
- const processedResult = await processImports(
- content,
- path.dirname(filePath),
- debugMode,
- undefined,
- undefined,
- importFormat,
- );
+ for (let i = 0; i < filePaths.length; i += CONCURRENT_LIMIT) {
+ const batch = filePaths.slice(i, i + CONCURRENT_LIMIT);
+ const batchPromises = batch.map(
+ async (filePath): Promise<GeminiFileContent> => {
+ try {
+ const content = await fs.readFile(filePath, 'utf-8');
- results.push({ filePath, content: processedResult.content });
- if (debugMode)
- logger.debug(
- `Successfully read and processed imports: ${filePath} (Length: ${processedResult.content.length})`,
- );
- } catch (error: unknown) {
- const isTestEnv =
- process.env['NODE_ENV'] === 'test' || process.env['VITEST'];
- if (!isTestEnv) {
+ // Process imports in the content
+ const processedResult = await processImports(
+ content,
+ path.dirname(filePath),
+ debugMode,
+ undefined,
+ undefined,
+ importFormat,
+ );
+ if (debugMode)
+ logger.debug(
+ `Successfully read and processed imports: ${filePath} (Length: ${processedResult.content.length})`,
+ );
+
+ return { filePath, content: processedResult.content };
+ } catch (error: unknown) {
+ const isTestEnv =
+ process.env['NODE_ENV'] === 'test' || process.env['VITEST'];
+ if (!isTestEnv) {
+ const message =
+ error instanceof Error ? error.message : String(error);
+ logger.warn(
+ `Warning: Could not read ${getAllGeminiMdFilenames()} file at ${filePath}. Error: ${message}`,
+ );
+ }
+ if (debugMode) logger.debug(`Failed to read: ${filePath}`);
+ return { filePath, content: null }; // Still include it with null content
+ }
+ },
+ );
+
+ const batchResults = await Promise.allSettled(batchPromises);
+
+ for (const result of batchResults) {
+ if (result.status === 'fulfilled') {
+ results.push(result.value);
+ } else {
+ // This case shouldn't happen since we catch all errors above,
+ // but handle it for completeness
+ const error = result.reason;
const message = error instanceof Error ? error.message : String(error);
- logger.warn(
- `Warning: Could not read ${getAllGeminiMdFilenames()} file at ${filePath}. Error: ${message}`,
- );
+ logger.error(`Unexpected error processing file: ${message}`);
}
- results.push({ filePath, content: null }); // Still include it with null content
- if (debugMode) logger.debug(`Failed to read: ${filePath}`);
}
}
+
return results;
}