summaryrefslogtreecommitdiff
path: root/packages/core/src/utils
diff options
context:
space:
mode:
authorAllen Hutchison <[email protected]>2025-06-11 09:21:23 -0700
committerGitHub <[email protected]>2025-06-11 09:21:23 -0700
commit1d7090b8ac9396a652eedf4fe7744111f81cfe7e (patch)
tree64fda810d803dced6aafaa208fd0b701b4408357 /packages/core/src/utils
parente2d689ff2f377ff0f2b1e3d61f9577ef5c2d085e (diff)
feat(core): Create BFS file search utility (#903)
Diffstat (limited to 'packages/core/src/utils')
-rw-r--r--packages/core/src/utils/bfsFileSearch.test.ts145
-rw-r--r--packages/core/src/utils/bfsFileSearch.ts97
-rw-r--r--packages/core/src/utils/memoryDiscovery.test.ts14
-rw-r--r--packages/core/src/utils/memoryDiscovery.ts105
4 files changed, 256 insertions, 105 deletions
diff --git a/packages/core/src/utils/bfsFileSearch.test.ts b/packages/core/src/utils/bfsFileSearch.test.ts
new file mode 100644
index 00000000..679700ca
--- /dev/null
+++ b/packages/core/src/utils/bfsFileSearch.test.ts
@@ -0,0 +1,145 @@
+/**
+ * @license
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { Dirent, PathLike } from 'fs';
+import { vi, describe, it, expect, beforeEach } from 'vitest';
+import * as fs from 'fs/promises';
+import * as gitUtils from './gitUtils.js';
+import { bfsFileSearch } from './bfsFileSearch.js';
+
+vi.mock('fs/promises');
+vi.mock('./gitUtils.js');
+
+const createMockDirent = (name: string, isFile: boolean): Dirent => {
+ const dirent = new Dirent();
+ dirent.name = name;
+ dirent.isFile = () => isFile;
+ dirent.isDirectory = () => !isFile;
+ return dirent;
+};
+
+// Type for the specific overload we're using
+type ReaddirWithFileTypes = (
+ path: PathLike,
+ options: { withFileTypes: true },
+) => Promise<Dirent[]>;
+
+describe('bfsFileSearch', () => {
+ beforeEach(() => {
+ vi.resetAllMocks();
+ });
+
+ it('should find a file in the root directory', async () => {
+ const mockFs = vi.mocked(fs);
+ const mockReaddir = mockFs.readdir as unknown as ReaddirWithFileTypes;
+ vi.mocked(mockReaddir).mockResolvedValue([
+ createMockDirent('file1.txt', true),
+ createMockDirent('file2.txt', true),
+ ]);
+
+ const result = await bfsFileSearch('/test', { fileName: 'file1.txt' });
+ expect(result).toEqual(['/test/file1.txt']);
+ });
+
+ it('should find a file in a subdirectory', async () => {
+ const mockFs = vi.mocked(fs);
+ const mockReaddir = mockFs.readdir as unknown as ReaddirWithFileTypes;
+ vi.mocked(mockReaddir).mockImplementation(async (dir) => {
+ if (dir === '/test') {
+ return [createMockDirent('subdir', false)];
+ }
+ if (dir === '/test/subdir') {
+ return [createMockDirent('file1.txt', true)];
+ }
+ return [];
+ });
+
+ const result = await bfsFileSearch('/test', { fileName: 'file1.txt' });
+ expect(result).toEqual(['/test/subdir/file1.txt']);
+ });
+
+ it('should ignore specified directories', async () => {
+ const mockFs = vi.mocked(fs);
+ const mockReaddir = mockFs.readdir as unknown as ReaddirWithFileTypes;
+ vi.mocked(mockReaddir).mockImplementation(async (dir) => {
+ if (dir === '/test') {
+ return [
+ createMockDirent('subdir1', false),
+ createMockDirent('subdir2', false),
+ ];
+ }
+ if (dir === '/test/subdir1') {
+ return [createMockDirent('file1.txt', true)];
+ }
+ if (dir === '/test/subdir2') {
+ return [createMockDirent('file1.txt', true)];
+ }
+ return [];
+ });
+
+ const result = await bfsFileSearch('/test', {
+ fileName: 'file1.txt',
+ ignoreDirs: ['subdir2'],
+ });
+ expect(result).toEqual(['/test/subdir1/file1.txt']);
+ });
+
+ it('should respect maxDirs limit', async () => {
+ const mockFs = vi.mocked(fs);
+ const mockReaddir = mockFs.readdir as unknown as ReaddirWithFileTypes;
+ vi.mocked(mockReaddir).mockImplementation(async (dir) => {
+ if (dir === '/test') {
+ return [
+ createMockDirent('subdir1', false),
+ createMockDirent('subdir2', false),
+ ];
+ }
+ if (dir === '/test/subdir1') {
+ return [createMockDirent('file1.txt', true)];
+ }
+ if (dir === '/test/subdir2') {
+ return [createMockDirent('file1.txt', true)];
+ }
+ return [];
+ });
+
+ const result = await bfsFileSearch('/test', {
+ fileName: 'file1.txt',
+ maxDirs: 2,
+ });
+ expect(result).toEqual(['/test/subdir1/file1.txt']);
+ });
+
+ it('should respect .gitignore files', async () => {
+ const mockFs = vi.mocked(fs);
+ const mockGitUtils = vi.mocked(gitUtils);
+ mockGitUtils.isGitRepository.mockReturnValue(true);
+ const mockReaddir = mockFs.readdir as unknown as ReaddirWithFileTypes;
+ vi.mocked(mockReaddir).mockImplementation(async (dir) => {
+ if (dir === '/test') {
+ return [
+ createMockDirent('.gitignore', true),
+ createMockDirent('subdir1', false),
+ createMockDirent('subdir2', false),
+ ];
+ }
+ if (dir === '/test/subdir1') {
+ return [createMockDirent('file1.txt', true)];
+ }
+ if (dir === '/test/subdir2') {
+ return [createMockDirent('file1.txt', true)];
+ }
+ return [];
+ });
+ mockFs.readFile.mockResolvedValue('subdir2');
+
+ const result = await bfsFileSearch('/test', {
+ fileName: 'file1.txt',
+ respectGitIgnore: true,
+ });
+ expect(result).toEqual(['/test/subdir1/file1.txt']);
+ });
+});
diff --git a/packages/core/src/utils/bfsFileSearch.ts b/packages/core/src/utils/bfsFileSearch.ts
new file mode 100644
index 00000000..6b05526f
--- /dev/null
+++ b/packages/core/src/utils/bfsFileSearch.ts
@@ -0,0 +1,97 @@
+/**
+ * @license
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { GitIgnoreParser, GitIgnoreFilter } from './gitIgnoreParser.js';
+import { isGitRepository } from './gitUtils.js';
+import * as fs from 'fs/promises';
+import * as path from 'path';
+import { Dirent } from 'fs';
+
+// Simple console logger for now.
+// TODO: Integrate with a more robust server-side logger.
+const logger = {
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
+ debug: (...args: any[]) => console.debug('[DEBUG] [BfsFileSearch]', ...args),
+};
+
+interface BfsFileSearchOptions {
+ fileName: string;
+ ignoreDirs?: string[];
+ maxDirs?: number;
+ debug?: boolean;
+ respectGitIgnore?: boolean;
+ projectRoot?: string;
+}
+
+/**
+ * Performs a breadth-first search for a specific file within a directory structure.
+ *
+ * @param rootDir The directory to start the search from.
+ * @param options Configuration for the search.
+ * @returns A promise that resolves to an array of paths where the file was found.
+ */
+export async function bfsFileSearch(
+ rootDir: string,
+ options: BfsFileSearchOptions,
+): Promise<string[]> {
+ const {
+ fileName,
+ ignoreDirs = [],
+ maxDirs = Infinity,
+ debug = false,
+ respectGitIgnore = true,
+ projectRoot = rootDir,
+ } = options;
+ const foundFiles: string[] = [];
+ const queue: string[] = [rootDir];
+ const visited = new Set<string>();
+ let scannedDirCount = 0;
+
+ let gitIgnoreFilter: GitIgnoreFilter | null = null;
+ if (respectGitIgnore && isGitRepository(projectRoot)) {
+ const parser = new GitIgnoreParser(projectRoot);
+ await parser.initialize();
+ gitIgnoreFilter = parser;
+ }
+
+ while (queue.length > 0 && scannedDirCount < maxDirs) {
+ const currentDir = queue.shift()!;
+ if (visited.has(currentDir)) {
+ continue;
+ }
+ visited.add(currentDir);
+ scannedDirCount++;
+
+ if (debug) {
+ logger.debug(`Scanning [${scannedDirCount}/${maxDirs}]: ${currentDir}`);
+ }
+
+ let entries: Dirent[];
+ try {
+ entries = await fs.readdir(currentDir, { withFileTypes: true });
+ } catch {
+ // Ignore errors for directories we can't read (e.g., permissions)
+ continue;
+ }
+
+ for (const entry of entries) {
+ const fullPath = path.join(currentDir, entry.name);
+ if (gitIgnoreFilter?.isIgnored(fullPath)) {
+ continue;
+ }
+
+ if (entry.isDirectory()) {
+ if (!ignoreDirs.includes(entry.name)) {
+ queue.push(fullPath);
+ }
+ } else if (entry.isFile() && entry.name === fileName) {
+ foundFiles.push(fullPath);
+ }
+ }
+ }
+
+ return foundFiles;
+}
diff --git a/packages/core/src/utils/memoryDiscovery.test.ts b/packages/core/src/utils/memoryDiscovery.test.ts
index a9d34bf3..5329a15b 100644
--- a/packages/core/src/utils/memoryDiscovery.test.ts
+++ b/packages/core/src/utils/memoryDiscovery.test.ts
@@ -512,13 +512,7 @@ describe('loadServerHierarchicalMemory', () => {
] as Dirent[];
}
if (p === ignoredDir) {
- return [
- {
- name: ORIGINAL_GEMINI_MD_FILENAME_CONST_FOR_TEST,
- isFile: () => true,
- isDirectory: () => false,
- } as Dirent,
- ] as Dirent[];
+ return [] as Dirent[];
}
return [] as Dirent[];
}) as unknown as typeof fsPromises.readdir);
@@ -565,10 +559,8 @@ describe('loadServerHierarchicalMemory', () => {
await loadServerHierarchicalMemory(CWD, true);
expect(consoleDebugSpy).toHaveBeenCalledWith(
- expect.stringContaining('[DEBUG] [MemoryDiscovery]'),
- expect.stringContaining(
- 'Max directory scan limit (200) reached. Stopping downward scan at:',
- ),
+ expect.stringContaining('[DEBUG] [BfsFileSearch]'),
+ expect.stringContaining('Scanning [200/200]:'),
);
consoleDebugSpy.mockRestore();
});
diff --git a/packages/core/src/utils/memoryDiscovery.ts b/packages/core/src/utils/memoryDiscovery.ts
index 221bf2c6..6e822145 100644
--- a/packages/core/src/utils/memoryDiscovery.ts
+++ b/packages/core/src/utils/memoryDiscovery.ts
@@ -8,6 +8,7 @@ import * as fs from 'fs/promises';
import * as fsSync from 'fs';
import * as path from 'path';
import { homedir } from 'os';
+import { bfsFileSearch } from './bfsFileSearch.js';
import {
GEMINI_CONFIG_DIR,
getCurrentGeminiMdFilename,
@@ -26,19 +27,6 @@ const logger = {
console.error('[ERROR] [MemoryDiscovery]', ...args),
};
-// TODO(adh): Refactor to use a shared ignore list with other tools like glob and read-many-files.
-const DEFAULT_IGNORE_DIRECTORIES = [
- 'node_modules',
- '.git',
- 'dist',
- 'build',
- 'out',
- 'coverage',
- '.vscode',
- '.idea',
- '.DS_Store',
-];
-
const MAX_DIRECTORIES_TO_SCAN_FOR_MEMORY = 200;
interface GeminiFileContent {
@@ -89,76 +77,6 @@ async function findProjectRoot(startDir: string): Promise<string | null> {
}
}
-async function collectDownwardGeminiFiles(
- directory: string,
- debugMode: boolean,
- ignoreDirs: string[],
- scannedDirCount: { count: number },
- maxScanDirs: number,
-): Promise<string[]> {
- if (scannedDirCount.count >= maxScanDirs) {
- if (debugMode)
- logger.debug(
- `Max directory scan limit (${maxScanDirs}) reached. Stopping downward scan at: ${directory}`,
- );
- return [];
- }
- scannedDirCount.count++;
-
- if (debugMode)
- logger.debug(
- `Scanning downward for ${getCurrentGeminiMdFilename()} files in: ${directory} (scanned: ${scannedDirCount.count}/${maxScanDirs})`,
- );
- const collectedPaths: string[] = [];
- try {
- const entries = await fs.readdir(directory, { withFileTypes: true });
- for (const entry of entries) {
- const fullPath = path.join(directory, entry.name);
- if (entry.isDirectory()) {
- if (ignoreDirs.includes(entry.name)) {
- if (debugMode)
- logger.debug(`Skipping ignored directory: ${fullPath}`);
- continue;
- }
- const subDirPaths = await collectDownwardGeminiFiles(
- fullPath,
- debugMode,
- ignoreDirs,
- scannedDirCount,
- maxScanDirs,
- );
- collectedPaths.push(...subDirPaths);
- } else if (
- entry.isFile() &&
- entry.name === getCurrentGeminiMdFilename()
- ) {
- try {
- await fs.access(fullPath, fsSync.constants.R_OK);
- collectedPaths.push(fullPath);
- if (debugMode)
- logger.debug(
- `Found readable downward ${getCurrentGeminiMdFilename()}: ${fullPath}`,
- );
- } catch {
- if (debugMode)
- logger.debug(
- `Downward ${getCurrentGeminiMdFilename()} not readable, skipping: ${fullPath}`,
- );
- }
- }
- }
- } catch (error) {
- // Only log warnings in non-test environments
- const isTestEnv = process.env.NODE_ENV === 'test' || process.env.VITEST;
- if (!isTestEnv) {
- const message = error instanceof Error ? error.message : String(error);
- logger.warn(`Error scanning directory ${directory}: ${message}`);
- }
- if (debugMode) logger.debug(`Failed to scan directory: ${directory}`);
- }
- return collectedPaths;
-}
-
async function getGeminiMdFilePathsInternal(
currentWorkingDirectory: string,
userHomePath: string,
@@ -256,20 +174,19 @@ async function getGeminiMdFilePathsInternal(
}
paths.push(...upwardPaths);
- if (debugMode)
- logger.debug(`Starting downward scan from CWD: ${resolvedCwd}`);
- const scannedDirCount = { count: 0 };
- const downwardPaths = await collectDownwardGeminiFiles(
- resolvedCwd,
- debugMode,
- DEFAULT_IGNORE_DIRECTORIES,
- scannedDirCount,
- MAX_DIRECTORIES_TO_SCAN_FOR_MEMORY,
- );
+ const downwardPaths = await bfsFileSearch(resolvedCwd, {
+ fileName: getCurrentGeminiMdFilename(),
+ maxDirs: MAX_DIRECTORIES_TO_SCAN_FOR_MEMORY,
+ debug: debugMode,
+ respectGitIgnore: true,
+ projectRoot: projectRoot || resolvedCwd,
+ });
downwardPaths.sort(); // Sort for consistent ordering, though hierarchy might be more complex
if (debugMode && downwardPaths.length > 0)
logger.debug(
- `Found downward ${getCurrentGeminiMdFilename()} files (sorted): ${JSON.stringify(downwardPaths)}`,
+ `Found downward ${getCurrentGeminiMdFilename()} files (sorted): ${JSON.stringify(
+ downwardPaths,
+ )}`,
);
// Add downward paths only if they haven't been included already (e.g. from upward scan)
for (const dPath of downwardPaths) {