summaryrefslogtreecommitdiff
path: root/packages/core/src
diff options
context:
space:
mode:
authorBryant Chandler <[email protected]>2025-08-18 13:43:24 -0700
committerGitHub <[email protected]>2025-08-18 20:43:24 +0000
commit465ac9f547d0d684439886d1466c1a1133da611d (patch)
treeb94f00730118784b5b07800db71224816b444bfe /packages/core/src
parentd66ddcd82e09d7b6fbc0226e31d73d38db5cff2a (diff)
feat(filesearch): Introduce non-recursive file search strategy (#6087)
Co-authored-by: Jacob Richman <[email protected]> Co-authored-by: Bryant Chandler <[email protected]>
Diffstat (limited to 'packages/core/src')
-rw-r--r--packages/core/src/utils/filesearch/crawler.test.ts573
-rw-r--r--packages/core/src/utils/filesearch/crawler.ts85
-rw-r--r--packages/core/src/utils/filesearch/fileSearch.test.ts345
-rw-r--r--packages/core/src/utils/filesearch/fileSearch.ts217
-rw-r--r--packages/core/src/utils/filesearch/ignore.test.ts99
-rw-r--r--packages/core/src/utils/filesearch/ignore.ts38
6 files changed, 968 insertions, 389 deletions
diff --git a/packages/core/src/utils/filesearch/crawler.test.ts b/packages/core/src/utils/filesearch/crawler.test.ts
new file mode 100644
index 00000000..baa4d19a
--- /dev/null
+++ b/packages/core/src/utils/filesearch/crawler.test.ts
@@ -0,0 +1,573 @@
+/**
+ * @license
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { describe, it, expect, afterEach, vi, beforeEach } from 'vitest';
+import * as fs from 'fs/promises';
+import * as path from 'path';
+import * as cache from './crawlCache.js';
+import { crawl } from './crawler.js';
+import { createTmpDir, cleanupTmpDir } from '@google/gemini-cli-test-utils';
+import { Ignore, loadIgnoreRules } from './ignore.js';
+
+describe('crawler', () => {
+ let tmpDir: string;
+ afterEach(async () => {
+ if (tmpDir) {
+ await cleanupTmpDir(tmpDir);
+ }
+ vi.restoreAllMocks();
+ });
+
+ it('should use .geminiignore rules', async () => {
+ tmpDir = await createTmpDir({
+ '.geminiignore': 'dist/',
+ dist: ['ignored.js'],
+ src: ['not-ignored.js'],
+ });
+
+ const ignore = loadIgnoreRules({
+ projectRoot: tmpDir,
+ useGitignore: false,
+ useGeminiignore: true,
+ ignoreDirs: [],
+ });
+
+ const results = await crawl({
+ crawlDirectory: tmpDir,
+ cwd: tmpDir,
+ ignore,
+ cache: false,
+ cacheTtl: 0,
+ });
+
+ expect(results).toEqual(
+ expect.arrayContaining([
+ '.',
+ 'src/',
+ '.geminiignore',
+ 'src/not-ignored.js',
+ ]),
+ );
+ });
+
+ it('should combine .gitignore and .geminiignore rules', async () => {
+ tmpDir = await createTmpDir({
+ '.gitignore': 'dist/',
+ '.geminiignore': 'build/',
+ dist: ['ignored-by-git.js'],
+ build: ['ignored-by-gemini.js'],
+ src: ['not-ignored.js'],
+ });
+
+ const ignore = loadIgnoreRules({
+ projectRoot: tmpDir,
+ useGitignore: true,
+ useGeminiignore: true,
+ ignoreDirs: [],
+ });
+
+ const results = await crawl({
+ crawlDirectory: tmpDir,
+ cwd: tmpDir,
+ ignore,
+ cache: false,
+ cacheTtl: 0,
+ });
+
+ expect(results).toEqual(
+ expect.arrayContaining([
+ '.',
+ 'src/',
+ '.geminiignore',
+ '.gitignore',
+ 'src/not-ignored.js',
+ ]),
+ );
+ });
+
+ it('should use ignoreDirs option', async () => {
+ tmpDir = await createTmpDir({
+ logs: ['some.log'],
+ src: ['main.js'],
+ });
+
+ const ignore = loadIgnoreRules({
+ projectRoot: tmpDir,
+ useGitignore: false,
+ useGeminiignore: false,
+ ignoreDirs: ['logs'],
+ });
+
+ const results = await crawl({
+ crawlDirectory: tmpDir,
+ cwd: tmpDir,
+ ignore,
+ cache: false,
+ cacheTtl: 0,
+ });
+
+ expect(results).toEqual(
+ expect.arrayContaining(['.', 'src/', 'src/main.js']),
+ );
+ });
+
+ it('should handle negated directories', async () => {
+ tmpDir = await createTmpDir({
+ '.gitignore': ['build/**', '!build/public', '!build/public/**'].join(
+ '\n',
+ ),
+ build: {
+ 'private.js': '',
+ public: ['index.html'],
+ },
+ src: ['main.js'],
+ });
+
+ const ignore = loadIgnoreRules({
+ projectRoot: tmpDir,
+ useGitignore: true,
+ useGeminiignore: false,
+ ignoreDirs: [],
+ });
+
+ const results = await crawl({
+ crawlDirectory: tmpDir,
+ cwd: tmpDir,
+ ignore,
+ cache: false,
+ cacheTtl: 0,
+ });
+
+ expect(results).toEqual(
+ expect.arrayContaining([
+ '.',
+ 'build/',
+ 'build/public/',
+ 'src/',
+ '.gitignore',
+ 'build/public/index.html',
+ 'src/main.js',
+ ]),
+ );
+ });
+
+ it('should handle root-level file negation', async () => {
+ tmpDir = await createTmpDir({
+ '.gitignore': ['*.mk', '!Foo.mk'].join('\n'),
+ 'bar.mk': '',
+ 'Foo.mk': '',
+ });
+
+ const ignore = loadIgnoreRules({
+ projectRoot: tmpDir,
+ useGitignore: true,
+ useGeminiignore: false,
+ ignoreDirs: [],
+ });
+
+ const results = await crawl({
+ crawlDirectory: tmpDir,
+ cwd: tmpDir,
+ ignore,
+ cache: false,
+ cacheTtl: 0,
+ });
+
+ expect(results).toEqual(
+ expect.arrayContaining(['.', '.gitignore', 'Foo.mk', 'bar.mk']),
+ );
+ });
+
+ it('should handle directory negation with glob', async () => {
+ tmpDir = await createTmpDir({
+ '.gitignore': [
+ 'third_party/**',
+ '!third_party/foo',
+ '!third_party/foo/bar',
+ '!third_party/foo/bar/baz_buffer',
+ ].join('\n'),
+ third_party: {
+ foo: {
+ bar: {
+ baz_buffer: '',
+ },
+ },
+ ignore_this: '',
+ },
+ });
+
+ const ignore = loadIgnoreRules({
+ projectRoot: tmpDir,
+ useGitignore: true,
+ useGeminiignore: false,
+ ignoreDirs: [],
+ });
+
+ const results = await crawl({
+ crawlDirectory: tmpDir,
+ cwd: tmpDir,
+ ignore,
+ cache: false,
+ cacheTtl: 0,
+ });
+
+ expect(results).toEqual(
+ expect.arrayContaining([
+ '.',
+ 'third_party/',
+ 'third_party/foo/',
+ 'third_party/foo/bar/',
+ '.gitignore',
+ 'third_party/foo/bar/baz_buffer',
+ ]),
+ );
+ });
+
+ it('should correctly handle negated patterns in .gitignore', async () => {
+ tmpDir = await createTmpDir({
+ '.gitignore': ['dist/**', '!dist/keep.js'].join('\n'),
+ dist: ['ignore.js', 'keep.js'],
+ src: ['main.js'],
+ });
+
+ const ignore = loadIgnoreRules({
+ projectRoot: tmpDir,
+ useGitignore: true,
+ useGeminiignore: false,
+ ignoreDirs: [],
+ });
+
+ const results = await crawl({
+ crawlDirectory: tmpDir,
+ cwd: tmpDir,
+ ignore,
+ cache: false,
+ cacheTtl: 0,
+ });
+
+ expect(results).toEqual(
+ expect.arrayContaining([
+ '.',
+ 'dist/',
+ 'src/',
+ '.gitignore',
+ 'dist/keep.js',
+ 'src/main.js',
+ ]),
+ );
+ });
+
+ it('should initialize correctly when ignore files are missing', async () => {
+ tmpDir = await createTmpDir({
+ src: ['file1.js'],
+ });
+
+ const ignore = loadIgnoreRules({
+ projectRoot: tmpDir,
+ useGitignore: true,
+ useGeminiignore: true,
+ ignoreDirs: [],
+ });
+
+ const results = await crawl({
+ crawlDirectory: tmpDir,
+ cwd: tmpDir,
+ ignore,
+ cache: false,
+ cacheTtl: 0,
+ });
+ expect(results).toEqual(
+ expect.arrayContaining(['.', 'src/', 'src/file1.js']),
+ );
+ });
+
+ it('should handle empty or commented-only ignore files', async () => {
+ tmpDir = await createTmpDir({
+ '.gitignore': '# This is a comment\n\n \n',
+ src: ['main.js'],
+ });
+
+ const ignore = loadIgnoreRules({
+ projectRoot: tmpDir,
+ useGitignore: true,
+ useGeminiignore: false,
+ ignoreDirs: [],
+ });
+
+ const results = await crawl({
+ crawlDirectory: tmpDir,
+ cwd: tmpDir,
+ ignore,
+ cache: false,
+ cacheTtl: 0,
+ });
+
+ expect(results).toEqual(
+ expect.arrayContaining(['.', 'src/', '.gitignore', 'src/main.js']),
+ );
+ });
+
+ it('should always ignore the .git directory', async () => {
+ tmpDir = await createTmpDir({
+ '.git': ['config', 'HEAD'],
+ src: ['main.js'],
+ });
+
+ const ignore = loadIgnoreRules({
+ projectRoot: tmpDir,
+ useGitignore: false,
+ useGeminiignore: false,
+ ignoreDirs: [],
+ });
+
+ const results = await crawl({
+ crawlDirectory: tmpDir,
+ cwd: tmpDir,
+ ignore,
+ cache: false,
+ cacheTtl: 0,
+ });
+
+ expect(results).toEqual(
+ expect.arrayContaining(['.', 'src/', 'src/main.js']),
+ );
+ });
+
+ describe('with in-memory cache', () => {
+ beforeEach(() => {
+ cache.clear();
+ vi.useFakeTimers();
+ });
+
+ afterEach(() => {
+ vi.useRealTimers();
+ });
+
+ it('should hit the cache for subsequent crawls', async () => {
+ tmpDir = await createTmpDir({ 'file1.js': '' });
+ const ignore = loadIgnoreRules({
+ projectRoot: tmpDir,
+ useGitignore: false,
+ useGeminiignore: false,
+ ignoreDirs: [],
+ });
+ const options = {
+ crawlDirectory: tmpDir,
+ cwd: tmpDir,
+ ignore,
+ cache: true,
+ cacheTtl: 10,
+ };
+
+ const crawlSpy = vi.spyOn(cache, 'read');
+
+ await crawl(options);
+ expect(crawlSpy).toHaveBeenCalledTimes(1);
+
+ await crawl(options);
+ expect(crawlSpy).toHaveBeenCalledTimes(2);
+ // fdir should not have been called a second time.
+ // We can't spy on it directly, but we can check the cache was hit.
+ const cacheKey = cache.getCacheKey(
+ options.crawlDirectory,
+ options.ignore.getFingerprint(),
+ undefined,
+ );
+ expect(cache.read(cacheKey)).toBeDefined();
+ });
+
+ it('should miss the cache when ignore rules change', async () => {
+ tmpDir = await createTmpDir({
+ '.gitignore': 'a.txt',
+ 'a.txt': '',
+ 'b.txt': '',
+ });
+ const getIgnore = () =>
+ loadIgnoreRules({
+ projectRoot: tmpDir,
+ useGitignore: true,
+ useGeminiignore: false,
+ ignoreDirs: [],
+ });
+ const getOptions = (ignore: Ignore) => ({
+ crawlDirectory: tmpDir,
+ cwd: tmpDir,
+ ignore,
+ cache: true,
+ cacheTtl: 10000,
+ });
+
+ // Initial crawl to populate the cache
+ const ignore1 = getIgnore();
+ const results1 = await crawl(getOptions(ignore1));
+ expect(results1).toEqual(
+ expect.arrayContaining(['.', '.gitignore', 'b.txt']),
+ );
+
+ // Modify the ignore file
+ await fs.writeFile(path.join(tmpDir, '.gitignore'), 'b.txt');
+
+ // Second crawl should miss the cache and trigger a recrawl
+ const ignore2 = getIgnore();
+ const results2 = await crawl(getOptions(ignore2));
+ expect(results2).toEqual(
+ expect.arrayContaining(['.', '.gitignore', 'a.txt']),
+ );
+ });
+
+ it('should miss the cache after TTL expires', async () => {
+ tmpDir = await createTmpDir({ 'file1.js': '' });
+ const ignore = loadIgnoreRules({
+ projectRoot: tmpDir,
+ useGitignore: false,
+ useGeminiignore: false,
+ ignoreDirs: [],
+ });
+ const options = {
+ crawlDirectory: tmpDir,
+ cwd: tmpDir,
+ ignore,
+ cache: true,
+ cacheTtl: 10, // 10 seconds
+ };
+
+ const readSpy = vi.spyOn(cache, 'read');
+ const writeSpy = vi.spyOn(cache, 'write');
+
+ await crawl(options);
+ expect(readSpy).toHaveBeenCalledTimes(1);
+ expect(writeSpy).toHaveBeenCalledTimes(1);
+
+ // Advance time past the TTL
+ await vi.advanceTimersByTimeAsync(11000);
+
+ await crawl(options);
+ expect(readSpy).toHaveBeenCalledTimes(2);
+ expect(writeSpy).toHaveBeenCalledTimes(2);
+ });
+
+ it('should miss the cache when maxDepth changes', async () => {
+ tmpDir = await createTmpDir({ 'file1.js': '' });
+ const ignore = loadIgnoreRules({
+ projectRoot: tmpDir,
+ useGitignore: false,
+ useGeminiignore: false,
+ ignoreDirs: [],
+ });
+ const getOptions = (maxDepth?: number) => ({
+ crawlDirectory: tmpDir,
+ cwd: tmpDir,
+ ignore,
+ cache: true,
+ cacheTtl: 10000,
+ maxDepth,
+ });
+
+ const readSpy = vi.spyOn(cache, 'read');
+ const writeSpy = vi.spyOn(cache, 'write');
+
+ // 1. First crawl with maxDepth: 1
+ await crawl(getOptions(1));
+ expect(readSpy).toHaveBeenCalledTimes(1);
+ expect(writeSpy).toHaveBeenCalledTimes(1);
+
+ // 2. Second crawl with maxDepth: 2, should be a cache miss
+ await crawl(getOptions(2));
+ expect(readSpy).toHaveBeenCalledTimes(2);
+ expect(writeSpy).toHaveBeenCalledTimes(2);
+
+ // 3. Third crawl with maxDepth: 1 again, should be a cache hit.
+ await crawl(getOptions(1));
+ expect(readSpy).toHaveBeenCalledTimes(3);
+ expect(writeSpy).toHaveBeenCalledTimes(2); // No new write
+ });
+ });
+
+ describe('with maxDepth', () => {
+ beforeEach(async () => {
+ tmpDir = await createTmpDir({
+ 'file-root.txt': '',
+ level1: {
+ 'file-level1.txt': '',
+ level2: {
+ 'file-level2.txt': '',
+ level3: {
+ 'file-level3.txt': '',
+ },
+ },
+ },
+ });
+ });
+
+ const getCrawlResults = (maxDepth?: number) => {
+ const ignore = loadIgnoreRules({
+ projectRoot: tmpDir,
+ useGitignore: false,
+ useGeminiignore: false,
+ ignoreDirs: [],
+ });
+ return crawl({
+ crawlDirectory: tmpDir,
+ cwd: tmpDir,
+ ignore,
+ cache: false,
+ cacheTtl: 0,
+ maxDepth,
+ });
+ };
+
+ it('should only crawl top-level files when maxDepth is 0', async () => {
+ const results = await getCrawlResults(0);
+ expect(results).toEqual(
+ expect.arrayContaining(['.', 'level1/', 'file-root.txt']),
+ );
+ });
+
+ it('should crawl one level deep when maxDepth is 1', async () => {
+ const results = await getCrawlResults(1);
+ expect(results).toEqual(
+ expect.arrayContaining([
+ '.',
+ 'level1/',
+ 'level1/level2/',
+ 'file-root.txt',
+ 'level1/file-level1.txt',
+ ]),
+ );
+ });
+
+ it('should crawl two levels deep when maxDepth is 2', async () => {
+ const results = await getCrawlResults(2);
+ expect(results).toEqual(
+ expect.arrayContaining([
+ '.',
+ 'level1/',
+ 'level1/level2/',
+ 'level1/level2/level3/',
+ 'file-root.txt',
+ 'level1/file-level1.txt',
+ 'level1/level2/file-level2.txt',
+ ]),
+ );
+ });
+
+ it('should perform a full recursive crawl when maxDepth is undefined', async () => {
+ const results = await getCrawlResults(undefined);
+ expect(results).toEqual(
+ expect.arrayContaining([
+ '.',
+ 'level1/',
+ 'level1/level2/',
+ 'level1/level2/level3/',
+ 'file-root.txt',
+ 'level1/file-level1.txt',
+ 'level1/level2/file-level2.txt',
+ 'level1/level2/level3/file-level3.txt',
+ ]),
+ );
+ });
+ });
+});
diff --git a/packages/core/src/utils/filesearch/crawler.ts b/packages/core/src/utils/filesearch/crawler.ts
new file mode 100644
index 00000000..7e422b06
--- /dev/null
+++ b/packages/core/src/utils/filesearch/crawler.ts
@@ -0,0 +1,85 @@
+/**
+ * @license
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import path from 'node:path';
+import { fdir } from 'fdir';
+import { Ignore } from './ignore.js';
+import * as cache from './crawlCache.js';
+
+export interface CrawlOptions {
+ // The directory to start the crawl from.
+ crawlDirectory: string;
+ // The project's root directory, for path relativity.
+ cwd: string;
+ // The fdir maxDepth option.
+ maxDepth?: number;
+ // A pre-configured Ignore instance.
+ ignore: Ignore;
+ // Caching options.
+ cache: boolean;
+ cacheTtl: number;
+}
+
+function toPosixPath(p: string) {
+ return p.split(path.sep).join(path.posix.sep);
+}
+
+export async function crawl(options: CrawlOptions): Promise<string[]> {
+ if (options.cache) {
+ const cacheKey = cache.getCacheKey(
+ options.crawlDirectory,
+ options.ignore.getFingerprint(),
+ options.maxDepth,
+ );
+ const cachedResults = cache.read(cacheKey);
+
+ if (cachedResults) {
+ return cachedResults;
+ }
+ }
+
+ const posixCwd = toPosixPath(options.cwd);
+ const posixCrawlDirectory = toPosixPath(options.crawlDirectory);
+
+ let results: string[];
+ try {
+ const dirFilter = options.ignore.getDirectoryFilter();
+ const api = new fdir()
+ .withRelativePaths()
+ .withDirs()
+ .withPathSeparator('/') // Always use unix style paths
+ .exclude((_, dirPath) => {
+ const relativePath = path.posix.relative(posixCrawlDirectory, dirPath);
+ return dirFilter(`${relativePath}/`);
+ });
+
+ if (options.maxDepth !== undefined) {
+ api.withMaxDepth(options.maxDepth);
+ }
+
+ results = await api.crawl(options.crawlDirectory).withPromise();
+ } catch (_e) {
+ // The directory probably doesn't exist.
+ return [];
+ }
+
+ const relativeToCrawlDir = path.posix.relative(posixCwd, posixCrawlDirectory);
+
+ const relativeToCwdResults = results.map((p) =>
+ path.posix.join(relativeToCrawlDir, p),
+ );
+
+ if (options.cache) {
+ const cacheKey = cache.getCacheKey(
+ options.crawlDirectory,
+ options.ignore.getFingerprint(),
+ options.maxDepth,
+ );
+ cache.write(cacheKey, relativeToCwdResults, options.cacheTtl * 1000);
+ }
+
+ return relativeToCwdResults;
+}
diff --git a/packages/core/src/utils/filesearch/fileSearch.test.ts b/packages/core/src/utils/filesearch/fileSearch.test.ts
index 38657492..2deea82d 100644
--- a/packages/core/src/utils/filesearch/fileSearch.test.ts
+++ b/packages/core/src/utils/filesearch/fileSearch.test.ts
@@ -4,17 +4,10 @@
* SPDX-License-Identifier: Apache-2.0
*/
-import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest';
-import * as fs from 'fs/promises';
-import * as path from 'path';
-import * as cache from './crawlCache.js';
-import { FileSearch, AbortError, filter } from './fileSearch.js';
+import { describe, it, expect, afterEach, vi } from 'vitest';
+import { FileSearchFactory, AbortError, filter } from './fileSearch.js';
import { createTmpDir, cleanupTmpDir } from '@google/gemini-cli-test-utils';
-type FileSearchWithPrivateMethods = FileSearch & {
- performCrawl: () => Promise<void>;
-};
-
describe('FileSearch', () => {
let tmpDir: string;
afterEach(async () => {
@@ -31,13 +24,14 @@ describe('FileSearch', () => {
src: ['not-ignored.js'],
});
- const fileSearch = new FileSearch({
+ const fileSearch = FileSearchFactory.create({
projectRoot: tmpDir,
useGitignore: false,
useGeminiignore: true,
ignoreDirs: [],
cache: false,
cacheTtl: 0,
+ enableRecursiveFileSearch: true,
});
await fileSearch.initialize();
@@ -55,13 +49,14 @@ describe('FileSearch', () => {
src: ['not-ignored.js'],
});
- const fileSearch = new FileSearch({
+ const fileSearch = FileSearchFactory.create({
projectRoot: tmpDir,
useGitignore: true,
useGeminiignore: true,
ignoreDirs: [],
cache: false,
cacheTtl: 0,
+ enableRecursiveFileSearch: true,
});
await fileSearch.initialize();
@@ -81,13 +76,14 @@ describe('FileSearch', () => {
src: ['main.js'],
});
- const fileSearch = new FileSearch({
+ const fileSearch = FileSearchFactory.create({
projectRoot: tmpDir,
useGitignore: false,
useGeminiignore: false,
ignoreDirs: ['logs'],
cache: false,
cacheTtl: 0,
+ enableRecursiveFileSearch: true,
});
await fileSearch.initialize();
@@ -108,13 +104,14 @@ describe('FileSearch', () => {
src: ['main.js'],
});
- const fileSearch = new FileSearch({
+ const fileSearch = FileSearchFactory.create({
projectRoot: tmpDir,
useGitignore: true,
useGeminiignore: false,
ignoreDirs: [],
cache: false,
cacheTtl: 0,
+ enableRecursiveFileSearch: true,
});
await fileSearch.initialize();
@@ -139,13 +136,14 @@ describe('FileSearch', () => {
},
});
- const fileSearch = new FileSearch({
+ const fileSearch = FileSearchFactory.create({
projectRoot: tmpDir,
useGitignore: false,
useGeminiignore: false,
ignoreDirs: [],
cache: false,
cacheTtl: 0,
+ enableRecursiveFileSearch: true,
});
await fileSearch.initialize();
@@ -161,13 +159,14 @@ describe('FileSearch', () => {
'Foo.mk': '',
});
- const fileSearch = new FileSearch({
+ const fileSearch = FileSearchFactory.create({
projectRoot: tmpDir,
useGitignore: true,
useGeminiignore: false,
ignoreDirs: [],
cache: false,
cacheTtl: 0,
+ enableRecursiveFileSearch: true,
});
await fileSearch.initialize();
@@ -194,13 +193,14 @@ describe('FileSearch', () => {
},
});
- const fileSearch = new FileSearch({
+ const fileSearch = FileSearchFactory.create({
projectRoot: tmpDir,
useGitignore: true,
useGeminiignore: false,
ignoreDirs: [],
cache: false,
cacheTtl: 0,
+ enableRecursiveFileSearch: true,
});
await fileSearch.initialize();
@@ -222,13 +222,14 @@ describe('FileSearch', () => {
src: ['main.js'],
});
- const fileSearch = new FileSearch({
+ const fileSearch = FileSearchFactory.create({
projectRoot: tmpDir,
useGitignore: true,
useGeminiignore: false,
ignoreDirs: [],
cache: false,
cacheTtl: 0,
+ enableRecursiveFileSearch: true,
});
await fileSearch.initialize();
@@ -250,13 +251,14 @@ describe('FileSearch', () => {
src: ['file1.js'],
});
- const fileSearch = new FileSearch({
+ const fileSearch = FileSearchFactory.create({
projectRoot: tmpDir,
useGitignore: true,
useGeminiignore: true,
ignoreDirs: [],
cache: false,
cacheTtl: 0,
+ enableRecursiveFileSearch: true,
});
// Expect no errors to be thrown during initialization
@@ -275,13 +277,14 @@ describe('FileSearch', () => {
},
});
- const fileSearch = new FileSearch({
+ const fileSearch = FileSearchFactory.create({
projectRoot: tmpDir,
useGitignore: false,
useGeminiignore: false,
ignoreDirs: [],
cache: false,
cacheTtl: 0,
+ enableRecursiveFileSearch: true,
});
await fileSearch.initialize();
@@ -299,13 +302,14 @@ describe('FileSearch', () => {
},
});
- const fileSearch = new FileSearch({
+ const fileSearch = FileSearchFactory.create({
projectRoot: tmpDir,
useGitignore: false,
useGeminiignore: false,
ignoreDirs: [],
cache: false,
cacheTtl: 0,
+ enableRecursiveFileSearch: true,
});
await fileSearch.initialize();
@@ -319,13 +323,14 @@ describe('FileSearch', () => {
src: ['file1.js'],
});
- const fileSearch = new FileSearch({
+ const fileSearch = FileSearchFactory.create({
projectRoot: tmpDir,
useGitignore: false,
useGeminiignore: false,
ignoreDirs: [],
cache: false,
cacheTtl: 0,
+ enableRecursiveFileSearch: true,
});
await fileSearch.initialize();
@@ -346,170 +351,21 @@ describe('FileSearch', () => {
await expect(filterPromise).rejects.toThrow(AbortError);
});
- describe('with in-memory cache', () => {
- beforeEach(() => {
- cache.clear();
- });
-
- afterEach(() => {
- vi.useRealTimers();
- });
-
- it('should throw an error if search is called before initialization', async () => {
- tmpDir = await createTmpDir({});
- const fileSearch = new FileSearch({
- projectRoot: tmpDir,
- useGitignore: false,
- useGeminiignore: false,
- ignoreDirs: [],
- cache: false,
- cacheTtl: 0,
- });
-
- await expect(fileSearch.search('')).rejects.toThrow(
- 'Engine not initialized. Call initialize() first.',
- );
- });
-
- it('should hit the cache for subsequent searches', async () => {
- tmpDir = await createTmpDir({ 'file1.js': '' });
- const getOptions = () => ({
- projectRoot: tmpDir,
- useGitignore: false,
- useGeminiignore: false,
- ignoreDirs: [],
- cache: true,
- cacheTtl: 10,
- });
-
- const fs1 = new FileSearch(getOptions());
- const crawlSpy1 = vi.spyOn(
- fs1 as FileSearchWithPrivateMethods,
- 'performCrawl',
- );
- await fs1.initialize();
- expect(crawlSpy1).toHaveBeenCalledTimes(1);
-
- // Second search should hit the cache because the options are identical
- const fs2 = new FileSearch(getOptions());
- const crawlSpy2 = vi.spyOn(
- fs2 as FileSearchWithPrivateMethods,
- 'performCrawl',
- );
- await fs2.initialize();
- expect(crawlSpy2).not.toHaveBeenCalled();
- });
-
- it('should miss the cache when ignore rules change', async () => {
- tmpDir = await createTmpDir({
- '.gitignore': 'a.txt',
- 'a.txt': '',
- 'b.txt': '',
- });
- const options = {
- projectRoot: tmpDir,
- useGitignore: true,
- useGeminiignore: false,
- ignoreDirs: [],
- cache: true,
- cacheTtl: 10000,
- };
-
- // Initial search to populate the cache
- const fs1 = new FileSearch(options);
- const crawlSpy1 = vi.spyOn(
- fs1 as FileSearchWithPrivateMethods,
- 'performCrawl',
- );
- await fs1.initialize();
- const results1 = await fs1.search('');
- expect(crawlSpy1).toHaveBeenCalledTimes(1);
- expect(results1).toEqual(['.gitignore', 'b.txt']);
-
- // Modify the ignore file
- await fs.writeFile(path.join(tmpDir, '.gitignore'), 'b.txt');
-
- // Second search should miss the cache and trigger a recrawl
- const fs2 = new FileSearch(options);
- const crawlSpy2 = vi.spyOn(
- fs2 as FileSearchWithPrivateMethods,
- 'performCrawl',
- );
- await fs2.initialize();
- const results2 = await fs2.search('');
- expect(crawlSpy2).toHaveBeenCalledTimes(1);
- expect(results2).toEqual(['.gitignore', 'a.txt']);
- });
-
- it('should miss the cache after TTL expires', async () => {
- vi.useFakeTimers();
- tmpDir = await createTmpDir({ 'file1.js': '' });
- const options = {
- projectRoot: tmpDir,
- useGitignore: false,
- useGeminiignore: false,
- ignoreDirs: [],
- cache: true,
- cacheTtl: 10, // 10 seconds
- };
-
- // Initial search to populate the cache
- const fs1 = new FileSearch(options);
- await fs1.initialize();
-
- // Advance time past the TTL
- await vi.advanceTimersByTimeAsync(11000);
-
- // Second search should miss the cache and trigger a recrawl
- const fs2 = new FileSearch(options);
- const crawlSpy = vi.spyOn(
- fs2 as FileSearchWithPrivateMethods,
- 'performCrawl',
- );
- await fs2.initialize();
-
- expect(crawlSpy).toHaveBeenCalledTimes(1);
+ it('should throw an error if search is called before initialization', async () => {
+ tmpDir = await createTmpDir({});
+ const fileSearch = FileSearchFactory.create({
+ projectRoot: tmpDir,
+ useGitignore: false,
+ useGeminiignore: false,
+ ignoreDirs: [],
+ cache: false,
+ cacheTtl: 0,
+ enableRecursiveFileSearch: true,
});
- it('should miss the cache when maxDepth changes', async () => {
- tmpDir = await createTmpDir({ 'file1.js': '' });
- const getOptions = (maxDepth?: number) => ({
- projectRoot: tmpDir,
- useGitignore: false,
- useGeminiignore: false,
- ignoreDirs: [],
- cache: true,
- cacheTtl: 10000,
- maxDepth,
- });
-
- // 1. First search with maxDepth: 1, should trigger a crawl.
- const fs1 = new FileSearch(getOptions(1));
- const crawlSpy1 = vi.spyOn(
- fs1 as FileSearchWithPrivateMethods,
- 'performCrawl',
- );
- await fs1.initialize();
- expect(crawlSpy1).toHaveBeenCalledTimes(1);
-
- // 2. Second search with maxDepth: 2, should be a cache miss and trigger a crawl.
- const fs2 = new FileSearch(getOptions(2));
- const crawlSpy2 = vi.spyOn(
- fs2 as FileSearchWithPrivateMethods,
- 'performCrawl',
- );
- await fs2.initialize();
- expect(crawlSpy2).toHaveBeenCalledTimes(1);
-
- // 3. Third search with maxDepth: 1 again, should be a cache hit.
- const fs3 = new FileSearch(getOptions(1));
- const crawlSpy3 = vi.spyOn(
- fs3 as FileSearchWithPrivateMethods,
- 'performCrawl',
- );
- await fs3.initialize();
- expect(crawlSpy3).not.toHaveBeenCalled();
- });
+ await expect(fileSearch.search('')).rejects.toThrow(
+ 'Engine not initialized. Call initialize() first.',
+ );
});
it('should handle empty or commented-only ignore files', async () => {
@@ -518,13 +374,14 @@ describe('FileSearch', () => {
src: ['main.js'],
});
- const fileSearch = new FileSearch({
+ const fileSearch = FileSearchFactory.create({
projectRoot: tmpDir,
useGitignore: true,
useGeminiignore: false,
ignoreDirs: [],
cache: false,
cacheTtl: 0,
+ enableRecursiveFileSearch: true,
});
await fileSearch.initialize();
@@ -539,13 +396,14 @@ describe('FileSearch', () => {
src: ['main.js'],
});
- const fileSearch = new FileSearch({
+ const fileSearch = FileSearchFactory.create({
projectRoot: tmpDir,
useGitignore: false, // Explicitly disable .gitignore to isolate this rule
useGeminiignore: false,
ignoreDirs: [],
cache: false,
cacheTtl: 0,
+ enableRecursiveFileSearch: true,
});
await fileSearch.initialize();
@@ -561,13 +419,14 @@ describe('FileSearch', () => {
}
tmpDir = await createTmpDir(largeDir);
- const fileSearch = new FileSearch({
+ const fileSearch = FileSearchFactory.create({
projectRoot: tmpDir,
useGitignore: false,
useGeminiignore: false,
ignoreDirs: [],
cache: false,
cacheTtl: 0,
+ enableRecursiveFileSearch: true,
});
await fileSearch.initialize();
@@ -596,13 +455,14 @@ describe('FileSearch', () => {
},
});
- const fileSearch = new FileSearch({
+ const fileSearch = FileSearchFactory.create({
projectRoot: tmpDir,
useGitignore: false,
useGeminiignore: false,
ignoreDirs: [],
cache: true, // Enable caching for this test
cacheTtl: 0,
+ enableRecursiveFileSearch: true,
});
await fileSearch.initialize();
@@ -634,13 +494,14 @@ describe('FileSearch', () => {
'other.txt': '',
});
- const fileSearch = new FileSearch({
+ const fileSearch = FileSearchFactory.create({
projectRoot: tmpDir,
useGitignore: false,
useGeminiignore: false,
ignoreDirs: [],
cache: false,
cacheTtl: 0,
+ enableRecursiveFileSearch: true,
});
await fileSearch.initialize();
@@ -676,13 +537,14 @@ describe('FileSearch', () => {
'file5.js': '',
});
- const fileSearch = new FileSearch({
+ const fileSearch = FileSearchFactory.create({
projectRoot: tmpDir,
useGitignore: false,
useGeminiignore: false,
ignoreDirs: [],
cache: true, // Ensure caching is enabled
cacheTtl: 10000,
+ enableRecursiveFileSearch: true,
});
await fileSearch.initialize();
@@ -704,108 +566,97 @@ describe('FileSearch', () => {
expect(limitedResults).toEqual(['file1.js', 'file2.js']);
});
- describe('with maxDepth', () => {
- beforeEach(async () => {
+ describe('DirectoryFileSearch', () => {
+ it('should search for files in the current directory', async () => {
tmpDir = await createTmpDir({
- 'file-root.txt': '',
- level1: {
- 'file-level1.txt': '',
- level2: {
- 'file-level2.txt': '',
- level3: {
- 'file-level3.txt': '',
- },
- },
- },
+ 'file1.js': '',
+ 'file2.ts': '',
+ 'file3.js': '',
});
- });
- it('should only search top-level files when maxDepth is 0', async () => {
- const fileSearch = new FileSearch({
+ const fileSearch = FileSearchFactory.create({
projectRoot: tmpDir,
useGitignore: false,
useGeminiignore: false,
ignoreDirs: [],
cache: false,
cacheTtl: 0,
- maxDepth: 0,
+ enableRecursiveFileSearch: false,
});
await fileSearch.initialize();
- const results = await fileSearch.search('');
-
- expect(results).toEqual(['level1/', 'file-root.txt']);
+ const results = await fileSearch.search('*.js');
+ expect(results).toEqual(['file1.js', 'file3.js']);
});
- it('should search one level deep when maxDepth is 1', async () => {
- const fileSearch = new FileSearch({
+ it('should search for files in a subdirectory', async () => {
+ tmpDir = await createTmpDir({
+ 'file1.js': '',
+ src: {
+ 'file2.js': '',
+ 'file3.ts': '',
+ },
+ });
+
+ const fileSearch = FileSearchFactory.create({
projectRoot: tmpDir,
useGitignore: false,
useGeminiignore: false,
ignoreDirs: [],
cache: false,
cacheTtl: 0,
- maxDepth: 1,
+ enableRecursiveFileSearch: false,
});
await fileSearch.initialize();
- const results = await fileSearch.search('');
-
- expect(results).toEqual([
- 'level1/',
- 'level1/level2/',
- 'file-root.txt',
- 'level1/file-level1.txt',
- ]);
+ const results = await fileSearch.search('src/*.js');
+ expect(results).toEqual(['src/file2.js']);
});
- it('should search two levels deep when maxDepth is 2', async () => {
- const fileSearch = new FileSearch({
+ it('should list all files in a directory', async () => {
+ tmpDir = await createTmpDir({
+ 'file1.js': '',
+ src: {
+ 'file2.js': '',
+ 'file3.ts': '',
+ },
+ });
+
+ const fileSearch = FileSearchFactory.create({
projectRoot: tmpDir,
useGitignore: false,
useGeminiignore: false,
ignoreDirs: [],
cache: false,
cacheTtl: 0,
- maxDepth: 2,
+ enableRecursiveFileSearch: false,
});
await fileSearch.initialize();
- const results = await fileSearch.search('');
-
- expect(results).toEqual([
- 'level1/',
- 'level1/level2/',
- 'level1/level2/level3/',
- 'file-root.txt',
- 'level1/file-level1.txt',
- 'level1/level2/file-level2.txt',
- ]);
+ const results = await fileSearch.search('src/');
+ expect(results).toEqual(['src/file2.js', 'src/file3.ts']);
});
- it('should perform a full recursive search when maxDepth is undefined', async () => {
- const fileSearch = new FileSearch({
+ it('should respect ignore rules', async () => {
+ tmpDir = await createTmpDir({
+ '.gitignore': '*.js',
+ 'file1.js': '',
+ 'file2.ts': '',
+ });
+
+ const fileSearch = FileSearchFactory.create({
projectRoot: tmpDir,
- useGitignore: false,
+ useGitignore: true,
useGeminiignore: false,
ignoreDirs: [],
cache: false,
cacheTtl: 0,
- maxDepth: undefined, // Explicitly undefined
+ enableRecursiveFileSearch: false,
});
await fileSearch.initialize();
- const results = await fileSearch.search('');
-
- expect(results).toEqual([
- 'level1/',
- 'level1/level2/',
- 'level1/level2/level3/',
- 'file-root.txt',
- 'level1/file-level1.txt',
- 'level1/level2/file-level2.txt',
- 'level1/level2/level3/file-level3.txt',
- ]);
+ const results = await fileSearch.search('*');
+ expect(results).toEqual(['.gitignore', 'file2.ts']);
});
});
});
diff --git a/packages/core/src/utils/filesearch/fileSearch.ts b/packages/core/src/utils/filesearch/fileSearch.ts
index dff8d0ec..fa36dab4 100644
--- a/packages/core/src/utils/filesearch/fileSearch.ts
+++ b/packages/core/src/utils/filesearch/fileSearch.ts
@@ -5,23 +5,22 @@
*/
import path from 'node:path';
-import fs from 'node:fs';
-import { fdir } from 'fdir';
import picomatch from 'picomatch';
-import { Ignore } from './ignore.js';
+import { Ignore, loadIgnoreRules } from './ignore.js';
import { ResultCache } from './result-cache.js';
-import * as cache from './crawlCache.js';
+import { crawl } from './crawler.js';
import { AsyncFzf, FzfResultItem } from 'fzf';
-export type FileSearchOptions = {
+export interface FileSearchOptions {
projectRoot: string;
ignoreDirs: string[];
useGitignore: boolean;
useGeminiignore: boolean;
cache: boolean;
cacheTtl: number;
+ enableRecursiveFileSearch: boolean;
maxDepth?: number;
-};
+}
export class AbortError extends Error {
constructor(message = 'Search aborted') {
@@ -78,54 +77,42 @@ export async function filter(
return results;
}
-export type SearchOptions = {
+export interface SearchOptions {
signal?: AbortSignal;
maxResults?: number;
-};
+}
-/**
- * Provides a fast and efficient way to search for files within a project,
- * respecting .gitignore and .geminiignore rules, and utilizing caching
- * for improved performance.
- */
-export class FileSearch {
- private readonly absoluteDir: string;
- private readonly ignore: Ignore = new Ignore();
+export interface FileSearch {
+ initialize(): Promise<void>;
+ search(pattern: string, options?: SearchOptions): Promise<string[]>;
+}
+
+class RecursiveFileSearch implements FileSearch {
+ private ignore: Ignore | undefined;
private resultCache: ResultCache | undefined;
private allFiles: string[] = [];
private fzf: AsyncFzf<string[]> | undefined;
- /**
- * Constructs a new `FileSearch` instance.
- * @param options Configuration options for the file search.
- */
- constructor(private readonly options: FileSearchOptions) {
- this.absoluteDir = path.resolve(options.projectRoot);
- }
+ constructor(private readonly options: FileSearchOptions) {}
- /**
- * Initializes the file search engine by loading ignore rules, crawling the
- * file system, and building the in-memory cache. This method must be called
- * before performing any searches.
- */
async initialize(): Promise<void> {
- this.loadIgnoreRules();
- await this.crawlFiles();
+ this.ignore = loadIgnoreRules(this.options);
+ this.allFiles = await crawl({
+ crawlDirectory: this.options.projectRoot,
+ cwd: this.options.projectRoot,
+ ignore: this.ignore,
+ cache: this.options.cache,
+ cacheTtl: this.options.cacheTtl,
+ maxDepth: this.options.maxDepth,
+ });
this.buildResultCache();
}
- /**
- * Searches for files matching a given pattern.
- * @param pattern The picomatch pattern to search for (e.g., '*.js', 'src/**').
- * @param options Search options, including an AbortSignal and maxResults.
- * @returns A promise that resolves to a list of matching file paths, relative
- * to the project root.
- */
async search(
pattern: string,
options: SearchOptions = {},
): Promise<string[]> {
- if (!this.resultCache || !this.fzf) {
+ if (!this.resultCache || !this.fzf || !this.ignore) {
throw new Error('Engine not initialized. Call initialize() first.');
}
@@ -159,21 +146,9 @@ export class FileSearch {
}
}
- // Trade-off: We apply a two-stage filtering process.
- // 1. During the file system crawl (`performCrawl`), we only apply directory-level
- // ignore rules (e.g., `node_modules/`, `dist/`). This is because applying
- // a full ignore filter (which includes file-specific patterns like `*.log`)
- // during the crawl can significantly slow down `fdir`.
- // 2. Here, in the `search` method, we apply the full ignore filter
- // (including file patterns) to the `filteredCandidates` (which have already
- // been filtered by the user's search pattern and sorted). For autocomplete,
- // the number of displayed results is small (MAX_SUGGESTIONS_TO_SHOW),
- // so applying the full filter to this truncated list is much more efficient
- // than applying it to every file during the initial crawl.
const fileFilter = this.ignore.getFileFilter();
const results: string[] = [];
for (const [i, candidate] of filteredCandidates.entries()) {
- // Yield to the event loop to avoid blocking on large result sets.
if (i % 1000 === 0) {
await new Promise((resolve) => setImmediate(resolve));
if (options.signal?.aborted) {
@@ -184,7 +159,6 @@ export class FileSearch {
if (results.length >= (options.maxResults ?? Infinity)) {
break;
}
- // The `ignore` library throws an error if the path is '.', so we skip it.
if (candidate === '.') {
continue;
}
@@ -195,106 +169,69 @@ export class FileSearch {
return results;
}
- /**
- * Loads ignore rules from .gitignore and .geminiignore files, and applies
- * any additional ignore directories specified in the options.
- */
- private loadIgnoreRules(): void {
- if (this.options.useGitignore) {
- const gitignorePath = path.join(this.absoluteDir, '.gitignore');
- if (fs.existsSync(gitignorePath)) {
- this.ignore.add(fs.readFileSync(gitignorePath, 'utf8'));
- }
- }
-
- if (this.options.useGeminiignore) {
- const geminiignorePath = path.join(this.absoluteDir, '.geminiignore');
- if (fs.existsSync(geminiignorePath)) {
- this.ignore.add(fs.readFileSync(geminiignorePath, 'utf8'));
- }
- }
-
- const ignoreDirs = ['.git', ...this.options.ignoreDirs];
- this.ignore.add(
- ignoreDirs.map((dir) => {
- if (dir.endsWith('/')) {
- return dir;
- }
- return `${dir}/`;
- }),
- );
+ private buildResultCache(): void {
+ this.resultCache = new ResultCache(this.allFiles);
+ // The v1 algorithm is much faster since it only looks at the first
+ // occurence of the pattern. We use it for search spaces that have >20k
+ // files, because the v2 algorithm is just too slow in those cases.
+ this.fzf = new AsyncFzf(this.allFiles, {
+ fuzzy: this.allFiles.length > 20000 ? 'v1' : 'v2',
+ });
}
+}
- /**
- * Crawls the file system to get a list of all files and directories,
- * optionally using a cache for faster initialization.
- */
- private async crawlFiles(): Promise<void> {
- if (this.options.cache) {
- const cacheKey = cache.getCacheKey(
- this.absoluteDir,
- this.ignore.getFingerprint(),
- this.options.maxDepth,
- );
- const cachedResults = cache.read(cacheKey);
+class DirectoryFileSearch implements FileSearch {
+ private ignore: Ignore | undefined;
- if (cachedResults) {
- this.allFiles = cachedResults;
- return;
- }
- }
+ constructor(private readonly options: FileSearchOptions) {}
- this.allFiles = await this.performCrawl();
+ async initialize(): Promise<void> {
+ this.ignore = loadIgnoreRules(this.options);
+ }
- if (this.options.cache) {
- const cacheKey = cache.getCacheKey(
- this.absoluteDir,
- this.ignore.getFingerprint(),
- this.options.maxDepth,
- );
- cache.write(cacheKey, this.allFiles, this.options.cacheTtl * 1000);
+ async search(
+ pattern: string,
+ options: SearchOptions = {},
+ ): Promise<string[]> {
+ if (!this.ignore) {
+ throw new Error('Engine not initialized. Call initialize() first.');
}
- }
+ pattern = pattern || '*';
- /**
- * Performs the actual file system crawl using `fdir`, applying directory
- * ignore rules.
- * @returns A promise that resolves to a list of all files and directories.
- */
- private async performCrawl(): Promise<string[]> {
- const dirFilter = this.ignore.getDirectoryFilter();
+ const dir = pattern.endsWith('/') ? pattern : path.dirname(pattern);
+ const results = await crawl({
+ crawlDirectory: path.join(this.options.projectRoot, dir),
+ cwd: this.options.projectRoot,
+ maxDepth: 0,
+ ignore: this.ignore,
+ cache: this.options.cache,
+ cacheTtl: this.options.cacheTtl,
+ });
- // We use `fdir` for fast file system traversal. A key performance
- // optimization for large workspaces is to exclude entire directories
- // early in the traversal process. This is why we apply directory-specific
- // ignore rules (e.g., `node_modules/`, `dist/`) directly to `fdir`'s
- // exclude filter.
- const api = new fdir()
- .withRelativePaths()
- .withDirs()
- .withPathSeparator('/') // Always use unix style paths
- .exclude((_, dirPath) => {
- const relativePath = path.relative(this.absoluteDir, dirPath);
- return dirFilter(`${relativePath}/`);
- });
+ const filteredResults = await filter(results, pattern, options.signal);
- if (this.options.maxDepth !== undefined) {
- api.withMaxDepth(this.options.maxDepth);
+ const fileFilter = this.ignore.getFileFilter();
+ const finalResults: string[] = [];
+ for (const candidate of filteredResults) {
+ if (finalResults.length >= (options.maxResults ?? Infinity)) {
+ break;
+ }
+ if (candidate === '.') {
+ continue;
+ }
+ if (!fileFilter(candidate)) {
+ finalResults.push(candidate);
+ }
}
-
- return api.crawl(this.absoluteDir).withPromise();
+ return finalResults;
}
+}
- /**
- * Builds the in-memory cache for fast pattern matching.
- */
- private buildResultCache(): void {
- this.resultCache = new ResultCache(this.allFiles);
- // The v1 algorithm is much faster since it only looks at the first
- // occurence of the pattern. We use it for search spaces that have >20k
- // files, because the v2 algorithm is just too slow in those cases.
- this.fzf = new AsyncFzf(this.allFiles, {
- fuzzy: this.allFiles.length > 20000 ? 'v1' : 'v2',
- });
+export class FileSearchFactory {
+ static create(options: FileSearchOptions): FileSearch {
+ if (options.enableRecursiveFileSearch) {
+ return new RecursiveFileSearch(options);
+ }
+ return new DirectoryFileSearch(options);
}
}
diff --git a/packages/core/src/utils/filesearch/ignore.test.ts b/packages/core/src/utils/filesearch/ignore.test.ts
index ff375e3f..f65ecd72 100644
--- a/packages/core/src/utils/filesearch/ignore.test.ts
+++ b/packages/core/src/utils/filesearch/ignore.test.ts
@@ -4,8 +4,9 @@
* SPDX-License-Identifier: Apache-2.0
*/
-import { describe, it, expect } from 'vitest';
-import { Ignore } from './ignore.js';
+import { describe, it, expect, afterEach } from 'vitest';
+import { Ignore, loadIgnoreRules } from './ignore.js';
+import { createTmpDir, cleanupTmpDir } from '@google/gemini-cli-test-utils';
describe('Ignore', () => {
describe('getDirectoryFilter', () => {
@@ -63,3 +64,97 @@ describe('Ignore', () => {
expect(ig1.getFingerprint()).not.toBe(ig2.getFingerprint());
});
});
+
+describe('loadIgnoreRules', () => {
+ let tmpDir: string;
+
+ afterEach(async () => {
+ if (tmpDir) {
+ await cleanupTmpDir(tmpDir);
+ }
+ });
+
+ it('should load rules from .gitignore', async () => {
+ tmpDir = await createTmpDir({
+ '.gitignore': '*.log',
+ });
+ const ignore = loadIgnoreRules({
+ projectRoot: tmpDir,
+ useGitignore: true,
+ useGeminiignore: false,
+ ignoreDirs: [],
+ });
+ const fileFilter = ignore.getFileFilter();
+ expect(fileFilter('test.log')).toBe(true);
+ expect(fileFilter('test.txt')).toBe(false);
+ });
+
+ it('should load rules from .geminiignore', async () => {
+ tmpDir = await createTmpDir({
+ '.geminiignore': '*.log',
+ });
+ const ignore = loadIgnoreRules({
+ projectRoot: tmpDir,
+ useGitignore: false,
+ useGeminiignore: true,
+ ignoreDirs: [],
+ });
+ const fileFilter = ignore.getFileFilter();
+ expect(fileFilter('test.log')).toBe(true);
+ expect(fileFilter('test.txt')).toBe(false);
+ });
+
+ it('should combine rules from .gitignore and .geminiignore', async () => {
+ tmpDir = await createTmpDir({
+ '.gitignore': '*.log',
+ '.geminiignore': '*.txt',
+ });
+ const ignore = loadIgnoreRules({
+ projectRoot: tmpDir,
+ useGitignore: true,
+ useGeminiignore: true,
+ ignoreDirs: [],
+ });
+ const fileFilter = ignore.getFileFilter();
+ expect(fileFilter('test.log')).toBe(true);
+ expect(fileFilter('test.txt')).toBe(true);
+ expect(fileFilter('test.md')).toBe(false);
+ });
+
+ it('should add ignoreDirs', async () => {
+ tmpDir = await createTmpDir({});
+ const ignore = loadIgnoreRules({
+ projectRoot: tmpDir,
+ useGitignore: false,
+ useGeminiignore: false,
+ ignoreDirs: ['logs/'],
+ });
+ const dirFilter = ignore.getDirectoryFilter();
+ expect(dirFilter('logs/')).toBe(true);
+ expect(dirFilter('src/')).toBe(false);
+ });
+
+ it('should handle missing ignore files gracefully', async () => {
+ tmpDir = await createTmpDir({});
+ const ignore = loadIgnoreRules({
+ projectRoot: tmpDir,
+ useGitignore: true,
+ useGeminiignore: true,
+ ignoreDirs: [],
+ });
+ const fileFilter = ignore.getFileFilter();
+ expect(fileFilter('anyfile.txt')).toBe(false);
+ });
+
+ it('should always add .git to the ignore list', async () => {
+ tmpDir = await createTmpDir({});
+ const ignore = loadIgnoreRules({
+ projectRoot: tmpDir,
+ useGitignore: false,
+ useGeminiignore: false,
+ ignoreDirs: [],
+ });
+ const dirFilter = ignore.getDirectoryFilter();
+ expect(dirFilter('.git/')).toBe(true);
+ });
+});
diff --git a/packages/core/src/utils/filesearch/ignore.ts b/packages/core/src/utils/filesearch/ignore.ts
index 9f756f93..a39066f5 100644
--- a/packages/core/src/utils/filesearch/ignore.ts
+++ b/packages/core/src/utils/filesearch/ignore.ts
@@ -4,11 +4,49 @@
* SPDX-License-Identifier: Apache-2.0
*/
+import fs from 'node:fs';
+import path from 'node:path';
import ignore from 'ignore';
import picomatch from 'picomatch';
const hasFileExtension = picomatch('**/*[*.]*');
+export interface LoadIgnoreRulesOptions {
+ projectRoot: string;
+ useGitignore: boolean;
+ useGeminiignore: boolean;
+ ignoreDirs: string[];
+}
+
+export function loadIgnoreRules(options: LoadIgnoreRulesOptions): Ignore {
+ const ignorer = new Ignore();
+ if (options.useGitignore) {
+ const gitignorePath = path.join(options.projectRoot, '.gitignore');
+ if (fs.existsSync(gitignorePath)) {
+ ignorer.add(fs.readFileSync(gitignorePath, 'utf8'));
+ }
+ }
+
+ if (options.useGeminiignore) {
+ const geminiignorePath = path.join(options.projectRoot, '.geminiignore');
+ if (fs.existsSync(geminiignorePath)) {
+ ignorer.add(fs.readFileSync(geminiignorePath, 'utf8'));
+ }
+ }
+
+ const ignoreDirs = ['.git', ...options.ignoreDirs];
+ ignorer.add(
+ ignoreDirs.map((dir) => {
+ if (dir.endsWith('/')) {
+ return dir;
+ }
+ return `${dir}/`;
+ }),
+ );
+
+ return ignorer;
+}
+
export class Ignore {
private readonly allPatterns: string[] = [];
private dirIgnorer = ignore();