summaryrefslogtreecommitdiff
path: root/packages/core/src/utils/filesearch/fileSearch.ts
diff options
context:
space:
mode:
authorBryant Chandler <[email protected]>2025-08-18 13:43:24 -0700
committerGitHub <[email protected]>2025-08-18 20:43:24 +0000
commit465ac9f547d0d684439886d1466c1a1133da611d (patch)
treeb94f00730118784b5b07800db71224816b444bfe /packages/core/src/utils/filesearch/fileSearch.ts
parentd66ddcd82e09d7b6fbc0226e31d73d38db5cff2a (diff)
feat(filesearch): Introduce non-recursive file search strategy (#6087)
Co-authored-by: Jacob Richman <[email protected]> Co-authored-by: Bryant Chandler <[email protected]>
Diffstat (limited to 'packages/core/src/utils/filesearch/fileSearch.ts')
-rw-r--r--packages/core/src/utils/filesearch/fileSearch.ts217
1 files changed, 77 insertions, 140 deletions
diff --git a/packages/core/src/utils/filesearch/fileSearch.ts b/packages/core/src/utils/filesearch/fileSearch.ts
index dff8d0ec..fa36dab4 100644
--- a/packages/core/src/utils/filesearch/fileSearch.ts
+++ b/packages/core/src/utils/filesearch/fileSearch.ts
@@ -5,23 +5,22 @@
*/
import path from 'node:path';
-import fs from 'node:fs';
-import { fdir } from 'fdir';
import picomatch from 'picomatch';
-import { Ignore } from './ignore.js';
+import { Ignore, loadIgnoreRules } from './ignore.js';
import { ResultCache } from './result-cache.js';
-import * as cache from './crawlCache.js';
+import { crawl } from './crawler.js';
import { AsyncFzf, FzfResultItem } from 'fzf';
-export type FileSearchOptions = {
+export interface FileSearchOptions {
projectRoot: string;
ignoreDirs: string[];
useGitignore: boolean;
useGeminiignore: boolean;
cache: boolean;
cacheTtl: number;
+ enableRecursiveFileSearch: boolean;
maxDepth?: number;
-};
+}
export class AbortError extends Error {
constructor(message = 'Search aborted') {
@@ -78,54 +77,42 @@ export async function filter(
return results;
}
-export type SearchOptions = {
+export interface SearchOptions {
signal?: AbortSignal;
maxResults?: number;
-};
+}
-/**
- * Provides a fast and efficient way to search for files within a project,
- * respecting .gitignore and .geminiignore rules, and utilizing caching
- * for improved performance.
- */
-export class FileSearch {
- private readonly absoluteDir: string;
- private readonly ignore: Ignore = new Ignore();
+export interface FileSearch {
+ initialize(): Promise<void>;
+ search(pattern: string, options?: SearchOptions): Promise<string[]>;
+}
+
+class RecursiveFileSearch implements FileSearch {
+ private ignore: Ignore | undefined;
private resultCache: ResultCache | undefined;
private allFiles: string[] = [];
private fzf: AsyncFzf<string[]> | undefined;
- /**
- * Constructs a new `FileSearch` instance.
- * @param options Configuration options for the file search.
- */
- constructor(private readonly options: FileSearchOptions) {
- this.absoluteDir = path.resolve(options.projectRoot);
- }
+ constructor(private readonly options: FileSearchOptions) {}
- /**
- * Initializes the file search engine by loading ignore rules, crawling the
- * file system, and building the in-memory cache. This method must be called
- * before performing any searches.
- */
async initialize(): Promise<void> {
- this.loadIgnoreRules();
- await this.crawlFiles();
+ this.ignore = loadIgnoreRules(this.options);
+ this.allFiles = await crawl({
+ crawlDirectory: this.options.projectRoot,
+ cwd: this.options.projectRoot,
+ ignore: this.ignore,
+ cache: this.options.cache,
+ cacheTtl: this.options.cacheTtl,
+ maxDepth: this.options.maxDepth,
+ });
this.buildResultCache();
}
- /**
- * Searches for files matching a given pattern.
- * @param pattern The picomatch pattern to search for (e.g., '*.js', 'src/**').
- * @param options Search options, including an AbortSignal and maxResults.
- * @returns A promise that resolves to a list of matching file paths, relative
- * to the project root.
- */
async search(
pattern: string,
options: SearchOptions = {},
): Promise<string[]> {
- if (!this.resultCache || !this.fzf) {
+ if (!this.resultCache || !this.fzf || !this.ignore) {
throw new Error('Engine not initialized. Call initialize() first.');
}
@@ -159,21 +146,9 @@ export class FileSearch {
}
}
- // Trade-off: We apply a two-stage filtering process.
- // 1. During the file system crawl (`performCrawl`), we only apply directory-level
- // ignore rules (e.g., `node_modules/`, `dist/`). This is because applying
- // a full ignore filter (which includes file-specific patterns like `*.log`)
- // during the crawl can significantly slow down `fdir`.
- // 2. Here, in the `search` method, we apply the full ignore filter
- // (including file patterns) to the `filteredCandidates` (which have already
- // been filtered by the user's search pattern and sorted). For autocomplete,
- // the number of displayed results is small (MAX_SUGGESTIONS_TO_SHOW),
- // so applying the full filter to this truncated list is much more efficient
- // than applying it to every file during the initial crawl.
const fileFilter = this.ignore.getFileFilter();
const results: string[] = [];
for (const [i, candidate] of filteredCandidates.entries()) {
- // Yield to the event loop to avoid blocking on large result sets.
if (i % 1000 === 0) {
await new Promise((resolve) => setImmediate(resolve));
if (options.signal?.aborted) {
@@ -184,7 +159,6 @@ export class FileSearch {
if (results.length >= (options.maxResults ?? Infinity)) {
break;
}
- // The `ignore` library throws an error if the path is '.', so we skip it.
if (candidate === '.') {
continue;
}
@@ -195,106 +169,69 @@ export class FileSearch {
return results;
}
- /**
- * Loads ignore rules from .gitignore and .geminiignore files, and applies
- * any additional ignore directories specified in the options.
- */
- private loadIgnoreRules(): void {
- if (this.options.useGitignore) {
- const gitignorePath = path.join(this.absoluteDir, '.gitignore');
- if (fs.existsSync(gitignorePath)) {
- this.ignore.add(fs.readFileSync(gitignorePath, 'utf8'));
- }
- }
-
- if (this.options.useGeminiignore) {
- const geminiignorePath = path.join(this.absoluteDir, '.geminiignore');
- if (fs.existsSync(geminiignorePath)) {
- this.ignore.add(fs.readFileSync(geminiignorePath, 'utf8'));
- }
- }
-
- const ignoreDirs = ['.git', ...this.options.ignoreDirs];
- this.ignore.add(
- ignoreDirs.map((dir) => {
- if (dir.endsWith('/')) {
- return dir;
- }
- return `${dir}/`;
- }),
- );
+ private buildResultCache(): void {
+ this.resultCache = new ResultCache(this.allFiles);
+ // The v1 algorithm is much faster since it only looks at the first
+ // occurence of the pattern. We use it for search spaces that have >20k
+ // files, because the v2 algorithm is just too slow in those cases.
+ this.fzf = new AsyncFzf(this.allFiles, {
+ fuzzy: this.allFiles.length > 20000 ? 'v1' : 'v2',
+ });
}
+}
- /**
- * Crawls the file system to get a list of all files and directories,
- * optionally using a cache for faster initialization.
- */
- private async crawlFiles(): Promise<void> {
- if (this.options.cache) {
- const cacheKey = cache.getCacheKey(
- this.absoluteDir,
- this.ignore.getFingerprint(),
- this.options.maxDepth,
- );
- const cachedResults = cache.read(cacheKey);
+class DirectoryFileSearch implements FileSearch {
+ private ignore: Ignore | undefined;
- if (cachedResults) {
- this.allFiles = cachedResults;
- return;
- }
- }
+ constructor(private readonly options: FileSearchOptions) {}
- this.allFiles = await this.performCrawl();
+ async initialize(): Promise<void> {
+ this.ignore = loadIgnoreRules(this.options);
+ }
- if (this.options.cache) {
- const cacheKey = cache.getCacheKey(
- this.absoluteDir,
- this.ignore.getFingerprint(),
- this.options.maxDepth,
- );
- cache.write(cacheKey, this.allFiles, this.options.cacheTtl * 1000);
+ async search(
+ pattern: string,
+ options: SearchOptions = {},
+ ): Promise<string[]> {
+ if (!this.ignore) {
+ throw new Error('Engine not initialized. Call initialize() first.');
}
- }
+ pattern = pattern || '*';
- /**
- * Performs the actual file system crawl using `fdir`, applying directory
- * ignore rules.
- * @returns A promise that resolves to a list of all files and directories.
- */
- private async performCrawl(): Promise<string[]> {
- const dirFilter = this.ignore.getDirectoryFilter();
+ const dir = pattern.endsWith('/') ? pattern : path.dirname(pattern);
+ const results = await crawl({
+ crawlDirectory: path.join(this.options.projectRoot, dir),
+ cwd: this.options.projectRoot,
+ maxDepth: 0,
+ ignore: this.ignore,
+ cache: this.options.cache,
+ cacheTtl: this.options.cacheTtl,
+ });
- // We use `fdir` for fast file system traversal. A key performance
- // optimization for large workspaces is to exclude entire directories
- // early in the traversal process. This is why we apply directory-specific
- // ignore rules (e.g., `node_modules/`, `dist/`) directly to `fdir`'s
- // exclude filter.
- const api = new fdir()
- .withRelativePaths()
- .withDirs()
- .withPathSeparator('/') // Always use unix style paths
- .exclude((_, dirPath) => {
- const relativePath = path.relative(this.absoluteDir, dirPath);
- return dirFilter(`${relativePath}/`);
- });
+ const filteredResults = await filter(results, pattern, options.signal);
- if (this.options.maxDepth !== undefined) {
- api.withMaxDepth(this.options.maxDepth);
+ const fileFilter = this.ignore.getFileFilter();
+ const finalResults: string[] = [];
+ for (const candidate of filteredResults) {
+ if (finalResults.length >= (options.maxResults ?? Infinity)) {
+ break;
+ }
+ if (candidate === '.') {
+ continue;
+ }
+ if (!fileFilter(candidate)) {
+ finalResults.push(candidate);
+ }
}
-
- return api.crawl(this.absoluteDir).withPromise();
+ return finalResults;
}
+}
- /**
- * Builds the in-memory cache for fast pattern matching.
- */
- private buildResultCache(): void {
- this.resultCache = new ResultCache(this.allFiles);
- // The v1 algorithm is much faster since it only looks at the first
- // occurence of the pattern. We use it for search spaces that have >20k
- // files, because the v2 algorithm is just too slow in those cases.
- this.fzf = new AsyncFzf(this.allFiles, {
- fuzzy: this.allFiles.length > 20000 ? 'v1' : 'v2',
- });
+export class FileSearchFactory {
+ static create(options: FileSearchOptions): FileSearch {
+ if (options.enableRecursiveFileSearch) {
+ return new RecursiveFileSearch(options);
+ }
+ return new DirectoryFileSearch(options);
}
}