packages/core/src/utils/filesearch/fileSearch.ts


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291

/**
 * @license
 * Copyright 2025 Google LLC
 * SPDX-License-Identifier: Apache-2.0
 */

import path from 'node:path';
import fs from 'node:fs';
import { fdir } from 'fdir';
import picomatch from 'picomatch';
import { Ignore } from './ignore.js';
import { ResultCache } from './result-cache.js';
import * as cache from './crawlCache.js';
import { Fzf, FzfResultItem } from 'fzf';

export type FileSearchOptions = {
  projectRoot: string;
  ignoreDirs: string[];
  useGitignore: boolean;
  useGeminiignore: boolean;
  cache: boolean;
  cacheTtl: number;
  maxDepth?: number;
};

export class AbortError extends Error {
  constructor(message = 'Search aborted') {
    super(message);
    this.name = 'AbortError';
  }
}

/**
 * Filters a list of paths based on a given pattern.
 * @param allPaths The list of all paths to filter.
 * @param pattern The picomatch pattern to filter by.
 * @param signal An AbortSignal to cancel the operation.
 * @returns A promise that resolves to the filtered and sorted list of paths.
 */
export async function filter(
  allPaths: string[],
  pattern: string,
  signal: AbortSignal | undefined,
): Promise<string[]> {
  const patternFilter = picomatch(pattern, {
    dot: true,
    contains: true,
    nocase: true,
  });

  const results: string[] = [];
  for (const [i, p] of allPaths.entries()) {
    // Yield control to the event loop periodically to prevent blocking.
    if (i % 1000 === 0) {
      await new Promise((resolve) => setImmediate(resolve));
      if (signal?.aborted) {
        throw new AbortError();
      }
    }

    if (patternFilter(p)) {
      results.push(p);
    }
  }

  results.sort((a, b) => {
    const aIsDir = a.endsWith('/');
    const bIsDir = b.endsWith('/');

    if (aIsDir && !bIsDir) return -1;
    if (!aIsDir && bIsDir) return 1;

    // This is 40% faster than localeCompare and the only thing we would really
    // gain from localeCompare is case-sensitive sort
    return a < b ? -1 : a > b ? 1 : 0;
  });

  return results;
}

/**
 * Filters a list of paths based on a given pattern using fzf.
 * @param allPaths The list of all paths to filter.
 * @param pattern The fzf pattern to filter by.
 * @returns The filtered and sorted list of paths.
 */
function filterByFzf(allPaths: string[], pattern: string) {
  return new Fzf(allPaths)
    .find(pattern)
    .map((entry: FzfResultItem) => entry.item);
}

export type SearchOptions = {
  signal?: AbortSignal;
  maxResults?: number;
};

/**
 * Provides a fast and efficient way to search for files within a project,
 * respecting .gitignore and .geminiignore rules, and utilizing caching
 * for improved performance.
 */
export class FileSearch {
  private readonly absoluteDir: string;
  private readonly ignore: Ignore = new Ignore();
  private resultCache: ResultCache | undefined;
  private allFiles: string[] = [];

  /**
   * Constructs a new `FileSearch` instance.
   * @param options Configuration options for the file search.
   */
  constructor(private readonly options: FileSearchOptions) {
    this.absoluteDir = path.resolve(options.projectRoot);
  }

  /**
   * Initializes the file search engine by loading ignore rules, crawling the
   * file system, and building the in-memory cache. This method must be called
   * before performing any searches.
   */
  async initialize(): Promise<void> {
    this.loadIgnoreRules();
    await this.crawlFiles();
    this.buildResultCache();
  }

  /**
   * Searches for files matching a given pattern.
   * @param pattern The picomatch pattern to search for (e.g., '*.js', 'src/**').
   * @param options Search options, including an AbortSignal and maxResults.
   * @returns A promise that resolves to a list of matching file paths, relative
   *          to the project root.
   */
  async search(
    pattern: string,
    options: SearchOptions = {},
  ): Promise<string[]> {
    if (!this.resultCache) {
      throw new Error('Engine not initialized. Call initialize() first.');
    }

    pattern = pattern || '*';

    const { files: candidates, isExactMatch } =
      await this.resultCache!.get(pattern);

    let filteredCandidates;
    if (isExactMatch) {
      filteredCandidates = candidates;
    } else {
      // Apply the user's picomatch pattern filter
      filteredCandidates = pattern.includes('*')
        ? await filter(candidates, pattern, options.signal)
        : filterByFzf(this.allFiles, pattern);
      this.resultCache!.set(pattern, filteredCandidates);
    }

    // Trade-off: We apply a two-stage filtering process.
    // 1. During the file system crawl (`performCrawl`), we only apply directory-level
    //    ignore rules (e.g., `node_modules/`, `dist/`). This is because applying
    //    a full ignore filter (which includes file-specific patterns like `*.log`)
    //    during the crawl can significantly slow down `fdir`.
    // 2. Here, in the `search` method, we apply the full ignore filter
    //    (including file patterns) to the `filteredCandidates` (which have already
    //    been filtered by the user's search pattern and sorted). For autocomplete,
    //    the number of displayed results is small (MAX_SUGGESTIONS_TO_SHOW),
    //    so applying the full filter to this truncated list is much more efficient
    //    than applying it to every file during the initial crawl.
    const fileFilter = this.ignore.getFileFilter();
    const results: string[] = [];
    for (const [i, candidate] of filteredCandidates.entries()) {
      // Yield to the event loop to avoid blocking on large result sets.
      if (i % 1000 === 0) {
        await new Promise((resolve) => setImmediate(resolve));
        if (options.signal?.aborted) {
          throw new AbortError();
        }
      }

      if (results.length >= (options.maxResults ?? Infinity)) {
        break;
      }
      // The `ignore` library throws an error if the path is '.', so we skip it.
      if (candidate === '.') {
        continue;
      }
      if (!fileFilter(candidate)) {
        results.push(candidate);
      }
    }
    return results;
  }

  /**
   * Loads ignore rules from .gitignore and .geminiignore files, and applies
   * any additional ignore directories specified in the options.
   */
  private loadIgnoreRules(): void {
    if (this.options.useGitignore) {
      const gitignorePath = path.join(this.absoluteDir, '.gitignore');
      if (fs.existsSync(gitignorePath)) {
        this.ignore.add(fs.readFileSync(gitignorePath, 'utf8'));
      }
    }

    if (this.options.useGeminiignore) {
      const geminiignorePath = path.join(this.absoluteDir, '.geminiignore');
      if (fs.existsSync(geminiignorePath)) {
        this.ignore.add(fs.readFileSync(geminiignorePath, 'utf8'));
      }
    }

    const ignoreDirs = ['.git', ...this.options.ignoreDirs];
    this.ignore.add(
      ignoreDirs.map((dir) => {
        if (dir.endsWith('/')) {
          return dir;
        }
        return `${dir}/`;
      }),
    );
  }

  /**
   * Crawls the file system to get a list of all files and directories,
   * optionally using a cache for faster initialization.
   */
  private async crawlFiles(): Promise<void> {
    if (this.options.cache) {
      const cacheKey = cache.getCacheKey(
        this.absoluteDir,
        this.ignore.getFingerprint(),
        this.options.maxDepth,
      );
      const cachedResults = cache.read(cacheKey);

      if (cachedResults) {
        this.allFiles = cachedResults;
        return;
      }
    }

    this.allFiles = await this.performCrawl();

    if (this.options.cache) {
      const cacheKey = cache.getCacheKey(
        this.absoluteDir,
        this.ignore.getFingerprint(),
        this.options.maxDepth,
      );
      cache.write(cacheKey, this.allFiles, this.options.cacheTtl * 1000);
    }
  }

  /**
   * Performs the actual file system crawl using `fdir`, applying directory
   * ignore rules.
   * @returns A promise that resolves to a list of all files and directories.
   */
  private async performCrawl(): Promise<string[]> {
    const dirFilter = this.ignore.getDirectoryFilter();

    // We use `fdir` for fast file system traversal. A key performance
    // optimization for large workspaces is to exclude entire directories
    // early in the traversal process. This is why we apply directory-specific
    // ignore rules (e.g., `node_modules/`, `dist/`) directly to `fdir`'s
    // exclude filter.
    const api = new fdir()
      .withRelativePaths()
      .withDirs()
      .withPathSeparator('/') // Always use unix style paths
      .exclude((_, dirPath) => {
        const relativePath = path.relative(this.absoluteDir, dirPath);
        return dirFilter(`${relativePath}/`);
      });

    if (this.options.maxDepth !== undefined) {
      api.withMaxDepth(this.options.maxDepth);
    }

    return api.crawl(this.absoluteDir).withPromise();
  }

  /**
   * Builds the in-memory cache for fast pattern matching.
   */
  private buildResultCache(): void {
    this.resultCache = new ResultCache(this.allFiles, this.absoluteDir);
  }
}