summaryrefslogtreecommitdiff
path: root/packages/core/src/utils/filesearch/crawler.ts
diff options
context:
space:
mode:
authorBryant Chandler <[email protected]>2025-08-18 13:43:24 -0700
committerGitHub <[email protected]>2025-08-18 20:43:24 +0000
commit465ac9f547d0d684439886d1466c1a1133da611d (patch)
treeb94f00730118784b5b07800db71224816b444bfe /packages/core/src/utils/filesearch/crawler.ts
parentd66ddcd82e09d7b6fbc0226e31d73d38db5cff2a (diff)
feat(filesearch): Introduce non-recursive file search strategy (#6087)
Co-authored-by: Jacob Richman <[email protected]> Co-authored-by: Bryant Chandler <[email protected]>
Diffstat (limited to 'packages/core/src/utils/filesearch/crawler.ts')
-rw-r--r--packages/core/src/utils/filesearch/crawler.ts85
1 files changed, 85 insertions, 0 deletions
diff --git a/packages/core/src/utils/filesearch/crawler.ts b/packages/core/src/utils/filesearch/crawler.ts
new file mode 100644
index 00000000..7e422b06
--- /dev/null
+++ b/packages/core/src/utils/filesearch/crawler.ts
@@ -0,0 +1,85 @@
+/**
+ * @license
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import path from 'node:path';
+import { fdir } from 'fdir';
+import { Ignore } from './ignore.js';
+import * as cache from './crawlCache.js';
+
+export interface CrawlOptions {
+ // The directory to start the crawl from.
+ crawlDirectory: string;
+ // The project's root directory, for path relativity.
+ cwd: string;
+ // The fdir maxDepth option.
+ maxDepth?: number;
+ // A pre-configured Ignore instance.
+ ignore: Ignore;
+ // Caching options.
+ cache: boolean;
+ cacheTtl: number;
+}
+
+function toPosixPath(p: string) {
+ return p.split(path.sep).join(path.posix.sep);
+}
+
+export async function crawl(options: CrawlOptions): Promise<string[]> {
+ if (options.cache) {
+ const cacheKey = cache.getCacheKey(
+ options.crawlDirectory,
+ options.ignore.getFingerprint(),
+ options.maxDepth,
+ );
+ const cachedResults = cache.read(cacheKey);
+
+ if (cachedResults) {
+ return cachedResults;
+ }
+ }
+
+ const posixCwd = toPosixPath(options.cwd);
+ const posixCrawlDirectory = toPosixPath(options.crawlDirectory);
+
+ let results: string[];
+ try {
+ const dirFilter = options.ignore.getDirectoryFilter();
+ const api = new fdir()
+ .withRelativePaths()
+ .withDirs()
+ .withPathSeparator('/') // Always use unix style paths
+ .exclude((_, dirPath) => {
+ const relativePath = path.posix.relative(posixCrawlDirectory, dirPath);
+ return dirFilter(`${relativePath}/`);
+ });
+
+ if (options.maxDepth !== undefined) {
+ api.withMaxDepth(options.maxDepth);
+ }
+
+ results = await api.crawl(options.crawlDirectory).withPromise();
+ } catch (_e) {
+ // The directory probably doesn't exist.
+ return [];
+ }
+
+ const relativeToCrawlDir = path.posix.relative(posixCwd, posixCrawlDirectory);
+
+ const relativeToCwdResults = results.map((p) =>
+ path.posix.join(relativeToCrawlDir, p),
+ );
+
+ if (options.cache) {
+ const cacheKey = cache.getCacheKey(
+ options.crawlDirectory,
+ options.ignore.getFingerprint(),
+ options.maxDepth,
+ );
+ cache.write(cacheKey, relativeToCwdResults, options.cacheTtl * 1000);
+ }
+
+ return relativeToCwdResults;
+}