summaryrefslogtreecommitdiff
path: root/packages/core/src/utils/fileUtils.ts
diff options
context:
space:
mode:
authorTommaso Sciortino <[email protected]>2025-05-30 18:25:47 -0700
committerGitHub <[email protected]>2025-05-30 18:25:47 -0700
commit21fba832d1b4ea7af43fb887d9b2b38fcf8210d0 (patch)
tree7200d2fac3a55c385e0a2dac34b5282c942364bc /packages/core/src/utils/fileUtils.ts
parentc81148a0cc8489f657901c2cc7247c0834075e1a (diff)
Rename server->core (#638)
Diffstat (limited to 'packages/core/src/utils/fileUtils.ts')
-rw-r--r--packages/core/src/utils/fileUtils.ts280
1 files changed, 280 insertions, 0 deletions
diff --git a/packages/core/src/utils/fileUtils.ts b/packages/core/src/utils/fileUtils.ts
new file mode 100644
index 00000000..d726c053
--- /dev/null
+++ b/packages/core/src/utils/fileUtils.ts
@@ -0,0 +1,280 @@
+/**
+ * @license
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import fs from 'fs';
+import path from 'path';
+import { PartUnion } from '@google/genai';
+import mime from 'mime-types';
+
+// Constants for text file processing
+const DEFAULT_MAX_LINES_TEXT_FILE = 2000;
+const MAX_LINE_LENGTH_TEXT_FILE = 2000;
+
+// Default values for encoding and separator format
+export const DEFAULT_ENCODING: BufferEncoding = 'utf-8';
+
+/**
+ * Checks if a path is within a given root directory.
+ * @param pathToCheck The absolute path to check.
+ * @param rootDirectory The absolute root directory.
+ * @returns True if the path is within the root directory, false otherwise.
+ */
+export function isWithinRoot(
+ pathToCheck: string,
+ rootDirectory: string,
+): boolean {
+ const normalizedPathToCheck = path.normalize(pathToCheck);
+ const normalizedRootDirectory = path.normalize(rootDirectory);
+
+ // Ensure the rootDirectory path ends with a separator for correct startsWith comparison,
+ // unless it's the root path itself (e.g., '/' or 'C:\').
+ const rootWithSeparator =
+ normalizedRootDirectory === path.sep ||
+ normalizedRootDirectory.endsWith(path.sep)
+ ? normalizedRootDirectory
+ : normalizedRootDirectory + path.sep;
+
+ return (
+ normalizedPathToCheck === normalizedRootDirectory ||
+ normalizedPathToCheck.startsWith(rootWithSeparator)
+ );
+}
+
+/**
+ * Determines if a file is likely binary based on content sampling.
+ * @param filePath Path to the file.
+ * @returns True if the file appears to be binary.
+ */
+export function isBinaryFile(filePath: string): boolean {
+ try {
+ const fd = fs.openSync(filePath, 'r');
+ // Read up to 4KB or file size, whichever is smaller
+ const fileSize = fs.fstatSync(fd).size;
+ if (fileSize === 0) {
+ // Empty file is not considered binary for content checking
+ fs.closeSync(fd);
+ return false;
+ }
+ const bufferSize = Math.min(4096, fileSize);
+ const buffer = Buffer.alloc(bufferSize);
+ const bytesRead = fs.readSync(fd, buffer, 0, buffer.length, 0);
+ fs.closeSync(fd);
+
+ if (bytesRead === 0) return false;
+
+ let nonPrintableCount = 0;
+ for (let i = 0; i < bytesRead; i++) {
+ if (buffer[i] === 0) return true; // Null byte is a strong indicator
+ if (buffer[i] < 9 || (buffer[i] > 13 && buffer[i] < 32)) {
+ nonPrintableCount++;
+ }
+ }
+ // If >30% non-printable characters, consider it binary
+ return nonPrintableCount / bytesRead > 0.3;
+ } catch {
+ // If any error occurs (e.g. file not found, permissions),
+ // treat as not binary here; let higher-level functions handle existence/access errors.
+ return false;
+ }
+}
+
+/**
+ * Detects the type of file based on extension and content.
+ * @param filePath Path to the file.
+ * @returns 'text', 'image', 'pdf', or 'binary'.
+ */
+export function detectFileType(
+ filePath: string,
+): 'text' | 'image' | 'pdf' | 'binary' {
+ const ext = path.extname(filePath).toLowerCase();
+ const lookedUpMimeType = mime.lookup(filePath); // Returns false if not found, or the mime type string
+
+ if (lookedUpMimeType && lookedUpMimeType.startsWith('image/')) {
+ return 'image';
+ }
+ if (lookedUpMimeType && lookedUpMimeType === 'application/pdf') {
+ return 'pdf';
+ }
+
+ // Stricter binary check for common non-text extensions before content check
+ // These are often not well-covered by mime-types or might be misidentified.
+ if (
+ [
+ '.zip',
+ '.tar',
+ '.gz',
+ '.exe',
+ '.dll',
+ '.so',
+ '.class',
+ '.jar',
+ '.war',
+ '.7z',
+ '.doc',
+ '.docx',
+ '.xls',
+ '.xlsx',
+ '.ppt',
+ '.pptx',
+ '.odt',
+ '.ods',
+ '.odp',
+ '.bin',
+ '.dat',
+ '.obj',
+ '.o',
+ '.a',
+ '.lib',
+ '.wasm',
+ '.pyc',
+ '.pyo',
+ ].includes(ext)
+ ) {
+ return 'binary';
+ }
+
+ // Fallback to content-based check if mime type wasn't conclusive for image/pdf
+ // and it's not a known binary extension.
+ if (isBinaryFile(filePath)) {
+ return 'binary';
+ }
+
+ return 'text';
+}
+
+export interface ProcessedFileReadResult {
+ llmContent: PartUnion; // string for text, Part for image/pdf/unreadable binary
+ returnDisplay: string;
+ error?: string; // Optional error message for the LLM if file processing failed
+ isTruncated?: boolean; // For text files, indicates if content was truncated
+ originalLineCount?: number; // For text files
+ linesShown?: [number, number]; // For text files [startLine, endLine] (1-based for display)
+}
+
+/**
+ * Reads and processes a single file, handling text, images, and PDFs.
+ * @param filePath Absolute path to the file.
+ * @param rootDirectory Absolute path to the project root for relative path display.
+ * @param offset Optional offset for text files (0-based line number).
+ * @param limit Optional limit for text files (number of lines to read).
+ * @returns ProcessedFileReadResult object.
+ */
+export async function processSingleFileContent(
+ filePath: string,
+ rootDirectory: string,
+ offset?: number,
+ limit?: number,
+): Promise<ProcessedFileReadResult> {
+ try {
+ if (!fs.existsSync(filePath)) {
+ // Sync check is acceptable before async read
+ return {
+ llmContent: '',
+ returnDisplay: 'File not found.',
+ error: `File not found: ${filePath}`,
+ };
+ }
+ const stats = fs.statSync(filePath); // Sync check
+ if (stats.isDirectory()) {
+ return {
+ llmContent: '',
+ returnDisplay: 'Path is a directory.',
+ error: `Path is a directory, not a file: ${filePath}`,
+ };
+ }
+
+ const fileType = detectFileType(filePath);
+ const relativePathForDisplay = path
+ .relative(rootDirectory, filePath)
+ .replace(/\\/g, '/');
+
+ switch (fileType) {
+ case 'binary': {
+ return {
+ llmContent: `Cannot display content of binary file: ${relativePathForDisplay}`,
+ returnDisplay: `Skipped binary file: ${relativePathForDisplay}`,
+ };
+ }
+ case 'text': {
+ const content = await fs.promises.readFile(filePath, 'utf8');
+ const lines = content.split('\n');
+ const originalLineCount = lines.length;
+
+ const startLine = offset || 0;
+ const effectiveLimit =
+ limit === undefined ? DEFAULT_MAX_LINES_TEXT_FILE : limit;
+ // Ensure endLine does not exceed originalLineCount
+ const endLine = Math.min(startLine + effectiveLimit, originalLineCount);
+ // Ensure selectedLines doesn't try to slice beyond array bounds if startLine is too high
+ const actualStartLine = Math.min(startLine, originalLineCount);
+ const selectedLines = lines.slice(actualStartLine, endLine);
+
+ let linesWereTruncatedInLength = false;
+ const formattedLines = selectedLines.map((line) => {
+ if (line.length > MAX_LINE_LENGTH_TEXT_FILE) {
+ linesWereTruncatedInLength = true;
+ return (
+ line.substring(0, MAX_LINE_LENGTH_TEXT_FILE) + '... [truncated]'
+ );
+ }
+ return line;
+ });
+
+ const contentRangeTruncated = endLine < originalLineCount;
+ const isTruncated = contentRangeTruncated || linesWereTruncatedInLength;
+
+ let llmTextContent = '';
+ if (contentRangeTruncated) {
+ llmTextContent += `[File content truncated: showing lines ${actualStartLine + 1}-${endLine} of ${originalLineCount} total lines. Use offset/limit parameters to view more.]\n`;
+ } else if (linesWereTruncatedInLength) {
+ llmTextContent += `[File content partially truncated: some lines exceeded maximum length of ${MAX_LINE_LENGTH_TEXT_FILE} characters.]\n`;
+ }
+ llmTextContent += formattedLines.join('\n');
+
+ return {
+ llmContent: llmTextContent,
+ returnDisplay: isTruncated ? '(truncated)' : '',
+ isTruncated,
+ originalLineCount,
+ linesShown: [actualStartLine + 1, endLine],
+ };
+ }
+ case 'image':
+ case 'pdf': {
+ const contentBuffer = await fs.promises.readFile(filePath);
+ const base64Data = contentBuffer.toString('base64');
+ return {
+ llmContent: {
+ inlineData: {
+ data: base64Data,
+ mimeType: mime.lookup(filePath) || 'application/octet-stream',
+ },
+ },
+ returnDisplay: `Read ${fileType} file: ${relativePathForDisplay}`,
+ };
+ }
+ default: {
+ // Should not happen with current detectFileType logic
+ const exhaustiveCheck: never = fileType;
+ return {
+ llmContent: `Unhandled file type: ${exhaustiveCheck}`,
+ returnDisplay: `Skipped unhandled file type: ${relativePathForDisplay}`,
+ error: `Unhandled file type for ${filePath}`,
+ };
+ }
+ }
+ } catch (error) {
+ const errorMessage = error instanceof Error ? error.message : String(error);
+ const displayPath = path
+ .relative(rootDirectory, filePath)
+ .replace(/\\/g, '/');
+ return {
+ llmContent: `Error reading file ${displayPath}: ${errorMessage}`,
+ returnDisplay: `Error reading file ${displayPath}: ${errorMessage}`,
+ error: `Error reading file ${filePath}: ${errorMessage}`,
+ };
+ }
+}