summaryrefslogtreecommitdiff
path: root/packages/server/src/utils/fileUtils.ts
diff options
context:
space:
mode:
Diffstat (limited to 'packages/server/src/utils/fileUtils.ts')
-rw-r--r--packages/server/src/utils/fileUtils.ts280
1 files changed, 0 insertions, 280 deletions
diff --git a/packages/server/src/utils/fileUtils.ts b/packages/server/src/utils/fileUtils.ts
deleted file mode 100644
index d726c053..00000000
--- a/packages/server/src/utils/fileUtils.ts
+++ /dev/null
@@ -1,280 +0,0 @@
-/**
- * @license
- * Copyright 2025 Google LLC
- * SPDX-License-Identifier: Apache-2.0
- */
-
-import fs from 'fs';
-import path from 'path';
-import { PartUnion } from '@google/genai';
-import mime from 'mime-types';
-
-// Constants for text file processing
-const DEFAULT_MAX_LINES_TEXT_FILE = 2000;
-const MAX_LINE_LENGTH_TEXT_FILE = 2000;
-
-// Default values for encoding and separator format
-export const DEFAULT_ENCODING: BufferEncoding = 'utf-8';
-
-/**
- * Checks if a path is within a given root directory.
- * @param pathToCheck The absolute path to check.
- * @param rootDirectory The absolute root directory.
- * @returns True if the path is within the root directory, false otherwise.
- */
-export function isWithinRoot(
- pathToCheck: string,
- rootDirectory: string,
-): boolean {
- const normalizedPathToCheck = path.normalize(pathToCheck);
- const normalizedRootDirectory = path.normalize(rootDirectory);
-
- // Ensure the rootDirectory path ends with a separator for correct startsWith comparison,
- // unless it's the root path itself (e.g., '/' or 'C:\').
- const rootWithSeparator =
- normalizedRootDirectory === path.sep ||
- normalizedRootDirectory.endsWith(path.sep)
- ? normalizedRootDirectory
- : normalizedRootDirectory + path.sep;
-
- return (
- normalizedPathToCheck === normalizedRootDirectory ||
- normalizedPathToCheck.startsWith(rootWithSeparator)
- );
-}
-
-/**
- * Determines if a file is likely binary based on content sampling.
- * @param filePath Path to the file.
- * @returns True if the file appears to be binary.
- */
-export function isBinaryFile(filePath: string): boolean {
- try {
- const fd = fs.openSync(filePath, 'r');
- // Read up to 4KB or file size, whichever is smaller
- const fileSize = fs.fstatSync(fd).size;
- if (fileSize === 0) {
- // Empty file is not considered binary for content checking
- fs.closeSync(fd);
- return false;
- }
- const bufferSize = Math.min(4096, fileSize);
- const buffer = Buffer.alloc(bufferSize);
- const bytesRead = fs.readSync(fd, buffer, 0, buffer.length, 0);
- fs.closeSync(fd);
-
- if (bytesRead === 0) return false;
-
- let nonPrintableCount = 0;
- for (let i = 0; i < bytesRead; i++) {
- if (buffer[i] === 0) return true; // Null byte is a strong indicator
- if (buffer[i] < 9 || (buffer[i] > 13 && buffer[i] < 32)) {
- nonPrintableCount++;
- }
- }
- // If >30% non-printable characters, consider it binary
- return nonPrintableCount / bytesRead > 0.3;
- } catch {
- // If any error occurs (e.g. file not found, permissions),
- // treat as not binary here; let higher-level functions handle existence/access errors.
- return false;
- }
-}
-
-/**
- * Detects the type of file based on extension and content.
- * @param filePath Path to the file.
- * @returns 'text', 'image', 'pdf', or 'binary'.
- */
-export function detectFileType(
- filePath: string,
-): 'text' | 'image' | 'pdf' | 'binary' {
- const ext = path.extname(filePath).toLowerCase();
- const lookedUpMimeType = mime.lookup(filePath); // Returns false if not found, or the mime type string
-
- if (lookedUpMimeType && lookedUpMimeType.startsWith('image/')) {
- return 'image';
- }
- if (lookedUpMimeType && lookedUpMimeType === 'application/pdf') {
- return 'pdf';
- }
-
- // Stricter binary check for common non-text extensions before content check
- // These are often not well-covered by mime-types or might be misidentified.
- if (
- [
- '.zip',
- '.tar',
- '.gz',
- '.exe',
- '.dll',
- '.so',
- '.class',
- '.jar',
- '.war',
- '.7z',
- '.doc',
- '.docx',
- '.xls',
- '.xlsx',
- '.ppt',
- '.pptx',
- '.odt',
- '.ods',
- '.odp',
- '.bin',
- '.dat',
- '.obj',
- '.o',
- '.a',
- '.lib',
- '.wasm',
- '.pyc',
- '.pyo',
- ].includes(ext)
- ) {
- return 'binary';
- }
-
- // Fallback to content-based check if mime type wasn't conclusive for image/pdf
- // and it's not a known binary extension.
- if (isBinaryFile(filePath)) {
- return 'binary';
- }
-
- return 'text';
-}
-
-export interface ProcessedFileReadResult {
- llmContent: PartUnion; // string for text, Part for image/pdf/unreadable binary
- returnDisplay: string;
- error?: string; // Optional error message for the LLM if file processing failed
- isTruncated?: boolean; // For text files, indicates if content was truncated
- originalLineCount?: number; // For text files
- linesShown?: [number, number]; // For text files [startLine, endLine] (1-based for display)
-}
-
-/**
- * Reads and processes a single file, handling text, images, and PDFs.
- * @param filePath Absolute path to the file.
- * @param rootDirectory Absolute path to the project root for relative path display.
- * @param offset Optional offset for text files (0-based line number).
- * @param limit Optional limit for text files (number of lines to read).
- * @returns ProcessedFileReadResult object.
- */
-export async function processSingleFileContent(
- filePath: string,
- rootDirectory: string,
- offset?: number,
- limit?: number,
-): Promise<ProcessedFileReadResult> {
- try {
- if (!fs.existsSync(filePath)) {
- // Sync check is acceptable before async read
- return {
- llmContent: '',
- returnDisplay: 'File not found.',
- error: `File not found: ${filePath}`,
- };
- }
- const stats = fs.statSync(filePath); // Sync check
- if (stats.isDirectory()) {
- return {
- llmContent: '',
- returnDisplay: 'Path is a directory.',
- error: `Path is a directory, not a file: ${filePath}`,
- };
- }
-
- const fileType = detectFileType(filePath);
- const relativePathForDisplay = path
- .relative(rootDirectory, filePath)
- .replace(/\\/g, '/');
-
- switch (fileType) {
- case 'binary': {
- return {
- llmContent: `Cannot display content of binary file: ${relativePathForDisplay}`,
- returnDisplay: `Skipped binary file: ${relativePathForDisplay}`,
- };
- }
- case 'text': {
- const content = await fs.promises.readFile(filePath, 'utf8');
- const lines = content.split('\n');
- const originalLineCount = lines.length;
-
- const startLine = offset || 0;
- const effectiveLimit =
- limit === undefined ? DEFAULT_MAX_LINES_TEXT_FILE : limit;
- // Ensure endLine does not exceed originalLineCount
- const endLine = Math.min(startLine + effectiveLimit, originalLineCount);
- // Ensure selectedLines doesn't try to slice beyond array bounds if startLine is too high
- const actualStartLine = Math.min(startLine, originalLineCount);
- const selectedLines = lines.slice(actualStartLine, endLine);
-
- let linesWereTruncatedInLength = false;
- const formattedLines = selectedLines.map((line) => {
- if (line.length > MAX_LINE_LENGTH_TEXT_FILE) {
- linesWereTruncatedInLength = true;
- return (
- line.substring(0, MAX_LINE_LENGTH_TEXT_FILE) + '... [truncated]'
- );
- }
- return line;
- });
-
- const contentRangeTruncated = endLine < originalLineCount;
- const isTruncated = contentRangeTruncated || linesWereTruncatedInLength;
-
- let llmTextContent = '';
- if (contentRangeTruncated) {
- llmTextContent += `[File content truncated: showing lines ${actualStartLine + 1}-${endLine} of ${originalLineCount} total lines. Use offset/limit parameters to view more.]\n`;
- } else if (linesWereTruncatedInLength) {
- llmTextContent += `[File content partially truncated: some lines exceeded maximum length of ${MAX_LINE_LENGTH_TEXT_FILE} characters.]\n`;
- }
- llmTextContent += formattedLines.join('\n');
-
- return {
- llmContent: llmTextContent,
- returnDisplay: isTruncated ? '(truncated)' : '',
- isTruncated,
- originalLineCount,
- linesShown: [actualStartLine + 1, endLine],
- };
- }
- case 'image':
- case 'pdf': {
- const contentBuffer = await fs.promises.readFile(filePath);
- const base64Data = contentBuffer.toString('base64');
- return {
- llmContent: {
- inlineData: {
- data: base64Data,
- mimeType: mime.lookup(filePath) || 'application/octet-stream',
- },
- },
- returnDisplay: `Read ${fileType} file: ${relativePathForDisplay}`,
- };
- }
- default: {
- // Should not happen with current detectFileType logic
- const exhaustiveCheck: never = fileType;
- return {
- llmContent: `Unhandled file type: ${exhaustiveCheck}`,
- returnDisplay: `Skipped unhandled file type: ${relativePathForDisplay}`,
- error: `Unhandled file type for ${filePath}`,
- };
- }
- }
- } catch (error) {
- const errorMessage = error instanceof Error ? error.message : String(error);
- const displayPath = path
- .relative(rootDirectory, filePath)
- .replace(/\\/g, '/');
- return {
- llmContent: `Error reading file ${displayPath}: ${errorMessage}`,
- returnDisplay: `Error reading file ${displayPath}: ${errorMessage}`,
- error: `Error reading file ${filePath}: ${errorMessage}`,
- };
- }
-}