diff options
Diffstat (limited to 'packages/core/src')
| -rw-r--r-- | packages/core/src/utils/memoryDiscovery.ts | 13 | ||||
| -rw-r--r-- | packages/core/src/utils/memoryImportProcessor.test.ts | 257 | ||||
| -rw-r--r-- | packages/core/src/utils/memoryImportProcessor.ts | 214 |
3 files changed, 482 insertions, 2 deletions
diff --git a/packages/core/src/utils/memoryDiscovery.ts b/packages/core/src/utils/memoryDiscovery.ts index 47d9f9a1..ab240ea8 100644 --- a/packages/core/src/utils/memoryDiscovery.ts +++ b/packages/core/src/utils/memoryDiscovery.ts @@ -14,6 +14,7 @@ import { getAllGeminiMdFilenames, } from '../tools/memoryTool.js'; import { FileDiscoveryService } from '../services/fileDiscoveryService.js'; +import { processImports } from './memoryImportProcessor.js'; // Simple console logger, similar to the one previously in CLI's config.ts // TODO: Integrate with a more robust server-side logger if available/appropriate. @@ -223,10 +224,18 @@ async function readGeminiMdFiles( for (const filePath of filePaths) { try { const content = await fs.readFile(filePath, 'utf-8'); - results.push({ filePath, content }); + + // Process imports in the content + const processedContent = await processImports( + content, + path.dirname(filePath), + debugMode, + ); + + results.push({ filePath, content: processedContent }); if (debugMode) logger.debug( - `Successfully read: ${filePath} (Length: ${content.length})`, + `Successfully read and processed imports: ${filePath} (Length: ${processedContent.length})`, ); } catch (error: unknown) { const isTestEnv = process.env.NODE_ENV === 'test' || process.env.VITEST; diff --git a/packages/core/src/utils/memoryImportProcessor.test.ts b/packages/core/src/utils/memoryImportProcessor.test.ts new file mode 100644 index 00000000..2f23dd2e --- /dev/null +++ b/packages/core/src/utils/memoryImportProcessor.test.ts @@ -0,0 +1,257 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'; +import * as fs from 'fs/promises'; +import * as path from 'path'; +import { processImports, validateImportPath } from './memoryImportProcessor.js'; + +// Mock fs/promises +vi.mock('fs/promises'); +const mockedFs = vi.mocked(fs); + +// Mock console methods to capture warnings +const originalConsoleWarn = console.warn; +const originalConsoleError = console.error; +const originalConsoleDebug = console.debug; + +describe('memoryImportProcessor', () => { + beforeEach(() => { + vi.clearAllMocks(); + // Mock console methods + console.warn = vi.fn(); + console.error = vi.fn(); + console.debug = vi.fn(); + }); + + afterEach(() => { + // Restore console methods + console.warn = originalConsoleWarn; + console.error = originalConsoleError; + console.debug = originalConsoleDebug; + }); + + describe('processImports', () => { + it('should process basic md file imports', async () => { + const content = 'Some content @./test.md more content'; + const basePath = '/test/path'; + const importedContent = '# Imported Content\nThis is imported.'; + + mockedFs.access.mockResolvedValue(undefined); + mockedFs.readFile.mockResolvedValue(importedContent); + + const result = await processImports(content, basePath, true); + + expect(result).toContain('<!-- Imported from: ./test.md -->'); + expect(result).toContain(importedContent); + expect(result).toContain('<!-- End of import from: ./test.md -->'); + expect(mockedFs.readFile).toHaveBeenCalledWith( + path.resolve(basePath, './test.md'), + 'utf-8', + ); + }); + + it('should warn and fail for non-md file imports', async () => { + const content = 'Some content @./instructions.txt more content'; + const basePath = '/test/path'; + + const result = await processImports(content, basePath, true); + + expect(console.warn).toHaveBeenCalledWith( + '[WARN] [ImportProcessor]', + 'Import processor only supports .md files. Attempting to import non-md file: ./instructions.txt. This will fail.', + ); + expect(result).toContain( + '<!-- Import failed: ./instructions.txt - Only .md files are supported -->', + ); + expect(mockedFs.readFile).not.toHaveBeenCalled(); + }); + + it('should handle circular imports', async () => { + const content = 'Content @./circular.md more content'; + const basePath = '/test/path'; + const circularContent = 'Circular @./main.md content'; + + mockedFs.access.mockResolvedValue(undefined); + mockedFs.readFile.mockResolvedValue(circularContent); + + // Set up the import state to simulate we're already processing main.md + const importState = { + processedFiles: new Set<string>(), + maxDepth: 10, + currentDepth: 0, + currentFile: '/test/path/main.md', // Simulate we're processing main.md + }; + + const result = await processImports(content, basePath, true, importState); + + // The circular import should be detected when processing the nested import + expect(result).toContain('<!-- Circular import detected: ./main.md -->'); + }); + + it('should handle file not found errors', async () => { + const content = 'Content @./nonexistent.md more content'; + const basePath = '/test/path'; + + mockedFs.access.mockRejectedValue(new Error('File not found')); + + const result = await processImports(content, basePath, true); + + expect(result).toContain( + '<!-- Import failed: ./nonexistent.md - File not found -->', + ); + expect(console.error).toHaveBeenCalledWith( + '[ERROR] [ImportProcessor]', + 'Failed to import ./nonexistent.md: File not found', + ); + }); + + it('should respect max depth limit', async () => { + const content = 'Content @./deep.md more content'; + const basePath = '/test/path'; + const deepContent = 'Deep @./deeper.md content'; + + mockedFs.access.mockResolvedValue(undefined); + mockedFs.readFile.mockResolvedValue(deepContent); + + const importState = { + processedFiles: new Set<string>(), + maxDepth: 1, + currentDepth: 1, + }; + + const result = await processImports(content, basePath, true, importState); + + expect(console.warn).toHaveBeenCalledWith( + '[WARN] [ImportProcessor]', + 'Maximum import depth (1) reached. Stopping import processing.', + ); + expect(result).toBe(content); + }); + + it('should handle nested imports recursively', async () => { + const content = 'Main @./nested.md content'; + const basePath = '/test/path'; + const nestedContent = 'Nested @./inner.md content'; + const innerContent = 'Inner content'; + + mockedFs.access.mockResolvedValue(undefined); + mockedFs.readFile + .mockResolvedValueOnce(nestedContent) + .mockResolvedValueOnce(innerContent); + + const result = await processImports(content, basePath, true); + + expect(result).toContain('<!-- Imported from: ./nested.md -->'); + expect(result).toContain('<!-- Imported from: ./inner.md -->'); + expect(result).toContain(innerContent); + }); + + it('should handle absolute paths in imports', async () => { + const content = 'Content @/absolute/path/file.md more content'; + const basePath = '/test/path'; + const importedContent = 'Absolute path content'; + + mockedFs.access.mockResolvedValue(undefined); + mockedFs.readFile.mockResolvedValue(importedContent); + + const result = await processImports(content, basePath, true); + + expect(result).toContain( + '<!-- Import failed: /absolute/path/file.md - Path traversal attempt -->', + ); + }); + + it('should handle multiple imports in same content', async () => { + const content = 'Start @./first.md middle @./second.md end'; + const basePath = '/test/path'; + const firstContent = 'First content'; + const secondContent = 'Second content'; + + mockedFs.access.mockResolvedValue(undefined); + mockedFs.readFile + .mockResolvedValueOnce(firstContent) + .mockResolvedValueOnce(secondContent); + + const result = await processImports(content, basePath, true); + + expect(result).toContain('<!-- Imported from: ./first.md -->'); + expect(result).toContain('<!-- Imported from: ./second.md -->'); + expect(result).toContain(firstContent); + expect(result).toContain(secondContent); + }); + }); + + describe('validateImportPath', () => { + it('should reject URLs', () => { + expect( + validateImportPath('https://example.com/file.md', '/base', [ + '/allowed', + ]), + ).toBe(false); + expect( + validateImportPath('http://example.com/file.md', '/base', ['/allowed']), + ).toBe(false); + expect( + validateImportPath('file:///path/to/file.md', '/base', ['/allowed']), + ).toBe(false); + }); + + it('should allow paths within allowed directories', () => { + expect(validateImportPath('./file.md', '/base', ['/base'])).toBe(true); + expect(validateImportPath('../file.md', '/base', ['/allowed'])).toBe( + false, + ); + expect( + validateImportPath('/allowed/sub/file.md', '/base', ['/allowed']), + ).toBe(true); + }); + + it('should reject paths outside allowed directories', () => { + expect( + validateImportPath('/forbidden/file.md', '/base', ['/allowed']), + ).toBe(false); + expect(validateImportPath('../../../file.md', '/base', ['/base'])).toBe( + false, + ); + }); + + it('should handle multiple allowed directories', () => { + expect( + validateImportPath('./file.md', '/base', ['/allowed1', '/allowed2']), + ).toBe(false); + expect( + validateImportPath('/allowed1/file.md', '/base', [ + '/allowed1', + '/allowed2', + ]), + ).toBe(true); + expect( + validateImportPath('/allowed2/file.md', '/base', [ + '/allowed1', + '/allowed2', + ]), + ).toBe(true); + }); + + it('should handle relative paths correctly', () => { + expect(validateImportPath('file.md', '/base', ['/base'])).toBe(true); + expect(validateImportPath('./file.md', '/base', ['/base'])).toBe(true); + expect(validateImportPath('../file.md', '/base', ['/parent'])).toBe( + false, + ); + }); + + it('should handle absolute paths correctly', () => { + expect( + validateImportPath('/allowed/file.md', '/base', ['/allowed']), + ).toBe(true); + expect( + validateImportPath('/forbidden/file.md', '/base', ['/allowed']), + ).toBe(false); + }); + }); +}); diff --git a/packages/core/src/utils/memoryImportProcessor.ts b/packages/core/src/utils/memoryImportProcessor.ts new file mode 100644 index 00000000..2128cbcc --- /dev/null +++ b/packages/core/src/utils/memoryImportProcessor.ts @@ -0,0 +1,214 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import * as fs from 'fs/promises'; +import * as path from 'path'; + +// Simple console logger for import processing +const logger = { + // eslint-disable-next-line @typescript-eslint/no-explicit-any + debug: (...args: any[]) => + console.debug('[DEBUG] [ImportProcessor]', ...args), + // eslint-disable-next-line @typescript-eslint/no-explicit-any + warn: (...args: any[]) => console.warn('[WARN] [ImportProcessor]', ...args), + // eslint-disable-next-line @typescript-eslint/no-explicit-any + error: (...args: any[]) => + console.error('[ERROR] [ImportProcessor]', ...args), +}; + +/** + * Interface for tracking import processing state to prevent circular imports + */ +interface ImportState { + processedFiles: Set<string>; + maxDepth: number; + currentDepth: number; + currentFile?: string; // Track the current file being processed +} + +/** + * Processes import statements in GEMINI.md content + * Supports @path/to/file.md syntax for importing content from other files + * + * @param content - The content to process for imports + * @param basePath - The directory path where the current file is located + * @param debugMode - Whether to enable debug logging + * @param importState - State tracking for circular import prevention + * @returns Processed content with imports resolved + */ +export async function processImports( + content: string, + basePath: string, + debugMode: boolean = false, + importState: ImportState = { + processedFiles: new Set(), + maxDepth: 10, + currentDepth: 0, + }, +): Promise<string> { + if (importState.currentDepth >= importState.maxDepth) { + if (debugMode) { + logger.warn( + `Maximum import depth (${importState.maxDepth}) reached. Stopping import processing.`, + ); + } + return content; + } + + // Regex to match @path/to/file imports (supports any file extension) + // Supports both @path/to/file.md and @./path/to/file.md syntax + const importRegex = /@([./]?[^\s\n]+\.[^\s\n]+)/g; + + let processedContent = content; + let match: RegExpExecArray | null; + + // Process all imports in the content + while ((match = importRegex.exec(content)) !== null) { + const importPath = match[1]; + + // Validate import path to prevent path traversal attacks + if (!validateImportPath(importPath, basePath, [basePath])) { + processedContent = processedContent.replace( + match[0], + `<!-- Import failed: ${importPath} - Path traversal attempt -->`, + ); + continue; + } + + // Check if the import is for a non-md file and warn + if (!importPath.endsWith('.md')) { + logger.warn( + `Import processor only supports .md files. Attempting to import non-md file: ${importPath}. This will fail.`, + ); + // Replace the import with a warning comment + processedContent = processedContent.replace( + match[0], + `<!-- Import failed: ${importPath} - Only .md files are supported -->`, + ); + continue; + } + + const fullPath = path.resolve(basePath, importPath); + + if (debugMode) { + logger.debug(`Processing import: ${importPath} -> ${fullPath}`); + } + + // Check for circular imports - if we're already processing this file + if (importState.currentFile === fullPath) { + if (debugMode) { + logger.warn(`Circular import detected: ${importPath}`); + } + // Replace the import with a warning comment + processedContent = processedContent.replace( + match[0], + `<!-- Circular import detected: ${importPath} -->`, + ); + continue; + } + + // Check if we've already processed this file in this import chain + if (importState.processedFiles.has(fullPath)) { + if (debugMode) { + logger.warn(`File already processed in this chain: ${importPath}`); + } + // Replace the import with a warning comment + processedContent = processedContent.replace( + match[0], + `<!-- File already processed: ${importPath} -->`, + ); + continue; + } + + // Check for potential circular imports by looking at the import chain + if (importState.currentFile) { + const currentFileDir = path.dirname(importState.currentFile); + const potentialCircularPath = path.resolve(currentFileDir, importPath); + if (potentialCircularPath === importState.currentFile) { + if (debugMode) { + logger.warn(`Circular import detected: ${importPath}`); + } + // Replace the import with a warning comment + processedContent = processedContent.replace( + match[0], + `<!-- Circular import detected: ${importPath} -->`, + ); + continue; + } + } + + try { + // Check if the file exists + await fs.access(fullPath); + + // Read the imported file content + const importedContent = await fs.readFile(fullPath, 'utf-8'); + + if (debugMode) { + logger.debug(`Successfully read imported file: ${fullPath}`); + } + + // Recursively process imports in the imported content + const processedImportedContent = await processImports( + importedContent, + path.dirname(fullPath), + debugMode, + { + ...importState, + processedFiles: new Set([...importState.processedFiles, fullPath]), + currentDepth: importState.currentDepth + 1, + currentFile: fullPath, // Set the current file being processed + }, + ); + + // Replace the import statement with the processed content + processedContent = processedContent.replace( + match[0], + `<!-- Imported from: ${importPath} -->\n${processedImportedContent}\n<!-- End of import from: ${importPath} -->`, + ); + } catch (error) { + const errorMessage = + error instanceof Error ? error.message : String(error); + if (debugMode) { + logger.error(`Failed to import ${importPath}: ${errorMessage}`); + } + + // Replace the import with an error comment + processedContent = processedContent.replace( + match[0], + `<!-- Import failed: ${importPath} - ${errorMessage} -->`, + ); + } + } + + return processedContent; +} + +/** + * Validates import paths to ensure they are safe and within allowed directories + * + * @param importPath - The import path to validate + * @param basePath - The base directory for resolving relative paths + * @param allowedDirectories - Array of allowed directory paths + * @returns Whether the import path is valid + */ +export function validateImportPath( + importPath: string, + basePath: string, + allowedDirectories: string[], +): boolean { + // Reject URLs + if (/^(file|https?):\/\//.test(importPath)) { + return false; + } + + const resolvedPath = path.resolve(basePath, importPath); + + return allowedDirectories.some((allowedDir) => { + const normalizedAllowedDir = path.resolve(allowedDir); + return resolvedPath.startsWith(normalizedAllowedDir); + }); +} |
