summaryrefslogtreecommitdiff
path: root/packages/core/src
diff options
context:
space:
mode:
authorNiladri Das <[email protected]>2025-06-30 04:21:47 +0530
committerGitHub <[email protected]>2025-06-29 22:51:47 +0000
commitf848d3575822703e7cedb3ae987a0e7341b6b390 (patch)
tree094512c57072c975248f138c4c1d8ce6fc91475e /packages/core/src
parentada4061a458c78c9241439e5a8cfa4893cb4ab5a (diff)
feat: modular GEMINI.md imports with @file.md syntax (#1585) (#2230)
Diffstat (limited to 'packages/core/src')
-rw-r--r--packages/core/src/utils/memoryDiscovery.ts13
-rw-r--r--packages/core/src/utils/memoryImportProcessor.test.ts257
-rw-r--r--packages/core/src/utils/memoryImportProcessor.ts214
3 files changed, 482 insertions, 2 deletions
diff --git a/packages/core/src/utils/memoryDiscovery.ts b/packages/core/src/utils/memoryDiscovery.ts
index 47d9f9a1..ab240ea8 100644
--- a/packages/core/src/utils/memoryDiscovery.ts
+++ b/packages/core/src/utils/memoryDiscovery.ts
@@ -14,6 +14,7 @@ import {
getAllGeminiMdFilenames,
} from '../tools/memoryTool.js';
import { FileDiscoveryService } from '../services/fileDiscoveryService.js';
+import { processImports } from './memoryImportProcessor.js';
// Simple console logger, similar to the one previously in CLI's config.ts
// TODO: Integrate with a more robust server-side logger if available/appropriate.
@@ -223,10 +224,18 @@ async function readGeminiMdFiles(
for (const filePath of filePaths) {
try {
const content = await fs.readFile(filePath, 'utf-8');
- results.push({ filePath, content });
+
+ // Process imports in the content
+ const processedContent = await processImports(
+ content,
+ path.dirname(filePath),
+ debugMode,
+ );
+
+ results.push({ filePath, content: processedContent });
if (debugMode)
logger.debug(
- `Successfully read: ${filePath} (Length: ${content.length})`,
+ `Successfully read and processed imports: ${filePath} (Length: ${processedContent.length})`,
);
} catch (error: unknown) {
const isTestEnv = process.env.NODE_ENV === 'test' || process.env.VITEST;
diff --git a/packages/core/src/utils/memoryImportProcessor.test.ts b/packages/core/src/utils/memoryImportProcessor.test.ts
new file mode 100644
index 00000000..2f23dd2e
--- /dev/null
+++ b/packages/core/src/utils/memoryImportProcessor.test.ts
@@ -0,0 +1,257 @@
+/**
+ * @license
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
+import * as fs from 'fs/promises';
+import * as path from 'path';
+import { processImports, validateImportPath } from './memoryImportProcessor.js';
+
+// Mock fs/promises
+vi.mock('fs/promises');
+const mockedFs = vi.mocked(fs);
+
+// Mock console methods to capture warnings
+const originalConsoleWarn = console.warn;
+const originalConsoleError = console.error;
+const originalConsoleDebug = console.debug;
+
+describe('memoryImportProcessor', () => {
+ beforeEach(() => {
+ vi.clearAllMocks();
+ // Mock console methods
+ console.warn = vi.fn();
+ console.error = vi.fn();
+ console.debug = vi.fn();
+ });
+
+ afterEach(() => {
+ // Restore console methods
+ console.warn = originalConsoleWarn;
+ console.error = originalConsoleError;
+ console.debug = originalConsoleDebug;
+ });
+
+ describe('processImports', () => {
+ it('should process basic md file imports', async () => {
+ const content = 'Some content @./test.md more content';
+ const basePath = '/test/path';
+ const importedContent = '# Imported Content\nThis is imported.';
+
+ mockedFs.access.mockResolvedValue(undefined);
+ mockedFs.readFile.mockResolvedValue(importedContent);
+
+ const result = await processImports(content, basePath, true);
+
+ expect(result).toContain('<!-- Imported from: ./test.md -->');
+ expect(result).toContain(importedContent);
+ expect(result).toContain('<!-- End of import from: ./test.md -->');
+ expect(mockedFs.readFile).toHaveBeenCalledWith(
+ path.resolve(basePath, './test.md'),
+ 'utf-8',
+ );
+ });
+
+ it('should warn and fail for non-md file imports', async () => {
+ const content = 'Some content @./instructions.txt more content';
+ const basePath = '/test/path';
+
+ const result = await processImports(content, basePath, true);
+
+ expect(console.warn).toHaveBeenCalledWith(
+ '[WARN] [ImportProcessor]',
+ 'Import processor only supports .md files. Attempting to import non-md file: ./instructions.txt. This will fail.',
+ );
+ expect(result).toContain(
+ '<!-- Import failed: ./instructions.txt - Only .md files are supported -->',
+ );
+ expect(mockedFs.readFile).not.toHaveBeenCalled();
+ });
+
+ it('should handle circular imports', async () => {
+ const content = 'Content @./circular.md more content';
+ const basePath = '/test/path';
+ const circularContent = 'Circular @./main.md content';
+
+ mockedFs.access.mockResolvedValue(undefined);
+ mockedFs.readFile.mockResolvedValue(circularContent);
+
+ // Set up the import state to simulate we're already processing main.md
+ const importState = {
+ processedFiles: new Set<string>(),
+ maxDepth: 10,
+ currentDepth: 0,
+ currentFile: '/test/path/main.md', // Simulate we're processing main.md
+ };
+
+ const result = await processImports(content, basePath, true, importState);
+
+ // The circular import should be detected when processing the nested import
+ expect(result).toContain('<!-- Circular import detected: ./main.md -->');
+ });
+
+ it('should handle file not found errors', async () => {
+ const content = 'Content @./nonexistent.md more content';
+ const basePath = '/test/path';
+
+ mockedFs.access.mockRejectedValue(new Error('File not found'));
+
+ const result = await processImports(content, basePath, true);
+
+ expect(result).toContain(
+ '<!-- Import failed: ./nonexistent.md - File not found -->',
+ );
+ expect(console.error).toHaveBeenCalledWith(
+ '[ERROR] [ImportProcessor]',
+ 'Failed to import ./nonexistent.md: File not found',
+ );
+ });
+
+ it('should respect max depth limit', async () => {
+ const content = 'Content @./deep.md more content';
+ const basePath = '/test/path';
+ const deepContent = 'Deep @./deeper.md content';
+
+ mockedFs.access.mockResolvedValue(undefined);
+ mockedFs.readFile.mockResolvedValue(deepContent);
+
+ const importState = {
+ processedFiles: new Set<string>(),
+ maxDepth: 1,
+ currentDepth: 1,
+ };
+
+ const result = await processImports(content, basePath, true, importState);
+
+ expect(console.warn).toHaveBeenCalledWith(
+ '[WARN] [ImportProcessor]',
+ 'Maximum import depth (1) reached. Stopping import processing.',
+ );
+ expect(result).toBe(content);
+ });
+
+ it('should handle nested imports recursively', async () => {
+ const content = 'Main @./nested.md content';
+ const basePath = '/test/path';
+ const nestedContent = 'Nested @./inner.md content';
+ const innerContent = 'Inner content';
+
+ mockedFs.access.mockResolvedValue(undefined);
+ mockedFs.readFile
+ .mockResolvedValueOnce(nestedContent)
+ .mockResolvedValueOnce(innerContent);
+
+ const result = await processImports(content, basePath, true);
+
+ expect(result).toContain('<!-- Imported from: ./nested.md -->');
+ expect(result).toContain('<!-- Imported from: ./inner.md -->');
+ expect(result).toContain(innerContent);
+ });
+
+ it('should handle absolute paths in imports', async () => {
+ const content = 'Content @/absolute/path/file.md more content';
+ const basePath = '/test/path';
+ const importedContent = 'Absolute path content';
+
+ mockedFs.access.mockResolvedValue(undefined);
+ mockedFs.readFile.mockResolvedValue(importedContent);
+
+ const result = await processImports(content, basePath, true);
+
+ expect(result).toContain(
+ '<!-- Import failed: /absolute/path/file.md - Path traversal attempt -->',
+ );
+ });
+
+ it('should handle multiple imports in same content', async () => {
+ const content = 'Start @./first.md middle @./second.md end';
+ const basePath = '/test/path';
+ const firstContent = 'First content';
+ const secondContent = 'Second content';
+
+ mockedFs.access.mockResolvedValue(undefined);
+ mockedFs.readFile
+ .mockResolvedValueOnce(firstContent)
+ .mockResolvedValueOnce(secondContent);
+
+ const result = await processImports(content, basePath, true);
+
+ expect(result).toContain('<!-- Imported from: ./first.md -->');
+ expect(result).toContain('<!-- Imported from: ./second.md -->');
+ expect(result).toContain(firstContent);
+ expect(result).toContain(secondContent);
+ });
+ });
+
+ describe('validateImportPath', () => {
+ it('should reject URLs', () => {
+ expect(
+ validateImportPath('https://example.com/file.md', '/base', [
+ '/allowed',
+ ]),
+ ).toBe(false);
+ expect(
+ validateImportPath('http://example.com/file.md', '/base', ['/allowed']),
+ ).toBe(false);
+ expect(
+ validateImportPath('file:///path/to/file.md', '/base', ['/allowed']),
+ ).toBe(false);
+ });
+
+ it('should allow paths within allowed directories', () => {
+ expect(validateImportPath('./file.md', '/base', ['/base'])).toBe(true);
+ expect(validateImportPath('../file.md', '/base', ['/allowed'])).toBe(
+ false,
+ );
+ expect(
+ validateImportPath('/allowed/sub/file.md', '/base', ['/allowed']),
+ ).toBe(true);
+ });
+
+ it('should reject paths outside allowed directories', () => {
+ expect(
+ validateImportPath('/forbidden/file.md', '/base', ['/allowed']),
+ ).toBe(false);
+ expect(validateImportPath('../../../file.md', '/base', ['/base'])).toBe(
+ false,
+ );
+ });
+
+ it('should handle multiple allowed directories', () => {
+ expect(
+ validateImportPath('./file.md', '/base', ['/allowed1', '/allowed2']),
+ ).toBe(false);
+ expect(
+ validateImportPath('/allowed1/file.md', '/base', [
+ '/allowed1',
+ '/allowed2',
+ ]),
+ ).toBe(true);
+ expect(
+ validateImportPath('/allowed2/file.md', '/base', [
+ '/allowed1',
+ '/allowed2',
+ ]),
+ ).toBe(true);
+ });
+
+ it('should handle relative paths correctly', () => {
+ expect(validateImportPath('file.md', '/base', ['/base'])).toBe(true);
+ expect(validateImportPath('./file.md', '/base', ['/base'])).toBe(true);
+ expect(validateImportPath('../file.md', '/base', ['/parent'])).toBe(
+ false,
+ );
+ });
+
+ it('should handle absolute paths correctly', () => {
+ expect(
+ validateImportPath('/allowed/file.md', '/base', ['/allowed']),
+ ).toBe(true);
+ expect(
+ validateImportPath('/forbidden/file.md', '/base', ['/allowed']),
+ ).toBe(false);
+ });
+ });
+});
diff --git a/packages/core/src/utils/memoryImportProcessor.ts b/packages/core/src/utils/memoryImportProcessor.ts
new file mode 100644
index 00000000..2128cbcc
--- /dev/null
+++ b/packages/core/src/utils/memoryImportProcessor.ts
@@ -0,0 +1,214 @@
+/**
+ * @license
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import * as fs from 'fs/promises';
+import * as path from 'path';
+
+// Simple console logger for import processing
+const logger = {
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
+ debug: (...args: any[]) =>
+ console.debug('[DEBUG] [ImportProcessor]', ...args),
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
+ warn: (...args: any[]) => console.warn('[WARN] [ImportProcessor]', ...args),
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
+ error: (...args: any[]) =>
+ console.error('[ERROR] [ImportProcessor]', ...args),
+};
+
+/**
+ * Interface for tracking import processing state to prevent circular imports
+ */
+interface ImportState {
+ processedFiles: Set<string>;
+ maxDepth: number;
+ currentDepth: number;
+ currentFile?: string; // Track the current file being processed
+}
+
+/**
+ * Processes import statements in GEMINI.md content
+ * Supports @path/to/file.md syntax for importing content from other files
+ *
+ * @param content - The content to process for imports
+ * @param basePath - The directory path where the current file is located
+ * @param debugMode - Whether to enable debug logging
+ * @param importState - State tracking for circular import prevention
+ * @returns Processed content with imports resolved
+ */
+export async function processImports(
+ content: string,
+ basePath: string,
+ debugMode: boolean = false,
+ importState: ImportState = {
+ processedFiles: new Set(),
+ maxDepth: 10,
+ currentDepth: 0,
+ },
+): Promise<string> {
+ if (importState.currentDepth >= importState.maxDepth) {
+ if (debugMode) {
+ logger.warn(
+ `Maximum import depth (${importState.maxDepth}) reached. Stopping import processing.`,
+ );
+ }
+ return content;
+ }
+
+ // Regex to match @path/to/file imports (supports any file extension)
+ // Supports both @path/to/file.md and @./path/to/file.md syntax
+ const importRegex = /@([./]?[^\s\n]+\.[^\s\n]+)/g;
+
+ let processedContent = content;
+ let match: RegExpExecArray | null;
+
+ // Process all imports in the content
+ while ((match = importRegex.exec(content)) !== null) {
+ const importPath = match[1];
+
+ // Validate import path to prevent path traversal attacks
+ if (!validateImportPath(importPath, basePath, [basePath])) {
+ processedContent = processedContent.replace(
+ match[0],
+ `<!-- Import failed: ${importPath} - Path traversal attempt -->`,
+ );
+ continue;
+ }
+
+ // Check if the import is for a non-md file and warn
+ if (!importPath.endsWith('.md')) {
+ logger.warn(
+ `Import processor only supports .md files. Attempting to import non-md file: ${importPath}. This will fail.`,
+ );
+ // Replace the import with a warning comment
+ processedContent = processedContent.replace(
+ match[0],
+ `<!-- Import failed: ${importPath} - Only .md files are supported -->`,
+ );
+ continue;
+ }
+
+ const fullPath = path.resolve(basePath, importPath);
+
+ if (debugMode) {
+ logger.debug(`Processing import: ${importPath} -> ${fullPath}`);
+ }
+
+ // Check for circular imports - if we're already processing this file
+ if (importState.currentFile === fullPath) {
+ if (debugMode) {
+ logger.warn(`Circular import detected: ${importPath}`);
+ }
+ // Replace the import with a warning comment
+ processedContent = processedContent.replace(
+ match[0],
+ `<!-- Circular import detected: ${importPath} -->`,
+ );
+ continue;
+ }
+
+ // Check if we've already processed this file in this import chain
+ if (importState.processedFiles.has(fullPath)) {
+ if (debugMode) {
+ logger.warn(`File already processed in this chain: ${importPath}`);
+ }
+ // Replace the import with a warning comment
+ processedContent = processedContent.replace(
+ match[0],
+ `<!-- File already processed: ${importPath} -->`,
+ );
+ continue;
+ }
+
+ // Check for potential circular imports by looking at the import chain
+ if (importState.currentFile) {
+ const currentFileDir = path.dirname(importState.currentFile);
+ const potentialCircularPath = path.resolve(currentFileDir, importPath);
+ if (potentialCircularPath === importState.currentFile) {
+ if (debugMode) {
+ logger.warn(`Circular import detected: ${importPath}`);
+ }
+ // Replace the import with a warning comment
+ processedContent = processedContent.replace(
+ match[0],
+ `<!-- Circular import detected: ${importPath} -->`,
+ );
+ continue;
+ }
+ }
+
+ try {
+ // Check if the file exists
+ await fs.access(fullPath);
+
+ // Read the imported file content
+ const importedContent = await fs.readFile(fullPath, 'utf-8');
+
+ if (debugMode) {
+ logger.debug(`Successfully read imported file: ${fullPath}`);
+ }
+
+ // Recursively process imports in the imported content
+ const processedImportedContent = await processImports(
+ importedContent,
+ path.dirname(fullPath),
+ debugMode,
+ {
+ ...importState,
+ processedFiles: new Set([...importState.processedFiles, fullPath]),
+ currentDepth: importState.currentDepth + 1,
+ currentFile: fullPath, // Set the current file being processed
+ },
+ );
+
+ // Replace the import statement with the processed content
+ processedContent = processedContent.replace(
+ match[0],
+ `<!-- Imported from: ${importPath} -->\n${processedImportedContent}\n<!-- End of import from: ${importPath} -->`,
+ );
+ } catch (error) {
+ const errorMessage =
+ error instanceof Error ? error.message : String(error);
+ if (debugMode) {
+ logger.error(`Failed to import ${importPath}: ${errorMessage}`);
+ }
+
+ // Replace the import with an error comment
+ processedContent = processedContent.replace(
+ match[0],
+ `<!-- Import failed: ${importPath} - ${errorMessage} -->`,
+ );
+ }
+ }
+
+ return processedContent;
+}
+
+/**
+ * Validates import paths to ensure they are safe and within allowed directories
+ *
+ * @param importPath - The import path to validate
+ * @param basePath - The base directory for resolving relative paths
+ * @param allowedDirectories - Array of allowed directory paths
+ * @returns Whether the import path is valid
+ */
+export function validateImportPath(
+ importPath: string,
+ basePath: string,
+ allowedDirectories: string[],
+): boolean {
+ // Reject URLs
+ if (/^(file|https?):\/\//.test(importPath)) {
+ return false;
+ }
+
+ const resolvedPath = path.resolve(basePath, importPath);
+
+ return allowedDirectories.some((allowedDir) => {
+ const normalizedAllowedDir = path.resolve(allowedDir);
+ return resolvedPath.startsWith(normalizedAllowedDir);
+ });
+}