From 804c181ac4a3dc1c4971a5b8a643421bbe697f3d Mon Sep 17 00:00:00 2001 From: Jacob Richman Date: Tue, 12 Aug 2025 09:19:09 -0700 Subject: chore(integration-tests): refactor to typescript (#5645) --- integration-tests/file-system.test.js | 89 ---- integration-tests/file-system.test.ts | 89 ++++ integration-tests/google_web_search.test.js | 74 ---- integration-tests/google_web_search.test.ts | 77 ++++ integration-tests/list_directory.test.js | 62 --- integration-tests/list_directory.test.ts | 62 +++ integration-tests/read_many_files.test.js | 50 --- integration-tests/read_many_files.test.ts | 50 +++ integration-tests/replace.test.js | 66 --- integration-tests/replace.test.ts | 66 +++ integration-tests/run-tests.js | 8 +- integration-tests/run_shell_command.test.js | 63 --- integration-tests/run_shell_command.test.ts | 63 +++ integration-tests/save_memory.test.js | 41 -- integration-tests/save_memory.test.ts | 41 ++ integration-tests/simple-mcp-server.test.js | 211 --------- integration-tests/simple-mcp-server.test.ts | 208 +++++++++ integration-tests/test-helper.js | 597 -------------------------- integration-tests/test-helper.ts | 642 ++++++++++++++++++++++++++++ integration-tests/tsconfig.json | 8 + integration-tests/write_file.test.js | 68 --- integration-tests/write_file.test.ts | 68 +++ 22 files changed, 1378 insertions(+), 1325 deletions(-) delete mode 100644 integration-tests/file-system.test.js create mode 100644 integration-tests/file-system.test.ts delete mode 100644 integration-tests/google_web_search.test.js create mode 100644 integration-tests/google_web_search.test.ts delete mode 100644 integration-tests/list_directory.test.js create mode 100644 integration-tests/list_directory.test.ts delete mode 100644 integration-tests/read_many_files.test.js create mode 100644 integration-tests/read_many_files.test.ts delete mode 100644 integration-tests/replace.test.js create mode 100644 integration-tests/replace.test.ts delete mode 100644 integration-tests/run_shell_command.test.js create mode 100644 integration-tests/run_shell_command.test.ts delete mode 100644 integration-tests/save_memory.test.js create mode 100644 integration-tests/save_memory.test.ts delete mode 100644 integration-tests/simple-mcp-server.test.js create mode 100644 integration-tests/simple-mcp-server.test.ts delete mode 100644 integration-tests/test-helper.js create mode 100644 integration-tests/test-helper.ts create mode 100644 integration-tests/tsconfig.json delete mode 100644 integration-tests/write_file.test.js create mode 100644 integration-tests/write_file.test.ts (limited to 'integration-tests') diff --git a/integration-tests/file-system.test.js b/integration-tests/file-system.test.js deleted file mode 100644 index d43f047f..00000000 --- a/integration-tests/file-system.test.js +++ /dev/null @@ -1,89 +0,0 @@ -/** - * @license - * Copyright 2025 Google LLC - * SPDX-License-Identifier: Apache-2.0 - */ - -import { strict as assert } from 'assert'; -import { test } from 'node:test'; -import { TestRig, printDebugInfo, validateModelOutput } from './test-helper.js'; - -test('should be able to read a file', async () => { - const rig = new TestRig(); - await rig.setup('should be able to read a file'); - rig.createFile('test.txt', 'hello world'); - - const result = await rig.run( - `read the file test.txt and show me its contents`, - ); - - const foundToolCall = await rig.waitForToolCall('read_file'); - - // Add debugging information - if (!foundToolCall || !result.includes('hello world')) { - printDebugInfo(rig, result, { - 'Found tool call': foundToolCall, - 'Contains hello world': result.includes('hello world'), - }); - } - - assert.ok(foundToolCall, 'Expected to find a read_file tool call'); - - // Validate model output - will throw if no output, warn if missing expected content - validateModelOutput(result, 'hello world', 'File read test'); -}); - -test('should be able to write a file', async () => { - const rig = new TestRig(); - await rig.setup('should be able to write a file'); - rig.createFile('test.txt', ''); - - const result = await rig.run(`edit test.txt to have a hello world message`); - - // Accept multiple valid tools for editing files - const foundToolCall = await rig.waitForAnyToolCall([ - 'write_file', - 'edit', - 'replace', - ]); - - // Add debugging information - if (!foundToolCall) { - printDebugInfo(rig, result); - } - - assert.ok( - foundToolCall, - 'Expected to find a write_file, edit, or replace tool call', - ); - - // Validate model output - will throw if no output - validateModelOutput(result, null, 'File write test'); - - const fileContent = rig.readFile('test.txt'); - - // Add debugging for file content - if (!fileContent.toLowerCase().includes('hello')) { - const writeCalls = rig - .readToolLogs() - .filter((t) => t.toolRequest.name === 'write_file') - .map((t) => t.toolRequest.args); - - printDebugInfo(rig, result, { - 'File content mismatch': true, - 'Expected to contain': 'hello', - 'Actual content': fileContent, - 'Write tool calls': JSON.stringify(writeCalls), - }); - } - - assert.ok( - fileContent.toLowerCase().includes('hello'), - 'Expected file to contain hello', - ); - - // Log success info if verbose - if (process.env.VERBOSE === 'true') { - console.log('File written successfully with hello message.'); - } -}); diff --git a/integration-tests/file-system.test.ts b/integration-tests/file-system.test.ts new file mode 100644 index 00000000..d43f047f --- /dev/null +++ b/integration-tests/file-system.test.ts @@ -0,0 +1,89 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { strict as assert } from 'assert'; +import { test } from 'node:test'; +import { TestRig, printDebugInfo, validateModelOutput } from './test-helper.js'; + +test('should be able to read a file', async () => { + const rig = new TestRig(); + await rig.setup('should be able to read a file'); + rig.createFile('test.txt', 'hello world'); + + const result = await rig.run( + `read the file test.txt and show me its contents`, + ); + + const foundToolCall = await rig.waitForToolCall('read_file'); + + // Add debugging information + if (!foundToolCall || !result.includes('hello world')) { + printDebugInfo(rig, result, { + 'Found tool call': foundToolCall, + 'Contains hello world': result.includes('hello world'), + }); + } + + assert.ok(foundToolCall, 'Expected to find a read_file tool call'); + + // Validate model output - will throw if no output, warn if missing expected content + validateModelOutput(result, 'hello world', 'File read test'); +}); + +test('should be able to write a file', async () => { + const rig = new TestRig(); + await rig.setup('should be able to write a file'); + rig.createFile('test.txt', ''); + + const result = await rig.run(`edit test.txt to have a hello world message`); + + // Accept multiple valid tools for editing files + const foundToolCall = await rig.waitForAnyToolCall([ + 'write_file', + 'edit', + 'replace', + ]); + + // Add debugging information + if (!foundToolCall) { + printDebugInfo(rig, result); + } + + assert.ok( + foundToolCall, + 'Expected to find a write_file, edit, or replace tool call', + ); + + // Validate model output - will throw if no output + validateModelOutput(result, null, 'File write test'); + + const fileContent = rig.readFile('test.txt'); + + // Add debugging for file content + if (!fileContent.toLowerCase().includes('hello')) { + const writeCalls = rig + .readToolLogs() + .filter((t) => t.toolRequest.name === 'write_file') + .map((t) => t.toolRequest.args); + + printDebugInfo(rig, result, { + 'File content mismatch': true, + 'Expected to contain': 'hello', + 'Actual content': fileContent, + 'Write tool calls': JSON.stringify(writeCalls), + }); + } + + assert.ok( + fileContent.toLowerCase().includes('hello'), + 'Expected file to contain hello', + ); + + // Log success info if verbose + if (process.env.VERBOSE === 'true') { + console.log('File written successfully with hello message.'); + } +}); diff --git a/integration-tests/google_web_search.test.js b/integration-tests/google_web_search.test.js deleted file mode 100644 index 31747421..00000000 --- a/integration-tests/google_web_search.test.js +++ /dev/null @@ -1,74 +0,0 @@ -/** - * @license - * Copyright 2025 Google LLC - * SPDX-License-Identifier: Apache-2.0 - */ - -import { test } from 'node:test'; -import { strict as assert } from 'assert'; -import { TestRig, printDebugInfo, validateModelOutput } from './test-helper.js'; - -test('should be able to search the web', async () => { - const rig = new TestRig(); - await rig.setup('should be able to search the web'); - - let result; - try { - result = await rig.run(`what is the weather in London`); - } catch (error) { - // Network errors can occur in CI environments - if ( - error.message.includes('network') || - error.message.includes('timeout') - ) { - console.warn('Skipping test due to network error:', error.message); - return; // Skip the test - } - throw error; // Re-throw if not a network error - } - - const foundToolCall = await rig.waitForToolCall('google_web_search'); - - // Add debugging information - if (!foundToolCall) { - const allTools = printDebugInfo(rig, result); - - // Check if the tool call failed due to network issues - const failedSearchCalls = allTools.filter( - (t) => - t.toolRequest.name === 'google_web_search' && !t.toolRequest.success, - ); - if (failedSearchCalls.length > 0) { - console.warn( - 'google_web_search tool was called but failed, possibly due to network issues', - ); - console.warn( - 'Failed calls:', - failedSearchCalls.map((t) => t.toolRequest.args), - ); - return; // Skip the test if network issues - } - } - - assert.ok(foundToolCall, 'Expected to find a call to google_web_search'); - - // Validate model output - will throw if no output, warn if missing expected content - const hasExpectedContent = validateModelOutput( - result, - ['weather', 'london'], - 'Google web search test', - ); - - // If content was missing, log the search queries used - if (!hasExpectedContent) { - const searchCalls = rig - .readToolLogs() - .filter((t) => t.toolRequest.name === 'google_web_search'); - if (searchCalls.length > 0) { - console.warn( - 'Search queries used:', - searchCalls.map((t) => t.toolRequest.args), - ); - } - } -}); diff --git a/integration-tests/google_web_search.test.ts b/integration-tests/google_web_search.test.ts new file mode 100644 index 00000000..6fb365a0 --- /dev/null +++ b/integration-tests/google_web_search.test.ts @@ -0,0 +1,77 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { test } from 'node:test'; +import { strict as assert } from 'assert'; +import { TestRig, printDebugInfo, validateModelOutput } from './test-helper.js'; + +test('should be able to search the web', async () => { + const rig = new TestRig(); + await rig.setup('should be able to search the web'); + + let result; + try { + result = await rig.run(`what is the weather in London`); + } catch (error) { + // Network errors can occur in CI environments + if ( + error instanceof Error && + (error.message.includes('network') || error.message.includes('timeout')) + ) { + console.warn( + 'Skipping test due to network error:', + (error as Error).message, + ); + return; // Skip the test + } + throw error; // Re-throw if not a network error + } + + const foundToolCall = await rig.waitForToolCall('google_web_search'); + + // Add debugging information + if (!foundToolCall) { + const allTools = printDebugInfo(rig, result); + + // Check if the tool call failed due to network issues + const failedSearchCalls = allTools.filter( + (t) => + t.toolRequest.name === 'google_web_search' && !t.toolRequest.success, + ); + if (failedSearchCalls.length > 0) { + console.warn( + 'google_web_search tool was called but failed, possibly due to network issues', + ); + console.warn( + 'Failed calls:', + failedSearchCalls.map((t) => t.toolRequest.args), + ); + return; // Skip the test if network issues + } + } + + assert.ok(foundToolCall, 'Expected to find a call to google_web_search'); + + // Validate model output - will throw if no output, warn if missing expected content + const hasExpectedContent = validateModelOutput( + result, + ['weather', 'london'], + 'Google web search test', + ); + + // If content was missing, log the search queries used + if (!hasExpectedContent) { + const searchCalls = rig + .readToolLogs() + .filter((t) => t.toolRequest.name === 'google_web_search'); + if (searchCalls.length > 0) { + console.warn( + 'Search queries used:', + searchCalls.map((t) => t.toolRequest.args), + ); + } + } +}); diff --git a/integration-tests/list_directory.test.js b/integration-tests/list_directory.test.js deleted file mode 100644 index 16f49f4b..00000000 --- a/integration-tests/list_directory.test.js +++ /dev/null @@ -1,62 +0,0 @@ -/** - * @license - * Copyright 2025 Google LLC - * SPDX-License-Identifier: Apache-2.0 - */ - -import { test } from 'node:test'; -import { strict as assert } from 'assert'; -import { TestRig, printDebugInfo, validateModelOutput } from './test-helper.js'; -import { existsSync } from 'fs'; -import { join } from 'path'; - -test('should be able to list a directory', async () => { - const rig = new TestRig(); - await rig.setup('should be able to list a directory'); - rig.createFile('file1.txt', 'file 1 content'); - rig.mkdir('subdir'); - rig.sync(); - - // Poll for filesystem changes to propagate in containers - await rig.poll( - () => { - // Check if the files exist in the test directory - const file1Path = join(rig.testDir, 'file1.txt'); - const subdirPath = join(rig.testDir, 'subdir'); - return existsSync(file1Path) && existsSync(subdirPath); - }, - 1000, // 1 second max wait - 50, // check every 50ms - ); - - const prompt = `Can you list the files in the current directory. Display them in the style of 'ls'`; - - const result = await rig.run(prompt); - - const foundToolCall = await rig.waitForToolCall('list_directory'); - - // Add debugging information - if ( - !foundToolCall || - !result.includes('file1.txt') || - !result.includes('subdir') - ) { - const allTools = printDebugInfo(rig, result, { - 'Found tool call': foundToolCall, - 'Contains file1.txt': result.includes('file1.txt'), - 'Contains subdir': result.includes('subdir'), - }); - - console.error( - 'List directory calls:', - allTools - .filter((t) => t.toolRequest.name === 'list_directory') - .map((t) => t.toolRequest.args), - ); - } - - assert.ok(foundToolCall, 'Expected to find a list_directory tool call'); - - // Validate model output - will throw if no output, warn if missing expected content - validateModelOutput(result, ['file1.txt', 'subdir'], 'List directory test'); -}); diff --git a/integration-tests/list_directory.test.ts b/integration-tests/list_directory.test.ts new file mode 100644 index 00000000..023eca12 --- /dev/null +++ b/integration-tests/list_directory.test.ts @@ -0,0 +1,62 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { test } from 'node:test'; +import { strict as assert } from 'assert'; +import { TestRig, printDebugInfo, validateModelOutput } from './test-helper.js'; +import { existsSync } from 'fs'; +import { join } from 'path'; + +test('should be able to list a directory', async () => { + const rig = new TestRig(); + await rig.setup('should be able to list a directory'); + rig.createFile('file1.txt', 'file 1 content'); + rig.mkdir('subdir'); + rig.sync(); + + // Poll for filesystem changes to propagate in containers + await rig.poll( + () => { + // Check if the files exist in the test directory + const file1Path = join(rig.testDir!, 'file1.txt'); + const subdirPath = join(rig.testDir!, 'subdir'); + return existsSync(file1Path) && existsSync(subdirPath); + }, + 1000, // 1 second max wait + 50, // check every 50ms + ); + + const prompt = `Can you list the files in the current directory. Display them in the style of 'ls'`; + + const result = await rig.run(prompt); + + const foundToolCall = await rig.waitForToolCall('list_directory'); + + // Add debugging information + if ( + !foundToolCall || + !result.includes('file1.txt') || + !result.includes('subdir') + ) { + const allTools = printDebugInfo(rig, result, { + 'Found tool call': foundToolCall, + 'Contains file1.txt': result.includes('file1.txt'), + 'Contains subdir': result.includes('subdir'), + }); + + console.error( + 'List directory calls:', + allTools + .filter((t) => t.toolRequest.name === 'list_directory') + .map((t) => t.toolRequest.args), + ); + } + + assert.ok(foundToolCall, 'Expected to find a list_directory tool call'); + + // Validate model output - will throw if no output, warn if missing expected content + validateModelOutput(result, ['file1.txt', 'subdir'], 'List directory test'); +}); diff --git a/integration-tests/read_many_files.test.js b/integration-tests/read_many_files.test.js deleted file mode 100644 index 74d2f358..00000000 --- a/integration-tests/read_many_files.test.js +++ /dev/null @@ -1,50 +0,0 @@ -/** - * @license - * Copyright 2025 Google LLC - * SPDX-License-Identifier: Apache-2.0 - */ - -import { test } from 'node:test'; -import { strict as assert } from 'assert'; -import { TestRig, printDebugInfo, validateModelOutput } from './test-helper.js'; - -test('should be able to read multiple files', async () => { - const rig = new TestRig(); - await rig.setup('should be able to read multiple files'); - rig.createFile('file1.txt', 'file 1 content'); - rig.createFile('file2.txt', 'file 2 content'); - - const prompt = `Please use read_many_files to read file1.txt and file2.txt and show me what's in them`; - - const result = await rig.run(prompt); - - // Check for either read_many_files or multiple read_file calls - const allTools = rig.readToolLogs(); - const readManyFilesCall = await rig.waitForToolCall('read_many_files'); - const readFileCalls = allTools.filter( - (t) => t.toolRequest.name === 'read_file', - ); - - // Accept either read_many_files OR at least 2 read_file calls - const foundValidPattern = readManyFilesCall || readFileCalls.length >= 2; - - // Add debugging information - if (!foundValidPattern) { - printDebugInfo(rig, result, { - 'read_many_files called': readManyFilesCall, - 'read_file calls': readFileCalls.length, - }); - } - - assert.ok( - foundValidPattern, - 'Expected to find either read_many_files or multiple read_file tool calls', - ); - - // Validate model output - will throw if no output, warn if missing expected content - validateModelOutput( - result, - ['file 1 content', 'file 2 content'], - 'Read many files test', - ); -}); diff --git a/integration-tests/read_many_files.test.ts b/integration-tests/read_many_files.test.ts new file mode 100644 index 00000000..74d2f358 --- /dev/null +++ b/integration-tests/read_many_files.test.ts @@ -0,0 +1,50 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { test } from 'node:test'; +import { strict as assert } from 'assert'; +import { TestRig, printDebugInfo, validateModelOutput } from './test-helper.js'; + +test('should be able to read multiple files', async () => { + const rig = new TestRig(); + await rig.setup('should be able to read multiple files'); + rig.createFile('file1.txt', 'file 1 content'); + rig.createFile('file2.txt', 'file 2 content'); + + const prompt = `Please use read_many_files to read file1.txt and file2.txt and show me what's in them`; + + const result = await rig.run(prompt); + + // Check for either read_many_files or multiple read_file calls + const allTools = rig.readToolLogs(); + const readManyFilesCall = await rig.waitForToolCall('read_many_files'); + const readFileCalls = allTools.filter( + (t) => t.toolRequest.name === 'read_file', + ); + + // Accept either read_many_files OR at least 2 read_file calls + const foundValidPattern = readManyFilesCall || readFileCalls.length >= 2; + + // Add debugging information + if (!foundValidPattern) { + printDebugInfo(rig, result, { + 'read_many_files called': readManyFilesCall, + 'read_file calls': readFileCalls.length, + }); + } + + assert.ok( + foundValidPattern, + 'Expected to find either read_many_files or multiple read_file tool calls', + ); + + // Validate model output - will throw if no output, warn if missing expected content + validateModelOutput( + result, + ['file 1 content', 'file 2 content'], + 'Read many files test', + ); +}); diff --git a/integration-tests/replace.test.js b/integration-tests/replace.test.js deleted file mode 100644 index 1ac6f5a4..00000000 --- a/integration-tests/replace.test.js +++ /dev/null @@ -1,66 +0,0 @@ -/** - * @license - * Copyright 2025 Google LLC - * SPDX-License-Identifier: Apache-2.0 - */ - -import { test } from 'node:test'; -import { strict as assert } from 'assert'; -import { TestRig, printDebugInfo, validateModelOutput } from './test-helper.js'; - -test('should be able to replace content in a file', async () => { - const rig = new TestRig(); - await rig.setup('should be able to replace content in a file'); - - const fileName = 'file_to_replace.txt'; - const originalContent = 'original content'; - const expectedContent = 'replaced content'; - - rig.createFile(fileName, originalContent); - const prompt = `Can you replace 'original' with 'replaced' in the file 'file_to_replace.txt'`; - - const result = await rig.run(prompt); - - const foundToolCall = await rig.waitForToolCall('replace'); - - // Add debugging information - if (!foundToolCall) { - printDebugInfo(rig, result); - } - - assert.ok(foundToolCall, 'Expected to find a replace tool call'); - - // Validate model output - will throw if no output, warn if missing expected content - validateModelOutput( - result, - ['replaced', 'file_to_replace.txt'], - 'Replace content test', - ); - - const newFileContent = rig.readFile(fileName); - - // Add debugging for file content - if (newFileContent !== expectedContent) { - console.error('File content mismatch - Debug info:'); - console.error('Expected:', expectedContent); - console.error('Actual:', newFileContent); - console.error( - 'Tool calls:', - rig.readToolLogs().map((t) => ({ - name: t.toolRequest.name, - args: t.toolRequest.args, - })), - ); - } - - assert.strictEqual( - newFileContent, - expectedContent, - 'File content should be updated correctly', - ); - - // Log success info if verbose - if (process.env.VERBOSE === 'true') { - console.log('File replaced successfully. New content:', newFileContent); - } -}); diff --git a/integration-tests/replace.test.ts b/integration-tests/replace.test.ts new file mode 100644 index 00000000..1ac6f5a4 --- /dev/null +++ b/integration-tests/replace.test.ts @@ -0,0 +1,66 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { test } from 'node:test'; +import { strict as assert } from 'assert'; +import { TestRig, printDebugInfo, validateModelOutput } from './test-helper.js'; + +test('should be able to replace content in a file', async () => { + const rig = new TestRig(); + await rig.setup('should be able to replace content in a file'); + + const fileName = 'file_to_replace.txt'; + const originalContent = 'original content'; + const expectedContent = 'replaced content'; + + rig.createFile(fileName, originalContent); + const prompt = `Can you replace 'original' with 'replaced' in the file 'file_to_replace.txt'`; + + const result = await rig.run(prompt); + + const foundToolCall = await rig.waitForToolCall('replace'); + + // Add debugging information + if (!foundToolCall) { + printDebugInfo(rig, result); + } + + assert.ok(foundToolCall, 'Expected to find a replace tool call'); + + // Validate model output - will throw if no output, warn if missing expected content + validateModelOutput( + result, + ['replaced', 'file_to_replace.txt'], + 'Replace content test', + ); + + const newFileContent = rig.readFile(fileName); + + // Add debugging for file content + if (newFileContent !== expectedContent) { + console.error('File content mismatch - Debug info:'); + console.error('Expected:', expectedContent); + console.error('Actual:', newFileContent); + console.error( + 'Tool calls:', + rig.readToolLogs().map((t) => ({ + name: t.toolRequest.name, + args: t.toolRequest.args, + })), + ); + } + + assert.strictEqual( + newFileContent, + expectedContent, + 'File content should be updated correctly', + ); + + // Log success info if verbose + if (process.env.VERBOSE === 'true') { + console.log('File replaced successfully. New content:', newFileContent); + } +}); diff --git a/integration-tests/run-tests.js b/integration-tests/run-tests.js index 05fb349e..b33e1afa 100644 --- a/integration-tests/run-tests.js +++ b/integration-tests/run-tests.js @@ -52,13 +52,13 @@ async function main() { const testPatterns = args.length > 0 - ? args.map((arg) => `integration-tests/${arg}.test.js`) - : ['integration-tests/*.test.js']; + ? args.map((arg) => `integration-tests/${arg}.test.ts`) + : ['integration-tests/*.test.ts']; const testFiles = glob.sync(testPatterns, { cwd: rootDir, absolute: true }); for (const testFile of testFiles) { const testFileName = basename(testFile); - console.log(`\tFound test file: ${testFileName}`); + console.log(` Found test file: ${testFileName}`); } const MAX_RETRIES = 3; @@ -92,7 +92,7 @@ async function main() { } nodeArgs.push(testFile); - const child = spawn('node', nodeArgs, { + const child = spawn('npx', ['tsx', ...nodeArgs], { stdio: 'pipe', env: { ...process.env, diff --git a/integration-tests/run_shell_command.test.js b/integration-tests/run_shell_command.test.js deleted file mode 100644 index 2a5f9ed4..00000000 --- a/integration-tests/run_shell_command.test.js +++ /dev/null @@ -1,63 +0,0 @@ -/** - * @license - * Copyright 2025 Google LLC - * SPDX-License-Identifier: Apache-2.0 - */ - -import { test } from 'node:test'; -import { strict as assert } from 'assert'; -import { TestRig, printDebugInfo, validateModelOutput } from './test-helper.js'; - -test('should be able to run a shell command', async () => { - const rig = new TestRig(); - await rig.setup('should be able to run a shell command'); - - const prompt = `Please run the command "echo hello-world" and show me the output`; - - const result = await rig.run(prompt); - - const foundToolCall = await rig.waitForToolCall('run_shell_command'); - - // Add debugging information - if (!foundToolCall || !result.includes('hello-world')) { - printDebugInfo(rig, result, { - 'Found tool call': foundToolCall, - 'Contains hello-world': result.includes('hello-world'), - }); - } - - assert.ok(foundToolCall, 'Expected to find a run_shell_command tool call'); - - // Validate model output - will throw if no output, warn if missing expected content - // Model often reports exit code instead of showing output - validateModelOutput( - result, - ['hello-world', 'exit code 0'], - 'Shell command test', - ); -}); - -test('should be able to run a shell command via stdin', async () => { - const rig = new TestRig(); - await rig.setup('should be able to run a shell command via stdin'); - - const prompt = `Please run the command "echo test-stdin" and show me what it outputs`; - - const result = await rig.run({ stdin: prompt }); - - const foundToolCall = await rig.waitForToolCall('run_shell_command'); - - // Add debugging information - if (!foundToolCall || !result.includes('test-stdin')) { - printDebugInfo(rig, result, { - 'Test type': 'Stdin test', - 'Found tool call': foundToolCall, - 'Contains test-stdin': result.includes('test-stdin'), - }); - } - - assert.ok(foundToolCall, 'Expected to find a run_shell_command tool call'); - - // Validate model output - will throw if no output, warn if missing expected content - validateModelOutput(result, 'test-stdin', 'Shell command stdin test'); -}); diff --git a/integration-tests/run_shell_command.test.ts b/integration-tests/run_shell_command.test.ts new file mode 100644 index 00000000..2a5f9ed4 --- /dev/null +++ b/integration-tests/run_shell_command.test.ts @@ -0,0 +1,63 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { test } from 'node:test'; +import { strict as assert } from 'assert'; +import { TestRig, printDebugInfo, validateModelOutput } from './test-helper.js'; + +test('should be able to run a shell command', async () => { + const rig = new TestRig(); + await rig.setup('should be able to run a shell command'); + + const prompt = `Please run the command "echo hello-world" and show me the output`; + + const result = await rig.run(prompt); + + const foundToolCall = await rig.waitForToolCall('run_shell_command'); + + // Add debugging information + if (!foundToolCall || !result.includes('hello-world')) { + printDebugInfo(rig, result, { + 'Found tool call': foundToolCall, + 'Contains hello-world': result.includes('hello-world'), + }); + } + + assert.ok(foundToolCall, 'Expected to find a run_shell_command tool call'); + + // Validate model output - will throw if no output, warn if missing expected content + // Model often reports exit code instead of showing output + validateModelOutput( + result, + ['hello-world', 'exit code 0'], + 'Shell command test', + ); +}); + +test('should be able to run a shell command via stdin', async () => { + const rig = new TestRig(); + await rig.setup('should be able to run a shell command via stdin'); + + const prompt = `Please run the command "echo test-stdin" and show me what it outputs`; + + const result = await rig.run({ stdin: prompt }); + + const foundToolCall = await rig.waitForToolCall('run_shell_command'); + + // Add debugging information + if (!foundToolCall || !result.includes('test-stdin')) { + printDebugInfo(rig, result, { + 'Test type': 'Stdin test', + 'Found tool call': foundToolCall, + 'Contains test-stdin': result.includes('test-stdin'), + }); + } + + assert.ok(foundToolCall, 'Expected to find a run_shell_command tool call'); + + // Validate model output - will throw if no output, warn if missing expected content + validateModelOutput(result, 'test-stdin', 'Shell command stdin test'); +}); diff --git a/integration-tests/save_memory.test.js b/integration-tests/save_memory.test.js deleted file mode 100644 index 3ec641d4..00000000 --- a/integration-tests/save_memory.test.js +++ /dev/null @@ -1,41 +0,0 @@ -/** - * @license - * Copyright 2025 Google LLC - * SPDX-License-Identifier: Apache-2.0 - */ - -import { test } from 'node:test'; -import { strict as assert } from 'assert'; -import { TestRig, printDebugInfo, validateModelOutput } from './test-helper.js'; - -test('should be able to save to memory', async () => { - const rig = new TestRig(); - await rig.setup('should be able to save to memory'); - - const prompt = `remember that my favorite color is blue. - - what is my favorite color? tell me that and surround it with $ symbol`; - const result = await rig.run(prompt); - - const foundToolCall = await rig.waitForToolCall('save_memory'); - - // Add debugging information - if (!foundToolCall || !result.toLowerCase().includes('blue')) { - const allTools = printDebugInfo(rig, result, { - 'Found tool call': foundToolCall, - 'Contains blue': result.toLowerCase().includes('blue'), - }); - - console.error( - 'Memory tool calls:', - allTools - .filter((t) => t.toolRequest.name === 'save_memory') - .map((t) => t.toolRequest.args), - ); - } - - assert.ok(foundToolCall, 'Expected to find a save_memory tool call'); - - // Validate model output - will throw if no output, warn if missing expected content - validateModelOutput(result, 'blue', 'Save memory test'); -}); diff --git a/integration-tests/save_memory.test.ts b/integration-tests/save_memory.test.ts new file mode 100644 index 00000000..3ec641d4 --- /dev/null +++ b/integration-tests/save_memory.test.ts @@ -0,0 +1,41 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { test } from 'node:test'; +import { strict as assert } from 'assert'; +import { TestRig, printDebugInfo, validateModelOutput } from './test-helper.js'; + +test('should be able to save to memory', async () => { + const rig = new TestRig(); + await rig.setup('should be able to save to memory'); + + const prompt = `remember that my favorite color is blue. + + what is my favorite color? tell me that and surround it with $ symbol`; + const result = await rig.run(prompt); + + const foundToolCall = await rig.waitForToolCall('save_memory'); + + // Add debugging information + if (!foundToolCall || !result.toLowerCase().includes('blue')) { + const allTools = printDebugInfo(rig, result, { + 'Found tool call': foundToolCall, + 'Contains blue': result.toLowerCase().includes('blue'), + }); + + console.error( + 'Memory tool calls:', + allTools + .filter((t) => t.toolRequest.name === 'save_memory') + .map((t) => t.toolRequest.args), + ); + } + + assert.ok(foundToolCall, 'Expected to find a save_memory tool call'); + + // Validate model output - will throw if no output, warn if missing expected content + validateModelOutput(result, 'blue', 'Save memory test'); +}); diff --git a/integration-tests/simple-mcp-server.test.js b/integration-tests/simple-mcp-server.test.js deleted file mode 100644 index 987f69d2..00000000 --- a/integration-tests/simple-mcp-server.test.js +++ /dev/null @@ -1,211 +0,0 @@ -/** - * @license - * Copyright 2025 Google LLC - * SPDX-License-Identifier: Apache-2.0 - */ - -/** - * This test verifies MCP (Model Context Protocol) server integration. - * It uses a minimal MCP server implementation that doesn't require - * external dependencies, making it compatible with Docker sandbox mode. - */ - -import { test, describe, before } from 'node:test'; -import { strict as assert } from 'node:assert'; -import { TestRig, validateModelOutput } from './test-helper.js'; -import { join } from 'path'; -import { fileURLToPath } from 'url'; -import { writeFileSync } from 'fs'; - -const __dirname = fileURLToPath(new URL('.', import.meta.url)); - -// Create a minimal MCP server that doesn't require external dependencies -// This implements the MCP protocol directly using Node.js built-ins -const serverScript = `#!/usr/bin/env node -/** - * @license - * Copyright 2025 Google LLC - * SPDX-License-Identifier: Apache-2.0 - */ - -const readline = require('readline'); -const fs = require('fs'); - -// Debug logging to stderr (only when MCP_DEBUG or VERBOSE is set) -const debugEnabled = process.env.MCP_DEBUG === 'true' || process.env.VERBOSE === 'true'; -function debug(msg) { - if (debugEnabled) { - fs.writeSync(2, \`[MCP-DEBUG] \${msg}\\n\`); - } -} - -debug('MCP server starting...'); - -// Simple JSON-RPC implementation for MCP -class SimpleJSONRPC { - constructor() { - this.handlers = new Map(); - this.rl = readline.createInterface({ - input: process.stdin, - output: process.stdout, - terminal: false - }); - - this.rl.on('line', (line) => { - debug(\`Received line: \${line}\`); - try { - const message = JSON.parse(line); - debug(\`Parsed message: \${JSON.stringify(message)}\`); - this.handleMessage(message); - } catch (e) { - debug(\`Parse error: \${e.message}\`); - } - }); - } - - send(message) { - const msgStr = JSON.stringify(message); - debug(\`Sending message: \${msgStr}\`); - process.stdout.write(msgStr + '\\n'); - } - - async handleMessage(message) { - if (message.method && this.handlers.has(message.method)) { - try { - const result = await this.handlers.get(message.method)(message.params || {}); - if (message.id !== undefined) { - this.send({ - jsonrpc: '2.0', - id: message.id, - result - }); - } - } catch (error) { - if (message.id !== undefined) { - this.send({ - jsonrpc: '2.0', - id: message.id, - error: { - code: -32603, - message: error.message - } - }); - } - } - } else if (message.id !== undefined) { - this.send({ - jsonrpc: '2.0', - id: message.id, - error: { - code: -32601, - message: 'Method not found' - } - }); - } - } - - on(method, handler) { - this.handlers.set(method, handler); - } -} - -// Create MCP server -const rpc = new SimpleJSONRPC(); - -// Handle initialize -rpc.on('initialize', async (params) => { - debug('Handling initialize request'); - return { - protocolVersion: '2024-11-05', - capabilities: { - tools: {} - }, - serverInfo: { - name: 'addition-server', - version: '1.0.0' - } - }; -}); - -// Handle tools/list -rpc.on('tools/list', async () => { - debug('Handling tools/list request'); - return { - tools: [{ - name: 'add', - description: 'Add two numbers', - inputSchema: { - type: 'object', - properties: { - a: { type: 'number', description: 'First number' }, - b: { type: 'number', description: 'Second number' } - }, - required: ['a', 'b'] - } - }] - }; -}); - -// Handle tools/call -rpc.on('tools/call', async (params) => { - debug(\`Handling tools/call request for tool: \${params.name}\`); - if (params.name === 'add') { - const { a, b } = params.arguments; - return { - content: [{ - type: 'text', - text: String(a + b) - }] - }; - } - throw new Error('Unknown tool: ' + params.name); -}); - -// Send initialization notification -rpc.send({ - jsonrpc: '2.0', - method: 'initialized' -}); -`; - -describe('simple-mcp-server', () => { - const rig = new TestRig(); - - before(async () => { - // Setup test directory with MCP server configuration - await rig.setup('simple-mcp-server', { - settings: { - mcpServers: { - 'addition-server': { - command: 'node', - args: ['mcp-server.cjs'], - }, - }, - }, - }); - - // Create server script in the test directory - const testServerPath = join(rig.testDir, 'mcp-server.cjs'); - writeFileSync(testServerPath, serverScript); - - // Make the script executable (though running with 'node' should work anyway) - if (process.platform !== 'win32') { - const { chmodSync } = await import('fs'); - chmodSync(testServerPath, 0o755); - } - }); - - test('should add two numbers', async () => { - // Test directory is already set up in before hook - // Just run the command - MCP server config is in settings.json - const output = await rig.run('add 5 and 10'); - - const foundToolCall = await rig.waitForToolCall('add'); - - assert.ok(foundToolCall, 'Expected to find an add tool call'); - - // Validate model output - will throw if no output, fail if missing expected content - validateModelOutput(output, '15', 'MCP server test'); - assert.ok(output.includes('15'), 'Expected output to contain the sum (15)'); - }); -}); diff --git a/integration-tests/simple-mcp-server.test.ts b/integration-tests/simple-mcp-server.test.ts new file mode 100644 index 00000000..c4191078 --- /dev/null +++ b/integration-tests/simple-mcp-server.test.ts @@ -0,0 +1,208 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +/** + * This test verifies MCP (Model Context Protocol) server integration. + * It uses a minimal MCP server implementation that doesn't require + * external dependencies, making it compatible with Docker sandbox mode. + */ + +import { test, describe, before } from 'node:test'; +import { strict as assert } from 'node:assert'; +import { TestRig, validateModelOutput } from './test-helper.js'; +import { join } from 'path'; +import { writeFileSync } from 'fs'; + +// Create a minimal MCP server that doesn't require external dependencies +// This implements the MCP protocol directly using Node.js built-ins +const serverScript = `#!/usr/bin/env node +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +const readline = require('readline'); +const fs = require('fs'); + +// Debug logging to stderr (only when MCP_DEBUG or VERBOSE is set) +const debugEnabled = process.env.MCP_DEBUG === 'true' || process.env.VERBOSE === 'true'; +function debug(msg) { + if (debugEnabled) { + fs.writeSync(2, \`[MCP-DEBUG] \${msg}\\n\`); + } +} + +debug('MCP server starting...'); + +// Simple JSON-RPC implementation for MCP +class SimpleJSONRPC { + constructor() { + this.handlers = new Map(); + this.rl = readline.createInterface({ + input: process.stdin, + output: process.stdout, + terminal: false + }); + + this.rl.on('line', (line) => { + debug(\`Received line: \${line}\`); + try { + const message = JSON.parse(line); + debug(\`Parsed message: \${JSON.stringify(message)}\`); + this.handleMessage(message); + } catch (e) { + debug(\`Parse error: \${e.message}\`); + } + }); + } + + send(message) { + const msgStr = JSON.stringify(message); + debug(\`Sending message: \${msgStr}\`); + process.stdout.write(msgStr + '\\n'); + } + + async handleMessage(message) { + if (message.method && this.handlers.has(message.method)) { + try { + const result = await this.handlers.get(message.method)(message.params || {}); + if (message.id !== undefined) { + this.send({ + jsonrpc: '2.0', + id: message.id, + result + }); + } + } catch (error) { + if (message.id !== undefined) { + this.send({ + jsonrpc: '2.0', + id: message.id, + error: { + code: -32603, + message: error.message + } + }); + } + } + } else if (message.id !== undefined) { + this.send({ + jsonrpc: '2.0', + id: message.id, + error: { + code: -32601, + message: 'Method not found' + } + }); + } + } + + on(method, handler) { + this.handlers.set(method, handler); + } +} + +// Create MCP server +const rpc = new SimpleJSONRPC(); + +// Handle initialize +rpc.on('initialize', async (params) => { + debug('Handling initialize request'); + return { + protocolVersion: '2024-11-05', + capabilities: { + tools: {} + }, + serverInfo: { + name: 'addition-server', + version: '1.0.0' + } + }; +}); + +// Handle tools/list +rpc.on('tools/list', async () => { + debug('Handling tools/list request'); + return { + tools: [{ + name: 'add', + description: 'Add two numbers', + inputSchema: { + type: 'object', + properties: { + a: { type: 'number', description: 'First number' }, + b: { type: 'number', description: 'Second number' } + }, + required: ['a', 'b'] + } + }] + }; +}); + +// Handle tools/call +rpc.on('tools/call', async (params) => { + debug(\`Handling tools/call request for tool: \${params.name}\`); + if (params.name === 'add') { + const { a, b } = params.arguments; + return { + content: [{ + type: 'text', + text: String(a + b) + }] + }; + } + throw new Error('Unknown tool: ' + params.name); +}); + +// Send initialization notification +rpc.send({ + jsonrpc: '2.0', + method: 'initialized' +}); +`; + +describe('simple-mcp-server', () => { + const rig = new TestRig(); + + before(async () => { + // Setup test directory with MCP server configuration + await rig.setup('simple-mcp-server', { + settings: { + mcpServers: { + 'addition-server': { + command: 'node', + args: ['mcp-server.cjs'], + }, + }, + }, + }); + + // Create server script in the test directory + const testServerPath = join(rig.testDir!, 'mcp-server.cjs'); + writeFileSync(testServerPath, serverScript); + + // Make the script executable (though running with 'node' should work anyway) + if (process.platform !== 'win32') { + const { chmodSync } = await import('fs'); + chmodSync(testServerPath, 0o755); + } + }); + + test('should add two numbers', async () => { + // Test directory is already set up in before hook + // Just run the command - MCP server config is in settings.json + const output = await rig.run('add 5 and 10'); + + const foundToolCall = await rig.waitForToolCall('add'); + + assert.ok(foundToolCall, 'Expected to find an add tool call'); + + // Validate model output - will throw if no output, fail if missing expected content + validateModelOutput(output, '15', 'MCP server test'); + assert.ok(output.includes('15'), 'Expected output to contain the sum (15)'); + }); +}); diff --git a/integration-tests/test-helper.js b/integration-tests/test-helper.js deleted file mode 100644 index d1125a78..00000000 --- a/integration-tests/test-helper.js +++ /dev/null @@ -1,597 +0,0 @@ -/** - * @license - * Copyright 2025 Google LLC - * SPDX-License-Identifier: Apache-2.0 - */ - -import { execSync, spawn } from 'child_process'; -import { parse } from 'shell-quote'; -import { mkdirSync, writeFileSync, readFileSync } from 'fs'; -import { join, dirname } from 'path'; -import { fileURLToPath } from 'url'; -import { env } from 'process'; -import { fileExists } from '../scripts/telemetry_utils.js'; - -const __dirname = dirname(fileURLToPath(import.meta.url)); - -function sanitizeTestName(name) { - return name - .toLowerCase() - .replace(/[^a-z0-9]/g, '-') - .replace(/-+/g, '-'); -} - -// Helper to create detailed error messages -export function createToolCallErrorMessage(expectedTools, foundTools, result) { - const expectedStr = Array.isArray(expectedTools) - ? expectedTools.join(' or ') - : expectedTools; - return ( - `Expected to find ${expectedStr} tool call(s). ` + - `Found: ${foundTools.length > 0 ? foundTools.join(', ') : 'none'}. ` + - `Output preview: ${result ? result.substring(0, 200) + '...' : 'no output'}` - ); -} - -// Helper to print debug information when tests fail -export function printDebugInfo(rig, result, context = {}) { - console.error('Test failed - Debug info:'); - console.error('Result length:', result.length); - console.error('Result (first 500 chars):', result.substring(0, 500)); - console.error( - 'Result (last 500 chars):', - result.substring(result.length - 500), - ); - - // Print any additional context provided - Object.entries(context).forEach(([key, value]) => { - console.error(`${key}:`, value); - }); - - // Check what tools were actually called - const allTools = rig.readToolLogs(); - console.error( - 'All tool calls found:', - allTools.map((t) => t.toolRequest.name), - ); - - return allTools; -} - -// Helper to validate model output and warn about unexpected content -export function validateModelOutput( - result, - expectedContent = null, - testName = '', -) { - // First, check if there's any output at all (this should fail the test if missing) - if (!result || result.trim().length === 0) { - throw new Error('Expected LLM to return some output'); - } - - // If expectedContent is provided, check for it and warn if missing - if (expectedContent) { - const contents = Array.isArray(expectedContent) - ? expectedContent - : [expectedContent]; - const missingContent = contents.filter((content) => { - if (typeof content === 'string') { - return !result.toLowerCase().includes(content.toLowerCase()); - } else if (content instanceof RegExp) { - return !content.test(result); - } - return false; - }); - - if (missingContent.length > 0) { - console.warn( - `Warning: LLM did not include expected content in response: ${missingContent.join(', ')}.`, - 'This is not ideal but not a test failure.', - ); - console.warn( - 'The tool was called successfully, which is the main requirement.', - ); - return false; - } else if (process.env.VERBOSE === 'true') { - console.log(`${testName}: Model output validated successfully.`); - } - return true; - } - - return true; -} - -export class TestRig { - constructor() { - this.bundlePath = join(__dirname, '..', 'bundle/gemini.js'); - this.testDir = null; - } - - // Get timeout based on environment - getDefaultTimeout() { - if (env.CI) return 60000; // 1 minute in CI - if (env.GEMINI_SANDBOX) return 30000; // 30s in containers - return 15000; // 15s locally - } - - setup(testName, options = {}) { - this.testName = testName; - const sanitizedName = sanitizeTestName(testName); - this.testDir = join(env.INTEGRATION_TEST_FILE_DIR, sanitizedName); - mkdirSync(this.testDir, { recursive: true }); - - // Create a settings file to point the CLI to the local collector - const geminiDir = join(this.testDir, '.gemini'); - mkdirSync(geminiDir, { recursive: true }); - // In sandbox mode, use an absolute path for telemetry inside the container - // The container mounts the test directory at the same path as the host - const telemetryPath = - env.GEMINI_SANDBOX && env.GEMINI_SANDBOX !== 'false' - ? join(this.testDir, 'telemetry.log') // Absolute path in test directory - : env.TELEMETRY_LOG_FILE; // Absolute path for non-sandbox - - const settings = { - telemetry: { - enabled: true, - target: 'local', - otlpEndpoint: '', - outfile: telemetryPath, - }, - sandbox: env.GEMINI_SANDBOX !== 'false' ? env.GEMINI_SANDBOX : false, - ...options.settings, // Allow tests to override/add settings - }; - writeFileSync( - join(geminiDir, 'settings.json'), - JSON.stringify(settings, null, 2), - ); - } - - createFile(fileName, content) { - const filePath = join(this.testDir, fileName); - writeFileSync(filePath, content); - return filePath; - } - - mkdir(dir) { - mkdirSync(join(this.testDir, dir), { recursive: true }); - } - - sync() { - // ensure file system is done before spawning - execSync('sync', { cwd: this.testDir }); - } - - run(promptOrOptions, ...args) { - let command = `node ${this.bundlePath} --yolo`; - const execOptions = { - cwd: this.testDir, - encoding: 'utf-8', - }; - - if (typeof promptOrOptions === 'string') { - command += ` --prompt ${JSON.stringify(promptOrOptions)}`; - } else if ( - typeof promptOrOptions === 'object' && - promptOrOptions !== null - ) { - if (promptOrOptions.prompt) { - command += ` --prompt ${JSON.stringify(promptOrOptions.prompt)}`; - } - if (promptOrOptions.stdin) { - execOptions.input = promptOrOptions.stdin; - } - } - - command += ` ${args.join(' ')}`; - - const commandArgs = parse(command); - const node = commandArgs.shift(); - - const child = spawn(node, commandArgs, { - cwd: this.testDir, - stdio: 'pipe', - }); - - let stdout = ''; - let stderr = ''; - - // Handle stdin if provided - if (execOptions.input) { - child.stdin.write(execOptions.input); - child.stdin.end(); - } - - child.stdout.on('data', (data) => { - stdout += data; - if (env.KEEP_OUTPUT === 'true' || env.VERBOSE === 'true') { - process.stdout.write(data); - } - }); - - child.stderr.on('data', (data) => { - stderr += data; - if (env.KEEP_OUTPUT === 'true' || env.VERBOSE === 'true') { - process.stderr.write(data); - } - }); - - const promise = new Promise((resolve, reject) => { - child.on('close', (code) => { - if (code === 0) { - // Store the raw stdout for Podman telemetry parsing - this._lastRunStdout = stdout; - - // Filter out telemetry output when running with Podman - // Podman seems to output telemetry to stdout even when writing to file - let result = stdout; - if (env.GEMINI_SANDBOX === 'podman') { - // Remove telemetry JSON objects from output - // They are multi-line JSON objects that start with { and contain telemetry fields - const lines = result.split('\n'); - const filteredLines = []; - let inTelemetryObject = false; - let braceDepth = 0; - - for (const line of lines) { - if (!inTelemetryObject && line.trim() === '{') { - // Check if this might be start of telemetry object - inTelemetryObject = true; - braceDepth = 1; - } else if (inTelemetryObject) { - // Count braces to track nesting - for (const char of line) { - if (char === '{') braceDepth++; - else if (char === '}') braceDepth--; - } - - // Check if we've closed all braces - if (braceDepth === 0) { - inTelemetryObject = false; - // Skip this line (the closing brace) - continue; - } - } else { - // Not in telemetry object, keep the line - filteredLines.push(line); - } - } - - result = filteredLines.join('\n'); - } - // If we have stderr output, include that also - if (stderr) { - result += `\n\nStdErr:\n${stderr}`; - } - - resolve(result); - } else { - reject(new Error(`Process exited with code ${code}:\n${stderr}`)); - } - }); - }); - - return promise; - } - - readFile(fileName) { - const content = readFileSync(join(this.testDir, fileName), 'utf-8'); - if (env.KEEP_OUTPUT === 'true' || env.VERBOSE === 'true') { - const testId = `${env.TEST_FILE_NAME.replace( - '.test.js', - '', - )}:${this.testName.replace(/ /g, '-')}`; - console.log(`--- FILE: ${testId}/${fileName} ---`); - console.log(content); - console.log(`--- END FILE: ${testId}/${fileName} ---`); - } - return content; - } - - async cleanup() { - // Clean up test directory - if (this.testDir && !env.KEEP_OUTPUT) { - try { - execSync(`rm -rf ${this.testDir}`); - } catch (error) { - // Ignore cleanup errors - if (env.VERBOSE === 'true') { - console.warn('Cleanup warning:', error.message); - } - } - } - } - - async waitForTelemetryReady() { - // In sandbox mode, telemetry is written to a relative path in the test directory - const logFilePath = - env.GEMINI_SANDBOX && env.GEMINI_SANDBOX !== 'false' - ? join(this.testDir, 'telemetry.log') - : env.TELEMETRY_LOG_FILE; - - if (!logFilePath) return; - - // Wait for telemetry file to exist and have content - await this.poll( - () => { - if (!fileExists(logFilePath)) return false; - try { - const content = readFileSync(logFilePath, 'utf-8'); - // Check if file has meaningful content (at least one complete JSON object) - return content.includes('"event.name"'); - } catch (_e) { - return false; - } - }, - 2000, // 2 seconds max - reduced since telemetry should flush on exit now - 100, // check every 100ms - ); - } - - async waitForToolCall(toolName, timeout) { - // Use environment-specific timeout - if (!timeout) { - timeout = this.getDefaultTimeout(); - } - - // Wait for telemetry to be ready before polling for tool calls - await this.waitForTelemetryReady(); - - return this.poll( - () => { - const toolLogs = this.readToolLogs(); - return toolLogs.some((log) => log.toolRequest.name === toolName); - }, - timeout, - 100, - ); - } - - async waitForAnyToolCall(toolNames, timeout) { - // Use environment-specific timeout - if (!timeout) { - timeout = this.getDefaultTimeout(); - } - - // Wait for telemetry to be ready before polling for tool calls - await this.waitForTelemetryReady(); - - return this.poll( - () => { - const toolLogs = this.readToolLogs(); - return toolNames.some((name) => - toolLogs.some((log) => log.toolRequest.name === name), - ); - }, - timeout, - 100, - ); - } - - async poll(predicate, timeout, interval) { - const startTime = Date.now(); - let attempts = 0; - while (Date.now() - startTime < timeout) { - attempts++; - const result = predicate(); - if (env.VERBOSE === 'true' && attempts % 5 === 0) { - console.log( - `Poll attempt ${attempts}: ${result ? 'success' : 'waiting...'}`, - ); - } - if (result) { - return true; - } - await new Promise((resolve) => setTimeout(resolve, interval)); - } - if (env.VERBOSE === 'true') { - console.log(`Poll timed out after ${attempts} attempts`); - } - return false; - } - - _parseToolLogsFromStdout(stdout) { - const logs = []; - - // The console output from Podman is JavaScript object notation, not JSON - // Look for tool call events in the output - // Updated regex to handle tool names with hyphens and underscores - const toolCallPattern = - /body:\s*'Tool call:\s*([\w-]+)\..*?Success:\s*(\w+)\..*?Duration:\s*(\d+)ms\.'/g; - const matches = [...stdout.matchAll(toolCallPattern)]; - - for (const match of matches) { - const toolName = match[1]; - const success = match[2] === 'true'; - const duration = parseInt(match[3], 10); - - // Try to find function_args nearby - const matchIndex = match.index || 0; - const contextStart = Math.max(0, matchIndex - 500); - const contextEnd = Math.min(stdout.length, matchIndex + 500); - const context = stdout.substring(contextStart, contextEnd); - - // Look for function_args in the context - let args = '{}'; - const argsMatch = context.match(/function_args:\s*'([^']+)'/); - if (argsMatch) { - args = argsMatch[1]; - } - - // Also try to find function_name to double-check - // Updated regex to handle tool names with hyphens and underscores - const nameMatch = context.match(/function_name:\s*'([\w-]+)'/); - const actualToolName = nameMatch ? nameMatch[1] : toolName; - - logs.push({ - timestamp: Date.now(), - toolRequest: { - name: actualToolName, - args: args, - success: success, - duration_ms: duration, - }, - }); - } - - // If no matches found with the simple pattern, try the JSON parsing approach - // in case the format changes - if (logs.length === 0) { - const lines = stdout.split('\n'); - let currentObject = ''; - let inObject = false; - let braceDepth = 0; - - for (const line of lines) { - if (!inObject && line.trim() === '{') { - inObject = true; - braceDepth = 1; - currentObject = line + '\n'; - } else if (inObject) { - currentObject += line + '\n'; - - // Count braces - for (const char of line) { - if (char === '{') braceDepth++; - else if (char === '}') braceDepth--; - } - - // If we've closed all braces, try to parse the object - if (braceDepth === 0) { - inObject = false; - try { - const obj = JSON.parse(currentObject); - - // Check for tool call in different formats - if ( - obj.body && - obj.body.includes('Tool call:') && - obj.attributes - ) { - const bodyMatch = obj.body.match(/Tool call: (\w+)\./); - if (bodyMatch) { - logs.push({ - timestamp: obj.timestamp || Date.now(), - toolRequest: { - name: bodyMatch[1], - args: obj.attributes.function_args || '{}', - success: obj.attributes.success !== false, - duration_ms: obj.attributes.duration_ms || 0, - }, - }); - } - } else if ( - obj.attributes && - obj.attributes['event.name'] === 'gemini_cli.tool_call' - ) { - logs.push({ - timestamp: obj.attributes['event.timestamp'], - toolRequest: { - name: obj.attributes.function_name, - args: obj.attributes.function_args, - success: obj.attributes.success, - duration_ms: obj.attributes.duration_ms, - }, - }); - } - } catch (_e) { - // Not valid JSON - } - currentObject = ''; - } - } - } - } - - return logs; - } - - readToolLogs() { - // For Podman, first check if telemetry file exists and has content - // If not, fall back to parsing from stdout - if (env.GEMINI_SANDBOX === 'podman') { - // Try reading from file first - const logFilePath = join(this.testDir, 'telemetry.log'); - - if (fileExists(logFilePath)) { - try { - const content = readFileSync(logFilePath, 'utf-8'); - if (content && content.includes('"event.name"')) { - // File has content, use normal file parsing - // Continue to the normal file parsing logic below - } else if (this._lastRunStdout) { - // File exists but is empty or doesn't have events, parse from stdout - return this._parseToolLogsFromStdout(this._lastRunStdout); - } - } catch (_e) { - // Error reading file, fall back to stdout - if (this._lastRunStdout) { - return this._parseToolLogsFromStdout(this._lastRunStdout); - } - } - } else if (this._lastRunStdout) { - // No file exists, parse from stdout - return this._parseToolLogsFromStdout(this._lastRunStdout); - } - } - - // In sandbox mode, telemetry is written to a relative path in the test directory - const logFilePath = - env.GEMINI_SANDBOX && env.GEMINI_SANDBOX !== 'false' - ? join(this.testDir, 'telemetry.log') - : env.TELEMETRY_LOG_FILE; - - if (!logFilePath) { - console.warn(`TELEMETRY_LOG_FILE environment variable not set`); - return []; - } - - // Check if file exists, if not return empty array (file might not be created yet) - if (!fileExists(logFilePath)) { - return []; - } - - const content = readFileSync(logFilePath, 'utf-8'); - - // Split the content into individual JSON objects - // They are separated by "}\n{" pattern - const jsonObjects = content - .split(/}\s*\n\s*{/) - .map((obj, index, array) => { - // Add back the braces we removed during split - if (index > 0) obj = '{' + obj; - if (index < array.length - 1) obj = obj + '}'; - return obj.trim(); - }) - .filter((obj) => obj); - - const logs = []; - - for (const jsonStr of jsonObjects) { - try { - const logData = JSON.parse(jsonStr); - // Look for tool call logs - if ( - logData.attributes && - logData.attributes['event.name'] === 'gemini_cli.tool_call' - ) { - const toolName = logData.attributes.function_name; - logs.push({ - toolRequest: { - name: toolName, - args: logData.attributes.function_args, - success: logData.attributes.success, - duration_ms: logData.attributes.duration_ms, - }, - }); - } - } catch (_e) { - // Skip objects that aren't valid JSON - if (env.VERBOSE === 'true') { - console.error('Failed to parse telemetry object:', _e.message); - } - } - } - - return logs; - } -} diff --git a/integration-tests/test-helper.ts b/integration-tests/test-helper.ts new file mode 100644 index 00000000..33443aaf --- /dev/null +++ b/integration-tests/test-helper.ts @@ -0,0 +1,642 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { execSync, spawn } from 'child_process'; +import { parse } from 'shell-quote'; +import { mkdirSync, writeFileSync, readFileSync } from 'fs'; +import { join, dirname } from 'path'; +import { fileURLToPath } from 'url'; +import { env } from 'process'; +import { fileExists } from '../scripts/telemetry_utils.js'; + +const __dirname = dirname(fileURLToPath(import.meta.url)); + +function sanitizeTestName(name: string) { + return name + .toLowerCase() + .replace(/[^a-z0-9]/g, '-') + .replace(/-+/g, '-'); +} + +// Helper to create detailed error messages +export function createToolCallErrorMessage( + expectedTools: string | string[], + foundTools: string[], + result: string, +) { + const expectedStr = Array.isArray(expectedTools) + ? expectedTools.join(' or ') + : expectedTools; + return ( + `Expected to find ${expectedStr} tool call(s). ` + + `Found: ${foundTools.length > 0 ? foundTools.join(', ') : 'none'}. ` + + `Output preview: ${result ? result.substring(0, 200) + '...' : 'no output'}` + ); +} + +// Helper to print debug information when tests fail +export function printDebugInfo( + rig: TestRig, + result: string, + context: Record = {}, +) { + console.error('Test failed - Debug info:'); + console.error('Result length:', result.length); + console.error('Result (first 500 chars):', result.substring(0, 500)); + console.error( + 'Result (last 500 chars):', + result.substring(result.length - 500), + ); + + // Print any additional context provided + Object.entries(context).forEach(([key, value]) => { + console.error(`${key}:`, value); + }); + + // Check what tools were actually called + const allTools = rig.readToolLogs(); + console.error( + 'All tool calls found:', + allTools.map((t) => t.toolRequest.name), + ); + + return allTools; +} + +// Helper to validate model output and warn about unexpected content +export function validateModelOutput( + result: string, + expectedContent: string | (string | RegExp)[] | null = null, + testName = '', +) { + // First, check if there's any output at all (this should fail the test if missing) + if (!result || result.trim().length === 0) { + throw new Error('Expected LLM to return some output'); + } + + // If expectedContent is provided, check for it and warn if missing + if (expectedContent) { + const contents = Array.isArray(expectedContent) + ? expectedContent + : [expectedContent]; + const missingContent = contents.filter((content) => { + if (typeof content === 'string') { + return !result.toLowerCase().includes(content.toLowerCase()); + } else if (content instanceof RegExp) { + return !content.test(result); + } + return false; + }); + + if (missingContent.length > 0) { + console.warn( + `Warning: LLM did not include expected content in response: ${missingContent.join(', ')}.`, + 'This is not ideal but not a test failure.', + ); + console.warn( + 'The tool was called successfully, which is the main requirement.', + ); + return false; + } else if (process.env.VERBOSE === 'true') { + console.log(`${testName}: Model output validated successfully.`); + } + return true; + } + + return true; +} + +export class TestRig { + bundlePath: string; + testDir: string | null; + testName?: string; + _lastRunStdout?: string; + + constructor() { + this.bundlePath = join(__dirname, '..', 'bundle/gemini.js'); + this.testDir = null; + } + + // Get timeout based on environment + getDefaultTimeout() { + if (env.CI) return 60000; // 1 minute in CI + if (env.GEMINI_SANDBOX) return 30000; // 30s in containers + return 15000; // 15s locally + } + + setup( + testName: string, + options: { settings?: Record } = {}, + ) { + this.testName = testName; + const sanitizedName = sanitizeTestName(testName); + this.testDir = join(env.INTEGRATION_TEST_FILE_DIR!, sanitizedName); + mkdirSync(this.testDir, { recursive: true }); + + // Create a settings file to point the CLI to the local collector + const geminiDir = join(this.testDir, '.gemini'); + mkdirSync(geminiDir, { recursive: true }); + // In sandbox mode, use an absolute path for telemetry inside the container + // The container mounts the test directory at the same path as the host + const telemetryPath = + env.GEMINI_SANDBOX && env.GEMINI_SANDBOX !== 'false' + ? join(this.testDir, 'telemetry.log') // Absolute path in test directory + : env.TELEMETRY_LOG_FILE; // Absolute path for non-sandbox + + const settings = { + telemetry: { + enabled: true, + target: 'local', + otlpEndpoint: '', + outfile: telemetryPath, + }, + sandbox: env.GEMINI_SANDBOX !== 'false' ? env.GEMINI_SANDBOX : false, + ...options.settings, // Allow tests to override/add settings + }; + writeFileSync( + join(geminiDir, 'settings.json'), + JSON.stringify(settings, null, 2), + ); + } + + createFile(fileName: string, content: string) { + const filePath = join(this.testDir!, fileName); + writeFileSync(filePath, content); + return filePath; + } + + mkdir(dir: string) { + mkdirSync(join(this.testDir!, dir), { recursive: true }); + } + + sync() { + // ensure file system is done before spawning + execSync('sync', { cwd: this.testDir! }); + } + + run( + promptOrOptions: string | { prompt?: string; stdin?: string }, + ...args: string[] + ): Promise { + let command = `node ${this.bundlePath} --yolo`; + const execOptions: { + cwd: string; + encoding: 'utf-8'; + input?: string; + } = { + cwd: this.testDir!, + encoding: 'utf-8', + }; + + if (typeof promptOrOptions === 'string') { + command += ` --prompt ${JSON.stringify(promptOrOptions)}`; + } else if ( + typeof promptOrOptions === 'object' && + promptOrOptions !== null + ) { + if (promptOrOptions.prompt) { + command += ` --prompt ${JSON.stringify(promptOrOptions.prompt)}`; + } + if (promptOrOptions.stdin) { + execOptions.input = promptOrOptions.stdin; + } + } + + command += ` ${args.join(' ')}`; + + const commandArgs = parse(command); + const node = commandArgs.shift() as string; + + const child = spawn(node, commandArgs as string[], { + cwd: this.testDir!, + stdio: 'pipe', + }); + + let stdout = ''; + let stderr = ''; + + // Handle stdin if provided + if (execOptions.input) { + child.stdin!.write(execOptions.input); + child.stdin!.end(); + } + + child.stdout!.on('data', (data: Buffer) => { + stdout += data; + if (env.KEEP_OUTPUT === 'true' || env.VERBOSE === 'true') { + process.stdout.write(data); + } + }); + + child.stderr!.on('data', (data: Buffer) => { + stderr += data; + if (env.KEEP_OUTPUT === 'true' || env.VERBOSE === 'true') { + process.stderr.write(data); + } + }); + + const promise = new Promise((resolve, reject) => { + child.on('close', (code: number) => { + if (code === 0) { + // Store the raw stdout for Podman telemetry parsing + this._lastRunStdout = stdout; + + // Filter out telemetry output when running with Podman + // Podman seems to output telemetry to stdout even when writing to file + let result = stdout; + if (env.GEMINI_SANDBOX === 'podman') { + // Remove telemetry JSON objects from output + // They are multi-line JSON objects that start with { and contain telemetry fields + const lines = result.split('\n'); + const filteredLines = []; + let inTelemetryObject = false; + let braceDepth = 0; + + for (const line of lines) { + if (!inTelemetryObject && line.trim() === '{') { + // Check if this might be start of telemetry object + inTelemetryObject = true; + braceDepth = 1; + } else if (inTelemetryObject) { + // Count braces to track nesting + for (const char of line) { + if (char === '{') braceDepth++; + else if (char === '}') braceDepth--; + } + + // Check if we've closed all braces + if (braceDepth === 0) { + inTelemetryObject = false; + // Skip this line (the closing brace) + continue; + } + } else { + // Not in telemetry object, keep the line + filteredLines.push(line); + } + } + + result = filteredLines.join('\n'); + } + // If we have stderr output, include that also + if (stderr) { + result += `\n\nStdErr:\n${stderr}`; + } + + resolve(result); + } else { + reject(new Error(`Process exited with code ${code}:\n${stderr}`)); + } + }); + }); + + return promise; + } + + readFile(fileName: string) { + const content = readFileSync(join(this.testDir!, fileName), 'utf-8'); + if (env.KEEP_OUTPUT === 'true' || env.VERBOSE === 'true') { + const testId = `${env.TEST_FILE_NAME!.replace( + '.test.js', + '', + )}:${this.testName!.replace(/ /g, '-')}`; + console.log(`--- FILE: ${testId}/${fileName} ---`); + console.log(content); + console.log(`--- END FILE: ${testId}/${fileName} ---`); + } + return content; + } + + async cleanup() { + // Clean up test directory + if (this.testDir && !env.KEEP_OUTPUT) { + try { + execSync(`rm -rf ${this.testDir}`); + } catch (error) { + // Ignore cleanup errors + if (env.VERBOSE === 'true') { + console.warn('Cleanup warning:', (error as Error).message); + } + } + } + } + + async waitForTelemetryReady() { + // In sandbox mode, telemetry is written to a relative path in the test directory + const logFilePath = + env.GEMINI_SANDBOX && env.GEMINI_SANDBOX !== 'false' + ? join(this.testDir!, 'telemetry.log') + : env.TELEMETRY_LOG_FILE; + + if (!logFilePath) return; + + // Wait for telemetry file to exist and have content + await this.poll( + () => { + if (!fileExists(logFilePath)) return false; + try { + const content = readFileSync(logFilePath, 'utf-8'); + // Check if file has meaningful content (at least one complete JSON object) + return content.includes('"event.name"'); + } catch { + return false; + } + }, + 2000, // 2 seconds max - reduced since telemetry should flush on exit now + 100, // check every 100ms + ); + } + + async waitForToolCall(toolName: string, timeout?: number) { + // Use environment-specific timeout + if (!timeout) { + timeout = this.getDefaultTimeout(); + } + + // Wait for telemetry to be ready before polling for tool calls + await this.waitForTelemetryReady(); + + return this.poll( + () => { + const toolLogs = this.readToolLogs(); + return toolLogs.some((log) => log.toolRequest.name === toolName); + }, + timeout, + 100, + ); + } + + async waitForAnyToolCall(toolNames: string[], timeout?: number) { + // Use environment-specific timeout + if (!timeout) { + timeout = this.getDefaultTimeout(); + } + + // Wait for telemetry to be ready before polling for tool calls + await this.waitForTelemetryReady(); + + return this.poll( + () => { + const toolLogs = this.readToolLogs(); + return toolNames.some((name) => + toolLogs.some((log) => log.toolRequest.name === name), + ); + }, + timeout, + 100, + ); + } + + async poll( + predicate: () => boolean, + timeout: number, + interval: number, + ): Promise { + const startTime = Date.now(); + let attempts = 0; + while (Date.now() - startTime < timeout) { + attempts++; + const result = predicate(); + if (env.VERBOSE === 'true' && attempts % 5 === 0) { + console.log( + `Poll attempt ${attempts}: ${result ? 'success' : 'waiting...'}`, + ); + } + if (result) { + return true; + } + await new Promise((resolve) => setTimeout(resolve, interval)); + } + if (env.VERBOSE === 'true') { + console.log(`Poll timed out after ${attempts} attempts`); + } + return false; + } + + _parseToolLogsFromStdout(stdout: string) { + const logs: { + timestamp: number; + toolRequest: { + name: string; + args: string; + success: boolean; + duration_ms: number; + }; + }[] = []; + + // The console output from Podman is JavaScript object notation, not JSON + // Look for tool call events in the output + // Updated regex to handle tool names with hyphens and underscores + const toolCallPattern = + /body:\s*'Tool call:\s*([\w-]+)\..*?Success:\s*(\w+)\..*?Duration:\s*(\d+)ms\.'/g; + const matches = [...stdout.matchAll(toolCallPattern)]; + + for (const match of matches) { + const toolName = match[1]; + const success = match[2] === 'true'; + const duration = parseInt(match[3], 10); + + // Try to find function_args nearby + const matchIndex = match.index || 0; + const contextStart = Math.max(0, matchIndex - 500); + const contextEnd = Math.min(stdout.length, matchIndex + 500); + const context = stdout.substring(contextStart, contextEnd); + + // Look for function_args in the context + let args = '{}'; + const argsMatch = context.match(/function_args:\s*'([^']+)'/); + if (argsMatch) { + args = argsMatch[1]; + } + + // Also try to find function_name to double-check + // Updated regex to handle tool names with hyphens and underscores + const nameMatch = context.match(/function_name:\s*'([\w-]+)'/); + const actualToolName = nameMatch ? nameMatch[1] : toolName; + + logs.push({ + timestamp: Date.now(), + toolRequest: { + name: actualToolName, + args: args, + success: success, + duration_ms: duration, + }, + }); + } + + // If no matches found with the simple pattern, try the JSON parsing approach + // in case the format changes + if (logs.length === 0) { + const lines = stdout.split('\n'); + let currentObject = ''; + let inObject = false; + let braceDepth = 0; + + for (const line of lines) { + if (!inObject && line.trim() === '{') { + inObject = true; + braceDepth = 1; + currentObject = line + '\n'; + } else if (inObject) { + currentObject += line + '\n'; + + // Count braces + for (const char of line) { + if (char === '{') braceDepth++; + else if (char === '}') braceDepth--; + } + + // If we've closed all braces, try to parse the object + if (braceDepth === 0) { + inObject = false; + try { + const obj = JSON.parse(currentObject); + + // Check for tool call in different formats + if ( + obj.body && + obj.body.includes('Tool call:') && + obj.attributes + ) { + const bodyMatch = obj.body.match(/Tool call: (\w+)\./); + if (bodyMatch) { + logs.push({ + timestamp: obj.timestamp || Date.now(), + toolRequest: { + name: bodyMatch[1], + args: obj.attributes.function_args || '{}', + success: obj.attributes.success !== false, + duration_ms: obj.attributes.duration_ms || 0, + }, + }); + } + } else if ( + obj.attributes && + obj.attributes['event.name'] === 'gemini_cli.tool_call' + ) { + logs.push({ + timestamp: obj.attributes['event.timestamp'], + toolRequest: { + name: obj.attributes.function_name, + args: obj.attributes.function_args, + success: obj.attributes.success, + duration_ms: obj.attributes.duration_ms, + }, + }); + } + } catch { + // Not valid JSON + } + currentObject = ''; + } + } + } + } + + return logs; + } + + readToolLogs() { + // For Podman, first check if telemetry file exists and has content + // If not, fall back to parsing from stdout + if (env.GEMINI_SANDBOX === 'podman') { + // Try reading from file first + const logFilePath = join(this.testDir!, 'telemetry.log'); + + if (fileExists(logFilePath)) { + try { + const content = readFileSync(logFilePath, 'utf-8'); + if (content && content.includes('"event.name"')) { + // File has content, use normal file parsing + // Continue to the normal file parsing logic below + } else if (this._lastRunStdout) { + // File exists but is empty or doesn't have events, parse from stdout + return this._parseToolLogsFromStdout(this._lastRunStdout); + } + } catch { + // Error reading file, fall back to stdout + if (this._lastRunStdout) { + return this._parseToolLogsFromStdout(this._lastRunStdout); + } + } + } else if (this._lastRunStdout) { + // No file exists, parse from stdout + return this._parseToolLogsFromStdout(this._lastRunStdout); + } + } + + // In sandbox mode, telemetry is written to a relative path in the test directory + const logFilePath = + env.GEMINI_SANDBOX && env.GEMINI_SANDBOX !== 'false' + ? join(this.testDir!, 'telemetry.log') + : env.TELEMETRY_LOG_FILE; + + if (!logFilePath) { + console.warn(`TELEMETRY_LOG_FILE environment variable not set`); + return []; + } + + // Check if file exists, if not return empty array (file might not be created yet) + if (!fileExists(logFilePath)) { + return []; + } + + const content = readFileSync(logFilePath, 'utf-8'); + + // Split the content into individual JSON objects + // They are separated by "}\n{" + const jsonObjects = content + .split(/}\s*\n\s*{/) + .map((obj, index, array) => { + // Add back the braces we removed during split + if (index > 0) obj = '{' + obj; + if (index < array.length - 1) obj = obj + '}'; + return obj.trim(); + }) + .filter((obj) => obj); + + const logs: { + toolRequest: { + name: string; + args: string; + success: boolean; + duration_ms: number; + }; + }[] = []; + + for (const jsonStr of jsonObjects) { + try { + const logData = JSON.parse(jsonStr); + // Look for tool call logs + if ( + logData.attributes && + logData.attributes['event.name'] === 'gemini_cli.tool_call' + ) { + const toolName = logData.attributes.function_name; + logs.push({ + toolRequest: { + name: toolName, + args: logData.attributes.function_args, + success: logData.attributes.success, + duration_ms: logData.attributes.duration_ms, + }, + }); + } + } catch (e) { + // Skip objects that aren't valid JSON + if (env.VERBOSE === 'true') { + console.error( + 'Failed to parse telemetry object:', + (e as Error).message, + ); + } + } + } + + return logs; + } +} diff --git a/integration-tests/tsconfig.json b/integration-tests/tsconfig.json new file mode 100644 index 00000000..3e053d04 --- /dev/null +++ b/integration-tests/tsconfig.json @@ -0,0 +1,8 @@ +{ + "extends": "../tsconfig.json", + "compilerOptions": { + "noEmit": true, + "allowJs": true + }, + "include": ["**/*.ts"] +} diff --git a/integration-tests/write_file.test.js b/integration-tests/write_file.test.js deleted file mode 100644 index 7809161e..00000000 --- a/integration-tests/write_file.test.js +++ /dev/null @@ -1,68 +0,0 @@ -/** - * @license - * Copyright 2025 Google LLC - * SPDX-License-Identifier: Apache-2.0 - */ - -import { test } from 'node:test'; -import { strict as assert } from 'assert'; -import { - TestRig, - createToolCallErrorMessage, - printDebugInfo, - validateModelOutput, -} from './test-helper.js'; - -test('should be able to write a file', async () => { - const rig = new TestRig(); - await rig.setup('should be able to write a file'); - const prompt = `show me an example of using the write tool. put a dad joke in dad.txt`; - - const result = await rig.run(prompt); - - const foundToolCall = await rig.waitForToolCall('write_file'); - - // Add debugging information - if (!foundToolCall) { - printDebugInfo(rig, result); - } - - const allTools = rig.readToolLogs(); - assert.ok( - foundToolCall, - createToolCallErrorMessage( - 'write_file', - allTools.map((t) => t.toolRequest.name), - result, - ), - ); - - // Validate model output - will throw if no output, warn if missing expected content - validateModelOutput(result, 'dad.txt', 'Write file test'); - - const newFilePath = 'dad.txt'; - - const newFileContent = rig.readFile(newFilePath); - - // Add debugging for file content - if (newFileContent === '') { - console.error('File was created but is empty'); - console.error( - 'Tool calls:', - rig.readToolLogs().map((t) => ({ - name: t.toolRequest.name, - args: t.toolRequest.args, - })), - ); - } - - assert.notEqual(newFileContent, '', 'Expected file to have content'); - - // Log success info if verbose - if (process.env.VERBOSE === 'true') { - console.log( - 'File created successfully with content:', - newFileContent.substring(0, 100) + '...', - ); - } -}); diff --git a/integration-tests/write_file.test.ts b/integration-tests/write_file.test.ts new file mode 100644 index 00000000..7809161e --- /dev/null +++ b/integration-tests/write_file.test.ts @@ -0,0 +1,68 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { test } from 'node:test'; +import { strict as assert } from 'assert'; +import { + TestRig, + createToolCallErrorMessage, + printDebugInfo, + validateModelOutput, +} from './test-helper.js'; + +test('should be able to write a file', async () => { + const rig = new TestRig(); + await rig.setup('should be able to write a file'); + const prompt = `show me an example of using the write tool. put a dad joke in dad.txt`; + + const result = await rig.run(prompt); + + const foundToolCall = await rig.waitForToolCall('write_file'); + + // Add debugging information + if (!foundToolCall) { + printDebugInfo(rig, result); + } + + const allTools = rig.readToolLogs(); + assert.ok( + foundToolCall, + createToolCallErrorMessage( + 'write_file', + allTools.map((t) => t.toolRequest.name), + result, + ), + ); + + // Validate model output - will throw if no output, warn if missing expected content + validateModelOutput(result, 'dad.txt', 'Write file test'); + + const newFilePath = 'dad.txt'; + + const newFileContent = rig.readFile(newFilePath); + + // Add debugging for file content + if (newFileContent === '') { + console.error('File was created but is empty'); + console.error( + 'Tool calls:', + rig.readToolLogs().map((t) => ({ + name: t.toolRequest.name, + args: t.toolRequest.args, + })), + ); + } + + assert.notEqual(newFileContent, '', 'Expected file to have content'); + + // Log success info if verbose + if (process.env.VERBOSE === 'true') { + console.log( + 'File created successfully with content:', + newFileContent.substring(0, 100) + '...', + ); + } +}); -- cgit v1.2.3