diff options
Diffstat (limited to 'integration-tests/test-helper.js')
| -rw-r--r-- | integration-tests/test-helper.js | 517 |
1 files changed, 504 insertions, 13 deletions
diff --git a/integration-tests/test-helper.js b/integration-tests/test-helper.js index 7ee3db87..9526ea5f 100644 --- a/integration-tests/test-helper.js +++ b/integration-tests/test-helper.js @@ -4,11 +4,13 @@ * SPDX-License-Identifier: Apache-2.0 */ -import { execSync } from 'child_process'; +import { execSync, spawn } from 'child_process'; +import { parse } from 'shell-quote'; import { mkdirSync, writeFileSync, readFileSync } from 'fs'; import { join, dirname } from 'path'; import { fileURLToPath } from 'url'; import { env } from 'process'; +import { fileExists } from '../scripts/telemetry_utils.js'; const __dirname = dirname(fileURLToPath(import.meta.url)); @@ -19,17 +21,129 @@ function sanitizeTestName(name) { .replace(/-+/g, '-'); } +// Helper to create detailed error messages +export function createToolCallErrorMessage(expectedTools, foundTools, result) { + const expectedStr = Array.isArray(expectedTools) + ? expectedTools.join(' or ') + : expectedTools; + return ( + `Expected to find ${expectedStr} tool call(s). ` + + `Found: ${foundTools.length > 0 ? foundTools.join(', ') : 'none'}. ` + + `Output preview: ${result ? result.substring(0, 200) + '...' : 'no output'}` + ); +} + +// Helper to print debug information when tests fail +export function printDebugInfo(rig, result, context = {}) { + console.error('Test failed - Debug info:'); + console.error('Result length:', result.length); + console.error('Result (first 500 chars):', result.substring(0, 500)); + console.error( + 'Result (last 500 chars):', + result.substring(result.length - 500), + ); + + // Print any additional context provided + Object.entries(context).forEach(([key, value]) => { + console.error(`${key}:`, value); + }); + + // Check what tools were actually called + const allTools = rig.readToolLogs(); + console.error( + 'All tool calls found:', + allTools.map((t) => t.toolRequest.name), + ); + + return allTools; +} + +// Helper to validate model output and warn about unexpected content +export function validateModelOutput( + result, + expectedContent = null, + testName = '', +) { + // First, check if there's any output at all (this should fail the test if missing) + if (!result || result.trim().length === 0) { + throw new Error('Expected LLM to return some output'); + } + + // If expectedContent is provided, check for it and warn if missing + if (expectedContent) { + const contents = Array.isArray(expectedContent) + ? expectedContent + : [expectedContent]; + const missingContent = contents.filter((content) => { + if (typeof content === 'string') { + return !result.toLowerCase().includes(content.toLowerCase()); + } else if (content instanceof RegExp) { + return !content.test(result); + } + return false; + }); + + if (missingContent.length > 0) { + console.warn( + `Warning: LLM did not include expected content in response: ${missingContent.join(', ')}.`, + 'This is not ideal but not a test failure.', + ); + console.warn( + 'The tool was called successfully, which is the main requirement.', + ); + return false; + } else if (process.env.VERBOSE === 'true') { + console.log(`${testName}: Model output validated successfully.`); + } + return true; + } + + return true; +} + export class TestRig { constructor() { this.bundlePath = join(__dirname, '..', 'bundle/gemini.js'); this.testDir = null; } - setup(testName) { + // Get timeout based on environment + getDefaultTimeout() { + if (env.CI) return 60000; // 1 minute in CI + if (env.GEMINI_SANDBOX) return 30000; // 30s in containers + return 15000; // 15s locally + } + + setup(testName, options = {}) { this.testName = testName; const sanitizedName = sanitizeTestName(testName); this.testDir = join(env.INTEGRATION_TEST_FILE_DIR, sanitizedName); mkdirSync(this.testDir, { recursive: true }); + + // Create a settings file to point the CLI to the local collector + const geminiDir = join(this.testDir, '.gemini'); + mkdirSync(geminiDir, { recursive: true }); + // In sandbox mode, use an absolute path for telemetry inside the container + // The container mounts the test directory at the same path as the host + const telemetryPath = + env.GEMINI_SANDBOX && env.GEMINI_SANDBOX !== 'false' + ? join(this.testDir, 'telemetry.log') // Absolute path in test directory + : env.TELEMETRY_LOG_FILE; // Absolute path for non-sandbox + + const settings = { + telemetry: { + enabled: true, + target: 'local', + otlpEndpoint: '', + outfile: telemetryPath, + }, + sandbox: env.GEMINI_SANDBOX !== 'false' ? env.GEMINI_SANDBOX : false, + ...options.settings, // Allow tests to override/add settings + }; + writeFileSync( + join(geminiDir, 'settings.json'), + JSON.stringify(settings, null, 2), + ); } createFile(fileName, content) { @@ -39,7 +153,7 @@ export class TestRig { } mkdir(dir) { - mkdirSync(join(this.testDir, dir)); + mkdirSync(join(this.testDir, dir), { recursive: true }); } sync() { @@ -70,19 +184,88 @@ export class TestRig { command += ` ${args.join(' ')}`; - const output = execSync(command, execOptions); + const commandArgs = parse(command); + const node = commandArgs.shift(); - if (env.KEEP_OUTPUT === 'true' || env.VERBOSE === 'true') { - const testId = `${env.TEST_FILE_NAME.replace( - '.test.js', - '', - )}:${this.testName.replace(/ /g, '-')}`; - console.log(`--- TEST: ${testId} ---`); - console.log(output); - console.log(`--- END TEST: ${testId} ---`); + const child = spawn(node, commandArgs, { + cwd: this.testDir, + stdio: 'pipe', + }); + + let stdout = ''; + let stderr = ''; + + // Handle stdin if provided + if (execOptions.input) { + child.stdin.write(execOptions.input); + child.stdin.end(); } - return output; + child.stdout.on('data', (data) => { + stdout += data; + if (env.KEEP_OUTPUT === 'true' || env.VERBOSE === 'true') { + process.stdout.write(data); + } + }); + + child.stderr.on('data', (data) => { + stderr += data; + if (env.KEEP_OUTPUT === 'true' || env.VERBOSE === 'true') { + process.stderr.write(data); + } + }); + + const promise = new Promise((resolve, reject) => { + child.on('close', (code) => { + if (code === 0) { + // Store the raw stdout for Podman telemetry parsing + this._lastRunStdout = stdout; + + // Filter out telemetry output when running with Podman + // Podman seems to output telemetry to stdout even when writing to file + let result = stdout; + if (env.GEMINI_SANDBOX === 'podman') { + // Remove telemetry JSON objects from output + // They are multi-line JSON objects that start with { and contain telemetry fields + const lines = result.split('\n'); + const filteredLines = []; + let inTelemetryObject = false; + let braceDepth = 0; + + for (const line of lines) { + if (!inTelemetryObject && line.trim() === '{') { + // Check if this might be start of telemetry object + inTelemetryObject = true; + braceDepth = 1; + } else if (inTelemetryObject) { + // Count braces to track nesting + for (const char of line) { + if (char === '{') braceDepth++; + else if (char === '}') braceDepth--; + } + + // Check if we've closed all braces + if (braceDepth === 0) { + inTelemetryObject = false; + // Skip this line (the closing brace) + continue; + } + } else { + // Not in telemetry object, keep the line + filteredLines.push(line); + } + } + + result = filteredLines.join('\n'); + } + resolve(result); + } else { + reject(new Error(`Process exited with code ${code}:\n${stderr}`)); + } + }); + }); + + return promise; } readFile(fileName) { @@ -98,4 +281,312 @@ export class TestRig { } return content; } + + async cleanup() { + // Clean up test directory + if (this.testDir && !env.KEEP_OUTPUT) { + try { + execSync(`rm -rf ${this.testDir}`); + } catch (error) { + // Ignore cleanup errors + if (env.VERBOSE === 'true') { + console.warn('Cleanup warning:', error.message); + } + } + } + } + + async waitForTelemetryReady() { + // In sandbox mode, telemetry is written to a relative path in the test directory + const logFilePath = + env.GEMINI_SANDBOX && env.GEMINI_SANDBOX !== 'false' + ? join(this.testDir, 'telemetry.log') + : env.TELEMETRY_LOG_FILE; + + if (!logFilePath) return; + + // Wait for telemetry file to exist and have content + await this.poll( + () => { + if (!fileExists(logFilePath)) return false; + try { + const content = readFileSync(logFilePath, 'utf-8'); + // Check if file has meaningful content (at least one complete JSON object) + return content.includes('"event.name"'); + } catch (_e) { + return false; + } + }, + 2000, // 2 seconds max - reduced since telemetry should flush on exit now + 100, // check every 100ms + ); + } + + async waitForToolCall(toolName, timeout) { + // Use environment-specific timeout + if (!timeout) { + timeout = this.getDefaultTimeout(); + } + + // Wait for telemetry to be ready before polling for tool calls + await this.waitForTelemetryReady(); + + return this.poll( + () => { + const toolLogs = this.readToolLogs(); + return toolLogs.some((log) => log.toolRequest.name === toolName); + }, + timeout, + 100, + ); + } + + async waitForAnyToolCall(toolNames, timeout) { + // Use environment-specific timeout + if (!timeout) { + timeout = this.getDefaultTimeout(); + } + + // Wait for telemetry to be ready before polling for tool calls + await this.waitForTelemetryReady(); + + return this.poll( + () => { + const toolLogs = this.readToolLogs(); + return toolNames.some((name) => + toolLogs.some((log) => log.toolRequest.name === name), + ); + }, + timeout, + 100, + ); + } + + async poll(predicate, timeout, interval) { + const startTime = Date.now(); + let attempts = 0; + while (Date.now() - startTime < timeout) { + attempts++; + const result = predicate(); + if (env.VERBOSE === 'true' && attempts % 5 === 0) { + console.log( + `Poll attempt ${attempts}: ${result ? 'success' : 'waiting...'}`, + ); + } + if (result) { + return true; + } + await new Promise((resolve) => setTimeout(resolve, interval)); + } + if (env.VERBOSE === 'true') { + console.log(`Poll timed out after ${attempts} attempts`); + } + return false; + } + + _parseToolLogsFromStdout(stdout) { + const logs = []; + + // The console output from Podman is JavaScript object notation, not JSON + // Look for tool call events in the output + // Updated regex to handle tool names with hyphens and underscores + const toolCallPattern = + /body:\s*'Tool call:\s*([\w-]+)\..*?Success:\s*(\w+)\..*?Duration:\s*(\d+)ms\.'/g; + const matches = [...stdout.matchAll(toolCallPattern)]; + + for (const match of matches) { + const toolName = match[1]; + const success = match[2] === 'true'; + const duration = parseInt(match[3], 10); + + // Try to find function_args nearby + const matchIndex = match.index || 0; + const contextStart = Math.max(0, matchIndex - 500); + const contextEnd = Math.min(stdout.length, matchIndex + 500); + const context = stdout.substring(contextStart, contextEnd); + + // Look for function_args in the context + let args = '{}'; + const argsMatch = context.match(/function_args:\s*'([^']+)'/); + if (argsMatch) { + args = argsMatch[1]; + } + + // Also try to find function_name to double-check + // Updated regex to handle tool names with hyphens and underscores + const nameMatch = context.match(/function_name:\s*'([\w-]+)'/); + const actualToolName = nameMatch ? nameMatch[1] : toolName; + + logs.push({ + timestamp: Date.now(), + toolRequest: { + name: actualToolName, + args: args, + success: success, + duration_ms: duration, + }, + }); + } + + // If no matches found with the simple pattern, try the JSON parsing approach + // in case the format changes + if (logs.length === 0) { + const lines = stdout.split('\n'); + let currentObject = ''; + let inObject = false; + let braceDepth = 0; + + for (const line of lines) { + if (!inObject && line.trim() === '{') { + inObject = true; + braceDepth = 1; + currentObject = line + '\n'; + } else if (inObject) { + currentObject += line + '\n'; + + // Count braces + for (const char of line) { + if (char === '{') braceDepth++; + else if (char === '}') braceDepth--; + } + + // If we've closed all braces, try to parse the object + if (braceDepth === 0) { + inObject = false; + try { + const obj = JSON.parse(currentObject); + + // Check for tool call in different formats + if ( + obj.body && + obj.body.includes('Tool call:') && + obj.attributes + ) { + const bodyMatch = obj.body.match(/Tool call: (\w+)\./); + if (bodyMatch) { + logs.push({ + timestamp: obj.timestamp || Date.now(), + toolRequest: { + name: bodyMatch[1], + args: obj.attributes.function_args || '{}', + success: obj.attributes.success !== false, + duration_ms: obj.attributes.duration_ms || 0, + }, + }); + } + } else if ( + obj.attributes && + obj.attributes['event.name'] === 'gemini_cli.tool_call' + ) { + logs.push({ + timestamp: obj.attributes['event.timestamp'], + toolRequest: { + name: obj.attributes.function_name, + args: obj.attributes.function_args, + success: obj.attributes.success, + duration_ms: obj.attributes.duration_ms, + }, + }); + } + } catch (_e) { + // Not valid JSON + } + currentObject = ''; + } + } + } + } + + return logs; + } + + readToolLogs() { + // For Podman, first check if telemetry file exists and has content + // If not, fall back to parsing from stdout + if (env.GEMINI_SANDBOX === 'podman') { + // Try reading from file first + const logFilePath = join(this.testDir, 'telemetry.log'); + + if (fileExists(logFilePath)) { + try { + const content = readFileSync(logFilePath, 'utf-8'); + if (content && content.includes('"event.name"')) { + // File has content, use normal file parsing + // Continue to the normal file parsing logic below + } else if (this._lastRunStdout) { + // File exists but is empty or doesn't have events, parse from stdout + return this._parseToolLogsFromStdout(this._lastRunStdout); + } + } catch (_e) { + // Error reading file, fall back to stdout + if (this._lastRunStdout) { + return this._parseToolLogsFromStdout(this._lastRunStdout); + } + } + } else if (this._lastRunStdout) { + // No file exists, parse from stdout + return this._parseToolLogsFromStdout(this._lastRunStdout); + } + } + + // In sandbox mode, telemetry is written to a relative path in the test directory + const logFilePath = + env.GEMINI_SANDBOX && env.GEMINI_SANDBOX !== 'false' + ? join(this.testDir, 'telemetry.log') + : env.TELEMETRY_LOG_FILE; + + if (!logFilePath) { + console.warn(`TELEMETRY_LOG_FILE environment variable not set`); + return []; + } + + // Check if file exists, if not return empty array (file might not be created yet) + if (!fileExists(logFilePath)) { + return []; + } + + const content = readFileSync(logFilePath, 'utf-8'); + + // Split the content into individual JSON objects + // They are separated by "}\n{" pattern + const jsonObjects = content + .split(/}\s*\n\s*{/) + .map((obj, index, array) => { + // Add back the braces we removed during split + if (index > 0) obj = '{' + obj; + if (index < array.length - 1) obj = obj + '}'; + return obj.trim(); + }) + .filter((obj) => obj); + + const logs = []; + + for (const jsonStr of jsonObjects) { + try { + const logData = JSON.parse(jsonStr); + // Look for tool call logs + if ( + logData.attributes && + logData.attributes['event.name'] === 'gemini_cli.tool_call' + ) { + const toolName = logData.attributes.function_name; + logs.push({ + toolRequest: { + name: toolName, + args: logData.attributes.function_args, + success: logData.attributes.success, + duration_ms: logData.attributes.duration_ms, + }, + }); + } + } catch (_e) { + // Skip objects that aren't valid JSON + if (env.VERBOSE === 'true') { + console.error('Failed to parse telemetry object:', _e.message); + } + } + } + + return logs; + } } |
