diff options
| author | Jacob Richman <[email protected]> | 2025-08-12 09:19:09 -0700 |
|---|---|---|
| committer | GitHub <[email protected]> | 2025-08-12 16:19:09 +0000 |
| commit | 804c181ac4a3dc1c4971a5b8a643421bbe697f3d (patch) | |
| tree | dc0ae93448453081954da307a91d90dfec9c361a /integration-tests/test-helper.js | |
| parent | 2d1a6af890da1e9437cd1a1774e2c7fc7ad32957 (diff) | |
chore(integration-tests): refactor to typescript (#5645)
Diffstat (limited to 'integration-tests/test-helper.js')
| -rw-r--r-- | integration-tests/test-helper.js | 597 |
1 files changed, 0 insertions, 597 deletions
diff --git a/integration-tests/test-helper.js b/integration-tests/test-helper.js deleted file mode 100644 index d1125a78..00000000 --- a/integration-tests/test-helper.js +++ /dev/null @@ -1,597 +0,0 @@ -/** - * @license - * Copyright 2025 Google LLC - * SPDX-License-Identifier: Apache-2.0 - */ - -import { execSync, spawn } from 'child_process'; -import { parse } from 'shell-quote'; -import { mkdirSync, writeFileSync, readFileSync } from 'fs'; -import { join, dirname } from 'path'; -import { fileURLToPath } from 'url'; -import { env } from 'process'; -import { fileExists } from '../scripts/telemetry_utils.js'; - -const __dirname = dirname(fileURLToPath(import.meta.url)); - -function sanitizeTestName(name) { - return name - .toLowerCase() - .replace(/[^a-z0-9]/g, '-') - .replace(/-+/g, '-'); -} - -// Helper to create detailed error messages -export function createToolCallErrorMessage(expectedTools, foundTools, result) { - const expectedStr = Array.isArray(expectedTools) - ? expectedTools.join(' or ') - : expectedTools; - return ( - `Expected to find ${expectedStr} tool call(s). ` + - `Found: ${foundTools.length > 0 ? foundTools.join(', ') : 'none'}. ` + - `Output preview: ${result ? result.substring(0, 200) + '...' : 'no output'}` - ); -} - -// Helper to print debug information when tests fail -export function printDebugInfo(rig, result, context = {}) { - console.error('Test failed - Debug info:'); - console.error('Result length:', result.length); - console.error('Result (first 500 chars):', result.substring(0, 500)); - console.error( - 'Result (last 500 chars):', - result.substring(result.length - 500), - ); - - // Print any additional context provided - Object.entries(context).forEach(([key, value]) => { - console.error(`${key}:`, value); - }); - - // Check what tools were actually called - const allTools = rig.readToolLogs(); - console.error( - 'All tool calls found:', - allTools.map((t) => t.toolRequest.name), - ); - - return allTools; -} - -// Helper to validate model output and warn about unexpected content -export function validateModelOutput( - result, - expectedContent = null, - testName = '', -) { - // First, check if there's any output at all (this should fail the test if missing) - if (!result || result.trim().length === 0) { - throw new Error('Expected LLM to return some output'); - } - - // If expectedContent is provided, check for it and warn if missing - if (expectedContent) { - const contents = Array.isArray(expectedContent) - ? expectedContent - : [expectedContent]; - const missingContent = contents.filter((content) => { - if (typeof content === 'string') { - return !result.toLowerCase().includes(content.toLowerCase()); - } else if (content instanceof RegExp) { - return !content.test(result); - } - return false; - }); - - if (missingContent.length > 0) { - console.warn( - `Warning: LLM did not include expected content in response: ${missingContent.join(', ')}.`, - 'This is not ideal but not a test failure.', - ); - console.warn( - 'The tool was called successfully, which is the main requirement.', - ); - return false; - } else if (process.env.VERBOSE === 'true') { - console.log(`${testName}: Model output validated successfully.`); - } - return true; - } - - return true; -} - -export class TestRig { - constructor() { - this.bundlePath = join(__dirname, '..', 'bundle/gemini.js'); - this.testDir = null; - } - - // Get timeout based on environment - getDefaultTimeout() { - if (env.CI) return 60000; // 1 minute in CI - if (env.GEMINI_SANDBOX) return 30000; // 30s in containers - return 15000; // 15s locally - } - - setup(testName, options = {}) { - this.testName = testName; - const sanitizedName = sanitizeTestName(testName); - this.testDir = join(env.INTEGRATION_TEST_FILE_DIR, sanitizedName); - mkdirSync(this.testDir, { recursive: true }); - - // Create a settings file to point the CLI to the local collector - const geminiDir = join(this.testDir, '.gemini'); - mkdirSync(geminiDir, { recursive: true }); - // In sandbox mode, use an absolute path for telemetry inside the container - // The container mounts the test directory at the same path as the host - const telemetryPath = - env.GEMINI_SANDBOX && env.GEMINI_SANDBOX !== 'false' - ? join(this.testDir, 'telemetry.log') // Absolute path in test directory - : env.TELEMETRY_LOG_FILE; // Absolute path for non-sandbox - - const settings = { - telemetry: { - enabled: true, - target: 'local', - otlpEndpoint: '', - outfile: telemetryPath, - }, - sandbox: env.GEMINI_SANDBOX !== 'false' ? env.GEMINI_SANDBOX : false, - ...options.settings, // Allow tests to override/add settings - }; - writeFileSync( - join(geminiDir, 'settings.json'), - JSON.stringify(settings, null, 2), - ); - } - - createFile(fileName, content) { - const filePath = join(this.testDir, fileName); - writeFileSync(filePath, content); - return filePath; - } - - mkdir(dir) { - mkdirSync(join(this.testDir, dir), { recursive: true }); - } - - sync() { - // ensure file system is done before spawning - execSync('sync', { cwd: this.testDir }); - } - - run(promptOrOptions, ...args) { - let command = `node ${this.bundlePath} --yolo`; - const execOptions = { - cwd: this.testDir, - encoding: 'utf-8', - }; - - if (typeof promptOrOptions === 'string') { - command += ` --prompt ${JSON.stringify(promptOrOptions)}`; - } else if ( - typeof promptOrOptions === 'object' && - promptOrOptions !== null - ) { - if (promptOrOptions.prompt) { - command += ` --prompt ${JSON.stringify(promptOrOptions.prompt)}`; - } - if (promptOrOptions.stdin) { - execOptions.input = promptOrOptions.stdin; - } - } - - command += ` ${args.join(' ')}`; - - const commandArgs = parse(command); - const node = commandArgs.shift(); - - const child = spawn(node, commandArgs, { - cwd: this.testDir, - stdio: 'pipe', - }); - - let stdout = ''; - let stderr = ''; - - // Handle stdin if provided - if (execOptions.input) { - child.stdin.write(execOptions.input); - child.stdin.end(); - } - - child.stdout.on('data', (data) => { - stdout += data; - if (env.KEEP_OUTPUT === 'true' || env.VERBOSE === 'true') { - process.stdout.write(data); - } - }); - - child.stderr.on('data', (data) => { - stderr += data; - if (env.KEEP_OUTPUT === 'true' || env.VERBOSE === 'true') { - process.stderr.write(data); - } - }); - - const promise = new Promise((resolve, reject) => { - child.on('close', (code) => { - if (code === 0) { - // Store the raw stdout for Podman telemetry parsing - this._lastRunStdout = stdout; - - // Filter out telemetry output when running with Podman - // Podman seems to output telemetry to stdout even when writing to file - let result = stdout; - if (env.GEMINI_SANDBOX === 'podman') { - // Remove telemetry JSON objects from output - // They are multi-line JSON objects that start with { and contain telemetry fields - const lines = result.split('\n'); - const filteredLines = []; - let inTelemetryObject = false; - let braceDepth = 0; - - for (const line of lines) { - if (!inTelemetryObject && line.trim() === '{') { - // Check if this might be start of telemetry object - inTelemetryObject = true; - braceDepth = 1; - } else if (inTelemetryObject) { - // Count braces to track nesting - for (const char of line) { - if (char === '{') braceDepth++; - else if (char === '}') braceDepth--; - } - - // Check if we've closed all braces - if (braceDepth === 0) { - inTelemetryObject = false; - // Skip this line (the closing brace) - continue; - } - } else { - // Not in telemetry object, keep the line - filteredLines.push(line); - } - } - - result = filteredLines.join('\n'); - } - // If we have stderr output, include that also - if (stderr) { - result += `\n\nStdErr:\n${stderr}`; - } - - resolve(result); - } else { - reject(new Error(`Process exited with code ${code}:\n${stderr}`)); - } - }); - }); - - return promise; - } - - readFile(fileName) { - const content = readFileSync(join(this.testDir, fileName), 'utf-8'); - if (env.KEEP_OUTPUT === 'true' || env.VERBOSE === 'true') { - const testId = `${env.TEST_FILE_NAME.replace( - '.test.js', - '', - )}:${this.testName.replace(/ /g, '-')}`; - console.log(`--- FILE: ${testId}/${fileName} ---`); - console.log(content); - console.log(`--- END FILE: ${testId}/${fileName} ---`); - } - return content; - } - - async cleanup() { - // Clean up test directory - if (this.testDir && !env.KEEP_OUTPUT) { - try { - execSync(`rm -rf ${this.testDir}`); - } catch (error) { - // Ignore cleanup errors - if (env.VERBOSE === 'true') { - console.warn('Cleanup warning:', error.message); - } - } - } - } - - async waitForTelemetryReady() { - // In sandbox mode, telemetry is written to a relative path in the test directory - const logFilePath = - env.GEMINI_SANDBOX && env.GEMINI_SANDBOX !== 'false' - ? join(this.testDir, 'telemetry.log') - : env.TELEMETRY_LOG_FILE; - - if (!logFilePath) return; - - // Wait for telemetry file to exist and have content - await this.poll( - () => { - if (!fileExists(logFilePath)) return false; - try { - const content = readFileSync(logFilePath, 'utf-8'); - // Check if file has meaningful content (at least one complete JSON object) - return content.includes('"event.name"'); - } catch (_e) { - return false; - } - }, - 2000, // 2 seconds max - reduced since telemetry should flush on exit now - 100, // check every 100ms - ); - } - - async waitForToolCall(toolName, timeout) { - // Use environment-specific timeout - if (!timeout) { - timeout = this.getDefaultTimeout(); - } - - // Wait for telemetry to be ready before polling for tool calls - await this.waitForTelemetryReady(); - - return this.poll( - () => { - const toolLogs = this.readToolLogs(); - return toolLogs.some((log) => log.toolRequest.name === toolName); - }, - timeout, - 100, - ); - } - - async waitForAnyToolCall(toolNames, timeout) { - // Use environment-specific timeout - if (!timeout) { - timeout = this.getDefaultTimeout(); - } - - // Wait for telemetry to be ready before polling for tool calls - await this.waitForTelemetryReady(); - - return this.poll( - () => { - const toolLogs = this.readToolLogs(); - return toolNames.some((name) => - toolLogs.some((log) => log.toolRequest.name === name), - ); - }, - timeout, - 100, - ); - } - - async poll(predicate, timeout, interval) { - const startTime = Date.now(); - let attempts = 0; - while (Date.now() - startTime < timeout) { - attempts++; - const result = predicate(); - if (env.VERBOSE === 'true' && attempts % 5 === 0) { - console.log( - `Poll attempt ${attempts}: ${result ? 'success' : 'waiting...'}`, - ); - } - if (result) { - return true; - } - await new Promise((resolve) => setTimeout(resolve, interval)); - } - if (env.VERBOSE === 'true') { - console.log(`Poll timed out after ${attempts} attempts`); - } - return false; - } - - _parseToolLogsFromStdout(stdout) { - const logs = []; - - // The console output from Podman is JavaScript object notation, not JSON - // Look for tool call events in the output - // Updated regex to handle tool names with hyphens and underscores - const toolCallPattern = - /body:\s*'Tool call:\s*([\w-]+)\..*?Success:\s*(\w+)\..*?Duration:\s*(\d+)ms\.'/g; - const matches = [...stdout.matchAll(toolCallPattern)]; - - for (const match of matches) { - const toolName = match[1]; - const success = match[2] === 'true'; - const duration = parseInt(match[3], 10); - - // Try to find function_args nearby - const matchIndex = match.index || 0; - const contextStart = Math.max(0, matchIndex - 500); - const contextEnd = Math.min(stdout.length, matchIndex + 500); - const context = stdout.substring(contextStart, contextEnd); - - // Look for function_args in the context - let args = '{}'; - const argsMatch = context.match(/function_args:\s*'([^']+)'/); - if (argsMatch) { - args = argsMatch[1]; - } - - // Also try to find function_name to double-check - // Updated regex to handle tool names with hyphens and underscores - const nameMatch = context.match(/function_name:\s*'([\w-]+)'/); - const actualToolName = nameMatch ? nameMatch[1] : toolName; - - logs.push({ - timestamp: Date.now(), - toolRequest: { - name: actualToolName, - args: args, - success: success, - duration_ms: duration, - }, - }); - } - - // If no matches found with the simple pattern, try the JSON parsing approach - // in case the format changes - if (logs.length === 0) { - const lines = stdout.split('\n'); - let currentObject = ''; - let inObject = false; - let braceDepth = 0; - - for (const line of lines) { - if (!inObject && line.trim() === '{') { - inObject = true; - braceDepth = 1; - currentObject = line + '\n'; - } else if (inObject) { - currentObject += line + '\n'; - - // Count braces - for (const char of line) { - if (char === '{') braceDepth++; - else if (char === '}') braceDepth--; - } - - // If we've closed all braces, try to parse the object - if (braceDepth === 0) { - inObject = false; - try { - const obj = JSON.parse(currentObject); - - // Check for tool call in different formats - if ( - obj.body && - obj.body.includes('Tool call:') && - obj.attributes - ) { - const bodyMatch = obj.body.match(/Tool call: (\w+)\./); - if (bodyMatch) { - logs.push({ - timestamp: obj.timestamp || Date.now(), - toolRequest: { - name: bodyMatch[1], - args: obj.attributes.function_args || '{}', - success: obj.attributes.success !== false, - duration_ms: obj.attributes.duration_ms || 0, - }, - }); - } - } else if ( - obj.attributes && - obj.attributes['event.name'] === 'gemini_cli.tool_call' - ) { - logs.push({ - timestamp: obj.attributes['event.timestamp'], - toolRequest: { - name: obj.attributes.function_name, - args: obj.attributes.function_args, - success: obj.attributes.success, - duration_ms: obj.attributes.duration_ms, - }, - }); - } - } catch (_e) { - // Not valid JSON - } - currentObject = ''; - } - } - } - } - - return logs; - } - - readToolLogs() { - // For Podman, first check if telemetry file exists and has content - // If not, fall back to parsing from stdout - if (env.GEMINI_SANDBOX === 'podman') { - // Try reading from file first - const logFilePath = join(this.testDir, 'telemetry.log'); - - if (fileExists(logFilePath)) { - try { - const content = readFileSync(logFilePath, 'utf-8'); - if (content && content.includes('"event.name"')) { - // File has content, use normal file parsing - // Continue to the normal file parsing logic below - } else if (this._lastRunStdout) { - // File exists but is empty or doesn't have events, parse from stdout - return this._parseToolLogsFromStdout(this._lastRunStdout); - } - } catch (_e) { - // Error reading file, fall back to stdout - if (this._lastRunStdout) { - return this._parseToolLogsFromStdout(this._lastRunStdout); - } - } - } else if (this._lastRunStdout) { - // No file exists, parse from stdout - return this._parseToolLogsFromStdout(this._lastRunStdout); - } - } - - // In sandbox mode, telemetry is written to a relative path in the test directory - const logFilePath = - env.GEMINI_SANDBOX && env.GEMINI_SANDBOX !== 'false' - ? join(this.testDir, 'telemetry.log') - : env.TELEMETRY_LOG_FILE; - - if (!logFilePath) { - console.warn(`TELEMETRY_LOG_FILE environment variable not set`); - return []; - } - - // Check if file exists, if not return empty array (file might not be created yet) - if (!fileExists(logFilePath)) { - return []; - } - - const content = readFileSync(logFilePath, 'utf-8'); - - // Split the content into individual JSON objects - // They are separated by "}\n{" pattern - const jsonObjects = content - .split(/}\s*\n\s*{/) - .map((obj, index, array) => { - // Add back the braces we removed during split - if (index > 0) obj = '{' + obj; - if (index < array.length - 1) obj = obj + '}'; - return obj.trim(); - }) - .filter((obj) => obj); - - const logs = []; - - for (const jsonStr of jsonObjects) { - try { - const logData = JSON.parse(jsonStr); - // Look for tool call logs - if ( - logData.attributes && - logData.attributes['event.name'] === 'gemini_cli.tool_call' - ) { - const toolName = logData.attributes.function_name; - logs.push({ - toolRequest: { - name: toolName, - args: logData.attributes.function_args, - success: logData.attributes.success, - duration_ms: logData.attributes.duration_ms, - }, - }); - } - } catch (_e) { - // Skip objects that aren't valid JSON - if (env.VERBOSE === 'true') { - console.error('Failed to parse telemetry object:', _e.message); - } - } - } - - return logs; - } -} |
