summaryrefslogtreecommitdiff
path: root/integration-tests/test-helper.js
diff options
context:
space:
mode:
Diffstat (limited to 'integration-tests/test-helper.js')
-rw-r--r--integration-tests/test-helper.js597
1 files changed, 0 insertions, 597 deletions
diff --git a/integration-tests/test-helper.js b/integration-tests/test-helper.js
deleted file mode 100644
index d1125a78..00000000
--- a/integration-tests/test-helper.js
+++ /dev/null
@@ -1,597 +0,0 @@
-/**
- * @license
- * Copyright 2025 Google LLC
- * SPDX-License-Identifier: Apache-2.0
- */
-
-import { execSync, spawn } from 'child_process';
-import { parse } from 'shell-quote';
-import { mkdirSync, writeFileSync, readFileSync } from 'fs';
-import { join, dirname } from 'path';
-import { fileURLToPath } from 'url';
-import { env } from 'process';
-import { fileExists } from '../scripts/telemetry_utils.js';
-
-const __dirname = dirname(fileURLToPath(import.meta.url));
-
-function sanitizeTestName(name) {
- return name
- .toLowerCase()
- .replace(/[^a-z0-9]/g, '-')
- .replace(/-+/g, '-');
-}
-
-// Helper to create detailed error messages
-export function createToolCallErrorMessage(expectedTools, foundTools, result) {
- const expectedStr = Array.isArray(expectedTools)
- ? expectedTools.join(' or ')
- : expectedTools;
- return (
- `Expected to find ${expectedStr} tool call(s). ` +
- `Found: ${foundTools.length > 0 ? foundTools.join(', ') : 'none'}. ` +
- `Output preview: ${result ? result.substring(0, 200) + '...' : 'no output'}`
- );
-}
-
-// Helper to print debug information when tests fail
-export function printDebugInfo(rig, result, context = {}) {
- console.error('Test failed - Debug info:');
- console.error('Result length:', result.length);
- console.error('Result (first 500 chars):', result.substring(0, 500));
- console.error(
- 'Result (last 500 chars):',
- result.substring(result.length - 500),
- );
-
- // Print any additional context provided
- Object.entries(context).forEach(([key, value]) => {
- console.error(`${key}:`, value);
- });
-
- // Check what tools were actually called
- const allTools = rig.readToolLogs();
- console.error(
- 'All tool calls found:',
- allTools.map((t) => t.toolRequest.name),
- );
-
- return allTools;
-}
-
-// Helper to validate model output and warn about unexpected content
-export function validateModelOutput(
- result,
- expectedContent = null,
- testName = '',
-) {
- // First, check if there's any output at all (this should fail the test if missing)
- if (!result || result.trim().length === 0) {
- throw new Error('Expected LLM to return some output');
- }
-
- // If expectedContent is provided, check for it and warn if missing
- if (expectedContent) {
- const contents = Array.isArray(expectedContent)
- ? expectedContent
- : [expectedContent];
- const missingContent = contents.filter((content) => {
- if (typeof content === 'string') {
- return !result.toLowerCase().includes(content.toLowerCase());
- } else if (content instanceof RegExp) {
- return !content.test(result);
- }
- return false;
- });
-
- if (missingContent.length > 0) {
- console.warn(
- `Warning: LLM did not include expected content in response: ${missingContent.join(', ')}.`,
- 'This is not ideal but not a test failure.',
- );
- console.warn(
- 'The tool was called successfully, which is the main requirement.',
- );
- return false;
- } else if (process.env.VERBOSE === 'true') {
- console.log(`${testName}: Model output validated successfully.`);
- }
- return true;
- }
-
- return true;
-}
-
-export class TestRig {
- constructor() {
- this.bundlePath = join(__dirname, '..', 'bundle/gemini.js');
- this.testDir = null;
- }
-
- // Get timeout based on environment
- getDefaultTimeout() {
- if (env.CI) return 60000; // 1 minute in CI
- if (env.GEMINI_SANDBOX) return 30000; // 30s in containers
- return 15000; // 15s locally
- }
-
- setup(testName, options = {}) {
- this.testName = testName;
- const sanitizedName = sanitizeTestName(testName);
- this.testDir = join(env.INTEGRATION_TEST_FILE_DIR, sanitizedName);
- mkdirSync(this.testDir, { recursive: true });
-
- // Create a settings file to point the CLI to the local collector
- const geminiDir = join(this.testDir, '.gemini');
- mkdirSync(geminiDir, { recursive: true });
- // In sandbox mode, use an absolute path for telemetry inside the container
- // The container mounts the test directory at the same path as the host
- const telemetryPath =
- env.GEMINI_SANDBOX && env.GEMINI_SANDBOX !== 'false'
- ? join(this.testDir, 'telemetry.log') // Absolute path in test directory
- : env.TELEMETRY_LOG_FILE; // Absolute path for non-sandbox
-
- const settings = {
- telemetry: {
- enabled: true,
- target: 'local',
- otlpEndpoint: '',
- outfile: telemetryPath,
- },
- sandbox: env.GEMINI_SANDBOX !== 'false' ? env.GEMINI_SANDBOX : false,
- ...options.settings, // Allow tests to override/add settings
- };
- writeFileSync(
- join(geminiDir, 'settings.json'),
- JSON.stringify(settings, null, 2),
- );
- }
-
- createFile(fileName, content) {
- const filePath = join(this.testDir, fileName);
- writeFileSync(filePath, content);
- return filePath;
- }
-
- mkdir(dir) {
- mkdirSync(join(this.testDir, dir), { recursive: true });
- }
-
- sync() {
- // ensure file system is done before spawning
- execSync('sync', { cwd: this.testDir });
- }
-
- run(promptOrOptions, ...args) {
- let command = `node ${this.bundlePath} --yolo`;
- const execOptions = {
- cwd: this.testDir,
- encoding: 'utf-8',
- };
-
- if (typeof promptOrOptions === 'string') {
- command += ` --prompt ${JSON.stringify(promptOrOptions)}`;
- } else if (
- typeof promptOrOptions === 'object' &&
- promptOrOptions !== null
- ) {
- if (promptOrOptions.prompt) {
- command += ` --prompt ${JSON.stringify(promptOrOptions.prompt)}`;
- }
- if (promptOrOptions.stdin) {
- execOptions.input = promptOrOptions.stdin;
- }
- }
-
- command += ` ${args.join(' ')}`;
-
- const commandArgs = parse(command);
- const node = commandArgs.shift();
-
- const child = spawn(node, commandArgs, {
- cwd: this.testDir,
- stdio: 'pipe',
- });
-
- let stdout = '';
- let stderr = '';
-
- // Handle stdin if provided
- if (execOptions.input) {
- child.stdin.write(execOptions.input);
- child.stdin.end();
- }
-
- child.stdout.on('data', (data) => {
- stdout += data;
- if (env.KEEP_OUTPUT === 'true' || env.VERBOSE === 'true') {
- process.stdout.write(data);
- }
- });
-
- child.stderr.on('data', (data) => {
- stderr += data;
- if (env.KEEP_OUTPUT === 'true' || env.VERBOSE === 'true') {
- process.stderr.write(data);
- }
- });
-
- const promise = new Promise((resolve, reject) => {
- child.on('close', (code) => {
- if (code === 0) {
- // Store the raw stdout for Podman telemetry parsing
- this._lastRunStdout = stdout;
-
- // Filter out telemetry output when running with Podman
- // Podman seems to output telemetry to stdout even when writing to file
- let result = stdout;
- if (env.GEMINI_SANDBOX === 'podman') {
- // Remove telemetry JSON objects from output
- // They are multi-line JSON objects that start with { and contain telemetry fields
- const lines = result.split('\n');
- const filteredLines = [];
- let inTelemetryObject = false;
- let braceDepth = 0;
-
- for (const line of lines) {
- if (!inTelemetryObject && line.trim() === '{') {
- // Check if this might be start of telemetry object
- inTelemetryObject = true;
- braceDepth = 1;
- } else if (inTelemetryObject) {
- // Count braces to track nesting
- for (const char of line) {
- if (char === '{') braceDepth++;
- else if (char === '}') braceDepth--;
- }
-
- // Check if we've closed all braces
- if (braceDepth === 0) {
- inTelemetryObject = false;
- // Skip this line (the closing brace)
- continue;
- }
- } else {
- // Not in telemetry object, keep the line
- filteredLines.push(line);
- }
- }
-
- result = filteredLines.join('\n');
- }
- // If we have stderr output, include that also
- if (stderr) {
- result += `\n\nStdErr:\n${stderr}`;
- }
-
- resolve(result);
- } else {
- reject(new Error(`Process exited with code ${code}:\n${stderr}`));
- }
- });
- });
-
- return promise;
- }
-
- readFile(fileName) {
- const content = readFileSync(join(this.testDir, fileName), 'utf-8');
- if (env.KEEP_OUTPUT === 'true' || env.VERBOSE === 'true') {
- const testId = `${env.TEST_FILE_NAME.replace(
- '.test.js',
- '',
- )}:${this.testName.replace(/ /g, '-')}`;
- console.log(`--- FILE: ${testId}/${fileName} ---`);
- console.log(content);
- console.log(`--- END FILE: ${testId}/${fileName} ---`);
- }
- return content;
- }
-
- async cleanup() {
- // Clean up test directory
- if (this.testDir && !env.KEEP_OUTPUT) {
- try {
- execSync(`rm -rf ${this.testDir}`);
- } catch (error) {
- // Ignore cleanup errors
- if (env.VERBOSE === 'true') {
- console.warn('Cleanup warning:', error.message);
- }
- }
- }
- }
-
- async waitForTelemetryReady() {
- // In sandbox mode, telemetry is written to a relative path in the test directory
- const logFilePath =
- env.GEMINI_SANDBOX && env.GEMINI_SANDBOX !== 'false'
- ? join(this.testDir, 'telemetry.log')
- : env.TELEMETRY_LOG_FILE;
-
- if (!logFilePath) return;
-
- // Wait for telemetry file to exist and have content
- await this.poll(
- () => {
- if (!fileExists(logFilePath)) return false;
- try {
- const content = readFileSync(logFilePath, 'utf-8');
- // Check if file has meaningful content (at least one complete JSON object)
- return content.includes('"event.name"');
- } catch (_e) {
- return false;
- }
- },
- 2000, // 2 seconds max - reduced since telemetry should flush on exit now
- 100, // check every 100ms
- );
- }
-
- async waitForToolCall(toolName, timeout) {
- // Use environment-specific timeout
- if (!timeout) {
- timeout = this.getDefaultTimeout();
- }
-
- // Wait for telemetry to be ready before polling for tool calls
- await this.waitForTelemetryReady();
-
- return this.poll(
- () => {
- const toolLogs = this.readToolLogs();
- return toolLogs.some((log) => log.toolRequest.name === toolName);
- },
- timeout,
- 100,
- );
- }
-
- async waitForAnyToolCall(toolNames, timeout) {
- // Use environment-specific timeout
- if (!timeout) {
- timeout = this.getDefaultTimeout();
- }
-
- // Wait for telemetry to be ready before polling for tool calls
- await this.waitForTelemetryReady();
-
- return this.poll(
- () => {
- const toolLogs = this.readToolLogs();
- return toolNames.some((name) =>
- toolLogs.some((log) => log.toolRequest.name === name),
- );
- },
- timeout,
- 100,
- );
- }
-
- async poll(predicate, timeout, interval) {
- const startTime = Date.now();
- let attempts = 0;
- while (Date.now() - startTime < timeout) {
- attempts++;
- const result = predicate();
- if (env.VERBOSE === 'true' && attempts % 5 === 0) {
- console.log(
- `Poll attempt ${attempts}: ${result ? 'success' : 'waiting...'}`,
- );
- }
- if (result) {
- return true;
- }
- await new Promise((resolve) => setTimeout(resolve, interval));
- }
- if (env.VERBOSE === 'true') {
- console.log(`Poll timed out after ${attempts} attempts`);
- }
- return false;
- }
-
- _parseToolLogsFromStdout(stdout) {
- const logs = [];
-
- // The console output from Podman is JavaScript object notation, not JSON
- // Look for tool call events in the output
- // Updated regex to handle tool names with hyphens and underscores
- const toolCallPattern =
- /body:\s*'Tool call:\s*([\w-]+)\..*?Success:\s*(\w+)\..*?Duration:\s*(\d+)ms\.'/g;
- const matches = [...stdout.matchAll(toolCallPattern)];
-
- for (const match of matches) {
- const toolName = match[1];
- const success = match[2] === 'true';
- const duration = parseInt(match[3], 10);
-
- // Try to find function_args nearby
- const matchIndex = match.index || 0;
- const contextStart = Math.max(0, matchIndex - 500);
- const contextEnd = Math.min(stdout.length, matchIndex + 500);
- const context = stdout.substring(contextStart, contextEnd);
-
- // Look for function_args in the context
- let args = '{}';
- const argsMatch = context.match(/function_args:\s*'([^']+)'/);
- if (argsMatch) {
- args = argsMatch[1];
- }
-
- // Also try to find function_name to double-check
- // Updated regex to handle tool names with hyphens and underscores
- const nameMatch = context.match(/function_name:\s*'([\w-]+)'/);
- const actualToolName = nameMatch ? nameMatch[1] : toolName;
-
- logs.push({
- timestamp: Date.now(),
- toolRequest: {
- name: actualToolName,
- args: args,
- success: success,
- duration_ms: duration,
- },
- });
- }
-
- // If no matches found with the simple pattern, try the JSON parsing approach
- // in case the format changes
- if (logs.length === 0) {
- const lines = stdout.split('\n');
- let currentObject = '';
- let inObject = false;
- let braceDepth = 0;
-
- for (const line of lines) {
- if (!inObject && line.trim() === '{') {
- inObject = true;
- braceDepth = 1;
- currentObject = line + '\n';
- } else if (inObject) {
- currentObject += line + '\n';
-
- // Count braces
- for (const char of line) {
- if (char === '{') braceDepth++;
- else if (char === '}') braceDepth--;
- }
-
- // If we've closed all braces, try to parse the object
- if (braceDepth === 0) {
- inObject = false;
- try {
- const obj = JSON.parse(currentObject);
-
- // Check for tool call in different formats
- if (
- obj.body &&
- obj.body.includes('Tool call:') &&
- obj.attributes
- ) {
- const bodyMatch = obj.body.match(/Tool call: (\w+)\./);
- if (bodyMatch) {
- logs.push({
- timestamp: obj.timestamp || Date.now(),
- toolRequest: {
- name: bodyMatch[1],
- args: obj.attributes.function_args || '{}',
- success: obj.attributes.success !== false,
- duration_ms: obj.attributes.duration_ms || 0,
- },
- });
- }
- } else if (
- obj.attributes &&
- obj.attributes['event.name'] === 'gemini_cli.tool_call'
- ) {
- logs.push({
- timestamp: obj.attributes['event.timestamp'],
- toolRequest: {
- name: obj.attributes.function_name,
- args: obj.attributes.function_args,
- success: obj.attributes.success,
- duration_ms: obj.attributes.duration_ms,
- },
- });
- }
- } catch (_e) {
- // Not valid JSON
- }
- currentObject = '';
- }
- }
- }
- }
-
- return logs;
- }
-
- readToolLogs() {
- // For Podman, first check if telemetry file exists and has content
- // If not, fall back to parsing from stdout
- if (env.GEMINI_SANDBOX === 'podman') {
- // Try reading from file first
- const logFilePath = join(this.testDir, 'telemetry.log');
-
- if (fileExists(logFilePath)) {
- try {
- const content = readFileSync(logFilePath, 'utf-8');
- if (content && content.includes('"event.name"')) {
- // File has content, use normal file parsing
- // Continue to the normal file parsing logic below
- } else if (this._lastRunStdout) {
- // File exists but is empty or doesn't have events, parse from stdout
- return this._parseToolLogsFromStdout(this._lastRunStdout);
- }
- } catch (_e) {
- // Error reading file, fall back to stdout
- if (this._lastRunStdout) {
- return this._parseToolLogsFromStdout(this._lastRunStdout);
- }
- }
- } else if (this._lastRunStdout) {
- // No file exists, parse from stdout
- return this._parseToolLogsFromStdout(this._lastRunStdout);
- }
- }
-
- // In sandbox mode, telemetry is written to a relative path in the test directory
- const logFilePath =
- env.GEMINI_SANDBOX && env.GEMINI_SANDBOX !== 'false'
- ? join(this.testDir, 'telemetry.log')
- : env.TELEMETRY_LOG_FILE;
-
- if (!logFilePath) {
- console.warn(`TELEMETRY_LOG_FILE environment variable not set`);
- return [];
- }
-
- // Check if file exists, if not return empty array (file might not be created yet)
- if (!fileExists(logFilePath)) {
- return [];
- }
-
- const content = readFileSync(logFilePath, 'utf-8');
-
- // Split the content into individual JSON objects
- // They are separated by "}\n{" pattern
- const jsonObjects = content
- .split(/}\s*\n\s*{/)
- .map((obj, index, array) => {
- // Add back the braces we removed during split
- if (index > 0) obj = '{' + obj;
- if (index < array.length - 1) obj = obj + '}';
- return obj.trim();
- })
- .filter((obj) => obj);
-
- const logs = [];
-
- for (const jsonStr of jsonObjects) {
- try {
- const logData = JSON.parse(jsonStr);
- // Look for tool call logs
- if (
- logData.attributes &&
- logData.attributes['event.name'] === 'gemini_cli.tool_call'
- ) {
- const toolName = logData.attributes.function_name;
- logs.push({
- toolRequest: {
- name: toolName,
- args: logData.attributes.function_args,
- success: logData.attributes.success,
- duration_ms: logData.attributes.duration_ms,
- },
- });
- }
- } catch (_e) {
- // Skip objects that aren't valid JSON
- if (env.VERBOSE === 'true') {
- console.error('Failed to parse telemetry object:', _e.message);
- }
- }
- }
-
- return logs;
- }
-}