diff options
| author | Allen Hutchison <[email protected]> | 2025-08-01 14:33:33 -0700 |
|---|---|---|
| committer | GitHub <[email protected]> | 2025-08-01 21:33:33 +0000 |
| commit | 387706607dfa372f4f0c6fee14286bf4a290b258 (patch) | |
| tree | 353e559b91a6a03809ada72800b1f36d402d4c7c /integration-tests/file-system.test.js | |
| parent | dccca91fc944424b032b09d29afb85d225a71dcc (diff) | |
fix(tests): refactor integration tests to be less flaky (#4890)
Co-authored-by: matt korwel <[email protected]>
Diffstat (limited to 'integration-tests/file-system.test.js')
| -rw-r--r-- | integration-tests/file-system.test.js | 77 |
1 files changed, 68 insertions, 9 deletions
diff --git a/integration-tests/file-system.test.js b/integration-tests/file-system.test.js index 87e9efe2..d43f047f 100644 --- a/integration-tests/file-system.test.js +++ b/integration-tests/file-system.test.js @@ -6,25 +6,84 @@ import { strict as assert } from 'assert'; import { test } from 'node:test'; -import { TestRig } from './test-helper.js'; +import { TestRig, printDebugInfo, validateModelOutput } from './test-helper.js'; -test('reads a file', (t) => { +test('should be able to read a file', async () => { const rig = new TestRig(); - rig.setup(t.name); + await rig.setup('should be able to read a file'); rig.createFile('test.txt', 'hello world'); - const output = rig.run(`read the file name test.txt`); + const result = await rig.run( + `read the file test.txt and show me its contents`, + ); - assert.ok(output.toLowerCase().includes('hello')); + const foundToolCall = await rig.waitForToolCall('read_file'); + + // Add debugging information + if (!foundToolCall || !result.includes('hello world')) { + printDebugInfo(rig, result, { + 'Found tool call': foundToolCall, + 'Contains hello world': result.includes('hello world'), + }); + } + + assert.ok(foundToolCall, 'Expected to find a read_file tool call'); + + // Validate model output - will throw if no output, warn if missing expected content + validateModelOutput(result, 'hello world', 'File read test'); }); -test('writes a file', (t) => { +test('should be able to write a file', async () => { const rig = new TestRig(); - rig.setup(t.name); + await rig.setup('should be able to write a file'); rig.createFile('test.txt', ''); - rig.run(`edit test.txt to have a hello world message`); + const result = await rig.run(`edit test.txt to have a hello world message`); + + // Accept multiple valid tools for editing files + const foundToolCall = await rig.waitForAnyToolCall([ + 'write_file', + 'edit', + 'replace', + ]); + + // Add debugging information + if (!foundToolCall) { + printDebugInfo(rig, result); + } + + assert.ok( + foundToolCall, + 'Expected to find a write_file, edit, or replace tool call', + ); + + // Validate model output - will throw if no output + validateModelOutput(result, null, 'File write test'); const fileContent = rig.readFile('test.txt'); - assert.ok(fileContent.toLowerCase().includes('hello')); + + // Add debugging for file content + if (!fileContent.toLowerCase().includes('hello')) { + const writeCalls = rig + .readToolLogs() + .filter((t) => t.toolRequest.name === 'write_file') + .map((t) => t.toolRequest.args); + + printDebugInfo(rig, result, { + 'File content mismatch': true, + 'Expected to contain': 'hello', + 'Actual content': fileContent, + 'Write tool calls': JSON.stringify(writeCalls), + }); + } + + assert.ok( + fileContent.toLowerCase().includes('hello'), + 'Expected file to contain hello', + ); + + // Log success info if verbose + if (process.env.VERBOSE === 'true') { + console.log('File written successfully with hello message.'); + } }); |
