fix(tests): refactor integration tests to be less flaky (#4890)

Co-authored-by: matt korwel <[email protected]>
author: Allen Hutchison <[email protected]> 2025-08-01 14:33:33 -0700
committer: GitHub <[email protected]> 2025-08-01 21:33:33 +0000
commit: 387706607dfa372f4f0c6fee14286bf4a290b258 (patch)
tree: 353e559b91a6a03809ada72800b1f36d402d4c7c /integration-tests/file-system.test.js
parent: dccca91fc944424b032b09d29afb85d225a71dcc (diff)
1 files changed, 68 insertions, 9 deletions
diff --git a/integration-tests/file-system.test.js b/integration-tests/file-system.test.js
index 87e9efe2..d43f047f 100644
--- a/integration-tests/file-system.test.js
+++ b/integration-tests/file-system.test.js
@@ -6,25 +6,84 @@
 
 import { strict as assert } from 'assert';
 import { test } from 'node:test';
-import { TestRig } from './test-helper.js';
+import { TestRig, printDebugInfo, validateModelOutput } from './test-helper.js';
 
-test('reads a file', (t) => {
+test('should be able to read a file', async () => {
   const rig = new TestRig();
-  rig.setup(t.name);
+  await rig.setup('should be able to read a file');
   rig.createFile('test.txt', 'hello world');
 
-  const output = rig.run(`read the file name test.txt`);
+  const result = await rig.run(
+    `read the file test.txt and show me its contents`,
+  );
 
-  assert.ok(output.toLowerCase().includes('hello'));
+  const foundToolCall = await rig.waitForToolCall('read_file');
+
+  // Add debugging information
+  if (!foundToolCall || !result.includes('hello world')) {
+    printDebugInfo(rig, result, {
+      'Found tool call': foundToolCall,
+      'Contains hello world': result.includes('hello world'),
+    });
+  }
+
+  assert.ok(foundToolCall, 'Expected to find a read_file tool call');
+
+  // Validate model output - will throw if no output, warn if missing expected content
+  validateModelOutput(result, 'hello world', 'File read test');
 });
 
-test('writes a file', (t) => {
+test('should be able to write a file', async () => {
   const rig = new TestRig();
-  rig.setup(t.name);
+  await rig.setup('should be able to write a file');
   rig.createFile('test.txt', '');
 
-  rig.run(`edit test.txt to have a hello world message`);
+  const result = await rig.run(`edit test.txt to have a hello world message`);
+
+  // Accept multiple valid tools for editing files
+  const foundToolCall = await rig.waitForAnyToolCall([
+    'write_file',
+    'edit',
+    'replace',
+  ]);
+
+  // Add debugging information
+  if (!foundToolCall) {
+    printDebugInfo(rig, result);
+  }
+
+  assert.ok(
+    foundToolCall,
+    'Expected to find a write_file, edit, or replace tool call',
+  );
+
+  // Validate model output - will throw if no output
+  validateModelOutput(result, null, 'File write test');
 
   const fileContent = rig.readFile('test.txt');
-  assert.ok(fileContent.toLowerCase().includes('hello'));
+
+  // Add debugging for file content
+  if (!fileContent.toLowerCase().includes('hello')) {
+    const writeCalls = rig
+      .readToolLogs()
+      .filter((t) => t.toolRequest.name === 'write_file')
+      .map((t) => t.toolRequest.args);
+
+    printDebugInfo(rig, result, {
+      'File content mismatch': true,
+      'Expected to contain': 'hello',
+      'Actual content': fileContent,
+      'Write tool calls': JSON.stringify(writeCalls),
+    });
+  }
+
+  assert.ok(
+    fileContent.toLowerCase().includes('hello'),
+    'Expected file to contain hello',
+  );
+
+  // Log success info if verbose
+  if (process.env.VERBOSE === 'true') {
+    console.log('File written successfully with hello message.');
+  }
 });
author	Allen Hutchison <[email protected]>	2025-08-01 14:33:33 -0700
committer	GitHub <[email protected]>	2025-08-01 21:33:33 +0000
commit	387706607dfa372f4f0c6fee14286bf4a290b258 (patch)
tree	353e559b91a6a03809ada72800b1f36d402d4c7c /integration-tests/file-system.test.js
parent	dccca91fc944424b032b09d29afb85d225a71dcc (diff)