fix(tests): refactor integration tests to be less flaky (#4890)

Co-authored-by: matt korwel <[email protected]>
author: Allen Hutchison <[email protected]> 2025-08-01 14:33:33 -0700
committer: GitHub <[email protected]> 2025-08-01 21:33:33 +0000
commit: 387706607dfa372f4f0c6fee14286bf4a290b258 (patch)
tree: 353e559b91a6a03809ada72800b1f36d402d4c7c /integration-tests/run_shell_command.test.js
parent: dccca91fc944424b032b09d29afb85d225a71dcc (diff)
1 files changed, 45 insertions, 13 deletions
diff --git a/integration-tests/run_shell_command.test.js b/integration-tests/run_shell_command.test.js
index 52aee194..2a5f9ed4 100644
--- a/integration-tests/run_shell_command.test.js
+++ b/integration-tests/run_shell_command.test.js
@@ -6,26 +6,58 @@
 
 import { test } from 'node:test';
 import { strict as assert } from 'assert';
-import { TestRig } from './test-helper.js';
+import { TestRig, printDebugInfo, validateModelOutput } from './test-helper.js';
 
-test('should be able to run a shell command', async (t) => {
+test('should be able to run a shell command', async () => {
   const rig = new TestRig();
-  rig.setup(t.name);
-  rig.createFile('blah.txt', 'some content');
+  await rig.setup('should be able to run a shell command');
 
-  const prompt = `Can you use ls to list the contexts of the current folder`;
-  const result = rig.run(prompt);
+  const prompt = `Please run the command "echo hello-world" and show me the output`;
 
-  assert.ok(result.includes('blah.txt'));
+  const result = await rig.run(prompt);
+
+  const foundToolCall = await rig.waitForToolCall('run_shell_command');
+
+  // Add debugging information
+  if (!foundToolCall || !result.includes('hello-world')) {
+    printDebugInfo(rig, result, {
+      'Found tool call': foundToolCall,
+      'Contains hello-world': result.includes('hello-world'),
+    });
+  }
+
+  assert.ok(foundToolCall, 'Expected to find a run_shell_command tool call');
+
+  // Validate model output - will throw if no output, warn if missing expected content
+  // Model often reports exit code instead of showing output
+  validateModelOutput(
+    result,
+    ['hello-world', 'exit code 0'],
+    'Shell command test',
+  );
 });
 
-test('should be able to run a shell command via stdin', async (t) => {
+test('should be able to run a shell command via stdin', async () => {
   const rig = new TestRig();
-  rig.setup(t.name);
-  rig.createFile('blah.txt', 'some content');
+  await rig.setup('should be able to run a shell command via stdin');
+
+  const prompt = `Please run the command "echo test-stdin" and show me what it outputs`;
+
+  const result = await rig.run({ stdin: prompt });
+
+  const foundToolCall = await rig.waitForToolCall('run_shell_command');
+
+  // Add debugging information
+  if (!foundToolCall || !result.includes('test-stdin')) {
+    printDebugInfo(rig, result, {
+      'Test type': 'Stdin test',
+      'Found tool call': foundToolCall,
+      'Contains test-stdin': result.includes('test-stdin'),
+    });
+  }
 
-  const prompt = `Can you use ls to list the contexts of the current folder`;
-  const result = rig.run({ stdin: prompt });
+  assert.ok(foundToolCall, 'Expected to find a run_shell_command tool call');
 
-  assert.ok(result.includes('blah.txt'));
+  // Validate model output - will throw if no output, warn if missing expected content
+  validateModelOutput(result, 'test-stdin', 'Shell command stdin test');
 });
author	Allen Hutchison <[email protected]>	2025-08-01 14:33:33 -0700
committer	GitHub <[email protected]>	2025-08-01 21:33:33 +0000
commit	387706607dfa372f4f0c6fee14286bf4a290b258 (patch)
tree	353e559b91a6a03809ada72800b1f36d402d4c7c /integration-tests/run_shell_command.test.js
parent	dccca91fc944424b032b09d29afb85d225a71dcc (diff)