1 files changed, 348 insertions, 128 deletions
diff --git a/packages/core/src/tools/shell.test.ts b/packages/core/src/tools/shell.test.ts
index 460c871a..55364197 100644
--- a/packages/core/src/tools/shell.test.ts
+++ b/packages/core/src/tools/shell.test.ts
@@ -4,164 +4,384 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
-import { expect, describe, it, vi, beforeEach } from 'vitest';
+import {
+  vi,
+  describe,
+  it,
+  expect,
+  beforeEach,
+  afterEach,
+  type Mock,
+} from 'vitest';
+
+const mockShellExecutionService = vi.hoisted(() => vi.fn());
+vi.mock('../services/shellExecutionService.js', () => ({
+  ShellExecutionService: { execute: mockShellExecutionService },
+}));
+vi.mock('fs');
+vi.mock('os');
+vi.mock('crypto');
+vi.mock('../utils/summarizer.js');
+
+import { isCommandAllowed } from '../utils/shell-utils.js';
 import { ShellTool } from './shell.js';
-import { Config } from '../config/config.js';
+import { type Config } from '../config/config.js';
+import {
+  type ShellExecutionResult,
+  type ShellOutputEvent,
+} from '../services/shellExecutionService.js';
+import * as fs from 'fs';
+import * as os from 'os';
+import * as path from 'path';
+import * as crypto from 'crypto';
 import * as summarizer from '../utils/summarizer.js';
-import { GeminiClient } from '../core/client.js';
-import { ToolExecuteConfirmationDetails } from './tools.js';
-import os from 'os';
+import { ToolConfirmationOutcome } from './tools.js';
+import { OUTPUT_UPDATE_INTERVAL_MS } from './shell.js';
 
-describe('ShellTool Bug Reproduction', () => {
+describe('ShellTool', () => {
   let shellTool: ShellTool;
-  let config: Config;
+  let mockConfig: Config;
+  let mockShellOutputCallback: (event: ShellOutputEvent) => void;
+  let resolveExecutionPromise: (result: ShellExecutionResult) => void;
 
   beforeEach(() => {
-    config = {
-      getCoreTools: () => undefined,
-      getExcludeTools: () => undefined,
-      getDebugMode: () => false,
-      getGeminiClient: () => ({}) as GeminiClient,
-      getTargetDir: () => '.',
-      getSummarizeToolOutputConfig: () => ({
-        [shellTool.name]: {},
-      }),
+    vi.clearAllMocks();
+
+    mockConfig = {
+      getCoreTools: vi.fn().mockReturnValue([]),
+      getExcludeTools: vi.fn().mockReturnValue([]),
+      getDebugMode: vi.fn().mockReturnValue(false),
+      getTargetDir: vi.fn().mockReturnValue('/test/dir'),
+      getSummarizeToolOutputConfig: vi.fn().mockReturnValue(undefined),
+      getGeminiClient: vi.fn(),
     } as unknown as Config;
-    shellTool = new ShellTool(config);
-  });
 
-  it('should not let the summarizer override the return display', async () => {
-    const summarizeSpy = vi
-      .spyOn(summarizer, 'summarizeToolOutput')
-      .mockResolvedValue('summarized output');
+    shellTool = new ShellTool(mockConfig);
 
-    const abortSignal = new AbortController().signal;
-    const result = await shellTool.execute(
-      { command: 'echo hello' },
-      abortSignal,
-      () => {},
+    vi.mocked(os.platform).mockReturnValue('linux');
+    vi.mocked(os.tmpdir).mockReturnValue('/tmp');
+    (vi.mocked(crypto.randomBytes) as Mock).mockReturnValue(
+      Buffer.from('abcdef', 'hex'),
     );
 
-    expect(result.returnDisplay).toBe('hello' + os.EOL);
-    expect(result.llmContent).toBe('summarized output');
-    expect(summarizeSpy).toHaveBeenCalled();
+    // Capture the output callback to simulate streaming events from the service
+    mockShellExecutionService.mockImplementation((_cmd, _cwd, callback) => {
+      mockShellOutputCallback = callback;
+      return {
+        pid: 12345,
+        result: new Promise((resolve) => {
+          resolveExecutionPromise = resolve;
+        }),
+      };
+    });
   });
 
-  it('should not call summarizer if disabled in config', async () => {
-    config = {
-      getCoreTools: () => undefined,
-      getExcludeTools: () => undefined,
-      getDebugMode: () => false,
-      getGeminiClient: () => ({}) as GeminiClient,
-      getTargetDir: () => '.',
-      getSummarizeToolOutputConfig: () => ({}),
-    } as unknown as Config;
-    shellTool = new ShellTool(config);
+  describe('isCommandAllowed', () => {
+    it('should allow a command if no restrictions are provided', () => {
+      (mockConfig.getCoreTools as Mock).mockReturnValue(undefined);
+      (mockConfig.getExcludeTools as Mock).mockReturnValue(undefined);
+      expect(isCommandAllowed('ls -l', mockConfig).allowed).toBe(true);
+    });
 
-    const summarizeSpy = vi
-      .spyOn(summarizer, 'summarizeToolOutput')
-      .mockResolvedValue('summarized output');
+    it('should block a command with command substitution using $()', () => {
+      expect(isCommandAllowed('echo $(rm -rf /)', mockConfig).allowed).toBe(
+        false,
+      );
+    });
+  });
 
-    const abortSignal = new AbortController().signal;
-    const result = await shellTool.execute(
-      { command: 'echo hello' },
-      abortSignal,
-      () => {},
-    );
+  describe('validateToolParams', () => {
+    it('should return null for a valid command', () => {
+      expect(shellTool.validateToolParams({ command: 'ls -l' })).toBeNull();
+    });
+
+    it('should return an error for an empty command', () => {
+      expect(shellTool.validateToolParams({ command: ' ' })).toBe(
+        'Command cannot be empty.',
+      );
+    });
 
-    expect(result.returnDisplay).toBe('hello' + os.EOL);
-    expect(result.llmContent).not.toBe('summarized output');
-    expect(summarizeSpy).not.toHaveBeenCalled();
+    it('should return an error for a non-existent directory', () => {
+      vi.mocked(fs.existsSync).mockReturnValue(false);
+      expect(
+        shellTool.validateToolParams({ command: 'ls', directory: 'rel/path' }),
+      ).toBe('Directory must exist.');
+    });
   });
 
-  it('should pass token budget to summarizer', async () => {
-    config = {
-      getCoreTools: () => undefined,
-      getExcludeTools: () => undefined,
-      getDebugMode: () => false,
-      getGeminiClient: () => ({}) as GeminiClient,
-      getTargetDir: () => '.',
-      getSummarizeToolOutputConfig: () => ({
+  describe('execute', () => {
+    const mockAbortSignal = new AbortController().signal;
+
+    const resolveShellExecution = (
+      result: Partial<ShellExecutionResult> = {},
+    ) => {
+      const fullResult: ShellExecutionResult = {
+        rawOutput: Buffer.from(result.output || ''),
+        output: 'Success',
+        stdout: 'Success',
+        stderr: '',
+        exitCode: 0,
+        signal: null,
+        error: null,
+        aborted: false,
+        pid: 12345,
+        ...result,
+      };
+      resolveExecutionPromise(fullResult);
+    };
+
+    it('should wrap command on linux and parse pgrep output', async () => {
+      const promise = shellTool.execute(
+        { command: 'my-command &' },
+        mockAbortSignal,
+      );
+      resolveShellExecution({ pid: 54321 });
+
+      vi.mocked(fs.existsSync).mockReturnValue(true);
+      vi.mocked(fs.readFileSync).mockReturnValue('54321\n54322\n'); // Service PID and background PID
+
+      const result = await promise;
+
+      const tmpFile = path.join(os.tmpdir(), 'shell_pgrep_abcdef.tmp');
+      const wrappedCommand = `{ my-command & }; __code=$?; pgrep -g 0 >${tmpFile} 2>&1; exit $__code;`;
+      expect(mockShellExecutionService).toHaveBeenCalledWith(
+        wrappedCommand,
+        expect.any(String),
+        expect.any(Function),
+        mockAbortSignal,
+      );
+      expect(result.llmContent).toContain('Background PIDs: 54322');
+      expect(vi.mocked(fs.unlinkSync)).toHaveBeenCalledWith(tmpFile);
+    });
+
+    it('should not wrap command on windows', async () => {
+      vi.mocked(os.platform).mockReturnValue('win32');
+      const promise = shellTool.execute({ command: 'dir' }, mockAbortSignal);
+      resolveExecutionPromise({
+        rawOutput: Buffer.from(''),
+        output: '',
+        stdout: '',
+        stderr: '',
+        exitCode: 0,
+        signal: null,
+        error: null,
+        aborted: false,
+        pid: 12345,
+      });
+      await promise;
+      expect(mockShellExecutionService).toHaveBeenCalledWith(
+        'dir',
+        expect.any(String),
+        expect.any(Function),
+        mockAbortSignal,
+      );
+    });
+
+    it('should format error messages correctly', async () => {
+      const error = new Error('wrapped command failed');
+      const promise = shellTool.execute(
+        { command: 'user-command' },
+        mockAbortSignal,
+      );
+      resolveShellExecution({
+        error,
+        exitCode: 1,
+        output: 'err',
+        stderr: 'err',
+        rawOutput: Buffer.from('err'),
+        stdout: '',
+        signal: null,
+        aborted: false,
+        pid: 12345,
+      });
+
+      const result = await promise;
+      // The final llmContent should contain the user's command, not the wrapper
+      expect(result.llmContent).toContain('Error: wrapped command failed');
+      expect(result.llmContent).not.toContain('pgrep');
+    });
+
+    it('should summarize output when configured', async () => {
+      (mockConfig.getSummarizeToolOutputConfig as Mock).mockReturnValue({
         [shellTool.name]: { tokenBudget: 1000 },
-      }),
-    } as unknown as Config;
-    shellTool = new ShellTool(config);
+      });
+      vi.mocked(summarizer.summarizeToolOutput).mockResolvedValue(
+        'summarized output',
+      );
 
-    const summarizeSpy = vi
-      .spyOn(summarizer, 'summarizeToolOutput')
-      .mockResolvedValue('summarized output');
+      const promise = shellTool.execute({ command: 'ls' }, mockAbortSignal);
+      resolveExecutionPromise({
+        output: 'long output',
+        rawOutput: Buffer.from('long output'),
+        stdout: 'long output',
+        stderr: '',
+        exitCode: 0,
+        signal: null,
+        error: null,
+        aborted: false,
+        pid: 12345,
+      });
 
-    const abortSignal = new AbortController().signal;
-    await shellTool.execute({ command: 'echo "hello"' }, abortSignal, () => {});
+      const result = await promise;
 
-    expect(summarizeSpy).toHaveBeenCalledWith(
-      expect.any(String),
-      expect.any(Object),
-      expect.any(Object),
-      1000,
-    );
-  });
+      expect(summarizer.summarizeToolOutput).toHaveBeenCalledWith(
+        expect.any(String),
+        mockConfig.getGeminiClient(),
+        mockAbortSignal,
+        1000,
+      );
+      expect(result.llmContent).toBe('summarized output');
+      expect(result.returnDisplay).toBe('long output');
+    });
 
-  it('should use default token budget if not specified', async () => {
-    config = {
-      getCoreTools: () => undefined,
-      getExcludeTools: () => undefined,
-      getDebugMode: () => false,
-      getGeminiClient: () => ({}) as GeminiClient,
-      getTargetDir: () => '.',
-      getSummarizeToolOutputConfig: () => ({
-        [shellTool.name]: {},
-      }),
-    } as unknown as Config;
-    shellTool = new ShellTool(config);
+    it('should clean up the temp file on synchronous execution error', async () => {
+      const error = new Error('sync spawn error');
+      mockShellExecutionService.mockImplementation(() => {
+        throw error;
+      });
+      vi.mocked(fs.existsSync).mockReturnValue(true); // Pretend the file exists
 
-    const summarizeSpy = vi
-      .spyOn(summarizer, 'summarizeToolOutput')
-      .mockResolvedValue('summarized output');
+      await expect(
+        shellTool.execute({ command: 'a-command' }, mockAbortSignal),
+      ).rejects.toThrow(error);
 
-    const abortSignal = new AbortController().signal;
-    await shellTool.execute({ command: 'echo "hello"' }, abortSignal, () => {});
+      const tmpFile = path.join(os.tmpdir(), 'shell_pgrep_abcdef.tmp');
+      expect(vi.mocked(fs.unlinkSync)).toHaveBeenCalledWith(tmpFile);
+    });
 
-    expect(summarizeSpy).toHaveBeenCalledWith(
-      expect.any(String),
-      expect.any(Object),
-      expect.any(Object),
-      undefined,
-    );
-  });
+    describe('Streaming to `updateOutput`', () => {
+      let updateOutputMock: Mock;
+      beforeEach(() => {
+        vi.useFakeTimers({ toFake: ['Date'] });
+        updateOutputMock = vi.fn();
+      });
+      afterEach(() => {
+        vi.useRealTimers();
+      });
 
-  it('should pass GEMINI_CLI environment variable to executed commands', async () => {
-    config = {
-      getCoreTools: () => undefined,
-      getExcludeTools: () => undefined,
-      getDebugMode: () => false,
-      getGeminiClient: () => ({}) as GeminiClient,
-      getTargetDir: () => '.',
-      getSummarizeToolOutputConfig: () => ({}),
-    } as unknown as Config;
-    shellTool = new ShellTool(config);
+      it('should throttle text output updates', async () => {
+        const promise = shellTool.execute(
+          { command: 'stream' },
+          mockAbortSignal,
+          updateOutputMock,
+        );
+
+        // First chunk, should be throttled.
+        mockShellOutputCallback({
+          type: 'data',
+          stream: 'stdout',
+          chunk: 'hello ',
+        });
+        expect(updateOutputMock).not.toHaveBeenCalled();
+
+        // Advance time past the throttle interval.
+        await vi.advanceTimersByTimeAsync(OUTPUT_UPDATE_INTERVAL_MS + 1);
+
+        // Send a second chunk. THIS event triggers the update with the CUMULATIVE content.
+        mockShellOutputCallback({
+          type: 'data',
+          stream: 'stderr',
+          chunk: 'world',
+        });
+
+        // It should have been called once now with the combined output.
+        expect(updateOutputMock).toHaveBeenCalledOnce();
+        expect(updateOutputMock).toHaveBeenCalledWith('hello \nworld');
+
+        resolveExecutionPromise({
+          rawOutput: Buffer.from(''),
+          output: '',
+          stdout: '',
+          stderr: '',
+          exitCode: 0,
+          signal: null,
+          error: null,
+          aborted: false,
+          pid: 12345,
+        });
+        await promise;
+      });
+
+      it('should immediately show binary detection message and throttle progress', async () => {
+        const promise = shellTool.execute(
+          { command: 'cat img' },
+          mockAbortSignal,
+          updateOutputMock,
+        );
 
-    const abortSignal = new AbortController().signal;
-    const command =
-      os.platform() === 'win32' ? 'echo %GEMINI_CLI%' : 'echo "$GEMINI_CLI"';
-    const result = await shellTool.execute({ command }, abortSignal, () => {});
+        mockShellOutputCallback({ type: 'binary_detected' });
+        expect(updateOutputMock).toHaveBeenCalledOnce();
+        expect(updateOutputMock).toHaveBeenCalledWith(
+          '[Binary output detected. Halting stream...]',
+        );
 
-    expect(result.returnDisplay).toBe('1' + os.EOL);
+        mockShellOutputCallback({
+          type: 'binary_progress',
+          bytesReceived: 1024,
+        });
+        expect(updateOutputMock).toHaveBeenCalledOnce();
+
+        // Advance time past the throttle interval.
+        await vi.advanceTimersByTimeAsync(OUTPUT_UPDATE_INTERVAL_MS + 1);
+
+        // Send a SECOND progress event. This one will trigger the flush.
+        mockShellOutputCallback({
+          type: 'binary_progress',
+          bytesReceived: 2048,
+        });
+
+        // Now it should be called a second time with the latest progress.
+        expect(updateOutputMock).toHaveBeenCalledTimes(2);
+        expect(updateOutputMock).toHaveBeenLastCalledWith(
+          '[Receiving binary output... 2.0 KB received]',
+        );
+
+        resolveExecutionPromise({
+          rawOutput: Buffer.from(''),
+          output: '',
+          stdout: '',
+          stderr: '',
+          exitCode: 0,
+          signal: null,
+          error: null,
+          aborted: false,
+          pid: 12345,
+        });
+        await promise;
+      });
+    });
   });
-});
 
-describe('shouldConfirmExecute', () => {
-  it('should de-duplicate command roots before asking for confirmation', async () => {
-    const shellTool = new ShellTool({
-      getCoreTools: () => ['run_shell_command'],
-      getExcludeTools: () => [],
-    } as unknown as Config);
-    const result = (await shellTool.shouldConfirmExecute(
-      {
-        command: 'git status && git log',
-      },
-      new AbortController().signal,
-    )) as ToolExecuteConfirmationDetails;
-    expect(result.rootCommand).toEqual('git');
+  describe('shouldConfirmExecute', () => {
+    it('should request confirmation for a new command and whitelist it on "Always"', async () => {
+      const params = { command: 'npm install' };
+      const confirmation = await shellTool.shouldConfirmExecute(
+        params,
+        new AbortController().signal,
+      );
+
+      expect(confirmation).not.toBe(false);
+      expect(confirmation && confirmation.type).toBe('exec');
+
+      // eslint-disable-next-line @typescript-eslint/no-explicit-any
+      await (confirmation as any).onConfirm(
+        ToolConfirmationOutcome.ProceedAlways,
+      );
+
+      // Should now be whitelisted
+      const secondConfirmation = await shellTool.shouldConfirmExecute(
+        { command: 'npm test' },
+        new AbortController().signal,
+      );
+      expect(secondConfirmation).toBe(false);
+    });
+
+    it('should skip confirmation if validation fails', async () => {
+      const confirmation = await shellTool.shouldConfirmExecute(
+        { command: '' },
+        new AbortController().signal,
+      );
+      expect(confirmation).toBe(false);
+    });
   });
 });