refactor(core): Centralize shell logic into ShellExecutionService (#4823)

author: Abhi <[email protected]> 2025-07-25 21:56:49 -0400
committer: GitHub <[email protected]> 2025-07-26 01:56:49 +0000
commit: ca5dd28ab60d78f42150460cbe9d4ed58d40afe4 (patch)
tree: 6f3b3b373133a6c0bf8bb9cee8a10cc06ac83c9a
parent: ad2ef080aae2e21bf04ad8e922719ceaa81f1e5f (diff)
11 files changed, 1708 insertions, 769 deletions
diff --git a/packages/cli/src/ui/hooks/shellCommandProcessor.test.ts b/packages/cli/src/ui/hooks/shellCommandProcessor.test.ts
index 1b268502..d5270aba 100644
--- a/packages/cli/src/ui/hooks/shellCommandProcessor.test.ts
+++ b/packages/cli/src/ui/hooks/shellCommandProcessor.test.ts
@@ -5,70 +5,86 @@
  */
 
 import { act, renderHook } from '@testing-library/react';
-import { vi } from 'vitest';
-import { spawn } from 'child_process';
-import type { ChildProcessWithoutNullStreams } from 'child_process';
-import { useShellCommandProcessor } from './shellCommandProcessor';
-import { Config, GeminiClient } from '@google/gemini-cli-core';
-import * as fs from 'fs';
-import EventEmitter from 'events';
-import { ToolCallStatus } from '../types';
+import {
+  vi,
+  describe,
+  it,
+  expect,
+  beforeEach,
+  afterEach,
+  type Mock,
+} from 'vitest';
 
-// Mock dependencies
-vi.mock('child_process');
+const mockIsBinary = vi.hoisted(() => vi.fn());
+const mockShellExecutionService = vi.hoisted(() => vi.fn());
+vi.mock('@google/gemini-cli-core', async (importOriginal) => {
+  const original =
+    await importOriginal<typeof import('@google/gemini-cli-core')>();
+  return {
+    ...original,
+    ShellExecutionService: { execute: mockShellExecutionService },
+    isBinary: mockIsBinary,
+  };
+});
 vi.mock('fs');
-vi.mock('os', () => ({
-  default: {
-    platform: () => 'linux',
-    tmpdir: () => '/tmp',
-    homedir: () => '/home/user',
-  },
-  platform: () => 'linux',
-  tmpdir: () => '/tmp',
-  homedir: () => '/home/user',
-}));
-vi.mock('@google/gemini-cli-core');
-vi.mock('../utils/textUtils.js', () => ({
-  isBinary: vi.fn(),
-}));
+vi.mock('os');
+vi.mock('crypto');
+vi.mock('../utils/textUtils.js');
+
+import {
+  useShellCommandProcessor,
+  OUTPUT_UPDATE_INTERVAL_MS,
+} from './shellCommandProcessor.js';
+import {
+  type Config,
+  type GeminiClient,
+  type ShellExecutionResult,
+  type ShellOutputEvent,
+} from '@google/gemini-cli-core';
+import * as fs from 'fs';
+import * as os from 'os';
+import * as path from 'path';
+import * as crypto from 'crypto';
+import { ToolCallStatus } from '../types.js';
 
 describe('useShellCommandProcessor', () => {
-  let spawnEmitter: EventEmitter;
-  let addItemToHistoryMock: vi.Mock;
-  let setPendingHistoryItemMock: vi.Mock;
-  let onExecMock: vi.Mock;
-  let onDebugMessageMock: vi.Mock;
-  let configMock: Config;
-  let geminiClientMock: GeminiClient;
+  let addItemToHistoryMock: Mock;
+  let setPendingHistoryItemMock: Mock;
+  let onExecMock: Mock;
+  let onDebugMessageMock: Mock;
+  let mockConfig: Config;
+  let mockGeminiClient: GeminiClient;
 
-  beforeEach(() => {
-    spawnEmitter = new EventEmitter();
-    spawnEmitter.stdout = new EventEmitter();
-    spawnEmitter.stderr = new EventEmitter();
-    vi.mocked(spawn).mockReturnValue(
-      spawnEmitter as ChildProcessWithoutNullStreams,
-    );
+  let mockShellOutputCallback: (event: ShellOutputEvent) => void;
+  let resolveExecutionPromise: (result: ShellExecutionResult) => void;
 
-    vi.spyOn(fs, 'existsSync').mockReturnValue(false);
-    vi.spyOn(fs, 'readFileSync').mockReturnValue('');
-    vi.spyOn(fs, 'unlinkSync').mockReturnValue(undefined);
+  beforeEach(() => {
+    vi.clearAllMocks();
 
     addItemToHistoryMock = vi.fn();
     setPendingHistoryItemMock = vi.fn();
     onExecMock = vi.fn();
     onDebugMessageMock = vi.fn();
+    mockConfig = { getTargetDir: () => '/test/dir' } as Config;
+    mockGeminiClient = { addHistory: vi.fn() } as unknown as GeminiClient;
 
-    configMock = {
-      getTargetDir: () => '/test/dir',
-    } as unknown as Config;
-
-    geminiClientMock = {
-      addHistory: vi.fn(),
-    } as unknown as GeminiClient;
-  });
+    vi.mocked(os.platform).mockReturnValue('linux');
+    vi.mocked(os.tmpdir).mockReturnValue('/tmp');
+    (vi.mocked(crypto.randomBytes) as Mock).mockReturnValue(
+      Buffer.from('abcdef', 'hex'),
+    );
+    mockIsBinary.mockReturnValue(false);
+    vi.mocked(fs.existsSync).mockReturnValue(false);
 
-  afterEach(() => {
-    vi.restoreAllMocks();
+    mockShellExecutionService.mockImplementation((_cmd, _cwd, callback) => {
+      mockShellOutputCallback = callback;
+      return {
+        pid: 12345,
+        result: new Promise((resolve) => {
+          resolveExecutionPromise = resolve;
+        }),
+      };
+    });
   });
 
   const renderProcessorHook = () =>
@@ -78,140 +94,386 @@ describe('useShellCommandProcessor', () => {
         setPendingHistoryItemMock,
         onExecMock,
         onDebugMessageMock,
-        configMock,
-        geminiClientMock,
+        mockConfig,
+        mockGeminiClient,
       ),
     );
 
-  it('should execute a command and update history on success', async () => {
+  const createMockServiceResult = (
+    overrides: Partial<ShellExecutionResult> = {},
+  ): ShellExecutionResult => ({
+    rawOutput: Buffer.from(overrides.output || ''),
+    output: 'Success',
+    stdout: 'Success',
+    stderr: '',
+    exitCode: 0,
+    signal: null,
+    error: null,
+    aborted: false,
+    pid: 12345,
+    ...overrides,
+  });
+
+  it('should initiate command execution and set pending state', async () => {
     const { result } = renderProcessorHook();
-    const abortController = new AbortController();
 
     act(() => {
-      result.current.handleShellCommand('ls -l', abortController.signal);
+      result.current.handleShellCommand('ls -l', new AbortController().signal);
     });
 
-    expect(spawn).toHaveBeenCalledWith(
-      'bash',
-      ['-c', expect.any(String)],
-      expect.objectContaining({
-        env: expect.objectContaining({
-          GEMINI_CLI: '1',
+    expect(addItemToHistoryMock).toHaveBeenCalledWith(
+      { type: 'user_shell', text: 'ls -l' },
+      expect.any(Number),
+    );
+    expect(setPendingHistoryItemMock).toHaveBeenCalledWith({
+      type: 'tool_group',
+      tools: [
+        expect.objectContaining({
+          name: 'Shell Command',
+          status: ToolCallStatus.Executing,
         }),
-      }),
+      ],
+    });
+    const tmpFile = path.join(os.tmpdir(), 'shell_pwd_abcdef.tmp');
+    const wrappedCommand = `{ ls -l; }; __code=$?; pwd > "${tmpFile}"; exit $__code`;
+    expect(mockShellExecutionService).toHaveBeenCalledWith(
+      wrappedCommand,
+      '/test/dir',
+      expect.any(Function),
+      expect.any(Object),
     );
+    expect(onExecMock).toHaveBeenCalledWith(expect.any(Promise));
+  });
 
-    expect(onExecMock).toHaveBeenCalledTimes(1);
+  it('should handle successful execution and update history correctly', async () => {
+    const { result } = renderProcessorHook();
+
+    act(() => {
+      result.current.handleShellCommand(
+        'echo "ok"',
+        new AbortController().signal,
+      );
+    });
     const execPromise = onExecMock.mock.calls[0][0];
 
-    // Simulate stdout
     act(() => {
-      spawnEmitter.stdout.emit('data', Buffer.from('file1.txt\nfile2.txt'));
+      resolveExecutionPromise(createMockServiceResult({ output: 'ok' }));
+    });
+    await act(async () => await execPromise);
+
+    expect(setPendingHistoryItemMock).toHaveBeenCalledWith(null);
+    expect(addItemToHistoryMock).toHaveBeenCalledTimes(2); // Initial + final
+    expect(addItemToHistoryMock.mock.calls[1][0]).toEqual(
+      expect.objectContaining({
+        tools: [
+          expect.objectContaining({
+            status: ToolCallStatus.Success,
+            resultDisplay: 'ok',
+          }),
+        ],
+      }),
+    );
+    expect(mockGeminiClient.addHistory).toHaveBeenCalled();
+  });
+
+  it('should handle command failure and display error status', async () => {
+    const { result } = renderProcessorHook();
+
+    act(() => {
+      result.current.handleShellCommand(
+        'bad-cmd',
+        new AbortController().signal,
+      );
     });
+    const execPromise = onExecMock.mock.calls[0][0];
 
-    // Simulate process exit
     act(() => {
-      spawnEmitter.emit('exit', 0, null);
+      resolveExecutionPromise(
+        createMockServiceResult({ exitCode: 127, output: 'not found' }),
+      );
     });
+    await act(async () => await execPromise);
+
+    const finalHistoryItem = addItemToHistoryMock.mock.calls[1][0];
+    expect(finalHistoryItem.tools[0].status).toBe(ToolCallStatus.Error);
+    expect(finalHistoryItem.tools[0].resultDisplay).toContain(
+      'Command exited with code 127',
+    );
+    expect(finalHistoryItem.tools[0].resultDisplay).toContain('not found');
+  });
 
-    await act(async () => {
-      await execPromise;
+  describe('UI Streaming and Throttling', () => {
+    beforeEach(() => {
+      vi.useFakeTimers({ toFake: ['Date'] });
+    });
+    afterEach(() => {
+      vi.useRealTimers();
     });
 
-    expect(addItemToHistoryMock).toHaveBeenCalledTimes(2);
-    expect(addItemToHistoryMock.mock.calls[1][0]).toEqual({
-      type: 'tool_group',
-      tools: [
+    it('should throttle pending UI updates for text streams', async () => {
+      const { result } = renderProcessorHook();
+      act(() => {
+        result.current.handleShellCommand(
+          'stream',
+          new AbortController().signal,
+        );
+      });
+
+      // Simulate rapid output
+      act(() => {
+        mockShellOutputCallback({
+          type: 'data',
+          stream: 'stdout',
+          chunk: 'hello',
+        });
+      });
+
+      // Should not have updated the UI yet
+      expect(setPendingHistoryItemMock).toHaveBeenCalledTimes(1); // Only the initial call
+
+      // Advance time and send another event to trigger the throttled update
+      await act(async () => {
+        await vi.advanceTimersByTimeAsync(OUTPUT_UPDATE_INTERVAL_MS + 1);
+      });
+      act(() => {
+        mockShellOutputCallback({
+          type: 'data',
+          stream: 'stdout',
+          chunk: ' world',
+        });
+      });
+
+      // Should now have been called with the cumulative output
+      expect(setPendingHistoryItemMock).toHaveBeenCalledTimes(2);
+      expect(setPendingHistoryItemMock).toHaveBeenLastCalledWith(
         expect.objectContaining({
-          name: 'Shell Command',
-          description: 'ls -l',
-          status: ToolCallStatus.Success,
-          resultDisplay: 'file1.txt\nfile2.txt',
+          tools: [expect.objectContaining({ resultDisplay: 'hello world' })],
         }),
-      ],
+      );
+    });
+
+    it('should show binary progress messages correctly', async () => {
+      const { result } = renderProcessorHook();
+      act(() => {
+        result.current.handleShellCommand(
+          'cat img',
+          new AbortController().signal,
+        );
+      });
+
+      // Should immediately show the detection message
+      act(() => {
+        mockShellOutputCallback({ type: 'binary_detected' });
+      });
+      await act(async () => {
+        await vi.advanceTimersByTimeAsync(OUTPUT_UPDATE_INTERVAL_MS + 1);
+      });
+      // Send another event to trigger the update
+      act(() => {
+        mockShellOutputCallback({ type: 'binary_progress', bytesReceived: 0 });
+      });
+
+      expect(setPendingHistoryItemMock).toHaveBeenLastCalledWith(
+        expect.objectContaining({
+          tools: [
+            expect.objectContaining({
+              resultDisplay: '[Binary output detected. Halting stream...]',
+            }),
+          ],
+        }),
+      );
+
+      // Now test progress updates
+      await act(async () => {
+        await vi.advanceTimersByTimeAsync(OUTPUT_UPDATE_INTERVAL_MS + 1);
+      });
+      act(() => {
+        mockShellOutputCallback({
+          type: 'binary_progress',
+          bytesReceived: 2048,
+        });
+      });
+
+      expect(setPendingHistoryItemMock).toHaveBeenLastCalledWith(
+        expect.objectContaining({
+          tools: [
+            expect.objectContaining({
+              resultDisplay: '[Receiving binary output... 2.0 KB received]',
+            }),
+          ],
+        }),
+      );
     });
-    expect(geminiClientMock.addHistory).toHaveBeenCalledTimes(1);
   });
 
-  it('should handle binary output', async () => {
+  it('should not wrap the command on Windows', async () => {
+    vi.mocked(os.platform).mockReturnValue('win32');
+    const { result } = renderProcessorHook();
+
+    act(() => {
+      result.current.handleShellCommand('dir', new AbortController().signal);
+    });
+
+    expect(mockShellExecutionService).toHaveBeenCalledWith(
+      'dir',
+      '/test/dir',
+      expect.any(Function),
+      expect.any(Object),
+    );
+  });
+
+  it('should handle command abort and display cancelled status', async () => {
     const { result } = renderProcessorHook();
     const abortController = new AbortController();
-    const { isBinary } = await import('../utils/textUtils.js');
-    (isBinary as vi.Mock).mockReturnValue(true);
 
     act(() => {
-      result.current.handleShellCommand(
-        'cat myimage.png',
-        abortController.signal,
+      result.current.handleShellCommand('sleep 5', abortController.signal);
+    });
+    const execPromise = onExecMock.mock.calls[0][0];
+
+    act(() => {
+      abortController.abort();
+      resolveExecutionPromise(
+        createMockServiceResult({ aborted: true, output: 'Canceled' }),
       );
     });
+    await act(async () => await execPromise);
 
-    expect(onExecMock).toHaveBeenCalledTimes(1);
-    const execPromise = onExecMock.mock.calls[0][0];
+    const finalHistoryItem = addItemToHistoryMock.mock.calls[1][0];
+    expect(finalHistoryItem.tools[0].status).toBe(ToolCallStatus.Canceled);
+    expect(finalHistoryItem.tools[0].resultDisplay).toContain(
+      'Command was cancelled.',
+    );
+  });
+
+  it('should handle binary output result correctly', async () => {
+    const { result } = renderProcessorHook();
+    const binaryBuffer = Buffer.from([0x89, 0x50, 0x4e, 0x47]);
+    mockIsBinary.mockReturnValue(true);
 
     act(() => {
-      spawnEmitter.stdout.emit('data', Buffer.from([0x89, 0x50, 0x4e, 0x47]));
+      result.current.handleShellCommand(
+        'cat image.png',
+        new AbortController().signal,
+      );
     });
+    const execPromise = onExecMock.mock.calls[0][0];
 
     act(() => {
-      spawnEmitter.emit('exit', 0, null);
+      resolveExecutionPromise(
+        createMockServiceResult({ rawOutput: binaryBuffer }),
+      );
     });
+    await act(async () => await execPromise);
 
-    await act(async () => {
-      await execPromise;
+    const finalHistoryItem = addItemToHistoryMock.mock.calls[1][0];
+    expect(finalHistoryItem.tools[0].status).toBe(ToolCallStatus.Success);
+    expect(finalHistoryItem.tools[0].resultDisplay).toBe(
+      '[Command produced binary output, which is not shown.]',
+    );
+  });
+
+  it('should handle promise rejection and show an error', async () => {
+    const { result } = renderProcessorHook();
+    const testError = new Error('Unexpected failure');
+    mockShellExecutionService.mockImplementation(() => ({
+      pid: 12345,
+      result: Promise.reject(testError),
+    }));
+
+    act(() => {
+      result.current.handleShellCommand(
+        'a-command',
+        new AbortController().signal,
+      );
     });
+    const execPromise = onExecMock.mock.calls[0][0];
+
+    await act(async () => await execPromise);
 
+    expect(setPendingHistoryItemMock).toHaveBeenCalledWith(null);
     expect(addItemToHistoryMock).toHaveBeenCalledTimes(2);
     expect(addItemToHistoryMock.mock.calls[1][0]).toEqual({
-      type: 'tool_group',
-      tools: [
-        expect.objectContaining({
-          name: 'Shell Command',
-          description: 'cat myimage.png',
-          status: ToolCallStatus.Success,
-          resultDisplay:
-            '[Command produced binary output, which is not shown.]',
-        }),
-      ],
+      type: 'error',
+      text: 'An unexpected error occurred: Unexpected failure',
     });
   });
 
-  it('should handle command failure', async () => {
+  it('should handle synchronous errors during execution and clean up resources', async () => {
+    const testError = new Error('Synchronous spawn error');
+    mockShellExecutionService.mockImplementation(() => {
+      throw testError;
+    });
+    // Mock that the temp file was created before the error was thrown
+    vi.mocked(fs.existsSync).mockReturnValue(true);
+
     const { result } = renderProcessorHook();
-    const abortController = new AbortController();
 
     act(() => {
       result.current.handleShellCommand(
-        'a-bad-command',
-        abortController.signal,
+        'a-command',
+        new AbortController().signal,
       );
     });
-
     const execPromise = onExecMock.mock.calls[0][0];
 
-    act(() => {
-      spawnEmitter.stderr.emit('data', Buffer.from('command not found'));
-    });
+    await act(async () => await execPromise);
 
-    act(() => {
-      spawnEmitter.emit('exit', 127, null);
+    expect(setPendingHistoryItemMock).toHaveBeenCalledWith(null);
+    expect(addItemToHistoryMock).toHaveBeenCalledTimes(2);
+    expect(addItemToHistoryMock.mock.calls[1][0]).toEqual({
+      type: 'error',
+      text: 'An unexpected error occurred: Synchronous spawn error',
     });
+    const tmpFile = path.join(os.tmpdir(), 'shell_pwd_abcdef.tmp');
+    // Verify that the temporary file was cleaned up
+    expect(vi.mocked(fs.unlinkSync)).toHaveBeenCalledWith(tmpFile);
+  });
+
+  describe('Directory Change Warning', () => {
+    it('should show a warning if the working directory changes', async () => {
+      const tmpFile = path.join(os.tmpdir(), 'shell_pwd_abcdef.tmp');
+      vi.mocked(fs.existsSync).mockReturnValue(true);
+      vi.mocked(fs.readFileSync).mockReturnValue('/test/dir/new'); // A different directory
+
+      const { result } = renderProcessorHook();
+      act(() => {
+        result.current.handleShellCommand(
+          'cd new',
+          new AbortController().signal,
+        );
+      });
+      const execPromise = onExecMock.mock.calls[0][0];
+
+      act(() => {
+        resolveExecutionPromise(createMockServiceResult());
+      });
+      await act(async () => await execPromise);
 
-    await act(async () => {
-      await execPromise;
+      const finalHistoryItem = addItemToHistoryMock.mock.calls[1][0];
+      expect(finalHistoryItem.tools[0].resultDisplay).toContain(
+        "WARNING: shell mode is stateless; the directory change to '/test/dir/new' will not persist.",
+      );
+      expect(vi.mocked(fs.unlinkSync)).toHaveBeenCalledWith(tmpFile);
     });
 
-    expect(addItemToHistoryMock).toHaveBeenCalledTimes(2);
-    expect(addItemToHistoryMock.mock.calls[1][0]).toEqual({
-      type: 'tool_group',
-      tools: [
-        expect.objectContaining({
-          name: 'Shell Command',
-          description: 'a-bad-command',
-          status: ToolCallStatus.Error,
-          resultDisplay: 'Command exited with code 127.\ncommand not found',
-        }),
-      ],
+    it('should NOT show a warning if the directory does not change', async () => {
+      vi.mocked(fs.existsSync).mockReturnValue(true);
+      vi.mocked(fs.readFileSync).mockReturnValue('/test/dir'); // The same directory
+
+      const { result } = renderProcessorHook();
+      act(() => {
+        result.current.handleShellCommand('ls', new AbortController().signal);
+      });
+      const execPromise = onExecMock.mock.calls[0][0];
+
+      act(() => {
+        resolveExecutionPromise(createMockServiceResult());
+      });
+      await act(async () => await execPromise);
+
+      const finalHistoryItem = addItemToHistoryMock.mock.calls[1][0];
+      expect(finalHistoryItem.tools[0].resultDisplay).not.toContain('WARNING');
     });
   });
 });
diff --git a/packages/cli/src/ui/hooks/shellCommandProcessor.ts b/packages/cli/src/ui/hooks/shellCommandProcessor.ts
index 9e343f90..08df0a74 100644
--- a/packages/cli/src/ui/hooks/shellCommandProcessor.ts
+++ b/packages/cli/src/ui/hooks/shellCommandProcessor.ts
@@ -4,8 +4,6 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
-import { spawn } from 'child_process';
-import { TextDecoder } from 'util';
 import {
   HistoryItemWithoutId,
   IndividualToolCallDisplay,
@@ -15,186 +13,22 @@ import { useCallback } from 'react';
 import {
   Config,
   GeminiClient,
-  getCachedEncodingForBuffer,
+  isBinary,
+  ShellExecutionResult,
+  ShellExecutionService,
 } from '@google/gemini-cli-core';
 import { type PartListUnion } from '@google/genai';
-import { formatMemoryUsage } from '../utils/formatters.js';
-import { isBinary } from '../utils/textUtils.js';
 import { UseHistoryManagerReturn } from './useHistoryManager.js';
 import { SHELL_COMMAND_NAME } from '../constants.js';
+import { formatMemoryUsage } from '../utils/formatters.js';
 import crypto from 'crypto';
 import path from 'path';
 import os from 'os';
 import fs from 'fs';
-import stripAnsi from 'strip-ansi';
 
-const OUTPUT_UPDATE_INTERVAL_MS = 1000;
+export const OUTPUT_UPDATE_INTERVAL_MS = 1000;
 const MAX_OUTPUT_LENGTH = 10000;
 
-/**
- * A structured result from a shell command execution.
- */
-interface ShellExecutionResult {
-  rawOutput: Buffer;
-  output: string;
-  exitCode: number | null;
-  signal: NodeJS.Signals | null;
-  error: Error | null;
-  aborted: boolean;
-}
-
-/**
- * Executes a shell command using `spawn`, capturing all output and lifecycle events.
- * This is the single, unified implementation for shell execution.
- *
- * @param commandToExecute The exact command string to run.
- * @param cwd The working directory to execute the command in.
- * @param abortSignal An AbortSignal to terminate the process.
- * @param onOutputChunk A callback for streaming real-time output.
- * @param onDebugMessage A callback for logging debug information.
- * @returns A promise that resolves with the complete execution result.
- */
-function executeShellCommand(
-  commandToExecute: string,
-  cwd: string,
-  abortSignal: AbortSignal,
-  onOutputChunk: (chunk: string) => void,
-  onDebugMessage: (message: string) => void,
-): Promise<ShellExecutionResult> {
-  return new Promise((resolve) => {
-    const isWindows = os.platform() === 'win32';
-    const shell = isWindows ? 'cmd.exe' : 'bash';
-    const shellArgs = isWindows
-      ? ['/c', commandToExecute]
-      : ['-c', commandToExecute];
-
-    const child = spawn(shell, shellArgs, {
-      cwd,
-      stdio: ['ignore', 'pipe', 'pipe'],
-      detached: !isWindows, // Use process groups on non-Windows for robust killing
-      env: {
-        ...process.env,
-        GEMINI_CLI: '1',
-      },
-    });
-
-    // Use decoders to handle multi-byte characters safely (for streaming output).
-    let stdoutDecoder: TextDecoder | null = null;
-    let stderrDecoder: TextDecoder | null = null;
-
-    let stdout = '';
-    let stderr = '';
-    const outputChunks: Buffer[] = [];
-    let error: Error | null = null;
-    let exited = false;
-
-    let streamToUi = true;
-    const MAX_SNIFF_SIZE = 4096;
-    let sniffedBytes = 0;
-
-    const handleOutput = (data: Buffer, stream: 'stdout' | 'stderr') => {
-      if (!stdoutDecoder || !stderrDecoder) {
-        const encoding = getCachedEncodingForBuffer(data);
-        stdoutDecoder = new TextDecoder(encoding);
-        stderrDecoder = new TextDecoder(encoding);
-      }
-
-      outputChunks.push(data);
-
-      if (streamToUi && sniffedBytes < MAX_SNIFF_SIZE) {
-        // Use a limited-size buffer for the check to avoid performance issues.
-        const sniffBuffer = Buffer.concat(outputChunks.slice(0, 20));
-        sniffedBytes = sniffBuffer.length;
-
-        if (isBinary(sniffBuffer)) {
-          streamToUi = false;
-          // Overwrite any garbled text that may have streamed with a clear message.
-          onOutputChunk('[Binary output detected. Halting stream...]');
-        }
-      }
-
-      const decodedChunk =
-        stream === 'stdout'
-          ? stdoutDecoder.decode(data, { stream: true })
-          : stderrDecoder.decode(data, { stream: true });
-      if (stream === 'stdout') {
-        stdout += stripAnsi(decodedChunk);
-      } else {
-        stderr += stripAnsi(decodedChunk);
-      }
-
-      if (!exited && streamToUi) {
-        // Send only the new chunk to avoid re-rendering the whole output.
-        const combinedOutput = stdout + (stderr ? `\n${stderr}` : '');
-        onOutputChunk(combinedOutput);
-      } else if (!exited && !streamToUi) {
-        // Send progress updates for the binary stream
-        const totalBytes = outputChunks.reduce(
-          (sum, chunk) => sum + chunk.length,
-          0,
-        );
-        onOutputChunk(
-          `[Receiving binary output... ${formatMemoryUsage(totalBytes)} received]`,
-        );
-      }
-    };
-
-    child.stdout.on('data', (data) => handleOutput(data, 'stdout'));
-    child.stderr.on('data', (data) => handleOutput(data, 'stderr'));
-    child.on('error', (err) => {
-      error = err;
-    });
-
-    const abortHandler = async () => {
-      if (child.pid && !exited) {
-        onDebugMessage(`Aborting shell command (PID: ${child.pid})`);
-        if (isWindows) {
-          spawn('taskkill', ['/pid', child.pid.toString(), '/f', '/t']);
-        } else {
-          try {
-            // Kill the entire process group (negative PID).
-            // SIGTERM first, then SIGKILL if it doesn't die.
-            process.kill(-child.pid, 'SIGTERM');
-            await new Promise((res) => setTimeout(res, 200));
-            if (!exited) {
-              process.kill(-child.pid, 'SIGKILL');
-            }
-          } catch (_e) {
-            // Fall back to killing just the main process if group kill fails.
-            if (!exited) child.kill('SIGKILL');
-          }
-        }
-      }
-    };
-
-    abortSignal.addEventListener('abort', abortHandler, { once: true });
-
-    child.on('exit', (code, signal) => {
-      exited = true;
-      abortSignal.removeEventListener('abort', abortHandler);
-
-      // Handle any final bytes lingering in the decoders
-      if (stdoutDecoder) {
-        stdout += stdoutDecoder.decode();
-      }
-      if (stderrDecoder) {
-        stderr += stderrDecoder.decode();
-      }
-
-      const finalBuffer = Buffer.concat(outputChunks);
-
-      resolve({
-        rawOutput: finalBuffer,
-        output: stdout + (stderr ? `\n${stderr}` : ''),
-        exitCode: code,
-        signal,
-        error,
-        aborted: abortSignal.aborted,
-      });
-    });
-  });
-}
-
 function addShellCommandToGeminiHistory(
   geminiClient: GeminiClient,
   rawQuery: string,
@@ -227,7 +61,6 @@ ${modelContent}
  * Hook to process shell commands.
  * Orchestrates command execution and updates history and agent context.
  */
-
 export const useShellCommandProcessor = (
   addItemToHistory: UseHistoryManagerReturn['addItem'],
   setPendingHistoryItem: React.Dispatch<
@@ -269,7 +102,11 @@ export const useShellCommandProcessor = (
       }
 
       const execPromise = new Promise<void>((resolve) => {
-        let lastUpdateTime = 0;
+        let lastUpdateTime = Date.now();
+        let cumulativeStdout = '';
+        let cumulativeStderr = '';
+        let isBinaryStream = false;
+        let binaryBytesReceived = 0;
 
         const initialToolDisplay: IndividualToolCallDisplay = {
           callId,
@@ -285,103 +122,183 @@ export const useShellCommandProcessor = (
           tools: [initialToolDisplay],
         });
 
+        let executionPid: number | undefined;
+
+        const abortHandler = () => {
+          onDebugMessage(
+            `Aborting shell command (PID: ${executionPid ?? 'unknown'})`,
+          );
+        };
+        abortSignal.addEventListener('abort', abortHandler, { once: true });
+
         onDebugMessage(`Executing in ${targetDir}: ${commandToExecute}`);
-        executeShellCommand(
-          commandToExecute,
-          targetDir,
-          abortSignal,
-          (streamedOutput) => {
-            // Throttle pending UI updates to avoid excessive re-renders.
-            if (Date.now() - lastUpdateTime > OUTPUT_UPDATE_INTERVAL_MS) {
-              setPendingHistoryItem({
-                type: 'tool_group',
-                tools: [
-                  { ...initialToolDisplay, resultDisplay: streamedOutput },
-                ],
-              });
-              lastUpdateTime = Date.now();
-            }
-          },
-          onDebugMessage,
-        )
-          .then((result) => {
-            setPendingHistoryItem(null);
 
-            let mainContent: string;
+        try {
+          const { pid, result } = ShellExecutionService.execute(
+            commandToExecute,
+            targetDir,
+            (event) => {
+              switch (event.type) {
+                case 'data':
+                  // Do not process text data if we've already switched to binary mode.
+                  if (isBinaryStream) break;
+                  if (event.stream === 'stdout') {
+                    cumulativeStdout += event.chunk;
+                  } else {
+                    cumulativeStderr += event.chunk;
+                  }
+                  break;
+                case 'binary_detected':
+                  isBinaryStream = true;
+                  break;
+                case 'binary_progress':
+                  isBinaryStream = true;
+                  binaryBytesReceived = event.bytesReceived;
+                  break;
+                default: {
+                  throw new Error('An unhandled ShellOutputEvent was found.');
+                }
+              }
+
+              // Compute the display string based on the *current* state.
+              let currentDisplayOutput: string;
+              if (isBinaryStream) {
+                if (binaryBytesReceived > 0) {
+                  currentDisplayOutput = `[Receiving binary output... ${formatMemoryUsage(
+                    binaryBytesReceived,
+                  )} received]`;
+                } else {
+                  currentDisplayOutput =
+                    '[Binary output detected. Halting stream...]';
+                }
+              } else {
+                currentDisplayOutput =
+                  cumulativeStdout +
+                  (cumulativeStderr ? `\n${cumulativeStderr}` : '');
+              }
+
+              // Throttle pending UI updates to avoid excessive re-renders.
+              if (Date.now() - lastUpdateTime > OUTPUT_UPDATE_INTERVAL_MS) {
+                setPendingHistoryItem({
+                  type: 'tool_group',
+                  tools: [
+                    {
+                      ...initialToolDisplay,
+                      resultDisplay: currentDisplayOutput,
+                    },
+                  ],
+                });
+                lastUpdateTime = Date.now();
+              }
+            },
+            abortSignal,
+          );
 
-            if (isBinary(result.rawOutput)) {
-              mainContent =
-                '[Command produced binary output, which is not shown.]';
-            } else {
-              mainContent =
-                result.output.trim() || '(Command produced no output)';
-            }
+          executionPid = pid;
 
-            let finalOutput = mainContent;
-            let finalStatus = ToolCallStatus.Success;
+          result
+            .then((result: ShellExecutionResult) => {
+              setPendingHistoryItem(null);
 
-            if (result.error) {
-              finalStatus = ToolCallStatus.Error;
-              finalOutput = `${result.error.message}\n${finalOutput}`;
-            } else if (result.aborted) {
-              finalStatus = ToolCallStatus.Canceled;
-              finalOutput = `Command was cancelled.\n${finalOutput}`;
-            } else if (result.signal) {
-              finalStatus = ToolCallStatus.Error;
-              finalOutput = `Command terminated by signal: ${result.signal}.\n${finalOutput}`;
-            } else if (result.exitCode !== 0) {
-              finalStatus = ToolCallStatus.Error;
-              finalOutput = `Command exited with code ${result.exitCode}.\n${finalOutput}`;
-            }
+              let mainContent: string;
 
-            if (pwdFilePath && fs.existsSync(pwdFilePath)) {
-              const finalPwd = fs.readFileSync(pwdFilePath, 'utf8').trim();
-              if (finalPwd && finalPwd !== targetDir) {
-                const warning = `WARNING: shell mode is stateless; the directory change to '${finalPwd}' will not persist.`;
-                finalOutput = `${warning}\n\n${finalOutput}`;
+              if (isBinary(result.rawOutput)) {
+                mainContent =
+                  '[Command produced binary output, which is not shown.]';
+              } else {
+                mainContent =
+                  result.output.trim() || '(Command produced no output)';
               }
-            }
 
-            const finalToolDisplay: IndividualToolCallDisplay = {
-              ...initialToolDisplay,
-              status: finalStatus,
-              resultDisplay: finalOutput,
-            };
+              let finalOutput = mainContent;
+              let finalStatus = ToolCallStatus.Success;
+
+              if (result.error) {
+                finalStatus = ToolCallStatus.Error;
+                finalOutput = `${result.error.message}\n${finalOutput}`;
+              } else if (result.aborted) {
+                finalStatus = ToolCallStatus.Canceled;
+                finalOutput = `Command was cancelled.\n${finalOutput}`;
+              } else if (result.signal) {
+                finalStatus = ToolCallStatus.Error;
+                finalOutput = `Command terminated by signal: ${result.signal}.\n${finalOutput}`;
+              } else if (result.exitCode !== 0) {
+                finalStatus = ToolCallStatus.Error;
+                finalOutput = `Command exited with code ${result.exitCode}.\n${finalOutput}`;
+              }
 
-            // Add the complete, contextual result to the local UI history.
-            addItemToHistory(
-              {
-                type: 'tool_group',
-                tools: [finalToolDisplay],
-              } as HistoryItemWithoutId,
-              userMessageTimestamp,
-            );
+              if (pwdFilePath && fs.existsSync(pwdFilePath)) {
+                const finalPwd = fs.readFileSync(pwdFilePath, 'utf8').trim();
+                if (finalPwd && finalPwd !== targetDir) {
+                  const warning = `WARNING: shell mode is stateless; the directory change to '${finalPwd}' will not persist.`;
+                  finalOutput = `${warning}\n\n${finalOutput}`;
+                }
+              }
 
-            // Add the same complete, contextual result to the LLM's history.
-            addShellCommandToGeminiHistory(geminiClient, rawQuery, finalOutput);
-          })
-          .catch((err) => {
-            setPendingHistoryItem(null);
-            const errorMessage =
-              err instanceof Error ? err.message : String(err);
-            addItemToHistory(
-              {
-                type: 'error',
-                text: `An unexpected error occurred: ${errorMessage}`,
-              },
-              userMessageTimestamp,
-            );
-          })
-          .finally(() => {
-            if (pwdFilePath && fs.existsSync(pwdFilePath)) {
-              fs.unlinkSync(pwdFilePath);
-            }
-            resolve();
-          });
+              const finalToolDisplay: IndividualToolCallDisplay = {
+                ...initialToolDisplay,
+                status: finalStatus,
+                resultDisplay: finalOutput,
+              };
+
+              // Add the complete, contextual result to the local UI history.
+              addItemToHistory(
+                {
+                  type: 'tool_group',
+                  tools: [finalToolDisplay],
+                } as HistoryItemWithoutId,
+                userMessageTimestamp,
+              );
+
+              // Add the same complete, contextual result to the LLM's history.
+              addShellCommandToGeminiHistory(
+                geminiClient,
+                rawQuery,
+                finalOutput,
+              );
+            })
+            .catch((err) => {
+              setPendingHistoryItem(null);
+              const errorMessage =
+                err instanceof Error ? err.message : String(err);
+              addItemToHistory(
+                {
+                  type: 'error',
+                  text: `An unexpected error occurred: ${errorMessage}`,
+                },
+                userMessageTimestamp,
+              );
+            })
+            .finally(() => {
+              abortSignal.removeEventListener('abort', abortHandler);
+              if (pwdFilePath && fs.existsSync(pwdFilePath)) {
+                fs.unlinkSync(pwdFilePath);
+              }
+              resolve();
+            });
+        } catch (err) {
+          // This block handles synchronous errors from `execute`
+          setPendingHistoryItem(null);
+          const errorMessage = err instanceof Error ? err.message : String(err);
+          addItemToHistory(
+            {
+              type: 'error',
+              text: `An unexpected error occurred: ${errorMessage}`,
+            },
+            userMessageTimestamp,
+          );
+
+          // Perform cleanup here as well
+          if (pwdFilePath && fs.existsSync(pwdFilePath)) {
+            fs.unlinkSync(pwdFilePath);
+          }
+
+          resolve(); // Resolve the promise to unblock `onExec`
+        }
       });
 
       onExec(execPromise);
-      return true; // Command was initiated
+      return true;
     },
     [
       config,
diff --git a/packages/cli/src/ui/utils/textUtils.test.ts b/packages/cli/src/ui/utils/textUtils.test.ts
deleted file mode 100644
index 5dd08875..00000000
--- a/packages/cli/src/ui/utils/textUtils.test.ts
+++ /dev/null
@@ -1,41 +0,0 @@
-/**
- * @license
- * Copyright 2025 Google LLC
- * SPDX-License-Identifier: Apache-2.0
- */
-
-import { isBinary } from './textUtils';
-
-describe('textUtils', () => {
-  describe('isBinary', () => {
-    it('should return true for a buffer containing a null byte', () => {
-      const buffer = Buffer.from([
-        0x89, 0x50, 0x4e, 0x47, 0x0d, 0x1a, 0x0a, 0x00,
-      ]);
-      expect(isBinary(buffer)).toBe(true);
-    });
-
-    it('should return false for a buffer containing only text', () => {
-      const buffer = Buffer.from('This is a test string.');
-      expect(isBinary(buffer)).toBe(false);
-    });
-
-    it('should return false for an empty buffer', () => {
-      const buffer = Buffer.from([]);
-      expect(isBinary(buffer)).toBe(false);
-    });
-
-    it('should return false for a null or undefined buffer', () => {
-      expect(isBinary(null)).toBe(false);
-      expect(isBinary(undefined)).toBe(false);
-    });
-
-    it('should only check the sample size', () => {
-      const longBufferWithNullByteAtEnd = Buffer.concat([
-        Buffer.from('a'.repeat(1024)),
-        Buffer.from([0x00]),
-      ]);
-      expect(isBinary(longBufferWithNullByteAtEnd, 512)).toBe(false);
-    });
-  });
-});
diff --git a/packages/cli/src/ui/utils/textUtils.ts b/packages/cli/src/ui/utils/textUtils.ts
index fa0abe9a..e4d8ea58 100644
--- a/packages/cli/src/ui/utils/textUtils.ts
+++ b/packages/cli/src/ui/utils/textUtils.ts
@@ -17,35 +17,6 @@ export const getAsciiArtWidth = (asciiArt: string): number => {
   return Math.max(...lines.map((line) => line.length));
 };
 
-/**
- * Checks if a Buffer is likely binary by testing for the presence of a NULL byte.
- * The presence of a NULL byte is a strong indicator that the data is not plain text.
- * @param data The Buffer to check.
- * @param sampleSize The number of bytes from the start of the buffer to test.
- * @returns True if a NULL byte is found, false otherwise.
- */
-export function isBinary(
-  data: Buffer | null | undefined,
-  sampleSize = 512,
-): boolean {
-  if (!data) {
-    return false;
-  }
-
-  const sample = data.length > sampleSize ? data.subarray(0, sampleSize) : data;
-
-  for (const byte of sample) {
-    // The presence of a NULL byte (0x00) is one of the most reliable
-    // indicators of a binary file. Text files should not contain them.
-    if (byte === 0) {
-      return true;
-    }
-  }
-
-  // If no NULL bytes were found in the sample, we assume it's text.
-  return false;
-}
-
 /*
  * -------------------------------------------------------------------------
  *  Unicode‑aware helpers (work at the code‑point level rather than UTF‑16
diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts
index 829de544..b4ce7e85 100644
--- a/packages/core/src/index.ts
+++ b/packages/core/src/index.ts
@@ -36,6 +36,8 @@ export * from './utils/quotaErrorDetection.js';
 export * from './utils/fileUtils.js';
 export * from './utils/retry.js';
 export * from './utils/systemEncoding.js';
+export * from './utils/textUtils.js';
+export * from './utils/formatters.js';
 
 // Export services
 export * from './services/fileDiscoveryService.js';
@@ -45,6 +47,9 @@ export * from './services/gitService.js';
 export * from './ide/ide-client.js';
 export * from './ide/ideContext.js';
 
+// Export Shell Execution Service
+export * from './services/shellExecutionService.js';
+
 // Export base tool definitions
 export * from './tools/tools.js';
 export * from './tools/tool-registry.js';
diff --git a/packages/core/src/services/shellExecutionService.test.ts b/packages/core/src/services/shellExecutionService.test.ts
new file mode 100644
index 00000000..4d1655a2
--- /dev/null
+++ b/packages/core/src/services/shellExecutionService.test.ts
@@ -0,0 +1,357 @@
+/**
+ * @license
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { vi, describe, it, expect, beforeEach, type Mock } from 'vitest';
+const mockSpawn = vi.hoisted(() => vi.fn());
+vi.mock('child_process', () => ({
+  spawn: mockSpawn,
+}));
+
+import EventEmitter from 'events';
+import { Readable } from 'stream';
+import { type ChildProcess } from 'child_process';
+import {
+  ShellExecutionService,
+  ShellOutputEvent,
+} from './shellExecutionService.js';
+
+const mockIsBinary = vi.hoisted(() => vi.fn());
+vi.mock('../utils/textUtils.js', () => ({
+  isBinary: mockIsBinary,
+}));
+
+const mockPlatform = vi.hoisted(() => vi.fn());
+vi.mock('os', () => ({
+  default: {
+    platform: mockPlatform,
+  },
+  platform: mockPlatform,
+}));
+
+const mockProcessKill = vi
+  .spyOn(process, 'kill')
+  .mockImplementation(() => true);
+
+describe('ShellExecutionService', () => {
+  let mockChildProcess: EventEmitter & Partial<ChildProcess>;
+  let onOutputEventMock: Mock<(event: ShellOutputEvent) => void>;
+
+  beforeEach(() => {
+    vi.clearAllMocks();
+
+    mockIsBinary.mockReturnValue(false);
+    mockPlatform.mockReturnValue('linux');
+
+    onOutputEventMock = vi.fn();
+
+    mockChildProcess = new EventEmitter() as EventEmitter &
+      Partial<ChildProcess>;
+    // FIX: Cast simple EventEmitters to the expected stream type.
+    mockChildProcess.stdout = new EventEmitter() as Readable;
+    mockChildProcess.stderr = new EventEmitter() as Readable;
+    mockChildProcess.kill = vi.fn();
+
+    // FIX: Use Object.defineProperty to set the readonly 'pid' property.
+    Object.defineProperty(mockChildProcess, 'pid', {
+      value: 12345,
+      configurable: true,
+    });
+
+    mockSpawn.mockReturnValue(mockChildProcess);
+  });
+
+  // Helper function to run a standard execution simulation
+  const simulateExecution = async (
+    command: string,
+    simulation: (cp: typeof mockChildProcess, ac: AbortController) => void,
+  ) => {
+    const abortController = new AbortController();
+    const handle = ShellExecutionService.execute(
+      command,
+      '/test/dir',
+      onOutputEventMock,
+      abortController.signal,
+    );
+
+    await new Promise((resolve) => setImmediate(resolve));
+    simulation(mockChildProcess, abortController);
+    const result = await handle.result;
+    return { result, handle, abortController };
+  };
+
+  describe('Successful Execution', () => {
+    it('should execute a command and capture stdout and stderr', async () => {
+      const { result, handle } = await simulateExecution('ls -l', (cp) => {
+        cp.stdout?.emit('data', Buffer.from('file1.txt\n'));
+        cp.stderr?.emit('data', Buffer.from('a warning'));
+        cp.emit('exit', 0, null);
+      });
+
+      expect(mockSpawn).toHaveBeenCalledWith(
+        'bash',
+        ['-c', 'ls -l'],
+        expect.any(Object),
+      );
+      expect(result.exitCode).toBe(0);
+      expect(result.signal).toBeNull();
+      expect(result.error).toBeNull();
+      expect(result.aborted).toBe(false);
+      expect(result.stdout).toBe('file1.txt\n');
+      expect(result.stderr).toBe('a warning');
+      expect(result.output).toBe('file1.txt\n\na warning');
+      expect(handle.pid).toBe(12345);
+
+      expect(onOutputEventMock).toHaveBeenCalledWith({
+        type: 'data',
+        stream: 'stdout',
+        chunk: 'file1.txt\n',
+      });
+      expect(onOutputEventMock).toHaveBeenCalledWith({
+        type: 'data',
+        stream: 'stderr',
+        chunk: 'a warning',
+      });
+    });
+
+    it('should strip ANSI codes from output', async () => {
+      const { result } = await simulateExecution('ls --color=auto', (cp) => {
+        cp.stdout?.emit('data', Buffer.from('a\u001b[31mred\u001b[0mword'));
+        cp.emit('exit', 0, null);
+      });
+
+      expect(result.stdout).toBe('aredword');
+      expect(onOutputEventMock).toHaveBeenCalledWith({
+        type: 'data',
+        stream: 'stdout',
+        chunk: 'aredword',
+      });
+    });
+
+    it('should correctly decode multi-byte characters split across chunks', async () => {
+      const { result } = await simulateExecution('echo "你好"', (cp) => {
+        const multiByteChar = Buffer.from('你好', 'utf-8');
+        cp.stdout?.emit('data', multiByteChar.slice(0, 2));
+        cp.stdout?.emit('data', multiByteChar.slice(2));
+        cp.emit('exit', 0, null);
+      });
+      expect(result.stdout).toBe('你好');
+    });
+
+    it('should handle commands with no output', async () => {
+      const { result } = await simulateExecution('touch file', (cp) => {
+        cp.emit('exit', 0, null);
+      });
+
+      expect(result.stdout).toBe('');
+      expect(result.stderr).toBe('');
+      expect(result.output).toBe('');
+      expect(onOutputEventMock).not.toHaveBeenCalled();
+    });
+  });
+
+  describe('Failed Execution', () => {
+    it('should capture a non-zero exit code and format output correctly', async () => {
+      const { result } = await simulateExecution('a-bad-command', (cp) => {
+        cp.stderr?.emit('data', Buffer.from('command not found'));
+        cp.emit('exit', 127, null);
+      });
+
+      expect(result.exitCode).toBe(127);
+      expect(result.stderr).toBe('command not found');
+      expect(result.stdout).toBe('');
+      expect(result.output).toBe('\ncommand not found');
+      expect(result.error).toBeNull();
+    });
+
+    it('should capture a termination signal', async () => {
+      const { result } = await simulateExecution('long-process', (cp) => {
+        cp.emit('exit', null, 'SIGTERM');
+      });
+
+      expect(result.exitCode).toBeNull();
+      expect(result.signal).toBe('SIGTERM');
+    });
+
+    it('should handle a spawn error', async () => {
+      const spawnError = new Error('spawn EACCES');
+      const { result } = await simulateExecution('protected-cmd', (cp) => {
+        cp.emit('error', spawnError);
+        cp.emit('exit', 1, null);
+      });
+
+      expect(result.error).toBe(spawnError);
+      expect(result.exitCode).toBe(1);
+    });
+  });
+
+  describe('Aborting Commands', () => {
+    describe.each([
+      {
+        platform: 'linux',
+        expectedSignal: 'SIGTERM',
+        expectedExit: { signal: 'SIGKILL' as const },
+      },
+      {
+        platform: 'win32',
+        expectedCommand: 'taskkill',
+        expectedExit: { code: 1 },
+      },
+    ])(
+      'on $platform',
+      ({ platform, expectedSignal, expectedCommand, expectedExit }) => {
+        it('should abort a running process and set the aborted flag', async () => {
+          mockPlatform.mockReturnValue(platform);
+
+          const { result } = await simulateExecution(
+            'sleep 10',
+            (cp, abortController) => {
+              abortController.abort();
+              if (expectedExit.signal)
+                cp.emit('exit', null, expectedExit.signal);
+              if (typeof expectedExit.code === 'number')
+                cp.emit('exit', expectedExit.code, null);
+            },
+          );
+
+          expect(result.aborted).toBe(true);
+
+          if (platform === 'linux') {
+            expect(mockProcessKill).toHaveBeenCalledWith(
+              -mockChildProcess.pid!,
+              expectedSignal,
+            );
+          } else {
+            expect(mockSpawn).toHaveBeenCalledWith(expectedCommand, [
+              '/pid',
+              String(mockChildProcess.pid),
+              '/f',
+              '/t',
+            ]);
+          }
+        });
+      },
+    );
+
+    it('should gracefully attempt SIGKILL on linux if SIGTERM fails', async () => {
+      mockPlatform.mockReturnValue('linux');
+      vi.useFakeTimers();
+
+      // Don't await the result inside the simulation block for this specific test.
+      // We need to control the timeline manually.
+      const abortController = new AbortController();
+      const handle = ShellExecutionService.execute(
+        'unresponsive_process',
+        '/test/dir',
+        onOutputEventMock,
+        abortController.signal,
+      );
+
+      abortController.abort();
+
+      // Check the first kill signal
+      expect(mockProcessKill).toHaveBeenCalledWith(
+        -mockChildProcess.pid!,
+        'SIGTERM',
+      );
+
+      // Now, advance time past the timeout
+      await vi.advanceTimersByTimeAsync(250);
+
+      // Check the second kill signal
+      expect(mockProcessKill).toHaveBeenCalledWith(
+        -mockChildProcess.pid!,
+        'SIGKILL',
+      );
+
+      // Finally, simulate the process exiting and await the result
+      mockChildProcess.emit('exit', null, 'SIGKILL');
+      const result = await handle.result;
+
+      vi.useRealTimers();
+
+      expect(result.aborted).toBe(true);
+      expect(result.signal).toBe('SIGKILL');
+      // The individual kill calls were already asserted above.
+      expect(mockProcessKill).toHaveBeenCalledTimes(2);
+    });
+  });
+
+  describe('Binary Output', () => {
+    it('should detect binary output and switch to progress events', async () => {
+      mockIsBinary.mockReturnValueOnce(true);
+      const binaryChunk1 = Buffer.from([0x89, 0x50, 0x4e, 0x47]);
+      const binaryChunk2 = Buffer.from([0x0d, 0x0a, 0x1a, 0x0a]);
+
+      const { result } = await simulateExecution('cat image.png', (cp) => {
+        cp.stdout?.emit('data', binaryChunk1);
+        cp.stdout?.emit('data', binaryChunk2);
+        cp.emit('exit', 0, null);
+      });
+
+      expect(result.rawOutput).toEqual(
+        Buffer.concat([binaryChunk1, binaryChunk2]),
+      );
+      expect(onOutputEventMock).toHaveBeenCalledTimes(3);
+      expect(onOutputEventMock.mock.calls[0][0]).toEqual({
+        type: 'binary_detected',
+      });
+      expect(onOutputEventMock.mock.calls[1][0]).toEqual({
+        type: 'binary_progress',
+        bytesReceived: 4,
+      });
+      expect(onOutputEventMock.mock.calls[2][0]).toEqual({
+        type: 'binary_progress',
+        bytesReceived: 8,
+      });
+    });
+
+    it('should not emit data events after binary is detected', async () => {
+      mockIsBinary.mockImplementation((buffer) => buffer.includes(0x00));
+
+      await simulateExecution('cat mixed_file', (cp) => {
+        cp.stdout?.emit('data', Buffer.from('some text'));
+        cp.stdout?.emit('data', Buffer.from([0x00, 0x01, 0x02]));
+        cp.stdout?.emit('data', Buffer.from('more text'));
+        cp.emit('exit', 0, null);
+      });
+
+      // FIX: Provide explicit type for the 'call' parameter in the map function.
+      const eventTypes = onOutputEventMock.mock.calls.map(
+        (call: [ShellOutputEvent]) => call[0].type,
+      );
+      expect(eventTypes).toEqual([
+        'data',
+        'binary_detected',
+        'binary_progress',
+        'binary_progress',
+      ]);
+    });
+  });
+
+  describe('Platform-Specific Behavior', () => {
+    it('should use cmd.exe on Windows', async () => {
+      mockPlatform.mockReturnValue('win32');
+      await simulateExecution('dir', (cp) => cp.emit('exit', 0, null));
+
+      expect(mockSpawn).toHaveBeenCalledWith(
+        'cmd.exe',
+        ['/c', 'dir'],
+        expect.objectContaining({ detached: false }),
+      );
+    });
+
+    it('should use bash and detached process group on Linux', async () => {
+      mockPlatform.mockReturnValue('linux');
+      await simulateExecution('ls', (cp) => cp.emit('exit', 0, null));
+
+      expect(mockSpawn).toHaveBeenCalledWith(
+        'bash',
+        ['-c', 'ls'],
+        expect.objectContaining({ detached: true }),
+      );
+    });
+  });
+});
diff --git a/packages/core/src/services/shellExecutionService.ts b/packages/core/src/services/shellExecutionService.ts
new file mode 100644
index 00000000..0f0002cd
--- /dev/null
+++ b/packages/core/src/services/shellExecutionService.ts
@@ -0,0 +1,229 @@
+/**
+ * @license
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { spawn } from 'child_process';
+import { TextDecoder } from 'util';
+import os from 'os';
+import stripAnsi from 'strip-ansi';
+import { getCachedEncodingForBuffer } from '../utils/systemEncoding.js';
+import { isBinary } from '../utils/textUtils.js';
+
+const SIGKILL_TIMEOUT_MS = 200;
+
+/** A structured result from a shell command execution. */
+export interface ShellExecutionResult {
+  /** The raw, unprocessed output buffer. */
+  rawOutput: Buffer;
+  /** The combined, decoded stdout and stderr as a string. */
+  output: string;
+  /** The decoded stdout as a string. */
+  stdout: string;
+  /** The decoded stderr as a string. */
+  stderr: string;
+  /** The process exit code, or null if terminated by a signal. */
+  exitCode: number | null;
+  /** The signal that terminated the process, if any. */
+  signal: NodeJS.Signals | null;
+  /** An error object if the process failed to spawn. */
+  error: Error | null;
+  /** A boolean indicating if the command was aborted by the user. */
+  aborted: boolean;
+  /** The process ID of the spawned shell. */
+  pid: number | undefined;
+}
+
+/** A handle for an ongoing shell execution. */
+export interface ShellExecutionHandle {
+  /** The process ID of the spawned shell. */
+  pid: number | undefined;
+  /** A promise that resolves with the complete execution result. */
+  result: Promise<ShellExecutionResult>;
+}
+
+/**
+ * Describes a structured event emitted during shell command execution.
+ */
+export type ShellOutputEvent =
+  | {
+      /** The event contains a chunk of output data. */
+      type: 'data';
+      /** The stream from which the data originated. */
+      stream: 'stdout' | 'stderr';
+      /** The decoded string chunk. */
+      chunk: string;
+    }
+  | {
+      /** Signals that the output stream has been identified as binary. */
+      type: 'binary_detected';
+    }
+  | {
+      /** Provides progress updates for a binary stream. */
+      type: 'binary_progress';
+      /** The total number of bytes received so far. */
+      bytesReceived: number;
+    };
+
+/**
+ * A centralized service for executing shell commands with robust process
+ * management, cross-platform compatibility, and streaming output capabilities.
+ *
+ */
+export class ShellExecutionService {
+  /**
+   * Executes a shell command using `spawn`, capturing all output and lifecycle events.
+   *
+   * @param commandToExecute The exact command string to run.
+   * @param cwd The working directory to execute the command in.
+   * @param onOutputEvent A callback for streaming structured events about the execution, including data chunks and status updates.
+   * @param abortSignal An AbortSignal to terminate the process and its children.
+   * @returns An object containing the process ID (pid) and a promise that
+   *          resolves with the complete execution result.
+   */
+  static execute(
+    commandToExecute: string,
+    cwd: string,
+    onOutputEvent: (event: ShellOutputEvent) => void,
+    abortSignal: AbortSignal,
+  ): ShellExecutionHandle {
+    const isWindows = os.platform() === 'win32';
+    const shell = isWindows ? 'cmd.exe' : 'bash';
+    const shellArgs = [isWindows ? '/c' : '-c', commandToExecute];
+
+    const child = spawn(shell, shellArgs, {
+      cwd,
+      stdio: ['ignore', 'pipe', 'pipe'],
+      detached: !isWindows, // Use process groups on non-Windows for robust killing
+      env: {
+        ...process.env,
+        GEMINI_CLI: '1',
+      },
+    });
+
+    const result = new Promise<ShellExecutionResult>((resolve) => {
+      // Use decoders to handle multi-byte characters safely (for streaming output).
+      let stdoutDecoder: TextDecoder | null = null;
+      let stderrDecoder: TextDecoder | null = null;
+
+      let stdout = '';
+      let stderr = '';
+      const outputChunks: Buffer[] = [];
+      let error: Error | null = null;
+      let exited = false;
+
+      let isStreamingRawContent = true;
+      const MAX_SNIFF_SIZE = 4096;
+      let sniffedBytes = 0;
+
+      const handleOutput = (data: Buffer, stream: 'stdout' | 'stderr') => {
+        if (!stdoutDecoder || !stderrDecoder) {
+          const encoding = getCachedEncodingForBuffer(data);
+          try {
+            stdoutDecoder = new TextDecoder(encoding);
+            stderrDecoder = new TextDecoder(encoding);
+          } catch {
+            // If the encoding is not supported, fall back to utf-8.
+            // This can happen on some platforms for certain encodings like 'utf-32le'.
+            stdoutDecoder = new TextDecoder('utf-8');
+            stderrDecoder = new TextDecoder('utf-8');
+          }
+        }
+
+        outputChunks.push(data);
+
+        // Binary detection logic. This only runs until we've made a determination.
+        if (isStreamingRawContent && sniffedBytes < MAX_SNIFF_SIZE) {
+          const sniffBuffer = Buffer.concat(outputChunks.slice(0, 20));
+          sniffedBytes = sniffBuffer.length;
+
+          if (isBinary(sniffBuffer)) {
+            // Change state to stop streaming raw content.
+            isStreamingRawContent = false;
+            onOutputEvent({ type: 'binary_detected' });
+          }
+        }
+
+        const decodedChunk =
+          stream === 'stdout'
+            ? stdoutDecoder.decode(data, { stream: true })
+            : stderrDecoder.decode(data, { stream: true });
+        const strippedChunk = stripAnsi(decodedChunk);
+
+        if (stream === 'stdout') {
+          stdout += strippedChunk;
+        } else {
+          stderr += strippedChunk;
+        }
+
+        if (isStreamingRawContent) {
+          onOutputEvent({ type: 'data', stream, chunk: strippedChunk });
+        } else {
+          const totalBytes = outputChunks.reduce(
+            (sum, chunk) => sum + chunk.length,
+            0,
+          );
+          onOutputEvent({ type: 'binary_progress', bytesReceived: totalBytes });
+        }
+      };
+
+      child.stdout.on('data', (data) => handleOutput(data, 'stdout'));
+      child.stderr.on('data', (data) => handleOutput(data, 'stderr'));
+      child.on('error', (err) => {
+        error = err;
+      });
+
+      const abortHandler = async () => {
+        if (child.pid && !exited) {
+          if (isWindows) {
+            spawn('taskkill', ['/pid', child.pid.toString(), '/f', '/t']);
+          } else {
+            try {
+              // Kill the entire process group (negative PID).
+              // SIGTERM first, then SIGKILL if it doesn't die.
+              process.kill(-child.pid, 'SIGTERM');
+              await new Promise((res) => setTimeout(res, SIGKILL_TIMEOUT_MS));
+              if (!exited) {
+                process.kill(-child.pid, 'SIGKILL');
+              }
+            } catch (_e) {
+              // Fall back to killing just the main process if group kill fails.
+              if (!exited) child.kill('SIGKILL');
+            }
+          }
+        }
+      };
+
+      abortSignal.addEventListener('abort', abortHandler, { once: true });
+
+      child.on('exit', (code, signal) => {
+        exited = true;
+        abortSignal.removeEventListener('abort', abortHandler);
+
+        if (stdoutDecoder) {
+          stdout += stripAnsi(stdoutDecoder.decode());
+        }
+        if (stderrDecoder) {
+          stderr += stripAnsi(stderrDecoder.decode());
+        }
+
+        const finalBuffer = Buffer.concat(outputChunks);
+
+        resolve({
+          rawOutput: finalBuffer,
+          output: stdout + (stderr ? `\n${stderr}` : ''),
+          stdout,
+          stderr,
+          exitCode: code,
+          signal,
+          error,
+          aborted: abortSignal.aborted,
+          pid: child.pid,
+        });
+      });
+    });
+
+    return { pid: child.pid, result };
+  }
+}
diff --git a/packages/core/src/tools/shell.test.ts b/packages/core/src/tools/shell.test.ts
index 460c871a..55364197 100644
--- a/packages/core/src/tools/shell.test.ts
+++ b/packages/core/src/tools/shell.test.ts
@@ -4,164 +4,384 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
-import { expect, describe, it, vi, beforeEach } from 'vitest';
+import {
+  vi,
+  describe,
+  it,
+  expect,
+  beforeEach,
+  afterEach,
+  type Mock,
+} from 'vitest';
+
+const mockShellExecutionService = vi.hoisted(() => vi.fn());
+vi.mock('../services/shellExecutionService.js', () => ({
+  ShellExecutionService: { execute: mockShellExecutionService },
+}));
+vi.mock('fs');
+vi.mock('os');
+vi.mock('crypto');
+vi.mock('../utils/summarizer.js');
+
+import { isCommandAllowed } from '../utils/shell-utils.js';
 import { ShellTool } from './shell.js';
-import { Config } from '../config/config.js';
+import { type Config } from '../config/config.js';
+import {
+  type ShellExecutionResult,
+  type ShellOutputEvent,
+} from '../services/shellExecutionService.js';
+import * as fs from 'fs';
+import * as os from 'os';
+import * as path from 'path';
+import * as crypto from 'crypto';
 import * as summarizer from '../utils/summarizer.js';
-import { GeminiClient } from '../core/client.js';
-import { ToolExecuteConfirmationDetails } from './tools.js';
-import os from 'os';
+import { ToolConfirmationOutcome } from './tools.js';
+import { OUTPUT_UPDATE_INTERVAL_MS } from './shell.js';
 
-describe('ShellTool Bug Reproduction', () => {
+describe('ShellTool', () => {
   let shellTool: ShellTool;
-  let config: Config;
+  let mockConfig: Config;
+  let mockShellOutputCallback: (event: ShellOutputEvent) => void;
+  let resolveExecutionPromise: (result: ShellExecutionResult) => void;
 
   beforeEach(() => {
-    config = {
-      getCoreTools: () => undefined,
-      getExcludeTools: () => undefined,
-      getDebugMode: () => false,
-      getGeminiClient: () => ({}) as GeminiClient,
-      getTargetDir: () => '.',
-      getSummarizeToolOutputConfig: () => ({
-        [shellTool.name]: {},
-      }),
+    vi.clearAllMocks();
+
+    mockConfig = {
+      getCoreTools: vi.fn().mockReturnValue([]),
+      getExcludeTools: vi.fn().mockReturnValue([]),
+      getDebugMode: vi.fn().mockReturnValue(false),
+      getTargetDir: vi.fn().mockReturnValue('/test/dir'),
+      getSummarizeToolOutputConfig: vi.fn().mockReturnValue(undefined),
+      getGeminiClient: vi.fn(),
     } as unknown as Config;
-    shellTool = new ShellTool(config);
-  });
 
-  it('should not let the summarizer override the return display', async () => {
-    const summarizeSpy = vi
-      .spyOn(summarizer, 'summarizeToolOutput')
-      .mockResolvedValue('summarized output');
+    shellTool = new ShellTool(mockConfig);
 
-    const abortSignal = new AbortController().signal;
-    const result = await shellTool.execute(
-      { command: 'echo hello' },
-      abortSignal,
-      () => {},
+    vi.mocked(os.platform).mockReturnValue('linux');
+    vi.mocked(os.tmpdir).mockReturnValue('/tmp');
+    (vi.mocked(crypto.randomBytes) as Mock).mockReturnValue(
+      Buffer.from('abcdef', 'hex'),
     );
 
-    expect(result.returnDisplay).toBe('hello' + os.EOL);
-    expect(result.llmContent).toBe('summarized output');
-    expect(summarizeSpy).toHaveBeenCalled();
+    // Capture the output callback to simulate streaming events from the service
+    mockShellExecutionService.mockImplementation((_cmd, _cwd, callback) => {
+      mockShellOutputCallback = callback;
+      return {
+        pid: 12345,
+        result: new Promise((resolve) => {
+          resolveExecutionPromise = resolve;
+        }),
+      };
+    });
   });
 
-  it('should not call summarizer if disabled in config', async () => {
-    config = {
-      getCoreTools: () => undefined,
-      getExcludeTools: () => undefined,
-      getDebugMode: () => false,
-      getGeminiClient: () => ({}) as GeminiClient,
-      getTargetDir: () => '.',
-      getSummarizeToolOutputConfig: () => ({}),
-    } as unknown as Config;
-    shellTool = new ShellTool(config);
+  describe('isCommandAllowed', () => {
+    it('should allow a command if no restrictions are provided', () => {
+      (mockConfig.getCoreTools as Mock).mockReturnValue(undefined);
+      (mockConfig.getExcludeTools as Mock).mockReturnValue(undefined);
+      expect(isCommandAllowed('ls -l', mockConfig).allowed).toBe(true);
+    });
 
-    const summarizeSpy = vi
-      .spyOn(summarizer, 'summarizeToolOutput')
-      .mockResolvedValue('summarized output');
+    it('should block a command with command substitution using $()', () => {
+      expect(isCommandAllowed('echo $(rm -rf /)', mockConfig).allowed).toBe(
+        false,
+      );
+    });
+  });
 
-    const abortSignal = new AbortController().signal;
-    const result = await shellTool.execute(
-      { command: 'echo hello' },
-      abortSignal,
-      () => {},
-    );
+  describe('validateToolParams', () => {
+    it('should return null for a valid command', () => {
+      expect(shellTool.validateToolParams({ command: 'ls -l' })).toBeNull();
+    });
+
+    it('should return an error for an empty command', () => {
+      expect(shellTool.validateToolParams({ command: ' ' })).toBe(
+        'Command cannot be empty.',
+      );
+    });
 
-    expect(result.returnDisplay).toBe('hello' + os.EOL);
-    expect(result.llmContent).not.toBe('summarized output');
-    expect(summarizeSpy).not.toHaveBeenCalled();
+    it('should return an error for a non-existent directory', () => {
+      vi.mocked(fs.existsSync).mockReturnValue(false);
+      expect(
+        shellTool.validateToolParams({ command: 'ls', directory: 'rel/path' }),
+      ).toBe('Directory must exist.');
+    });
   });
 
-  it('should pass token budget to summarizer', async () => {
-    config = {
-      getCoreTools: () => undefined,
-      getExcludeTools: () => undefined,
-      getDebugMode: () => false,
-      getGeminiClient: () => ({}) as GeminiClient,
-      getTargetDir: () => '.',
-      getSummarizeToolOutputConfig: () => ({
+  describe('execute', () => {
+    const mockAbortSignal = new AbortController().signal;
+
+    const resolveShellExecution = (
+      result: Partial<ShellExecutionResult> = {},
+    ) => {
+      const fullResult: ShellExecutionResult = {
+        rawOutput: Buffer.from(result.output || ''),
+        output: 'Success',
+        stdout: 'Success',
+        stderr: '',
+        exitCode: 0,
+        signal: null,
+        error: null,
+        aborted: false,
+        pid: 12345,
+        ...result,
+      };
+      resolveExecutionPromise(fullResult);
+    };
+
+    it('should wrap command on linux and parse pgrep output', async () => {
+      const promise = shellTool.execute(
+        { command: 'my-command &' },
+        mockAbortSignal,
+      );
+      resolveShellExecution({ pid: 54321 });
+
+      vi.mocked(fs.existsSync).mockReturnValue(true);
+      vi.mocked(fs.readFileSync).mockReturnValue('54321\n54322\n'); // Service PID and background PID
+
+      const result = await promise;
+
+      const tmpFile = path.join(os.tmpdir(), 'shell_pgrep_abcdef.tmp');
+      const wrappedCommand = `{ my-command & }; __code=$?; pgrep -g 0 >${tmpFile} 2>&1; exit $__code;`;
+      expect(mockShellExecutionService).toHaveBeenCalledWith(
+        wrappedCommand,
+        expect.any(String),
+        expect.any(Function),
+        mockAbortSignal,
+      );
+      expect(result.llmContent).toContain('Background PIDs: 54322');
+      expect(vi.mocked(fs.unlinkSync)).toHaveBeenCalledWith(tmpFile);
+    });
+
+    it('should not wrap command on windows', async () => {
+      vi.mocked(os.platform).mockReturnValue('win32');
+      const promise = shellTool.execute({ command: 'dir' }, mockAbortSignal);
+      resolveExecutionPromise({
+        rawOutput: Buffer.from(''),
+        output: '',
+        stdout: '',
+        stderr: '',
+        exitCode: 0,
+        signal: null,
+        error: null,
+        aborted: false,
+        pid: 12345,
+      });
+      await promise;
+      expect(mockShellExecutionService).toHaveBeenCalledWith(
+        'dir',
+        expect.any(String),
+        expect.any(Function),
+        mockAbortSignal,
+      );
+    });
+
+    it('should format error messages correctly', async () => {
+      const error = new Error('wrapped command failed');
+      const promise = shellTool.execute(
+        { command: 'user-command' },
+        mockAbortSignal,
+      );
+      resolveShellExecution({
+        error,
+        exitCode: 1,
+        output: 'err',
+        stderr: 'err',
+        rawOutput: Buffer.from('err'),
+        stdout: '',
+        signal: null,
+        aborted: false,
+        pid: 12345,
+      });
+
+      const result = await promise;
+      // The final llmContent should contain the user's command, not the wrapper
+      expect(result.llmContent).toContain('Error: wrapped command failed');
+      expect(result.llmContent).not.toContain('pgrep');
+    });
+
+    it('should summarize output when configured', async () => {
+      (mockConfig.getSummarizeToolOutputConfig as Mock).mockReturnValue({
         [shellTool.name]: { tokenBudget: 1000 },
-      }),
-    } as unknown as Config;
-    shellTool = new ShellTool(config);
+      });
+      vi.mocked(summarizer.summarizeToolOutput).mockResolvedValue(
+        'summarized output',
+      );
 
-    const summarizeSpy = vi
-      .spyOn(summarizer, 'summarizeToolOutput')
-      .mockResolvedValue('summarized output');
+      const promise = shellTool.execute({ command: 'ls' }, mockAbortSignal);
+      resolveExecutionPromise({
+        output: 'long output',
+        rawOutput: Buffer.from('long output'),
+        stdout: 'long output',
+        stderr: '',
+        exitCode: 0,
+        signal: null,
+        error: null,
+        aborted: false,
+        pid: 12345,
+      });
 
-    const abortSignal = new AbortController().signal;
-    await shellTool.execute({ command: 'echo "hello"' }, abortSignal, () => {});
+      const result = await promise;
 
-    expect(summarizeSpy).toHaveBeenCalledWith(
-      expect.any(String),
-      expect.any(Object),
-      expect.any(Object),
-      1000,
-    );
-  });
+      expect(summarizer.summarizeToolOutput).toHaveBeenCalledWith(
+        expect.any(String),
+        mockConfig.getGeminiClient(),
+        mockAbortSignal,
+        1000,
+      );
+      expect(result.llmContent).toBe('summarized output');
+      expect(result.returnDisplay).toBe('long output');
+    });
 
-  it('should use default token budget if not specified', async () => {
-    config = {
-      getCoreTools: () => undefined,
-      getExcludeTools: () => undefined,
-      getDebugMode: () => false,
-      getGeminiClient: () => ({}) as GeminiClient,
-      getTargetDir: () => '.',
-      getSummarizeToolOutputConfig: () => ({
-        [shellTool.name]: {},
-      }),
-    } as unknown as Config;
-    shellTool = new ShellTool(config);
+    it('should clean up the temp file on synchronous execution error', async () => {
+      const error = new Error('sync spawn error');
+      mockShellExecutionService.mockImplementation(() => {
+        throw error;
+      });
+      vi.mocked(fs.existsSync).mockReturnValue(true); // Pretend the file exists
 
-    const summarizeSpy = vi
-      .spyOn(summarizer, 'summarizeToolOutput')
-      .mockResolvedValue('summarized output');
+      await expect(
+        shellTool.execute({ command: 'a-command' }, mockAbortSignal),
+      ).rejects.toThrow(error);
 
-    const abortSignal = new AbortController().signal;
-    await shellTool.execute({ command: 'echo "hello"' }, abortSignal, () => {});
+      const tmpFile = path.join(os.tmpdir(), 'shell_pgrep_abcdef.tmp');
+      expect(vi.mocked(fs.unlinkSync)).toHaveBeenCalledWith(tmpFile);
+    });
 
-    expect(summarizeSpy).toHaveBeenCalledWith(
-      expect.any(String),
-      expect.any(Object),
-      expect.any(Object),
-      undefined,
-    );
-  });
+    describe('Streaming to `updateOutput`', () => {
+      let updateOutputMock: Mock;
+      beforeEach(() => {
+        vi.useFakeTimers({ toFake: ['Date'] });
+        updateOutputMock = vi.fn();
+      });
+      afterEach(() => {
+        vi.useRealTimers();
+      });
 
-  it('should pass GEMINI_CLI environment variable to executed commands', async () => {
-    config = {
-      getCoreTools: () => undefined,
-      getExcludeTools: () => undefined,
-      getDebugMode: () => false,
-      getGeminiClient: () => ({}) as GeminiClient,
-      getTargetDir: () => '.',
-      getSummarizeToolOutputConfig: () => ({}),
-    } as unknown as Config;
-    shellTool = new ShellTool(config);
+      it('should throttle text output updates', async () => {
+        const promise = shellTool.execute(
+          { command: 'stream' },
+          mockAbortSignal,
+          updateOutputMock,
+        );
+
+        // First chunk, should be throttled.
+        mockShellOutputCallback({
+          type: 'data',
+          stream: 'stdout',
+          chunk: 'hello ',
+        });
+        expect(updateOutputMock).not.toHaveBeenCalled();
+
+        // Advance time past the throttle interval.
+        await vi.advanceTimersByTimeAsync(OUTPUT_UPDATE_INTERVAL_MS + 1);
+
+        // Send a second chunk. THIS event triggers the update with the CUMULATIVE content.
+        mockShellOutputCallback({
+          type: 'data',
+          stream: 'stderr',
+          chunk: 'world',
+        });
+
+        // It should have been called once now with the combined output.
+        expect(updateOutputMock).toHaveBeenCalledOnce();
+        expect(updateOutputMock).toHaveBeenCalledWith('hello \nworld');
+
+        resolveExecutionPromise({
+          rawOutput: Buffer.from(''),
+          output: '',
+          stdout: '',
+          stderr: '',
+          exitCode: 0,
+          signal: null,
+          error: null,
+          aborted: false,
+          pid: 12345,
+        });
+        await promise;
+      });
+
+      it('should immediately show binary detection message and throttle progress', async () => {
+        const promise = shellTool.execute(
+          { command: 'cat img' },
+          mockAbortSignal,
+          updateOutputMock,
+        );
 
-    const abortSignal = new AbortController().signal;
-    const command =
-      os.platform() === 'win32' ? 'echo %GEMINI_CLI%' : 'echo "$GEMINI_CLI"';
-    const result = await shellTool.execute({ command }, abortSignal, () => {});
+        mockShellOutputCallback({ type: 'binary_detected' });
+        expect(updateOutputMock).toHaveBeenCalledOnce();
+        expect(updateOutputMock).toHaveBeenCalledWith(
+          '[Binary output detected. Halting stream...]',
+        );
 
-    expect(result.returnDisplay).toBe('1' + os.EOL);
+        mockShellOutputCallback({
+          type: 'binary_progress',
+          bytesReceived: 1024,
+        });
+        expect(updateOutputMock).toHaveBeenCalledOnce();
+
+        // Advance time past the throttle interval.
+        await vi.advanceTimersByTimeAsync(OUTPUT_UPDATE_INTERVAL_MS + 1);
+
+        // Send a SECOND progress event. This one will trigger the flush.
+        mockShellOutputCallback({
+          type: 'binary_progress',
+          bytesReceived: 2048,
+        });
+
+        // Now it should be called a second time with the latest progress.
+        expect(updateOutputMock).toHaveBeenCalledTimes(2);
+        expect(updateOutputMock).toHaveBeenLastCalledWith(
+          '[Receiving binary output... 2.0 KB received]',
+        );
+
+        resolveExecutionPromise({
+          rawOutput: Buffer.from(''),
+          output: '',
+          stdout: '',
+          stderr: '',
+          exitCode: 0,
+          signal: null,
+          error: null,
+          aborted: false,
+          pid: 12345,
+        });
+        await promise;
+      });
+    });
   });
-});
 
-describe('shouldConfirmExecute', () => {
-  it('should de-duplicate command roots before asking for confirmation', async () => {
-    const shellTool = new ShellTool({
-      getCoreTools: () => ['run_shell_command'],
-      getExcludeTools: () => [],
-    } as unknown as Config);
-    const result = (await shellTool.shouldConfirmExecute(
-      {
-        command: 'git status && git log',
-      },
-      new AbortController().signal,
-    )) as ToolExecuteConfirmationDetails;
-    expect(result.rootCommand).toEqual('git');
+  describe('shouldConfirmExecute', () => {
+    it('should request confirmation for a new command and whitelist it on "Always"', async () => {
+      const params = { command: 'npm install' };
+      const confirmation = await shellTool.shouldConfirmExecute(
+        params,
+        new AbortController().signal,
+      );
+
+      expect(confirmation).not.toBe(false);
+      expect(confirmation && confirmation.type).toBe('exec');
+
+      // eslint-disable-next-line @typescript-eslint/no-explicit-any
+      await (confirmation as any).onConfirm(
+        ToolConfirmationOutcome.ProceedAlways,
+      );
+
+      // Should now be whitelisted
+      const secondConfirmation = await shellTool.shouldConfirmExecute(
+        { command: 'npm test' },
+        new AbortController().signal,
+      );
+      expect(secondConfirmation).toBe(false);
+    });
+
+    it('should skip confirmation if validation fails', async () => {
+      const confirmation = await shellTool.shouldConfirmExecute(
+        { command: '' },
+        new AbortController().signal,
+      );
+      expect(confirmation).toBe(false);
+    });
   });
 });
diff --git a/packages/core/src/tools/shell.ts b/packages/core/src/tools/shell.ts
index d90ae678..02fcbb7f 100644
--- a/packages/core/src/tools/shell.ts
+++ b/packages/core/src/tools/shell.ts
@@ -20,22 +20,25 @@ import {
 import { Type } from '@google/genai';
 import { SchemaValidator } from '../utils/schemaValidator.js';
 import { getErrorMessage } from '../utils/errors.js';
-import stripAnsi from 'strip-ansi';
+import { summarizeToolOutput } from '../utils/summarizer.js';
+import {
+  ShellExecutionService,
+  ShellOutputEvent,
+} from '../services/shellExecutionService.js';
+import { formatMemoryUsage } from '../utils/formatters.js';
 import {
   getCommandRoots,
   isCommandAllowed,
   stripShellWrapper,
 } from '../utils/shell-utils.js';
 
+export const OUTPUT_UPDATE_INTERVAL_MS = 1000;
+
 export interface ShellToolParams {
   command: string;
   description?: string;
   directory?: string;
 }
-import { spawn } from 'child_process';
-import { summarizeToolOutput } from '../utils/summarizer.js';
-
-const OUTPUT_UPDATE_INTERVAL_MS = 1000;
 
 export class ShellTool extends BaseTool<ShellToolParams, ToolResult> {
   static Name: string = 'run_shell_command';
@@ -196,216 +199,182 @@ export class ShellTool extends BaseTool<ShellToolParams, ToolResult> {
       .toString('hex')}.tmp`;
     const tempFilePath = path.join(os.tmpdir(), tempFileName);
 
-    // pgrep is not available on Windows, so we can't get background PIDs
-    const commandToExecute = isWindows
-      ? strippedCommand
-      : (() => {
-          // wrap command to append subprocess pids (via pgrep) to temporary file
-          let command = strippedCommand.trim();
-          if (!command.endsWith('&')) command += ';';
-          return `{ ${command} }; __code=$?; pgrep -g 0 >${tempFilePath} 2>&1; exit $__code;`;
-        })();
-
-    // spawn command in specified directory (or project root if not specified)
-    const shell = isWindows
-      ? spawn('cmd.exe', ['/c', commandToExecute], {
-          stdio: ['ignore', 'pipe', 'pipe'],
-          // detached: true, // ensure subprocess starts its own process group (esp. in Linux)
-          cwd: path.resolve(this.config.getTargetDir(), params.directory || ''),
-          env: {
-            ...process.env,
-            GEMINI_CLI: '1',
-          },
-        })
-      : spawn('bash', ['-c', commandToExecute], {
-          stdio: ['ignore', 'pipe', 'pipe'],
-          detached: true, // ensure subprocess starts its own process group (esp. in Linux)
-          cwd: path.resolve(this.config.getTargetDir(), params.directory || ''),
-          env: {
-            ...process.env,
-            GEMINI_CLI: '1',
-          },
-        });
+    try {
+      // pgrep is not available on Windows, so we can't get background PIDs
+      const commandToExecute = isWindows
+        ? strippedCommand
+        : (() => {
+            // wrap command to append subprocess pids (via pgrep) to temporary file
+            let command = strippedCommand.trim();
+            if (!command.endsWith('&')) command += ';';
+            return `{ ${command} }; __code=$?; pgrep -g 0 >${tempFilePath} 2>&1; exit $__code;`;
+          })();
 
-    let exited = false;
-    let stdout = '';
-    let output = '';
-    let lastUpdateTime = Date.now();
+      const cwd = path.resolve(
+        this.config.getTargetDir(),
+        params.directory || '',
+      );
 
-    const appendOutput = (str: string) => {
-      output += str;
-      if (
-        updateOutput &&
-        Date.now() - lastUpdateTime > OUTPUT_UPDATE_INTERVAL_MS
-      ) {
-        updateOutput(output);
-        lastUpdateTime = Date.now();
-      }
-    };
+      let cumulativeStdout = '';
+      let cumulativeStderr = '';
 
-    shell.stdout.on('data', (data: Buffer) => {
-      // continue to consume post-exit for background processes
-      // removing listeners can overflow OS buffer and block subprocesses
-      // destroying (e.g. shell.stdout.destroy()) can terminate subprocesses via SIGPIPE
-      if (!exited) {
-        const str = stripAnsi(data.toString());
-        stdout += str;
-        appendOutput(str);
-      }
-    });
+      let lastUpdateTime = Date.now();
+      let isBinaryStream = false;
 
-    let stderr = '';
-    shell.stderr.on('data', (data: Buffer) => {
-      if (!exited) {
-        const str = stripAnsi(data.toString());
-        stderr += str;
-        appendOutput(str);
-      }
-    });
-
-    let error: Error | null = null;
-    shell.on('error', (err: Error) => {
-      error = err;
-      // remove wrapper from user's command in error message
-      error.message = error.message.replace(commandToExecute, params.command);
-    });
+      const { result: resultPromise } = ShellExecutionService.execute(
+        commandToExecute,
+        cwd,
+        (event: ShellOutputEvent) => {
+          if (!updateOutput) {
+            return;
+          }
 
-    let code: number | null = null;
-    let processSignal: NodeJS.Signals | null = null;
-    const exitHandler = (
-      _code: number | null,
-      _signal: NodeJS.Signals | null,
-    ) => {
-      exited = true;
-      code = _code;
-      processSignal = _signal;
-    };
-    shell.on('exit', exitHandler);
+          let currentDisplayOutput = '';
+          let shouldUpdate = false;
 
-    const abortHandler = async () => {
-      if (shell.pid && !exited) {
-        if (os.platform() === 'win32') {
-          // For Windows, use taskkill to kill the process tree
-          spawn('taskkill', ['/pid', shell.pid.toString(), '/f', '/t']);
-        } else {
-          try {
-            // attempt to SIGTERM process group (negative PID)
-            // fall back to SIGKILL (to group) after 200ms
-            process.kill(-shell.pid, 'SIGTERM');
-            await new Promise((resolve) => setTimeout(resolve, 200));
-            if (shell.pid && !exited) {
-              process.kill(-shell.pid, 'SIGKILL');
-            }
-          } catch (_e) {
-            // if group kill fails, fall back to killing just the main process
-            try {
-              if (shell.pid) {
-                shell.kill('SIGKILL');
+          switch (event.type) {
+            case 'data':
+              if (isBinaryStream) break; // Don't process text if we are in binary mode
+              if (event.stream === 'stdout') {
+                cumulativeStdout += event.chunk;
+              } else {
+                cumulativeStderr += event.chunk;
+              }
+              currentDisplayOutput =
+                cumulativeStdout +
+                (cumulativeStderr ? `\n${cumulativeStderr}` : '');
+              if (Date.now() - lastUpdateTime > OUTPUT_UPDATE_INTERVAL_MS) {
+                shouldUpdate = true;
+              }
+              break;
+            case 'binary_detected':
+              isBinaryStream = true;
+              currentDisplayOutput =
+                '[Binary output detected. Halting stream...]';
+              shouldUpdate = true;
+              break;
+            case 'binary_progress':
+              isBinaryStream = true;
+              currentDisplayOutput = `[Receiving binary output... ${formatMemoryUsage(
+                event.bytesReceived,
+              )} received]`;
+              if (Date.now() - lastUpdateTime > OUTPUT_UPDATE_INTERVAL_MS) {
+                shouldUpdate = true;
               }
-            } catch (_e) {
-              console.error(`failed to kill shell process ${shell.pid}: ${_e}`);
+              break;
+            default: {
+              throw new Error('An unhandled ShellOutputEvent was found.');
             }
           }
-        }
-      }
-    };
-    signal.addEventListener('abort', abortHandler);
 
-    // wait for the shell to exit
-    try {
-      await new Promise((resolve) => shell.on('exit', resolve));
-    } finally {
-      signal.removeEventListener('abort', abortHandler);
-    }
+          if (shouldUpdate) {
+            updateOutput(currentDisplayOutput);
+            lastUpdateTime = Date.now();
+          }
+        },
+        signal,
+      );
 
-    // parse pids (pgrep output) from temporary file and remove it
-    const backgroundPIDs: number[] = [];
-    if (os.platform() !== 'win32') {
-      if (fs.existsSync(tempFilePath)) {
-        const pgrepLines = fs
-          .readFileSync(tempFilePath, 'utf8')
-          .split('\n')
-          .filter(Boolean);
-        for (const line of pgrepLines) {
-          if (!/^\d+$/.test(line)) {
-            console.error(`pgrep: ${line}`);
+      const result = await resultPromise;
+
+      const backgroundPIDs: number[] = [];
+      if (os.platform() !== 'win32') {
+        if (fs.existsSync(tempFilePath)) {
+          const pgrepLines = fs
+            .readFileSync(tempFilePath, 'utf8')
+            .split('\n')
+            .filter(Boolean);
+          for (const line of pgrepLines) {
+            if (!/^\d+$/.test(line)) {
+              console.error(`pgrep: ${line}`);
+            }
+            const pid = Number(line);
+            if (pid !== result.pid) {
+              backgroundPIDs.push(pid);
+            }
           }
-          const pid = Number(line);
-          // exclude the shell subprocess pid
-          if (pid !== shell.pid) {
-            backgroundPIDs.push(pid);
+        } else {
+          if (!signal.aborted) {
+            console.error('missing pgrep output');
           }
         }
-        fs.unlinkSync(tempFilePath);
-      } else {
-        if (!signal.aborted) {
-          console.error('missing pgrep output');
-        }
       }
-    }
 
-    let llmContent = '';
-    if (signal.aborted) {
-      llmContent = 'Command was cancelled by user before it could complete.';
-      if (output.trim()) {
-        llmContent += ` Below is the output (on stdout and stderr) before it was cancelled:\n${output}`;
+      let llmContent = '';
+      if (result.aborted) {
+        llmContent = 'Command was cancelled by user before it could complete.';
+        if (result.output.trim()) {
+          llmContent += ` Below is the output (on stdout and stderr) before it was cancelled:\n${result.output}`;
+        } else {
+          llmContent += ' There was no output before it was cancelled.';
+        }
       } else {
-        llmContent += ' There was no output before it was cancelled.';
+        // Create a formatted error string for display, replacing the wrapper command
+        // with the user-facing command.
+        const finalError = result.error
+          ? result.error.message.replace(commandToExecute, params.command)
+          : '(none)';
+
+        llmContent = [
+          `Command: ${params.command}`,
+          `Directory: ${params.directory || '(root)'}`,
+          `Stdout: ${result.stdout || '(empty)'}`,
+          `Stderr: ${result.stderr || '(empty)'}`,
+          `Error: ${finalError}`, // Use the cleaned error string.
+          `Exit Code: ${result.exitCode ?? '(none)'}`,
+          `Signal: ${result.signal ?? '(none)'}`,
+          `Background PIDs: ${
+            backgroundPIDs.length ? backgroundPIDs.join(', ') : '(none)'
+          }`,
+          `Process Group PGID: ${result.pid ?? '(none)'}`,
+        ].join('\n');
       }
-    } else {
-      llmContent = [
-        `Command: ${params.command}`,
-        `Directory: ${params.directory || '(root)'}`,
-        `Stdout: ${stdout || '(empty)'}`,
-        `Stderr: ${stderr || '(empty)'}`,
-        `Error: ${error ?? '(none)'}`,
-        `Exit Code: ${code ?? '(none)'}`,
-        `Signal: ${processSignal ?? '(none)'}`,
-        `Background PIDs: ${backgroundPIDs.length ? backgroundPIDs.join(', ') : '(none)'}`,
-        `Process Group PGID: ${shell.pid ?? '(none)'}`,
-      ].join('\n');
-    }
 
-    let returnDisplayMessage = '';
-    if (this.config.getDebugMode()) {
-      returnDisplayMessage = llmContent;
-    } else {
-      if (output.trim()) {
-        returnDisplayMessage = output;
+      let returnDisplayMessage = '';
+      if (this.config.getDebugMode()) {
+        returnDisplayMessage = llmContent;
       } else {
-        // Output is empty, let's provide a reason if the command failed or was cancelled
-        if (signal.aborted) {
-          returnDisplayMessage = 'Command cancelled by user.';
-        } else if (processSignal) {
-          returnDisplayMessage = `Command terminated by signal: ${processSignal}`;
-        } else if (error) {
-          // If error is not null, it's an Error object (or other truthy value)
-          returnDisplayMessage = `Command failed: ${getErrorMessage(error)}`;
-        } else if (code !== null && code !== 0) {
-          returnDisplayMessage = `Command exited with code: ${code}`;
+        if (result.output.trim()) {
+          returnDisplayMessage = result.output;
+        } else {
+          if (result.aborted) {
+            returnDisplayMessage = 'Command cancelled by user.';
+          } else if (result.signal) {
+            returnDisplayMessage = `Command terminated by signal: ${result.signal}`;
+          } else if (result.error) {
+            returnDisplayMessage = `Command failed: ${getErrorMessage(
+              result.error,
+            )}`;
+          } else if (result.exitCode !== null && result.exitCode !== 0) {
+            returnDisplayMessage = `Command exited with code: ${result.exitCode}`;
+          }
+          // If output is empty and command succeeded (code 0, no error/signal/abort),
+          // returnDisplayMessage will remain empty, which is fine.
         }
-        // If output is empty and command succeeded (code 0, no error/signal/abort),
-        // returnDisplayMessage will remain empty, which is fine.
       }
-    }
 
-    const summarizeConfig = this.config.getSummarizeToolOutputConfig();
-    if (summarizeConfig && summarizeConfig[this.name]) {
-      const summary = await summarizeToolOutput(
-        llmContent,
-        this.config.getGeminiClient(),
-        signal,
-        summarizeConfig[this.name].tokenBudget,
-      );
+      const summarizeConfig = this.config.getSummarizeToolOutputConfig();
+      if (summarizeConfig && summarizeConfig[this.name]) {
+        const summary = await summarizeToolOutput(
+          llmContent,
+          this.config.getGeminiClient(),
+          signal,
+          summarizeConfig[this.name].tokenBudget,
+        );
+        return {
+          llmContent: summary,
+          returnDisplay: returnDisplayMessage,
+        };
+      }
+
       return {
-        llmContent: summary,
+        llmContent,
         returnDisplay: returnDisplayMessage,
       };
+    } finally {
+      if (fs.existsSync(tempFilePath)) {
+        fs.unlinkSync(tempFilePath);
+      }
     }
-
-    return {
-      llmContent,
-      returnDisplay: returnDisplayMessage,
-    };
   }
 }
diff --git a/packages/core/src/utils/formatters.ts b/packages/core/src/utils/formatters.ts
new file mode 100644
index 00000000..ab02160e
--- /dev/null
+++ b/packages/core/src/utils/formatters.ts
@@ -0,0 +1,16 @@
+/**
+ * @license
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+export const formatMemoryUsage = (bytes: number): string => {
+  const gb = bytes / (1024 * 1024 * 1024);
+  if (bytes < 1024 * 1024) {
+    return `${(bytes / 1024).toFixed(1)} KB`;
+  }
+  if (bytes < 1024 * 1024 * 1024) {
+    return `${(bytes / (1024 * 1024)).toFixed(1)} MB`;
+  }
+  return `${gb.toFixed(2)} GB`;
+};
diff --git a/packages/core/src/utils/textUtils.ts b/packages/core/src/utils/textUtils.ts
new file mode 100644
index 00000000..3bcc4759
--- /dev/null
+++ b/packages/core/src/utils/textUtils.ts
@@ -0,0 +1,34 @@
+/**
+ * @license
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+/**
+ * Checks if a Buffer is likely binary by testing for the presence of a NULL byte.
+ * The presence of a NULL byte is a strong indicator that the data is not plain text.
+ * @param data The Buffer to check.
+ * @param sampleSize The number of bytes from the start of the buffer to test.
+ * @returns True if a NULL byte is found, false otherwise.
+ */
+export function isBinary(
+  data: Buffer | null | undefined,
+  sampleSize = 512,
+): boolean {
+  if (!data) {
+    return false;
+  }
+
+  const sample = data.length > sampleSize ? data.subarray(0, sampleSize) : data;
+
+  for (const byte of sample) {
+    // The presence of a NULL byte (0x00) is one of the most reliable
+    // indicators of a binary file. Text files should not contain them.
+    if (byte === 0) {
+      return true;
+    }
+  }
+
+  // If no NULL bytes were found in the sample, we assume it's text.
+  return false;
+}
author	Abhi <[email protected]>	2025-07-25 21:56:49 -0400
committer	GitHub <[email protected]>	2025-07-26 01:56:49 +0000
commit	ca5dd28ab60d78f42150460cbe9d4ed58d40afe4 (patch)
tree	6f3b3b373133a6c0bf8bb9cee8a10cc06ac83c9a
parent	ad2ef080aae2e21bf04ad8e922719ceaa81f1e5f (diff)