7 files changed, 1147 insertions, 317 deletions
diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts
index 829de544..b4ce7e85 100644
--- a/packages/core/src/index.ts
+++ b/packages/core/src/index.ts
@@ -36,6 +36,8 @@ export * from './utils/quotaErrorDetection.js';
 export * from './utils/fileUtils.js';
 export * from './utils/retry.js';
 export * from './utils/systemEncoding.js';
+export * from './utils/textUtils.js';
+export * from './utils/formatters.js';
 
 // Export services
 export * from './services/fileDiscoveryService.js';
@@ -45,6 +47,9 @@ export * from './services/gitService.js';
 export * from './ide/ide-client.js';
 export * from './ide/ideContext.js';
 
+// Export Shell Execution Service
+export * from './services/shellExecutionService.js';
+
 // Export base tool definitions
 export * from './tools/tools.js';
 export * from './tools/tool-registry.js';
diff --git a/packages/core/src/services/shellExecutionService.test.ts b/packages/core/src/services/shellExecutionService.test.ts
new file mode 100644
index 00000000..4d1655a2
--- /dev/null
+++ b/packages/core/src/services/shellExecutionService.test.ts
@@ -0,0 +1,357 @@
+/**
+ * @license
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { vi, describe, it, expect, beforeEach, type Mock } from 'vitest';
+const mockSpawn = vi.hoisted(() => vi.fn());
+vi.mock('child_process', () => ({
+  spawn: mockSpawn,
+}));
+
+import EventEmitter from 'events';
+import { Readable } from 'stream';
+import { type ChildProcess } from 'child_process';
+import {
+  ShellExecutionService,
+  ShellOutputEvent,
+} from './shellExecutionService.js';
+
+const mockIsBinary = vi.hoisted(() => vi.fn());
+vi.mock('../utils/textUtils.js', () => ({
+  isBinary: mockIsBinary,
+}));
+
+const mockPlatform = vi.hoisted(() => vi.fn());
+vi.mock('os', () => ({
+  default: {
+    platform: mockPlatform,
+  },
+  platform: mockPlatform,
+}));
+
+const mockProcessKill = vi
+  .spyOn(process, 'kill')
+  .mockImplementation(() => true);
+
+describe('ShellExecutionService', () => {
+  let mockChildProcess: EventEmitter & Partial<ChildProcess>;
+  let onOutputEventMock: Mock<(event: ShellOutputEvent) => void>;
+
+  beforeEach(() => {
+    vi.clearAllMocks();
+
+    mockIsBinary.mockReturnValue(false);
+    mockPlatform.mockReturnValue('linux');
+
+    onOutputEventMock = vi.fn();
+
+    mockChildProcess = new EventEmitter() as EventEmitter &
+      Partial<ChildProcess>;
+    // FIX: Cast simple EventEmitters to the expected stream type.
+    mockChildProcess.stdout = new EventEmitter() as Readable;
+    mockChildProcess.stderr = new EventEmitter() as Readable;
+    mockChildProcess.kill = vi.fn();
+
+    // FIX: Use Object.defineProperty to set the readonly 'pid' property.
+    Object.defineProperty(mockChildProcess, 'pid', {
+      value: 12345,
+      configurable: true,
+    });
+
+    mockSpawn.mockReturnValue(mockChildProcess);
+  });
+
+  // Helper function to run a standard execution simulation
+  const simulateExecution = async (
+    command: string,
+    simulation: (cp: typeof mockChildProcess, ac: AbortController) => void,
+  ) => {
+    const abortController = new AbortController();
+    const handle = ShellExecutionService.execute(
+      command,
+      '/test/dir',
+      onOutputEventMock,
+      abortController.signal,
+    );
+
+    await new Promise((resolve) => setImmediate(resolve));
+    simulation(mockChildProcess, abortController);
+    const result = await handle.result;
+    return { result, handle, abortController };
+  };
+
+  describe('Successful Execution', () => {
+    it('should execute a command and capture stdout and stderr', async () => {
+      const { result, handle } = await simulateExecution('ls -l', (cp) => {
+        cp.stdout?.emit('data', Buffer.from('file1.txt\n'));
+        cp.stderr?.emit('data', Buffer.from('a warning'));
+        cp.emit('exit', 0, null);
+      });
+
+      expect(mockSpawn).toHaveBeenCalledWith(
+        'bash',
+        ['-c', 'ls -l'],
+        expect.any(Object),
+      );
+      expect(result.exitCode).toBe(0);
+      expect(result.signal).toBeNull();
+      expect(result.error).toBeNull();
+      expect(result.aborted).toBe(false);
+      expect(result.stdout).toBe('file1.txt\n');
+      expect(result.stderr).toBe('a warning');
+      expect(result.output).toBe('file1.txt\n\na warning');
+      expect(handle.pid).toBe(12345);
+
+      expect(onOutputEventMock).toHaveBeenCalledWith({
+        type: 'data',
+        stream: 'stdout',
+        chunk: 'file1.txt\n',
+      });
+      expect(onOutputEventMock).toHaveBeenCalledWith({
+        type: 'data',
+        stream: 'stderr',
+        chunk: 'a warning',
+      });
+    });
+
+    it('should strip ANSI codes from output', async () => {
+      const { result } = await simulateExecution('ls --color=auto', (cp) => {
+        cp.stdout?.emit('data', Buffer.from('a\u001b[31mred\u001b[0mword'));
+        cp.emit('exit', 0, null);
+      });
+
+      expect(result.stdout).toBe('aredword');
+      expect(onOutputEventMock).toHaveBeenCalledWith({
+        type: 'data',
+        stream: 'stdout',
+        chunk: 'aredword',
+      });
+    });
+
+    it('should correctly decode multi-byte characters split across chunks', async () => {
+      const { result } = await simulateExecution('echo "你好"', (cp) => {
+        const multiByteChar = Buffer.from('你好', 'utf-8');
+        cp.stdout?.emit('data', multiByteChar.slice(0, 2));
+        cp.stdout?.emit('data', multiByteChar.slice(2));
+        cp.emit('exit', 0, null);
+      });
+      expect(result.stdout).toBe('你好');
+    });
+
+    it('should handle commands with no output', async () => {
+      const { result } = await simulateExecution('touch file', (cp) => {
+        cp.emit('exit', 0, null);
+      });
+
+      expect(result.stdout).toBe('');
+      expect(result.stderr).toBe('');
+      expect(result.output).toBe('');
+      expect(onOutputEventMock).not.toHaveBeenCalled();
+    });
+  });
+
+  describe('Failed Execution', () => {
+    it('should capture a non-zero exit code and format output correctly', async () => {
+      const { result } = await simulateExecution('a-bad-command', (cp) => {
+        cp.stderr?.emit('data', Buffer.from('command not found'));
+        cp.emit('exit', 127, null);
+      });
+
+      expect(result.exitCode).toBe(127);
+      expect(result.stderr).toBe('command not found');
+      expect(result.stdout).toBe('');
+      expect(result.output).toBe('\ncommand not found');
+      expect(result.error).toBeNull();
+    });
+
+    it('should capture a termination signal', async () => {
+      const { result } = await simulateExecution('long-process', (cp) => {
+        cp.emit('exit', null, 'SIGTERM');
+      });
+
+      expect(result.exitCode).toBeNull();
+      expect(result.signal).toBe('SIGTERM');
+    });
+
+    it('should handle a spawn error', async () => {
+      const spawnError = new Error('spawn EACCES');
+      const { result } = await simulateExecution('protected-cmd', (cp) => {
+        cp.emit('error', spawnError);
+        cp.emit('exit', 1, null);
+      });
+
+      expect(result.error).toBe(spawnError);
+      expect(result.exitCode).toBe(1);
+    });
+  });
+
+  describe('Aborting Commands', () => {
+    describe.each([
+      {
+        platform: 'linux',
+        expectedSignal: 'SIGTERM',
+        expectedExit: { signal: 'SIGKILL' as const },
+      },
+      {
+        platform: 'win32',
+        expectedCommand: 'taskkill',
+        expectedExit: { code: 1 },
+      },
+    ])(
+      'on $platform',
+      ({ platform, expectedSignal, expectedCommand, expectedExit }) => {
+        it('should abort a running process and set the aborted flag', async () => {
+          mockPlatform.mockReturnValue(platform);
+
+          const { result } = await simulateExecution(
+            'sleep 10',
+            (cp, abortController) => {
+              abortController.abort();
+              if (expectedExit.signal)
+                cp.emit('exit', null, expectedExit.signal);
+              if (typeof expectedExit.code === 'number')
+                cp.emit('exit', expectedExit.code, null);
+            },
+          );
+
+          expect(result.aborted).toBe(true);
+
+          if (platform === 'linux') {
+            expect(mockProcessKill).toHaveBeenCalledWith(
+              -mockChildProcess.pid!,
+              expectedSignal,
+            );
+          } else {
+            expect(mockSpawn).toHaveBeenCalledWith(expectedCommand, [
+              '/pid',
+              String(mockChildProcess.pid),
+              '/f',
+              '/t',
+            ]);
+          }
+        });
+      },
+    );
+
+    it('should gracefully attempt SIGKILL on linux if SIGTERM fails', async () => {
+      mockPlatform.mockReturnValue('linux');
+      vi.useFakeTimers();
+
+      // Don't await the result inside the simulation block for this specific test.
+      // We need to control the timeline manually.
+      const abortController = new AbortController();
+      const handle = ShellExecutionService.execute(
+        'unresponsive_process',
+        '/test/dir',
+        onOutputEventMock,
+        abortController.signal,
+      );
+
+      abortController.abort();
+
+      // Check the first kill signal
+      expect(mockProcessKill).toHaveBeenCalledWith(
+        -mockChildProcess.pid!,
+        'SIGTERM',
+      );
+
+      // Now, advance time past the timeout
+      await vi.advanceTimersByTimeAsync(250);
+
+      // Check the second kill signal
+      expect(mockProcessKill).toHaveBeenCalledWith(
+        -mockChildProcess.pid!,
+        'SIGKILL',
+      );
+
+      // Finally, simulate the process exiting and await the result
+      mockChildProcess.emit('exit', null, 'SIGKILL');
+      const result = await handle.result;
+
+      vi.useRealTimers();
+
+      expect(result.aborted).toBe(true);
+      expect(result.signal).toBe('SIGKILL');
+      // The individual kill calls were already asserted above.
+      expect(mockProcessKill).toHaveBeenCalledTimes(2);
+    });
+  });
+
+  describe('Binary Output', () => {
+    it('should detect binary output and switch to progress events', async () => {
+      mockIsBinary.mockReturnValueOnce(true);
+      const binaryChunk1 = Buffer.from([0x89, 0x50, 0x4e, 0x47]);
+      const binaryChunk2 = Buffer.from([0x0d, 0x0a, 0x1a, 0x0a]);
+
+      const { result } = await simulateExecution('cat image.png', (cp) => {
+        cp.stdout?.emit('data', binaryChunk1);
+        cp.stdout?.emit('data', binaryChunk2);
+        cp.emit('exit', 0, null);
+      });
+
+      expect(result.rawOutput).toEqual(
+        Buffer.concat([binaryChunk1, binaryChunk2]),
+      );
+      expect(onOutputEventMock).toHaveBeenCalledTimes(3);
+      expect(onOutputEventMock.mock.calls[0][0]).toEqual({
+        type: 'binary_detected',
+      });
+      expect(onOutputEventMock.mock.calls[1][0]).toEqual({
+        type: 'binary_progress',
+        bytesReceived: 4,
+      });
+      expect(onOutputEventMock.mock.calls[2][0]).toEqual({
+        type: 'binary_progress',
+        bytesReceived: 8,
+      });
+    });
+
+    it('should not emit data events after binary is detected', async () => {
+      mockIsBinary.mockImplementation((buffer) => buffer.includes(0x00));
+
+      await simulateExecution('cat mixed_file', (cp) => {
+        cp.stdout?.emit('data', Buffer.from('some text'));
+        cp.stdout?.emit('data', Buffer.from([0x00, 0x01, 0x02]));
+        cp.stdout?.emit('data', Buffer.from('more text'));
+        cp.emit('exit', 0, null);
+      });
+
+      // FIX: Provide explicit type for the 'call' parameter in the map function.
+      const eventTypes = onOutputEventMock.mock.calls.map(
+        (call: [ShellOutputEvent]) => call[0].type,
+      );
+      expect(eventTypes).toEqual([
+        'data',
+        'binary_detected',
+        'binary_progress',
+        'binary_progress',
+      ]);
+    });
+  });
+
+  describe('Platform-Specific Behavior', () => {
+    it('should use cmd.exe on Windows', async () => {
+      mockPlatform.mockReturnValue('win32');
+      await simulateExecution('dir', (cp) => cp.emit('exit', 0, null));
+
+      expect(mockSpawn).toHaveBeenCalledWith(
+        'cmd.exe',
+        ['/c', 'dir'],
+        expect.objectContaining({ detached: false }),
+      );
+    });
+
+    it('should use bash and detached process group on Linux', async () => {
+      mockPlatform.mockReturnValue('linux');
+      await simulateExecution('ls', (cp) => cp.emit('exit', 0, null));
+
+      expect(mockSpawn).toHaveBeenCalledWith(
+        'bash',
+        ['-c', 'ls'],
+        expect.objectContaining({ detached: true }),
+      );
+    });
+  });
+});
diff --git a/packages/core/src/services/shellExecutionService.ts b/packages/core/src/services/shellExecutionService.ts
new file mode 100644
index 00000000..0f0002cd
--- /dev/null
+++ b/packages/core/src/services/shellExecutionService.ts
@@ -0,0 +1,229 @@
+/**
+ * @license
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { spawn } from 'child_process';
+import { TextDecoder } from 'util';
+import os from 'os';
+import stripAnsi from 'strip-ansi';
+import { getCachedEncodingForBuffer } from '../utils/systemEncoding.js';
+import { isBinary } from '../utils/textUtils.js';
+
+const SIGKILL_TIMEOUT_MS = 200;
+
+/** A structured result from a shell command execution. */
+export interface ShellExecutionResult {
+  /** The raw, unprocessed output buffer. */
+  rawOutput: Buffer;
+  /** The combined, decoded stdout and stderr as a string. */
+  output: string;
+  /** The decoded stdout as a string. */
+  stdout: string;
+  /** The decoded stderr as a string. */
+  stderr: string;
+  /** The process exit code, or null if terminated by a signal. */
+  exitCode: number | null;
+  /** The signal that terminated the process, if any. */
+  signal: NodeJS.Signals | null;
+  /** An error object if the process failed to spawn. */
+  error: Error | null;
+  /** A boolean indicating if the command was aborted by the user. */
+  aborted: boolean;
+  /** The process ID of the spawned shell. */
+  pid: number | undefined;
+}
+
+/** A handle for an ongoing shell execution. */
+export interface ShellExecutionHandle {
+  /** The process ID of the spawned shell. */
+  pid: number | undefined;
+  /** A promise that resolves with the complete execution result. */
+  result: Promise<ShellExecutionResult>;
+}
+
+/**
+ * Describes a structured event emitted during shell command execution.
+ */
+export type ShellOutputEvent =
+  | {
+      /** The event contains a chunk of output data. */
+      type: 'data';
+      /** The stream from which the data originated. */
+      stream: 'stdout' | 'stderr';
+      /** The decoded string chunk. */
+      chunk: string;
+    }
+  | {
+      /** Signals that the output stream has been identified as binary. */
+      type: 'binary_detected';
+    }
+  | {
+      /** Provides progress updates for a binary stream. */
+      type: 'binary_progress';
+      /** The total number of bytes received so far. */
+      bytesReceived: number;
+    };
+
+/**
+ * A centralized service for executing shell commands with robust process
+ * management, cross-platform compatibility, and streaming output capabilities.
+ *
+ */
+export class ShellExecutionService {
+  /**
+   * Executes a shell command using `spawn`, capturing all output and lifecycle events.
+   *
+   * @param commandToExecute The exact command string to run.
+   * @param cwd The working directory to execute the command in.
+   * @param onOutputEvent A callback for streaming structured events about the execution, including data chunks and status updates.
+   * @param abortSignal An AbortSignal to terminate the process and its children.
+   * @returns An object containing the process ID (pid) and a promise that
+   *          resolves with the complete execution result.
+   */
+  static execute(
+    commandToExecute: string,
+    cwd: string,
+    onOutputEvent: (event: ShellOutputEvent) => void,
+    abortSignal: AbortSignal,
+  ): ShellExecutionHandle {
+    const isWindows = os.platform() === 'win32';
+    const shell = isWindows ? 'cmd.exe' : 'bash';
+    const shellArgs = [isWindows ? '/c' : '-c', commandToExecute];
+
+    const child = spawn(shell, shellArgs, {
+      cwd,
+      stdio: ['ignore', 'pipe', 'pipe'],
+      detached: !isWindows, // Use process groups on non-Windows for robust killing
+      env: {
+        ...process.env,
+        GEMINI_CLI: '1',
+      },
+    });
+
+    const result = new Promise<ShellExecutionResult>((resolve) => {
+      // Use decoders to handle multi-byte characters safely (for streaming output).
+      let stdoutDecoder: TextDecoder | null = null;
+      let stderrDecoder: TextDecoder | null = null;
+
+      let stdout = '';
+      let stderr = '';
+      const outputChunks: Buffer[] = [];
+      let error: Error | null = null;
+      let exited = false;
+
+      let isStreamingRawContent = true;
+      const MAX_SNIFF_SIZE = 4096;
+      let sniffedBytes = 0;
+
+      const handleOutput = (data: Buffer, stream: 'stdout' | 'stderr') => {
+        if (!stdoutDecoder || !stderrDecoder) {
+          const encoding = getCachedEncodingForBuffer(data);
+          try {
+            stdoutDecoder = new TextDecoder(encoding);
+            stderrDecoder = new TextDecoder(encoding);
+          } catch {
+            // If the encoding is not supported, fall back to utf-8.
+            // This can happen on some platforms for certain encodings like 'utf-32le'.
+            stdoutDecoder = new TextDecoder('utf-8');
+            stderrDecoder = new TextDecoder('utf-8');
+          }
+        }
+
+        outputChunks.push(data);
+
+        // Binary detection logic. This only runs until we've made a determination.
+        if (isStreamingRawContent && sniffedBytes < MAX_SNIFF_SIZE) {
+          const sniffBuffer = Buffer.concat(outputChunks.slice(0, 20));
+          sniffedBytes = sniffBuffer.length;
+
+          if (isBinary(sniffBuffer)) {
+            // Change state to stop streaming raw content.
+            isStreamingRawContent = false;
+            onOutputEvent({ type: 'binary_detected' });
+          }
+        }
+
+        const decodedChunk =
+          stream === 'stdout'
+            ? stdoutDecoder.decode(data, { stream: true })
+            : stderrDecoder.decode(data, { stream: true });
+        const strippedChunk = stripAnsi(decodedChunk);
+
+        if (stream === 'stdout') {
+          stdout += strippedChunk;
+        } else {
+          stderr += strippedChunk;
+        }
+
+        if (isStreamingRawContent) {
+          onOutputEvent({ type: 'data', stream, chunk: strippedChunk });
+        } else {
+          const totalBytes = outputChunks.reduce(
+            (sum, chunk) => sum + chunk.length,
+            0,
+          );
+          onOutputEvent({ type: 'binary_progress', bytesReceived: totalBytes });
+        }
+      };
+
+      child.stdout.on('data', (data) => handleOutput(data, 'stdout'));
+      child.stderr.on('data', (data) => handleOutput(data, 'stderr'));
+      child.on('error', (err) => {
+        error = err;
+      });
+
+      const abortHandler = async () => {
+        if (child.pid && !exited) {
+          if (isWindows) {
+            spawn('taskkill', ['/pid', child.pid.toString(), '/f', '/t']);
+          } else {
+            try {
+              // Kill the entire process group (negative PID).
+              // SIGTERM first, then SIGKILL if it doesn't die.
+              process.kill(-child.pid, 'SIGTERM');
+              await new Promise((res) => setTimeout(res, SIGKILL_TIMEOUT_MS));
+              if (!exited) {
+                process.kill(-child.pid, 'SIGKILL');
+              }
+            } catch (_e) {
+              // Fall back to killing just the main process if group kill fails.
+              if (!exited) child.kill('SIGKILL');
+            }
+          }
+        }
+      };
+
+      abortSignal.addEventListener('abort', abortHandler, { once: true });
+
+      child.on('exit', (code, signal) => {
+        exited = true;
+        abortSignal.removeEventListener('abort', abortHandler);
+
+        if (stdoutDecoder) {
+          stdout += stripAnsi(stdoutDecoder.decode());
+        }
+        if (stderrDecoder) {
+          stderr += stripAnsi(stderrDecoder.decode());
+        }
+
+        const finalBuffer = Buffer.concat(outputChunks);
+
+        resolve({
+          rawOutput: finalBuffer,
+          output: stdout + (stderr ? `\n${stderr}` : ''),
+          stdout,
+          stderr,
+          exitCode: code,
+          signal,
+          error,
+          aborted: abortSignal.aborted,
+          pid: child.pid,
+        });
+      });
+    });
+
+    return { pid: child.pid, result };
+  }
+}
diff --git a/packages/core/src/tools/shell.test.ts b/packages/core/src/tools/shell.test.ts
index 460c871a..55364197 100644
--- a/packages/core/src/tools/shell.test.ts
+++ b/packages/core/src/tools/shell.test.ts
@@ -4,164 +4,384 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
-import { expect, describe, it, vi, beforeEach } from 'vitest';
+import {
+  vi,
+  describe,
+  it,
+  expect,
+  beforeEach,
+  afterEach,
+  type Mock,
+} from 'vitest';
+
+const mockShellExecutionService = vi.hoisted(() => vi.fn());
+vi.mock('../services/shellExecutionService.js', () => ({
+  ShellExecutionService: { execute: mockShellExecutionService },
+}));
+vi.mock('fs');
+vi.mock('os');
+vi.mock('crypto');
+vi.mock('../utils/summarizer.js');
+
+import { isCommandAllowed } from '../utils/shell-utils.js';
 import { ShellTool } from './shell.js';
-import { Config } from '../config/config.js';
+import { type Config } from '../config/config.js';
+import {
+  type ShellExecutionResult,
+  type ShellOutputEvent,
+} from '../services/shellExecutionService.js';
+import * as fs from 'fs';
+import * as os from 'os';
+import * as path from 'path';
+import * as crypto from 'crypto';
 import * as summarizer from '../utils/summarizer.js';
-import { GeminiClient } from '../core/client.js';
-import { ToolExecuteConfirmationDetails } from './tools.js';
-import os from 'os';
+import { ToolConfirmationOutcome } from './tools.js';
+import { OUTPUT_UPDATE_INTERVAL_MS } from './shell.js';
 
-describe('ShellTool Bug Reproduction', () => {
+describe('ShellTool', () => {
   let shellTool: ShellTool;
-  let config: Config;
+  let mockConfig: Config;
+  let mockShellOutputCallback: (event: ShellOutputEvent) => void;
+  let resolveExecutionPromise: (result: ShellExecutionResult) => void;
 
   beforeEach(() => {
-    config = {
-      getCoreTools: () => undefined,
-      getExcludeTools: () => undefined,
-      getDebugMode: () => false,
-      getGeminiClient: () => ({}) as GeminiClient,
-      getTargetDir: () => '.',
-      getSummarizeToolOutputConfig: () => ({
-        [shellTool.name]: {},
-      }),
+    vi.clearAllMocks();
+
+    mockConfig = {
+      getCoreTools: vi.fn().mockReturnValue([]),
+      getExcludeTools: vi.fn().mockReturnValue([]),
+      getDebugMode: vi.fn().mockReturnValue(false),
+      getTargetDir: vi.fn().mockReturnValue('/test/dir'),
+      getSummarizeToolOutputConfig: vi.fn().mockReturnValue(undefined),
+      getGeminiClient: vi.fn(),
     } as unknown as Config;
-    shellTool = new ShellTool(config);
-  });
 
-  it('should not let the summarizer override the return display', async () => {
-    const summarizeSpy = vi
-      .spyOn(summarizer, 'summarizeToolOutput')
-      .mockResolvedValue('summarized output');
+    shellTool = new ShellTool(mockConfig);
 
-    const abortSignal = new AbortController().signal;
-    const result = await shellTool.execute(
-      { command: 'echo hello' },
-      abortSignal,
-      () => {},
+    vi.mocked(os.platform).mockReturnValue('linux');
+    vi.mocked(os.tmpdir).mockReturnValue('/tmp');
+    (vi.mocked(crypto.randomBytes) as Mock).mockReturnValue(
+      Buffer.from('abcdef', 'hex'),
     );
 
-    expect(result.returnDisplay).toBe('hello' + os.EOL);
-    expect(result.llmContent).toBe('summarized output');
-    expect(summarizeSpy).toHaveBeenCalled();
+    // Capture the output callback to simulate streaming events from the service
+    mockShellExecutionService.mockImplementation((_cmd, _cwd, callback) => {
+      mockShellOutputCallback = callback;
+      return {
+        pid: 12345,
+        result: new Promise((resolve) => {
+          resolveExecutionPromise = resolve;
+        }),
+      };
+    });
   });
 
-  it('should not call summarizer if disabled in config', async () => {
-    config = {
-      getCoreTools: () => undefined,
-      getExcludeTools: () => undefined,
-      getDebugMode: () => false,
-      getGeminiClient: () => ({}) as GeminiClient,
-      getTargetDir: () => '.',
-      getSummarizeToolOutputConfig: () => ({}),
-    } as unknown as Config;
-    shellTool = new ShellTool(config);
+  describe('isCommandAllowed', () => {
+    it('should allow a command if no restrictions are provided', () => {
+      (mockConfig.getCoreTools as Mock).mockReturnValue(undefined);
+      (mockConfig.getExcludeTools as Mock).mockReturnValue(undefined);
+      expect(isCommandAllowed('ls -l', mockConfig).allowed).toBe(true);
+    });
 
-    const summarizeSpy = vi
-      .spyOn(summarizer, 'summarizeToolOutput')
-      .mockResolvedValue('summarized output');
+    it('should block a command with command substitution using $()', () => {
+      expect(isCommandAllowed('echo $(rm -rf /)', mockConfig).allowed).toBe(
+        false,
+      );
+    });
+  });
 
-    const abortSignal = new AbortController().signal;
-    const result = await shellTool.execute(
-      { command: 'echo hello' },
-      abortSignal,
-      () => {},
-    );
+  describe('validateToolParams', () => {
+    it('should return null for a valid command', () => {
+      expect(shellTool.validateToolParams({ command: 'ls -l' })).toBeNull();
+    });
+
+    it('should return an error for an empty command', () => {
+      expect(shellTool.validateToolParams({ command: ' ' })).toBe(
+        'Command cannot be empty.',
+      );
+    });
 
-    expect(result.returnDisplay).toBe('hello' + os.EOL);
-    expect(result.llmContent).not.toBe('summarized output');
-    expect(summarizeSpy).not.toHaveBeenCalled();
+    it('should return an error for a non-existent directory', () => {
+      vi.mocked(fs.existsSync).mockReturnValue(false);
+      expect(
+        shellTool.validateToolParams({ command: 'ls', directory: 'rel/path' }),
+      ).toBe('Directory must exist.');
+    });
   });
 
-  it('should pass token budget to summarizer', async () => {
-    config = {
-      getCoreTools: () => undefined,
-      getExcludeTools: () => undefined,
-      getDebugMode: () => false,
-      getGeminiClient: () => ({}) as GeminiClient,
-      getTargetDir: () => '.',
-      getSummarizeToolOutputConfig: () => ({
+  describe('execute', () => {
+    const mockAbortSignal = new AbortController().signal;
+
+    const resolveShellExecution = (
+      result: Partial<ShellExecutionResult> = {},
+    ) => {
+      const fullResult: ShellExecutionResult = {
+        rawOutput: Buffer.from(result.output || ''),
+        output: 'Success',
+        stdout: 'Success',
+        stderr: '',
+        exitCode: 0,
+        signal: null,
+        error: null,
+        aborted: false,
+        pid: 12345,
+        ...result,
+      };
+      resolveExecutionPromise(fullResult);
+    };
+
+    it('should wrap command on linux and parse pgrep output', async () => {
+      const promise = shellTool.execute(
+        { command: 'my-command &' },
+        mockAbortSignal,
+      );
+      resolveShellExecution({ pid: 54321 });
+
+      vi.mocked(fs.existsSync).mockReturnValue(true);
+      vi.mocked(fs.readFileSync).mockReturnValue('54321\n54322\n'); // Service PID and background PID
+
+      const result = await promise;
+
+      const tmpFile = path.join(os.tmpdir(), 'shell_pgrep_abcdef.tmp');
+      const wrappedCommand = `{ my-command & }; __code=$?; pgrep -g 0 >${tmpFile} 2>&1; exit $__code;`;
+      expect(mockShellExecutionService).toHaveBeenCalledWith(
+        wrappedCommand,
+        expect.any(String),
+        expect.any(Function),
+        mockAbortSignal,
+      );
+      expect(result.llmContent).toContain('Background PIDs: 54322');
+      expect(vi.mocked(fs.unlinkSync)).toHaveBeenCalledWith(tmpFile);
+    });
+
+    it('should not wrap command on windows', async () => {
+      vi.mocked(os.platform).mockReturnValue('win32');
+      const promise = shellTool.execute({ command: 'dir' }, mockAbortSignal);
+      resolveExecutionPromise({
+        rawOutput: Buffer.from(''),
+        output: '',
+        stdout: '',
+        stderr: '',
+        exitCode: 0,
+        signal: null,
+        error: null,
+        aborted: false,
+        pid: 12345,
+      });
+      await promise;
+      expect(mockShellExecutionService).toHaveBeenCalledWith(
+        'dir',
+        expect.any(String),
+        expect.any(Function),
+        mockAbortSignal,
+      );
+    });
+
+    it('should format error messages correctly', async () => {
+      const error = new Error('wrapped command failed');
+      const promise = shellTool.execute(
+        { command: 'user-command' },
+        mockAbortSignal,
+      );
+      resolveShellExecution({
+        error,
+        exitCode: 1,
+        output: 'err',
+        stderr: 'err',
+        rawOutput: Buffer.from('err'),
+        stdout: '',
+        signal: null,
+        aborted: false,
+        pid: 12345,
+      });
+
+      const result = await promise;
+      // The final llmContent should contain the user's command, not the wrapper
+      expect(result.llmContent).toContain('Error: wrapped command failed');
+      expect(result.llmContent).not.toContain('pgrep');
+    });
+
+    it('should summarize output when configured', async () => {
+      (mockConfig.getSummarizeToolOutputConfig as Mock).mockReturnValue({
         [shellTool.name]: { tokenBudget: 1000 },
-      }),
-    } as unknown as Config;
-    shellTool = new ShellTool(config);
+      });
+      vi.mocked(summarizer.summarizeToolOutput).mockResolvedValue(
+        'summarized output',
+      );
 
-    const summarizeSpy = vi
-      .spyOn(summarizer, 'summarizeToolOutput')
-      .mockResolvedValue('summarized output');
+      const promise = shellTool.execute({ command: 'ls' }, mockAbortSignal);
+      resolveExecutionPromise({
+        output: 'long output',
+        rawOutput: Buffer.from('long output'),
+        stdout: 'long output',
+        stderr: '',
+        exitCode: 0,
+        signal: null,
+        error: null,
+        aborted: false,
+        pid: 12345,
+      });
 
-    const abortSignal = new AbortController().signal;
-    await shellTool.execute({ command: 'echo "hello"' }, abortSignal, () => {});
+      const result = await promise;
 
-    expect(summarizeSpy).toHaveBeenCalledWith(
-      expect.any(String),
-      expect.any(Object),
-      expect.any(Object),
-      1000,
-    );
-  });
+      expect(summarizer.summarizeToolOutput).toHaveBeenCalledWith(
+        expect.any(String),
+        mockConfig.getGeminiClient(),
+        mockAbortSignal,
+        1000,
+      );
+      expect(result.llmContent).toBe('summarized output');
+      expect(result.returnDisplay).toBe('long output');
+    });
 
-  it('should use default token budget if not specified', async () => {
-    config = {
-      getCoreTools: () => undefined,
-      getExcludeTools: () => undefined,
-      getDebugMode: () => false,
-      getGeminiClient: () => ({}) as GeminiClient,
-      getTargetDir: () => '.',
-      getSummarizeToolOutputConfig: () => ({
-        [shellTool.name]: {},
-      }),
-    } as unknown as Config;
-    shellTool = new ShellTool(config);
+    it('should clean up the temp file on synchronous execution error', async () => {
+      const error = new Error('sync spawn error');
+      mockShellExecutionService.mockImplementation(() => {
+        throw error;
+      });
+      vi.mocked(fs.existsSync).mockReturnValue(true); // Pretend the file exists
 
-    const summarizeSpy = vi
-      .spyOn(summarizer, 'summarizeToolOutput')
-      .mockResolvedValue('summarized output');
+      await expect(
+        shellTool.execute({ command: 'a-command' }, mockAbortSignal),
+      ).rejects.toThrow(error);
 
-    const abortSignal = new AbortController().signal;
-    await shellTool.execute({ command: 'echo "hello"' }, abortSignal, () => {});
+      const tmpFile = path.join(os.tmpdir(), 'shell_pgrep_abcdef.tmp');
+      expect(vi.mocked(fs.unlinkSync)).toHaveBeenCalledWith(tmpFile);
+    });
 
-    expect(summarizeSpy).toHaveBeenCalledWith(
-      expect.any(String),
-      expect.any(Object),
-      expect.any(Object),
-      undefined,
-    );
-  });
+    describe('Streaming to `updateOutput`', () => {
+      let updateOutputMock: Mock;
+      beforeEach(() => {
+        vi.useFakeTimers({ toFake: ['Date'] });
+        updateOutputMock = vi.fn();
+      });
+      afterEach(() => {
+        vi.useRealTimers();
+      });
 
-  it('should pass GEMINI_CLI environment variable to executed commands', async () => {
-    config = {
-      getCoreTools: () => undefined,
-      getExcludeTools: () => undefined,
-      getDebugMode: () => false,
-      getGeminiClient: () => ({}) as GeminiClient,
-      getTargetDir: () => '.',
-      getSummarizeToolOutputConfig: () => ({}),
-    } as unknown as Config;
-    shellTool = new ShellTool(config);
+      it('should throttle text output updates', async () => {
+        const promise = shellTool.execute(
+          { command: 'stream' },
+          mockAbortSignal,
+          updateOutputMock,
+        );
+
+        // First chunk, should be throttled.
+        mockShellOutputCallback({
+          type: 'data',
+          stream: 'stdout',
+          chunk: 'hello ',
+        });
+        expect(updateOutputMock).not.toHaveBeenCalled();
+
+        // Advance time past the throttle interval.
+        await vi.advanceTimersByTimeAsync(OUTPUT_UPDATE_INTERVAL_MS + 1);
+
+        // Send a second chunk. THIS event triggers the update with the CUMULATIVE content.
+        mockShellOutputCallback({
+          type: 'data',
+          stream: 'stderr',
+          chunk: 'world',
+        });
+
+        // It should have been called once now with the combined output.
+        expect(updateOutputMock).toHaveBeenCalledOnce();
+        expect(updateOutputMock).toHaveBeenCalledWith('hello \nworld');
+
+        resolveExecutionPromise({
+          rawOutput: Buffer.from(''),
+          output: '',
+          stdout: '',
+          stderr: '',
+          exitCode: 0,
+          signal: null,
+          error: null,
+          aborted: false,
+          pid: 12345,
+        });
+        await promise;
+      });
+
+      it('should immediately show binary detection message and throttle progress', async () => {
+        const promise = shellTool.execute(
+          { command: 'cat img' },
+          mockAbortSignal,
+          updateOutputMock,
+        );
 
-    const abortSignal = new AbortController().signal;
-    const command =
-      os.platform() === 'win32' ? 'echo %GEMINI_CLI%' : 'echo "$GEMINI_CLI"';
-    const result = await shellTool.execute({ command }, abortSignal, () => {});
+        mockShellOutputCallback({ type: 'binary_detected' });
+        expect(updateOutputMock).toHaveBeenCalledOnce();
+        expect(updateOutputMock).toHaveBeenCalledWith(
+          '[Binary output detected. Halting stream...]',
+        );
 
-    expect(result.returnDisplay).toBe('1' + os.EOL);
+        mockShellOutputCallback({
+          type: 'binary_progress',
+          bytesReceived: 1024,
+        });
+        expect(updateOutputMock).toHaveBeenCalledOnce();
+
+        // Advance time past the throttle interval.
+        await vi.advanceTimersByTimeAsync(OUTPUT_UPDATE_INTERVAL_MS + 1);
+
+        // Send a SECOND progress event. This one will trigger the flush.
+        mockShellOutputCallback({
+          type: 'binary_progress',
+          bytesReceived: 2048,
+        });
+
+        // Now it should be called a second time with the latest progress.
+        expect(updateOutputMock).toHaveBeenCalledTimes(2);
+        expect(updateOutputMock).toHaveBeenLastCalledWith(
+          '[Receiving binary output... 2.0 KB received]',
+        );
+
+        resolveExecutionPromise({
+          rawOutput: Buffer.from(''),
+          output: '',
+          stdout: '',
+          stderr: '',
+          exitCode: 0,
+          signal: null,
+          error: null,
+          aborted: false,
+          pid: 12345,
+        });
+        await promise;
+      });
+    });
   });
-});
 
-describe('shouldConfirmExecute', () => {
-  it('should de-duplicate command roots before asking for confirmation', async () => {
-    const shellTool = new ShellTool({
-      getCoreTools: () => ['run_shell_command'],
-      getExcludeTools: () => [],
-    } as unknown as Config);
-    const result = (await shellTool.shouldConfirmExecute(
-      {
-        command: 'git status && git log',
-      },
-      new AbortController().signal,
-    )) as ToolExecuteConfirmationDetails;
-    expect(result.rootCommand).toEqual('git');
+  describe('shouldConfirmExecute', () => {
+    it('should request confirmation for a new command and whitelist it on "Always"', async () => {
+      const params = { command: 'npm install' };
+      const confirmation = await shellTool.shouldConfirmExecute(
+        params,
+        new AbortController().signal,
+      );
+
+      expect(confirmation).not.toBe(false);
+      expect(confirmation && confirmation.type).toBe('exec');
+
+      // eslint-disable-next-line @typescript-eslint/no-explicit-any
+      await (confirmation as any).onConfirm(
+        ToolConfirmationOutcome.ProceedAlways,
+      );
+
+      // Should now be whitelisted
+      const secondConfirmation = await shellTool.shouldConfirmExecute(
+        { command: 'npm test' },
+        new AbortController().signal,
+      );
+      expect(secondConfirmation).toBe(false);
+    });
+
+    it('should skip confirmation if validation fails', async () => {
+      const confirmation = await shellTool.shouldConfirmExecute(
+        { command: '' },
+        new AbortController().signal,
+      );
+      expect(confirmation).toBe(false);
+    });
   });
 });
diff --git a/packages/core/src/tools/shell.ts b/packages/core/src/tools/shell.ts
index d90ae678..02fcbb7f 100644
--- a/packages/core/src/tools/shell.ts
+++ b/packages/core/src/tools/shell.ts
@@ -20,22 +20,25 @@ import {
 import { Type } from '@google/genai';
 import { SchemaValidator } from '../utils/schemaValidator.js';
 import { getErrorMessage } from '../utils/errors.js';
-import stripAnsi from 'strip-ansi';
+import { summarizeToolOutput } from '../utils/summarizer.js';
+import {
+  ShellExecutionService,
+  ShellOutputEvent,
+} from '../services/shellExecutionService.js';
+import { formatMemoryUsage } from '../utils/formatters.js';
 import {
   getCommandRoots,
   isCommandAllowed,
   stripShellWrapper,
 } from '../utils/shell-utils.js';
 
+export const OUTPUT_UPDATE_INTERVAL_MS = 1000;
+
 export interface ShellToolParams {
   command: string;
   description?: string;
   directory?: string;
 }
-import { spawn } from 'child_process';
-import { summarizeToolOutput } from '../utils/summarizer.js';
-
-const OUTPUT_UPDATE_INTERVAL_MS = 1000;
 
 export class ShellTool extends BaseTool<ShellToolParams, ToolResult> {
   static Name: string = 'run_shell_command';
@@ -196,216 +199,182 @@ export class ShellTool extends BaseTool<ShellToolParams, ToolResult> {
       .toString('hex')}.tmp`;
     const tempFilePath = path.join(os.tmpdir(), tempFileName);
 
-    // pgrep is not available on Windows, so we can't get background PIDs
-    const commandToExecute = isWindows
-      ? strippedCommand
-      : (() => {
-          // wrap command to append subprocess pids (via pgrep) to temporary file
-          let command = strippedCommand.trim();
-          if (!command.endsWith('&')) command += ';';
-          return `{ ${command} }; __code=$?; pgrep -g 0 >${tempFilePath} 2>&1; exit $__code;`;
-        })();
-
-    // spawn command in specified directory (or project root if not specified)
-    const shell = isWindows
-      ? spawn('cmd.exe', ['/c', commandToExecute], {
-          stdio: ['ignore', 'pipe', 'pipe'],
-          // detached: true, // ensure subprocess starts its own process group (esp. in Linux)
-          cwd: path.resolve(this.config.getTargetDir(), params.directory || ''),
-          env: {
-            ...process.env,
-            GEMINI_CLI: '1',
-          },
-        })
-      : spawn('bash', ['-c', commandToExecute], {
-          stdio: ['ignore', 'pipe', 'pipe'],
-          detached: true, // ensure subprocess starts its own process group (esp. in Linux)
-          cwd: path.resolve(this.config.getTargetDir(), params.directory || ''),
-          env: {
-            ...process.env,
-            GEMINI_CLI: '1',
-          },
-        });
+    try {
+      // pgrep is not available on Windows, so we can't get background PIDs
+      const commandToExecute = isWindows
+        ? strippedCommand
+        : (() => {
+            // wrap command to append subprocess pids (via pgrep) to temporary file
+            let command = strippedCommand.trim();
+            if (!command.endsWith('&')) command += ';';
+            return `{ ${command} }; __code=$?; pgrep -g 0 >${tempFilePath} 2>&1; exit $__code;`;
+          })();
 
-    let exited = false;
-    let stdout = '';
-    let output = '';
-    let lastUpdateTime = Date.now();
+      const cwd = path.resolve(
+        this.config.getTargetDir(),
+        params.directory || '',
+      );
 
-    const appendOutput = (str: string) => {
-      output += str;
-      if (
-        updateOutput &&
-        Date.now() - lastUpdateTime > OUTPUT_UPDATE_INTERVAL_MS
-      ) {
-        updateOutput(output);
-        lastUpdateTime = Date.now();
-      }
-    };
+      let cumulativeStdout = '';
+      let cumulativeStderr = '';
 
-    shell.stdout.on('data', (data: Buffer) => {
-      // continue to consume post-exit for background processes
-      // removing listeners can overflow OS buffer and block subprocesses
-      // destroying (e.g. shell.stdout.destroy()) can terminate subprocesses via SIGPIPE
-      if (!exited) {
-        const str = stripAnsi(data.toString());
-        stdout += str;
-        appendOutput(str);
-      }
-    });
+      let lastUpdateTime = Date.now();
+      let isBinaryStream = false;
 
-    let stderr = '';
-    shell.stderr.on('data', (data: Buffer) => {
-      if (!exited) {
-        const str = stripAnsi(data.toString());
-        stderr += str;
-        appendOutput(str);
-      }
-    });
-
-    let error: Error | null = null;
-    shell.on('error', (err: Error) => {
-      error = err;
-      // remove wrapper from user's command in error message
-      error.message = error.message.replace(commandToExecute, params.command);
-    });
+      const { result: resultPromise } = ShellExecutionService.execute(
+        commandToExecute,
+        cwd,
+        (event: ShellOutputEvent) => {
+          if (!updateOutput) {
+            return;
+          }
 
-    let code: number | null = null;
-    let processSignal: NodeJS.Signals | null = null;
-    const exitHandler = (
-      _code: number | null,
-      _signal: NodeJS.Signals | null,
-    ) => {
-      exited = true;
-      code = _code;
-      processSignal = _signal;
-    };
-    shell.on('exit', exitHandler);
+          let currentDisplayOutput = '';
+          let shouldUpdate = false;
 
-    const abortHandler = async () => {
-      if (shell.pid && !exited) {
-        if (os.platform() === 'win32') {
-          // For Windows, use taskkill to kill the process tree
-          spawn('taskkill', ['/pid', shell.pid.toString(), '/f', '/t']);
-        } else {
-          try {
-            // attempt to SIGTERM process group (negative PID)
-            // fall back to SIGKILL (to group) after 200ms
-            process.kill(-shell.pid, 'SIGTERM');
-            await new Promise((resolve) => setTimeout(resolve, 200));
-            if (shell.pid && !exited) {
-              process.kill(-shell.pid, 'SIGKILL');
-            }
-          } catch (_e) {
-            // if group kill fails, fall back to killing just the main process
-            try {
-              if (shell.pid) {
-                shell.kill('SIGKILL');
+          switch (event.type) {
+            case 'data':
+              if (isBinaryStream) break; // Don't process text if we are in binary mode
+              if (event.stream === 'stdout') {
+                cumulativeStdout += event.chunk;
+              } else {
+                cumulativeStderr += event.chunk;
+              }
+              currentDisplayOutput =
+                cumulativeStdout +
+                (cumulativeStderr ? `\n${cumulativeStderr}` : '');
+              if (Date.now() - lastUpdateTime > OUTPUT_UPDATE_INTERVAL_MS) {
+                shouldUpdate = true;
+              }
+              break;
+            case 'binary_detected':
+              isBinaryStream = true;
+              currentDisplayOutput =
+                '[Binary output detected. Halting stream...]';
+              shouldUpdate = true;
+              break;
+            case 'binary_progress':
+              isBinaryStream = true;
+              currentDisplayOutput = `[Receiving binary output... ${formatMemoryUsage(
+                event.bytesReceived,
+              )} received]`;
+              if (Date.now() - lastUpdateTime > OUTPUT_UPDATE_INTERVAL_MS) {
+                shouldUpdate = true;
               }
-            } catch (_e) {
-              console.error(`failed to kill shell process ${shell.pid}: ${_e}`);
+              break;
+            default: {
+              throw new Error('An unhandled ShellOutputEvent was found.');
             }
           }
-        }
-      }
-    };
-    signal.addEventListener('abort', abortHandler);
 
-    // wait for the shell to exit
-    try {
-      await new Promise((resolve) => shell.on('exit', resolve));
-    } finally {
-      signal.removeEventListener('abort', abortHandler);
-    }
+          if (shouldUpdate) {
+            updateOutput(currentDisplayOutput);
+            lastUpdateTime = Date.now();
+          }
+        },
+        signal,
+      );
 
-    // parse pids (pgrep output) from temporary file and remove it
-    const backgroundPIDs: number[] = [];
-    if (os.platform() !== 'win32') {
-      if (fs.existsSync(tempFilePath)) {
-        const pgrepLines = fs
-          .readFileSync(tempFilePath, 'utf8')
-          .split('\n')
-          .filter(Boolean);
-        for (const line of pgrepLines) {
-          if (!/^\d+$/.test(line)) {
-            console.error(`pgrep: ${line}`);
+      const result = await resultPromise;
+
+      const backgroundPIDs: number[] = [];
+      if (os.platform() !== 'win32') {
+        if (fs.existsSync(tempFilePath)) {
+          const pgrepLines = fs
+            .readFileSync(tempFilePath, 'utf8')
+            .split('\n')
+            .filter(Boolean);
+          for (const line of pgrepLines) {
+            if (!/^\d+$/.test(line)) {
+              console.error(`pgrep: ${line}`);
+            }
+            const pid = Number(line);
+            if (pid !== result.pid) {
+              backgroundPIDs.push(pid);
+            }
           }
-          const pid = Number(line);
-          // exclude the shell subprocess pid
-          if (pid !== shell.pid) {
-            backgroundPIDs.push(pid);
+        } else {
+          if (!signal.aborted) {
+            console.error('missing pgrep output');
           }
         }
-        fs.unlinkSync(tempFilePath);
-      } else {
-        if (!signal.aborted) {
-          console.error('missing pgrep output');
-        }
       }
-    }
 
-    let llmContent = '';
-    if (signal.aborted) {
-      llmContent = 'Command was cancelled by user before it could complete.';
-      if (output.trim()) {
-        llmContent += ` Below is the output (on stdout and stderr) before it was cancelled:\n${output}`;
+      let llmContent = '';
+      if (result.aborted) {
+        llmContent = 'Command was cancelled by user before it could complete.';
+        if (result.output.trim()) {
+          llmContent += ` Below is the output (on stdout and stderr) before it was cancelled:\n${result.output}`;
+        } else {
+          llmContent += ' There was no output before it was cancelled.';
+        }
       } else {
-        llmContent += ' There was no output before it was cancelled.';
+        // Create a formatted error string for display, replacing the wrapper command
+        // with the user-facing command.
+        const finalError = result.error
+          ? result.error.message.replace(commandToExecute, params.command)
+          : '(none)';
+
+        llmContent = [
+          `Command: ${params.command}`,
+          `Directory: ${params.directory || '(root)'}`,
+          `Stdout: ${result.stdout || '(empty)'}`,
+          `Stderr: ${result.stderr || '(empty)'}`,
+          `Error: ${finalError}`, // Use the cleaned error string.
+          `Exit Code: ${result.exitCode ?? '(none)'}`,
+          `Signal: ${result.signal ?? '(none)'}`,
+          `Background PIDs: ${
+            backgroundPIDs.length ? backgroundPIDs.join(', ') : '(none)'
+          }`,
+          `Process Group PGID: ${result.pid ?? '(none)'}`,
+        ].join('\n');
       }
-    } else {
-      llmContent = [
-        `Command: ${params.command}`,
-        `Directory: ${params.directory || '(root)'}`,
-        `Stdout: ${stdout || '(empty)'}`,
-        `Stderr: ${stderr || '(empty)'}`,
-        `Error: ${error ?? '(none)'}`,
-        `Exit Code: ${code ?? '(none)'}`,
-        `Signal: ${processSignal ?? '(none)'}`,
-        `Background PIDs: ${backgroundPIDs.length ? backgroundPIDs.join(', ') : '(none)'}`,
-        `Process Group PGID: ${shell.pid ?? '(none)'}`,
-      ].join('\n');
-    }
 
-    let returnDisplayMessage = '';
-    if (this.config.getDebugMode()) {
-      returnDisplayMessage = llmContent;
-    } else {
-      if (output.trim()) {
-        returnDisplayMessage = output;
+      let returnDisplayMessage = '';
+      if (this.config.getDebugMode()) {
+        returnDisplayMessage = llmContent;
       } else {
-        // Output is empty, let's provide a reason if the command failed or was cancelled
-        if (signal.aborted) {
-          returnDisplayMessage = 'Command cancelled by user.';
-        } else if (processSignal) {
-          returnDisplayMessage = `Command terminated by signal: ${processSignal}`;
-        } else if (error) {
-          // If error is not null, it's an Error object (or other truthy value)
-          returnDisplayMessage = `Command failed: ${getErrorMessage(error)}`;
-        } else if (code !== null && code !== 0) {
-          returnDisplayMessage = `Command exited with code: ${code}`;
+        if (result.output.trim()) {
+          returnDisplayMessage = result.output;
+        } else {
+          if (result.aborted) {
+            returnDisplayMessage = 'Command cancelled by user.';
+          } else if (result.signal) {
+            returnDisplayMessage = `Command terminated by signal: ${result.signal}`;
+          } else if (result.error) {
+            returnDisplayMessage = `Command failed: ${getErrorMessage(
+              result.error,
+            )}`;
+          } else if (result.exitCode !== null && result.exitCode !== 0) {
+            returnDisplayMessage = `Command exited with code: ${result.exitCode}`;
+          }
+          // If output is empty and command succeeded (code 0, no error/signal/abort),
+          // returnDisplayMessage will remain empty, which is fine.
         }
-        // If output is empty and command succeeded (code 0, no error/signal/abort),
-        // returnDisplayMessage will remain empty, which is fine.
       }
-    }
 
-    const summarizeConfig = this.config.getSummarizeToolOutputConfig();
-    if (summarizeConfig && summarizeConfig[this.name]) {
-      const summary = await summarizeToolOutput(
-        llmContent,
-        this.config.getGeminiClient(),
-        signal,
-        summarizeConfig[this.name].tokenBudget,
-      );
+      const summarizeConfig = this.config.getSummarizeToolOutputConfig();
+      if (summarizeConfig && summarizeConfig[this.name]) {
+        const summary = await summarizeToolOutput(
+          llmContent,
+          this.config.getGeminiClient(),
+          signal,
+          summarizeConfig[this.name].tokenBudget,
+        );
+        return {
+          llmContent: summary,
+          returnDisplay: returnDisplayMessage,
+        };
+      }
+
       return {
-        llmContent: summary,
+        llmContent,
         returnDisplay: returnDisplayMessage,
       };
+    } finally {
+      if (fs.existsSync(tempFilePath)) {
+        fs.unlinkSync(tempFilePath);
+      }
     }
-
-    return {
-      llmContent,
-      returnDisplay: returnDisplayMessage,
-    };
   }
 }
diff --git a/packages/core/src/utils/formatters.ts b/packages/core/src/utils/formatters.ts
new file mode 100644
index 00000000..ab02160e
--- /dev/null
+++ b/packages/core/src/utils/formatters.ts
@@ -0,0 +1,16 @@
+/**
+ * @license
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+export const formatMemoryUsage = (bytes: number): string => {
+  const gb = bytes / (1024 * 1024 * 1024);
+  if (bytes < 1024 * 1024) {
+    return `${(bytes / 1024).toFixed(1)} KB`;
+  }
+  if (bytes < 1024 * 1024 * 1024) {
+    return `${(bytes / (1024 * 1024)).toFixed(1)} MB`;
+  }
+  return `${gb.toFixed(2)} GB`;
+};
diff --git a/packages/core/src/utils/textUtils.ts b/packages/core/src/utils/textUtils.ts
new file mode 100644
index 00000000..3bcc4759
--- /dev/null
+++ b/packages/core/src/utils/textUtils.ts
@@ -0,0 +1,34 @@
+/**
+ * @license
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+/**
+ * Checks if a Buffer is likely binary by testing for the presence of a NULL byte.
+ * The presence of a NULL byte is a strong indicator that the data is not plain text.
+ * @param data The Buffer to check.
+ * @param sampleSize The number of bytes from the start of the buffer to test.
+ * @returns True if a NULL byte is found, false otherwise.
+ */
+export function isBinary(
+  data: Buffer | null | undefined,
+  sampleSize = 512,
+): boolean {
+  if (!data) {
+    return false;
+  }
+
+  const sample = data.length > sampleSize ? data.subarray(0, sampleSize) : data;
+
+  for (const byte of sample) {
+    // The presence of a NULL byte (0x00) is one of the most reliable
+    // indicators of a binary file. Text files should not contain them.
+    if (byte === 0) {
+      return true;
+    }
+  }
+
+  // If no NULL bytes were found in the sample, we assume it's text.
+  return false;
+}