summaryrefslogtreecommitdiff
path: root/packages/core/src/tools/shell.test.ts
diff options
context:
space:
mode:
Diffstat (limited to 'packages/core/src/tools/shell.test.ts')
-rw-r--r--packages/core/src/tools/shell.test.ts476
1 files changed, 348 insertions, 128 deletions
diff --git a/packages/core/src/tools/shell.test.ts b/packages/core/src/tools/shell.test.ts
index 460c871a..55364197 100644
--- a/packages/core/src/tools/shell.test.ts
+++ b/packages/core/src/tools/shell.test.ts
@@ -4,164 +4,384 @@
* SPDX-License-Identifier: Apache-2.0
*/
-import { expect, describe, it, vi, beforeEach } from 'vitest';
+import {
+ vi,
+ describe,
+ it,
+ expect,
+ beforeEach,
+ afterEach,
+ type Mock,
+} from 'vitest';
+
+const mockShellExecutionService = vi.hoisted(() => vi.fn());
+vi.mock('../services/shellExecutionService.js', () => ({
+ ShellExecutionService: { execute: mockShellExecutionService },
+}));
+vi.mock('fs');
+vi.mock('os');
+vi.mock('crypto');
+vi.mock('../utils/summarizer.js');
+
+import { isCommandAllowed } from '../utils/shell-utils.js';
import { ShellTool } from './shell.js';
-import { Config } from '../config/config.js';
+import { type Config } from '../config/config.js';
+import {
+ type ShellExecutionResult,
+ type ShellOutputEvent,
+} from '../services/shellExecutionService.js';
+import * as fs from 'fs';
+import * as os from 'os';
+import * as path from 'path';
+import * as crypto from 'crypto';
import * as summarizer from '../utils/summarizer.js';
-import { GeminiClient } from '../core/client.js';
-import { ToolExecuteConfirmationDetails } from './tools.js';
-import os from 'os';
+import { ToolConfirmationOutcome } from './tools.js';
+import { OUTPUT_UPDATE_INTERVAL_MS } from './shell.js';
-describe('ShellTool Bug Reproduction', () => {
+describe('ShellTool', () => {
let shellTool: ShellTool;
- let config: Config;
+ let mockConfig: Config;
+ let mockShellOutputCallback: (event: ShellOutputEvent) => void;
+ let resolveExecutionPromise: (result: ShellExecutionResult) => void;
beforeEach(() => {
- config = {
- getCoreTools: () => undefined,
- getExcludeTools: () => undefined,
- getDebugMode: () => false,
- getGeminiClient: () => ({}) as GeminiClient,
- getTargetDir: () => '.',
- getSummarizeToolOutputConfig: () => ({
- [shellTool.name]: {},
- }),
+ vi.clearAllMocks();
+
+ mockConfig = {
+ getCoreTools: vi.fn().mockReturnValue([]),
+ getExcludeTools: vi.fn().mockReturnValue([]),
+ getDebugMode: vi.fn().mockReturnValue(false),
+ getTargetDir: vi.fn().mockReturnValue('/test/dir'),
+ getSummarizeToolOutputConfig: vi.fn().mockReturnValue(undefined),
+ getGeminiClient: vi.fn(),
} as unknown as Config;
- shellTool = new ShellTool(config);
- });
- it('should not let the summarizer override the return display', async () => {
- const summarizeSpy = vi
- .spyOn(summarizer, 'summarizeToolOutput')
- .mockResolvedValue('summarized output');
+ shellTool = new ShellTool(mockConfig);
- const abortSignal = new AbortController().signal;
- const result = await shellTool.execute(
- { command: 'echo hello' },
- abortSignal,
- () => {},
+ vi.mocked(os.platform).mockReturnValue('linux');
+ vi.mocked(os.tmpdir).mockReturnValue('/tmp');
+ (vi.mocked(crypto.randomBytes) as Mock).mockReturnValue(
+ Buffer.from('abcdef', 'hex'),
);
- expect(result.returnDisplay).toBe('hello' + os.EOL);
- expect(result.llmContent).toBe('summarized output');
- expect(summarizeSpy).toHaveBeenCalled();
+ // Capture the output callback to simulate streaming events from the service
+ mockShellExecutionService.mockImplementation((_cmd, _cwd, callback) => {
+ mockShellOutputCallback = callback;
+ return {
+ pid: 12345,
+ result: new Promise((resolve) => {
+ resolveExecutionPromise = resolve;
+ }),
+ };
+ });
});
- it('should not call summarizer if disabled in config', async () => {
- config = {
- getCoreTools: () => undefined,
- getExcludeTools: () => undefined,
- getDebugMode: () => false,
- getGeminiClient: () => ({}) as GeminiClient,
- getTargetDir: () => '.',
- getSummarizeToolOutputConfig: () => ({}),
- } as unknown as Config;
- shellTool = new ShellTool(config);
+ describe('isCommandAllowed', () => {
+ it('should allow a command if no restrictions are provided', () => {
+ (mockConfig.getCoreTools as Mock).mockReturnValue(undefined);
+ (mockConfig.getExcludeTools as Mock).mockReturnValue(undefined);
+ expect(isCommandAllowed('ls -l', mockConfig).allowed).toBe(true);
+ });
- const summarizeSpy = vi
- .spyOn(summarizer, 'summarizeToolOutput')
- .mockResolvedValue('summarized output');
+ it('should block a command with command substitution using $()', () => {
+ expect(isCommandAllowed('echo $(rm -rf /)', mockConfig).allowed).toBe(
+ false,
+ );
+ });
+ });
- const abortSignal = new AbortController().signal;
- const result = await shellTool.execute(
- { command: 'echo hello' },
- abortSignal,
- () => {},
- );
+ describe('validateToolParams', () => {
+ it('should return null for a valid command', () => {
+ expect(shellTool.validateToolParams({ command: 'ls -l' })).toBeNull();
+ });
+
+ it('should return an error for an empty command', () => {
+ expect(shellTool.validateToolParams({ command: ' ' })).toBe(
+ 'Command cannot be empty.',
+ );
+ });
- expect(result.returnDisplay).toBe('hello' + os.EOL);
- expect(result.llmContent).not.toBe('summarized output');
- expect(summarizeSpy).not.toHaveBeenCalled();
+ it('should return an error for a non-existent directory', () => {
+ vi.mocked(fs.existsSync).mockReturnValue(false);
+ expect(
+ shellTool.validateToolParams({ command: 'ls', directory: 'rel/path' }),
+ ).toBe('Directory must exist.');
+ });
});
- it('should pass token budget to summarizer', async () => {
- config = {
- getCoreTools: () => undefined,
- getExcludeTools: () => undefined,
- getDebugMode: () => false,
- getGeminiClient: () => ({}) as GeminiClient,
- getTargetDir: () => '.',
- getSummarizeToolOutputConfig: () => ({
+ describe('execute', () => {
+ const mockAbortSignal = new AbortController().signal;
+
+ const resolveShellExecution = (
+ result: Partial<ShellExecutionResult> = {},
+ ) => {
+ const fullResult: ShellExecutionResult = {
+ rawOutput: Buffer.from(result.output || ''),
+ output: 'Success',
+ stdout: 'Success',
+ stderr: '',
+ exitCode: 0,
+ signal: null,
+ error: null,
+ aborted: false,
+ pid: 12345,
+ ...result,
+ };
+ resolveExecutionPromise(fullResult);
+ };
+
+ it('should wrap command on linux and parse pgrep output', async () => {
+ const promise = shellTool.execute(
+ { command: 'my-command &' },
+ mockAbortSignal,
+ );
+ resolveShellExecution({ pid: 54321 });
+
+ vi.mocked(fs.existsSync).mockReturnValue(true);
+ vi.mocked(fs.readFileSync).mockReturnValue('54321\n54322\n'); // Service PID and background PID
+
+ const result = await promise;
+
+ const tmpFile = path.join(os.tmpdir(), 'shell_pgrep_abcdef.tmp');
+ const wrappedCommand = `{ my-command & }; __code=$?; pgrep -g 0 >${tmpFile} 2>&1; exit $__code;`;
+ expect(mockShellExecutionService).toHaveBeenCalledWith(
+ wrappedCommand,
+ expect.any(String),
+ expect.any(Function),
+ mockAbortSignal,
+ );
+ expect(result.llmContent).toContain('Background PIDs: 54322');
+ expect(vi.mocked(fs.unlinkSync)).toHaveBeenCalledWith(tmpFile);
+ });
+
+ it('should not wrap command on windows', async () => {
+ vi.mocked(os.platform).mockReturnValue('win32');
+ const promise = shellTool.execute({ command: 'dir' }, mockAbortSignal);
+ resolveExecutionPromise({
+ rawOutput: Buffer.from(''),
+ output: '',
+ stdout: '',
+ stderr: '',
+ exitCode: 0,
+ signal: null,
+ error: null,
+ aborted: false,
+ pid: 12345,
+ });
+ await promise;
+ expect(mockShellExecutionService).toHaveBeenCalledWith(
+ 'dir',
+ expect.any(String),
+ expect.any(Function),
+ mockAbortSignal,
+ );
+ });
+
+ it('should format error messages correctly', async () => {
+ const error = new Error('wrapped command failed');
+ const promise = shellTool.execute(
+ { command: 'user-command' },
+ mockAbortSignal,
+ );
+ resolveShellExecution({
+ error,
+ exitCode: 1,
+ output: 'err',
+ stderr: 'err',
+ rawOutput: Buffer.from('err'),
+ stdout: '',
+ signal: null,
+ aborted: false,
+ pid: 12345,
+ });
+
+ const result = await promise;
+ // The final llmContent should contain the user's command, not the wrapper
+ expect(result.llmContent).toContain('Error: wrapped command failed');
+ expect(result.llmContent).not.toContain('pgrep');
+ });
+
+ it('should summarize output when configured', async () => {
+ (mockConfig.getSummarizeToolOutputConfig as Mock).mockReturnValue({
[shellTool.name]: { tokenBudget: 1000 },
- }),
- } as unknown as Config;
- shellTool = new ShellTool(config);
+ });
+ vi.mocked(summarizer.summarizeToolOutput).mockResolvedValue(
+ 'summarized output',
+ );
- const summarizeSpy = vi
- .spyOn(summarizer, 'summarizeToolOutput')
- .mockResolvedValue('summarized output');
+ const promise = shellTool.execute({ command: 'ls' }, mockAbortSignal);
+ resolveExecutionPromise({
+ output: 'long output',
+ rawOutput: Buffer.from('long output'),
+ stdout: 'long output',
+ stderr: '',
+ exitCode: 0,
+ signal: null,
+ error: null,
+ aborted: false,
+ pid: 12345,
+ });
- const abortSignal = new AbortController().signal;
- await shellTool.execute({ command: 'echo "hello"' }, abortSignal, () => {});
+ const result = await promise;
- expect(summarizeSpy).toHaveBeenCalledWith(
- expect.any(String),
- expect.any(Object),
- expect.any(Object),
- 1000,
- );
- });
+ expect(summarizer.summarizeToolOutput).toHaveBeenCalledWith(
+ expect.any(String),
+ mockConfig.getGeminiClient(),
+ mockAbortSignal,
+ 1000,
+ );
+ expect(result.llmContent).toBe('summarized output');
+ expect(result.returnDisplay).toBe('long output');
+ });
- it('should use default token budget if not specified', async () => {
- config = {
- getCoreTools: () => undefined,
- getExcludeTools: () => undefined,
- getDebugMode: () => false,
- getGeminiClient: () => ({}) as GeminiClient,
- getTargetDir: () => '.',
- getSummarizeToolOutputConfig: () => ({
- [shellTool.name]: {},
- }),
- } as unknown as Config;
- shellTool = new ShellTool(config);
+ it('should clean up the temp file on synchronous execution error', async () => {
+ const error = new Error('sync spawn error');
+ mockShellExecutionService.mockImplementation(() => {
+ throw error;
+ });
+ vi.mocked(fs.existsSync).mockReturnValue(true); // Pretend the file exists
- const summarizeSpy = vi
- .spyOn(summarizer, 'summarizeToolOutput')
- .mockResolvedValue('summarized output');
+ await expect(
+ shellTool.execute({ command: 'a-command' }, mockAbortSignal),
+ ).rejects.toThrow(error);
- const abortSignal = new AbortController().signal;
- await shellTool.execute({ command: 'echo "hello"' }, abortSignal, () => {});
+ const tmpFile = path.join(os.tmpdir(), 'shell_pgrep_abcdef.tmp');
+ expect(vi.mocked(fs.unlinkSync)).toHaveBeenCalledWith(tmpFile);
+ });
- expect(summarizeSpy).toHaveBeenCalledWith(
- expect.any(String),
- expect.any(Object),
- expect.any(Object),
- undefined,
- );
- });
+ describe('Streaming to `updateOutput`', () => {
+ let updateOutputMock: Mock;
+ beforeEach(() => {
+ vi.useFakeTimers({ toFake: ['Date'] });
+ updateOutputMock = vi.fn();
+ });
+ afterEach(() => {
+ vi.useRealTimers();
+ });
- it('should pass GEMINI_CLI environment variable to executed commands', async () => {
- config = {
- getCoreTools: () => undefined,
- getExcludeTools: () => undefined,
- getDebugMode: () => false,
- getGeminiClient: () => ({}) as GeminiClient,
- getTargetDir: () => '.',
- getSummarizeToolOutputConfig: () => ({}),
- } as unknown as Config;
- shellTool = new ShellTool(config);
+ it('should throttle text output updates', async () => {
+ const promise = shellTool.execute(
+ { command: 'stream' },
+ mockAbortSignal,
+ updateOutputMock,
+ );
+
+ // First chunk, should be throttled.
+ mockShellOutputCallback({
+ type: 'data',
+ stream: 'stdout',
+ chunk: 'hello ',
+ });
+ expect(updateOutputMock).not.toHaveBeenCalled();
+
+ // Advance time past the throttle interval.
+ await vi.advanceTimersByTimeAsync(OUTPUT_UPDATE_INTERVAL_MS + 1);
+
+ // Send a second chunk. THIS event triggers the update with the CUMULATIVE content.
+ mockShellOutputCallback({
+ type: 'data',
+ stream: 'stderr',
+ chunk: 'world',
+ });
+
+ // It should have been called once now with the combined output.
+ expect(updateOutputMock).toHaveBeenCalledOnce();
+ expect(updateOutputMock).toHaveBeenCalledWith('hello \nworld');
+
+ resolveExecutionPromise({
+ rawOutput: Buffer.from(''),
+ output: '',
+ stdout: '',
+ stderr: '',
+ exitCode: 0,
+ signal: null,
+ error: null,
+ aborted: false,
+ pid: 12345,
+ });
+ await promise;
+ });
+
+ it('should immediately show binary detection message and throttle progress', async () => {
+ const promise = shellTool.execute(
+ { command: 'cat img' },
+ mockAbortSignal,
+ updateOutputMock,
+ );
- const abortSignal = new AbortController().signal;
- const command =
- os.platform() === 'win32' ? 'echo %GEMINI_CLI%' : 'echo "$GEMINI_CLI"';
- const result = await shellTool.execute({ command }, abortSignal, () => {});
+ mockShellOutputCallback({ type: 'binary_detected' });
+ expect(updateOutputMock).toHaveBeenCalledOnce();
+ expect(updateOutputMock).toHaveBeenCalledWith(
+ '[Binary output detected. Halting stream...]',
+ );
- expect(result.returnDisplay).toBe('1' + os.EOL);
+ mockShellOutputCallback({
+ type: 'binary_progress',
+ bytesReceived: 1024,
+ });
+ expect(updateOutputMock).toHaveBeenCalledOnce();
+
+ // Advance time past the throttle interval.
+ await vi.advanceTimersByTimeAsync(OUTPUT_UPDATE_INTERVAL_MS + 1);
+
+ // Send a SECOND progress event. This one will trigger the flush.
+ mockShellOutputCallback({
+ type: 'binary_progress',
+ bytesReceived: 2048,
+ });
+
+ // Now it should be called a second time with the latest progress.
+ expect(updateOutputMock).toHaveBeenCalledTimes(2);
+ expect(updateOutputMock).toHaveBeenLastCalledWith(
+ '[Receiving binary output... 2.0 KB received]',
+ );
+
+ resolveExecutionPromise({
+ rawOutput: Buffer.from(''),
+ output: '',
+ stdout: '',
+ stderr: '',
+ exitCode: 0,
+ signal: null,
+ error: null,
+ aborted: false,
+ pid: 12345,
+ });
+ await promise;
+ });
+ });
});
-});
-describe('shouldConfirmExecute', () => {
- it('should de-duplicate command roots before asking for confirmation', async () => {
- const shellTool = new ShellTool({
- getCoreTools: () => ['run_shell_command'],
- getExcludeTools: () => [],
- } as unknown as Config);
- const result = (await shellTool.shouldConfirmExecute(
- {
- command: 'git status && git log',
- },
- new AbortController().signal,
- )) as ToolExecuteConfirmationDetails;
- expect(result.rootCommand).toEqual('git');
+ describe('shouldConfirmExecute', () => {
+ it('should request confirmation for a new command and whitelist it on "Always"', async () => {
+ const params = { command: 'npm install' };
+ const confirmation = await shellTool.shouldConfirmExecute(
+ params,
+ new AbortController().signal,
+ );
+
+ expect(confirmation).not.toBe(false);
+ expect(confirmation && confirmation.type).toBe('exec');
+
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
+ await (confirmation as any).onConfirm(
+ ToolConfirmationOutcome.ProceedAlways,
+ );
+
+ // Should now be whitelisted
+ const secondConfirmation = await shellTool.shouldConfirmExecute(
+ { command: 'npm test' },
+ new AbortController().signal,
+ );
+ expect(secondConfirmation).toBe(false);
+ });
+
+ it('should skip confirmation if validation fails', async () => {
+ const confirmation = await shellTool.shouldConfirmExecute(
+ { command: '' },
+ new AbortController().signal,
+ );
+ expect(confirmation).toBe(false);
+ });
});
});