summaryrefslogtreecommitdiff
path: root/packages/core/src
diff options
context:
space:
mode:
authorJerop Kipruto <[email protected]>2025-06-12 16:48:10 -0400
committerGitHub <[email protected]>2025-06-12 20:48:10 +0000
commit6723c72fa5468be713c05205c75be532729e8f92 (patch)
tree2392b344fb942f1c452e9fca5b5b6d131a827805 /packages/core/src
parentf8863f4d00f23a3e29496535be6cf0bb80ee43e9 (diff)
telemetry: include user decisions in tool call logs (#966)
Add the user's decision (accept, reject, modify) to tool call telemetry to better understand user intent. The decision provides crucial context to the `success` metric, as a user can reject a call that would have succeeded or accept one that fails. Also prettify the arguments json. Example: ![image](https://github.com/user-attachments/assets/251cb9fc-ceaa-4cdd-929c-8de47031aca8) #750
Diffstat (limited to 'packages/core/src')
-rw-r--r--packages/core/src/core/coreToolScheduler.test.ts6
-rw-r--r--packages/core/src/core/coreToolScheduler.ts52
-rw-r--r--packages/core/src/telemetry/loggers.test.ts238
-rw-r--r--packages/core/src/telemetry/loggers.ts36
-rw-r--r--packages/core/src/telemetry/metrics.ts2
-rw-r--r--packages/core/src/telemetry/types.ts3
6 files changed, 334 insertions, 3 deletions
diff --git a/packages/core/src/core/coreToolScheduler.test.ts b/packages/core/src/core/coreToolScheduler.test.ts
index d9d30f94..8e09a2af 100644
--- a/packages/core/src/core/coreToolScheduler.test.ts
+++ b/packages/core/src/core/coreToolScheduler.test.ts
@@ -16,6 +16,7 @@ import {
ToolCallConfirmationDetails,
ToolConfirmationOutcome,
ToolResult,
+ Config,
} from '../index.js';
import { Part, PartListUnion } from '@google/genai';
import { convertToFunctionResponse } from './coreToolScheduler.js';
@@ -74,7 +75,12 @@ describe('CoreToolScheduler', () => {
const onAllToolCallsComplete = vi.fn();
const onToolCallsUpdate = vi.fn();
+ const mockConfig = {
+ getSessionId: () => 'test-session-id',
+ } as Config;
+
const scheduler = new CoreToolScheduler({
+ config: mockConfig,
toolRegistry: Promise.resolve(toolRegistry as any),
onAllToolCallsComplete,
onToolCallsUpdate,
diff --git a/packages/core/src/core/coreToolScheduler.ts b/packages/core/src/core/coreToolScheduler.ts
index f8688aeb..d17e1511 100644
--- a/packages/core/src/core/coreToolScheduler.ts
+++ b/packages/core/src/core/coreToolScheduler.ts
@@ -16,6 +16,8 @@ import {
EditTool,
EditToolParams,
EditorType,
+ Config,
+ logToolCall,
} from '../index.js';
import { Part, PartListUnion } from '@google/genai';
import { getResponseTextFromParts } from '../utils/generateContentResponseUtilities.js';
@@ -25,6 +27,7 @@ export type ValidatingToolCall = {
request: ToolCallRequestInfo;
tool: Tool;
startTime?: number;
+ outcome?: ToolConfirmationOutcome;
};
export type ScheduledToolCall = {
@@ -32,6 +35,7 @@ export type ScheduledToolCall = {
request: ToolCallRequestInfo;
tool: Tool;
startTime?: number;
+ outcome?: ToolConfirmationOutcome;
};
export type ErroredToolCall = {
@@ -39,6 +43,7 @@ export type ErroredToolCall = {
request: ToolCallRequestInfo;
response: ToolCallResponseInfo;
durationMs?: number;
+ outcome?: ToolConfirmationOutcome;
};
export type SuccessfulToolCall = {
@@ -47,6 +52,7 @@ export type SuccessfulToolCall = {
tool: Tool;
response: ToolCallResponseInfo;
durationMs?: number;
+ outcome?: ToolConfirmationOutcome;
};
export type ExecutingToolCall = {
@@ -55,6 +61,7 @@ export type ExecutingToolCall = {
tool: Tool;
liveOutput?: string;
startTime?: number;
+ outcome?: ToolConfirmationOutcome;
};
export type CancelledToolCall = {
@@ -63,6 +70,7 @@ export type CancelledToolCall = {
response: ToolCallResponseInfo;
tool: Tool;
durationMs?: number;
+ outcome?: ToolConfirmationOutcome;
};
export type WaitingToolCall = {
@@ -71,6 +79,7 @@ export type WaitingToolCall = {
tool: Tool;
confirmationDetails: ToolCallConfirmationDetails;
startTime?: number;
+ outcome?: ToolConfirmationOutcome;
};
export type Status = ToolCall['status'];
@@ -205,6 +214,7 @@ interface CoreToolSchedulerOptions {
onToolCallsUpdate?: ToolCallsUpdateHandler;
approvalMode?: ApprovalMode;
getPreferredEditor: () => EditorType | undefined;
+ config: Config;
}
export class CoreToolScheduler {
@@ -215,8 +225,10 @@ export class CoreToolScheduler {
private onToolCallsUpdate?: ToolCallsUpdateHandler;
private approvalMode: ApprovalMode;
private getPreferredEditor: () => EditorType | undefined;
+ private config: Config;
constructor(options: CoreToolSchedulerOptions) {
+ this.config = options.config;
this.toolRegistry = options.toolRegistry;
this.outputUpdateHandler = options.outputUpdateHandler;
this.onAllToolCallsComplete = options.onAllToolCallsComplete;
@@ -274,6 +286,14 @@ export class CoreToolScheduler {
| WaitingToolCall
).tool;
+ const outcome = (
+ currentCall as
+ | ValidatingToolCall
+ | ScheduledToolCall
+ | ExecutingToolCall
+ | WaitingToolCall
+ ).outcome;
+
switch (newStatus) {
case 'success': {
const durationMs = existingStartTime
@@ -285,6 +305,7 @@ export class CoreToolScheduler {
status: 'success',
response: auxiliaryData as ToolCallResponseInfo,
durationMs,
+ outcome,
} as SuccessfulToolCall;
}
case 'error': {
@@ -296,6 +317,7 @@ export class CoreToolScheduler {
status: 'error',
response: auxiliaryData as ToolCallResponseInfo,
durationMs,
+ outcome,
} as ErroredToolCall;
}
case 'awaiting_approval':
@@ -305,6 +327,7 @@ export class CoreToolScheduler {
status: 'awaiting_approval',
confirmationDetails: auxiliaryData as ToolCallConfirmationDetails,
startTime: existingStartTime,
+ outcome,
} as WaitingToolCall;
case 'scheduled':
return {
@@ -312,6 +335,7 @@ export class CoreToolScheduler {
tool: toolInstance,
status: 'scheduled',
startTime: existingStartTime,
+ outcome,
} as ScheduledToolCall;
case 'cancelled': {
const durationMs = existingStartTime
@@ -336,6 +360,7 @@ export class CoreToolScheduler {
error: undefined,
},
durationMs,
+ outcome,
} as CancelledToolCall;
}
case 'validating':
@@ -344,6 +369,7 @@ export class CoreToolScheduler {
tool: toolInstance,
status: 'validating',
startTime: existingStartTime,
+ outcome,
} as ValidatingToolCall;
case 'executing':
return {
@@ -351,6 +377,7 @@ export class CoreToolScheduler {
tool: toolInstance,
status: 'executing',
startTime: existingStartTime,
+ outcome,
} as ExecutingToolCall;
default: {
const exhaustiveCheck: never = newStatus;
@@ -482,6 +509,14 @@ export class CoreToolScheduler {
await originalOnConfirm(outcome);
}
+ this.toolCalls = this.toolCalls.map((call) => {
+ if (call.request.callId !== callId) return call;
+ return {
+ ...call,
+ outcome,
+ };
+ });
+
if (outcome === ToolConfirmationOutcome.Cancel || signal.aborted) {
this.setStatusInternal(
callId,
@@ -613,6 +648,23 @@ export class CoreToolScheduler {
const completedCalls = [...this.toolCalls] as CompletedToolCall[];
this.toolCalls = [];
+ for (const call of completedCalls) {
+ logToolCall(
+ this.config,
+ {
+ function_name: call.request.name,
+ function_args: call.request.args,
+ duration_ms: call.durationMs ?? 0,
+ success: call.status === 'success',
+ error:
+ call.status === 'error'
+ ? call.response.error?.message
+ : undefined,
+ },
+ call.outcome,
+ );
+ }
+
if (this.onAllToolCallsComplete) {
this.onAllToolCallsComplete(completedCalls);
}
diff --git a/packages/core/src/telemetry/loggers.test.ts b/packages/core/src/telemetry/loggers.test.ts
index 2153ef48..a09f3eaf 100644
--- a/packages/core/src/telemetry/loggers.test.ts
+++ b/packages/core/src/telemetry/loggers.test.ts
@@ -4,6 +4,7 @@
* SPDX-License-Identifier: Apache-2.0
*/
+import { ToolConfirmationOutcome } from '../index.js';
import { logs } from '@opentelemetry/api-logs';
import { SemanticAttributes } from '@opentelemetry/semantic-conventions';
import { Config } from '../config/config.js';
@@ -12,6 +13,8 @@ import {
logApiResponse,
logCliConfiguration,
logUserPrompt,
+ logToolCall,
+ ToolCallDecision,
} from './loggers.js';
import * as metrics from './metrics.js';
import * as sdk from './sdk.js';
@@ -236,4 +239,239 @@ describe('loggers', () => {
});
});
});
+
+ describe('logToolCall', () => {
+ const mockConfig = {
+ getSessionId: () => 'test-session-id',
+ } as Config;
+
+ const mockMetrics = {
+ recordToolCallMetrics: vi.fn(),
+ };
+
+ beforeEach(() => {
+ vi.spyOn(metrics, 'recordToolCallMetrics').mockImplementation(
+ mockMetrics.recordToolCallMetrics,
+ );
+ mockLogger.emit.mockReset();
+ });
+
+ it('should log a tool call with all fields', () => {
+ const event = {
+ function_name: 'test-function',
+ function_args: {
+ arg1: 'value1',
+ arg2: 2,
+ },
+ duration_ms: 100,
+ success: true,
+ };
+
+ logToolCall(mockConfig, event, ToolConfirmationOutcome.ProceedOnce);
+
+ expect(mockLogger.emit).toHaveBeenCalledWith({
+ body: 'Tool call: test-function. Decision: accept. Success: true. Duration: 100ms.',
+ attributes: {
+ 'session.id': 'test-session-id',
+ 'event.name': 'gemini_cli.tool_call',
+ 'event.timestamp': '2025-01-01T00:00:00.000Z',
+ function_name: 'test-function',
+ function_args: JSON.stringify(
+ {
+ arg1: 'value1',
+ arg2: 2,
+ },
+ null,
+ 2,
+ ),
+ duration_ms: 100,
+ success: true,
+ decision: ToolCallDecision.ACCEPT,
+ },
+ });
+
+ expect(mockMetrics.recordToolCallMetrics).toHaveBeenCalledWith(
+ mockConfig,
+ 'test-function',
+ 100,
+ true,
+ ToolCallDecision.ACCEPT,
+ );
+ });
+ it('should log a tool call with a reject decision', () => {
+ const event = {
+ function_name: 'test-function',
+ function_args: {
+ arg1: 'value1',
+ arg2: 2,
+ },
+ duration_ms: 100,
+ success: false,
+ };
+
+ logToolCall(mockConfig, event, ToolConfirmationOutcome.Cancel);
+
+ expect(mockLogger.emit).toHaveBeenCalledWith({
+ body: 'Tool call: test-function. Decision: reject. Success: false. Duration: 100ms.',
+ attributes: {
+ 'session.id': 'test-session-id',
+ 'event.name': 'gemini_cli.tool_call',
+ 'event.timestamp': '2025-01-01T00:00:00.000Z',
+ function_name: 'test-function',
+ function_args: JSON.stringify(
+ {
+ arg1: 'value1',
+ arg2: 2,
+ },
+ null,
+ 2,
+ ),
+ duration_ms: 100,
+ success: false,
+ decision: ToolCallDecision.REJECT,
+ },
+ });
+
+ expect(mockMetrics.recordToolCallMetrics).toHaveBeenCalledWith(
+ mockConfig,
+ 'test-function',
+ 100,
+ false,
+ ToolCallDecision.REJECT,
+ );
+ });
+
+ it('should log a tool call with a modify decision', () => {
+ const event = {
+ function_name: 'test-function',
+ function_args: {
+ arg1: 'value1',
+ arg2: 2,
+ },
+ duration_ms: 100,
+ success: true,
+ };
+
+ logToolCall(mockConfig, event, ToolConfirmationOutcome.ModifyWithEditor);
+
+ expect(mockLogger.emit).toHaveBeenCalledWith({
+ body: 'Tool call: test-function. Decision: modify. Success: true. Duration: 100ms.',
+ attributes: {
+ 'session.id': 'test-session-id',
+ 'event.name': 'gemini_cli.tool_call',
+ 'event.timestamp': '2025-01-01T00:00:00.000Z',
+ function_name: 'test-function',
+ function_args: JSON.stringify(
+ {
+ arg1: 'value1',
+ arg2: 2,
+ },
+ null,
+ 2,
+ ),
+ duration_ms: 100,
+ success: true,
+ decision: ToolCallDecision.MODIFY,
+ },
+ });
+
+ expect(mockMetrics.recordToolCallMetrics).toHaveBeenCalledWith(
+ mockConfig,
+ 'test-function',
+ 100,
+ true,
+ ToolCallDecision.MODIFY,
+ );
+ });
+
+ it('should log a tool call without a decision', () => {
+ const event = {
+ function_name: 'test-function',
+ function_args: {
+ arg1: 'value1',
+ arg2: 2,
+ },
+ duration_ms: 100,
+ success: true,
+ };
+
+ logToolCall(mockConfig, event);
+
+ expect(mockLogger.emit).toHaveBeenCalledWith({
+ body: 'Tool call: test-function. Success: true. Duration: 100ms.',
+ attributes: {
+ 'session.id': 'test-session-id',
+ 'event.name': 'gemini_cli.tool_call',
+ 'event.timestamp': '2025-01-01T00:00:00.000Z',
+ function_name: 'test-function',
+ function_args: JSON.stringify(
+ {
+ arg1: 'value1',
+ arg2: 2,
+ },
+ null,
+ 2,
+ ),
+ duration_ms: 100,
+ success: true,
+ },
+ });
+
+ expect(mockMetrics.recordToolCallMetrics).toHaveBeenCalledWith(
+ mockConfig,
+ 'test-function',
+ 100,
+ true,
+ undefined,
+ );
+ });
+
+ it('should log a failed tool call with an error', () => {
+ const event = {
+ function_name: 'test-function',
+ function_args: {
+ arg1: 'value1',
+ arg2: 2,
+ },
+ duration_ms: 100,
+ success: false,
+ error: 'test-error',
+ error_type: 'test-error-type',
+ };
+
+ logToolCall(mockConfig, event);
+
+ expect(mockLogger.emit).toHaveBeenCalledWith({
+ body: 'Tool call: test-function. Success: false. Duration: 100ms.',
+ attributes: {
+ 'session.id': 'test-session-id',
+ 'event.name': 'gemini_cli.tool_call',
+ 'event.timestamp': '2025-01-01T00:00:00.000Z',
+ function_name: 'test-function',
+ function_args: JSON.stringify(
+ {
+ arg1: 'value1',
+ arg2: 2,
+ },
+ null,
+ 2,
+ ),
+ duration_ms: 100,
+ success: false,
+ error: 'test-error',
+ 'error.message': 'test-error',
+ error_type: 'test-error-type',
+ 'error.type': 'test-error-type',
+ },
+ });
+
+ expect(mockMetrics.recordToolCallMetrics).toHaveBeenCalledWith(
+ mockConfig,
+ 'test-function',
+ 100,
+ false,
+ undefined,
+ );
+ });
+ });
});
diff --git a/packages/core/src/telemetry/loggers.ts b/packages/core/src/telemetry/loggers.ts
index 49a7019a..66f584e7 100644
--- a/packages/core/src/telemetry/loggers.ts
+++ b/packages/core/src/telemetry/loggers.ts
@@ -30,6 +30,7 @@ import {
recordToolCallMetrics,
} from './metrics.js';
import { isTelemetrySdkInitialized } from './sdk.js';
+import { ToolConfirmationOutcome } from '../index.js';
const shouldLogUserPrompts = (config: Config): boolean =>
config.getTelemetryLogUserPromptsEnabled() ?? false;
@@ -40,6 +41,29 @@ function getCommonAttributes(config: Config): LogAttributes {
};
}
+export enum ToolCallDecision {
+ ACCEPT = 'accept',
+ REJECT = 'reject',
+ MODIFY = 'modify',
+}
+
+export function getDecisionFromOutcome(
+ outcome: ToolConfirmationOutcome,
+): ToolCallDecision {
+ switch (outcome) {
+ case ToolConfirmationOutcome.ProceedOnce:
+ case ToolConfirmationOutcome.ProceedAlways:
+ case ToolConfirmationOutcome.ProceedAlwaysServer:
+ case ToolConfirmationOutcome.ProceedAlwaysTool:
+ return ToolCallDecision.ACCEPT;
+ case ToolConfirmationOutcome.ModifyWithEditor:
+ return ToolCallDecision.MODIFY;
+ case ToolConfirmationOutcome.Cancel:
+ default:
+ return ToolCallDecision.REJECT;
+ }
+}
+
export function logCliConfiguration(config: Config): void {
if (!isTelemetrySdkInitialized()) return;
@@ -103,15 +127,20 @@ export function logUserPrompt(
export function logToolCall(
config: Config,
- event: Omit<ToolCallEvent, 'event.name' | 'event.timestamp'>,
+ event: Omit<ToolCallEvent, 'event.name' | 'event.timestamp' | 'decision'>,
+ outcome?: ToolConfirmationOutcome,
): void {
if (!isTelemetrySdkInitialized()) return;
+
+ const decision = outcome ? getDecisionFromOutcome(outcome) : undefined;
+
const attributes: LogAttributes = {
...getCommonAttributes(config),
...event,
'event.name': EVENT_TOOL_CALL,
'event.timestamp': new Date().toISOString(),
- function_args: JSON.stringify(event.function_args),
+ function_args: JSON.stringify(event.function_args, null, 2),
+ decision,
};
if (event.error) {
attributes['error.message'] = event.error;
@@ -121,7 +150,7 @@ export function logToolCall(
}
const logger = logs.getLogger(SERVICE_NAME);
const logRecord: LogRecord = {
- body: `Tool call: ${event.function_name}. Success: ${event.success}. Duration: ${event.duration_ms}ms.`,
+ body: `Tool call: ${event.function_name}${decision ? `. Decision: ${decision}` : ''}. Success: ${event.success}. Duration: ${event.duration_ms}ms.`,
attributes,
};
logger.emit(logRecord);
@@ -130,6 +159,7 @@ export function logToolCall(
event.function_name,
event.duration_ms,
event.success,
+ decision,
);
}
diff --git a/packages/core/src/telemetry/metrics.ts b/packages/core/src/telemetry/metrics.ts
index 93aa2189..59979ef3 100644
--- a/packages/core/src/telemetry/metrics.ts
+++ b/packages/core/src/telemetry/metrics.ts
@@ -89,6 +89,7 @@ export function recordToolCallMetrics(
functionName: string,
durationMs: number,
success: boolean,
+ decision?: 'accept' | 'reject' | 'modify',
): void {
if (!toolCallCounter || !toolCallLatencyHistogram || !isMetricsInitialized)
return;
@@ -97,6 +98,7 @@ export function recordToolCallMetrics(
...getCommonAttributes(config),
function_name: functionName,
success,
+ decision,
};
toolCallCounter.add(1, metricAttributes);
toolCallLatencyHistogram.record(durationMs, {
diff --git a/packages/core/src/telemetry/types.ts b/packages/core/src/telemetry/types.ts
index da352862..926f1384 100644
--- a/packages/core/src/telemetry/types.ts
+++ b/packages/core/src/telemetry/types.ts
@@ -4,6 +4,8 @@
* SPDX-License-Identifier: Apache-2.0
*/
+import { ToolCallDecision } from './loggers.js';
+
export interface UserPromptEvent {
'event.name': 'user_prompt';
'event.timestamp': string; // ISO 8601
@@ -18,6 +20,7 @@ export interface ToolCallEvent {
function_args: Record<string, unknown>;
duration_ms: number;
success: boolean;
+ decision?: ToolCallDecision;
error?: string;
error_type?: string;
}