summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--packages/server/src/core/client.ts33
-rw-r--r--packages/server/src/utils/nextSpeakerChecker.test.ts186
-rw-r--r--packages/server/src/utils/nextSpeakerChecker.ts97
3 files changed, 303 insertions, 13 deletions
diff --git a/packages/server/src/core/client.ts b/packages/server/src/core/client.ts
index 46af465a..4c20c0ad 100644
--- a/packages/server/src/core/client.ts
+++ b/packages/server/src/core/client.ts
@@ -21,6 +21,7 @@ import { Config } from '../config/config.js';
import { getCoreSystemPrompt } from './prompts.js';
import { ReadManyFilesTool } from '../tools/read-many-files.js';
import { getResponseText } from '../utils/generateContentResponseUtilities.js';
+import { checkNextSpeaker } from '../utils/nextSpeakerChecker.js';
export class GeminiClient {
private client: GoogleGenAI;
@@ -103,6 +104,16 @@ export class GeminiClient {
.getToolRegistry()
.getFunctionDeclarations();
const tools: Tool[] = [{ functionDeclarations: toolDeclarations }];
+ const history: Content[] = [
+ {
+ role: 'user',
+ parts: envParts,
+ },
+ {
+ role: 'model',
+ parts: [{ text: 'Got it. Thanks for the context!' }],
+ },
+ ];
try {
return this.client.chats.create({
model: this.model,
@@ -111,16 +122,7 @@ export class GeminiClient {
...this.generateContentConfig,
tools,
},
- history: [
- {
- role: 'user',
- parts: envParts,
- },
- {
- role: 'model',
- parts: [{ text: 'Got it. Thanks for the context!' }],
- },
- ],
+ history,
});
} catch (error) {
console.error('Error initializing Gemini chat session:', error);
@@ -149,10 +151,15 @@ export class GeminiClient {
break;
}
- // What do we do when we have both function responses and confirmations?
const fnResponses = turn.getFunctionResponses();
if (fnResponses.length === 0) {
- break; // user's turn to respond
+ const nextSpeakerCheck = await checkNextSpeaker(chat, this);
+ if (nextSpeakerCheck?.next_speaker === 'model') {
+ request = [{ text: 'Please continue.' }];
+ continue;
+ } else {
+ break;
+ }
}
request = fnResponses;
}
@@ -167,7 +174,7 @@ export class GeminiClient {
): Promise<Record<string, unknown>> {
try {
const result = await this.client.models.generateContent({
- model: this.model,
+ model: 'gemini-2.0-flash',
config: {
...this.generateContentConfig,
systemInstruction: getCoreSystemPrompt(),
diff --git a/packages/server/src/utils/nextSpeakerChecker.test.ts b/packages/server/src/utils/nextSpeakerChecker.test.ts
new file mode 100644
index 00000000..b8d17875
--- /dev/null
+++ b/packages/server/src/utils/nextSpeakerChecker.test.ts
@@ -0,0 +1,186 @@
+/**
+ * @license
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { describe, it, expect, vi, beforeEach, Mock, afterEach } from 'vitest';
+import { Chat, Content } from '@google/genai';
+import { GeminiClient } from '../core/client.js';
+import { Config } from '../config/config.js'; // Added Config import
+import { checkNextSpeaker, NextSpeakerResponse } from './nextSpeakerChecker.js';
+
+// Mock GeminiClient and Config constructor
+vi.mock('../core/client.js');
+vi.mock('../config/config.js');
+
+// Mock @google/genai
+const mockGetHistory = vi.fn();
+const mockCreateChat = vi.fn(() => ({
+ getHistory: mockGetHistory,
+}));
+
+vi.mock('@google/genai', async () => {
+ const actualGenAI =
+ await vi.importActual<typeof import('@google/genai')>('@google/genai');
+ return {
+ ...actualGenAI,
+ GoogleGenAI: vi.fn().mockImplementation(() => ({
+ chats: {
+ create: mockCreateChat,
+ },
+ })),
+ // Keep Chat constructor mock for type safety if direct instantiation is attempted,
+ // but primary path is via client.chats.create
+ Chat: vi.fn().mockImplementation(() => ({
+ getHistory: mockGetHistory,
+ })),
+ };
+});
+
+describe('checkNextSpeaker', () => {
+ let mockChat: Chat;
+ let mockGeminiClient: GeminiClient;
+ let MockConfig: Mock;
+
+ beforeEach(() => {
+ // Dynamically import and assign the mock
+ // Must be done within beforeEach or test to ensure mocks are reset
+ MockConfig = vi.mocked(Config);
+ // Create a mock instance of Config
+ const mockConfigInstance = new MockConfig(
+ 'test-api-key',
+ 'gemini-pro',
+ false,
+ '.',
+ false,
+ undefined,
+ false,
+ undefined,
+ undefined,
+ undefined,
+ );
+ // Mock any methods on mockConfigInstance if needed, e.g., mockConfigInstance.getToolRegistry = vi.fn()...
+
+ mockGeminiClient = new GeminiClient(mockConfigInstance);
+ // Simulate chat creation as done in GeminiClient
+ mockChat = { getHistory: mockGetHistory } as unknown as Chat;
+ });
+
+ afterEach(() => {
+ vi.clearAllMocks();
+ });
+
+ it('should return null if history is empty', async () => {
+ (mockChat.getHistory as Mock).mockResolvedValue([]);
+ const result = await checkNextSpeaker(mockChat, mockGeminiClient);
+ expect(result).toBeNull();
+ expect(mockGeminiClient.generateJson).not.toHaveBeenCalled();
+ });
+
+ it('should return null if the last speaker was the user', async () => {
+ (mockChat.getHistory as Mock).mockResolvedValue([
+ { role: 'user', parts: [{ text: 'Hello' }] },
+ ] as Content[]);
+ const result = await checkNextSpeaker(mockChat, mockGeminiClient);
+ expect(result).toBeNull();
+ expect(mockGeminiClient.generateJson).not.toHaveBeenCalled();
+ });
+
+ it("should return { next_speaker: 'model' } when model intends to continue", async () => {
+ (mockChat.getHistory as Mock).mockResolvedValue([
+ { role: 'model', parts: [{ text: 'I will now do something.' }] },
+ ] as Content[]);
+ const mockApiResponse: NextSpeakerResponse = {
+ reasoning: 'Model stated it will do something.',
+ next_speaker: 'model',
+ };
+ (mockGeminiClient.generateJson as Mock).mockResolvedValue(mockApiResponse);
+
+ const result = await checkNextSpeaker(mockChat, mockGeminiClient);
+ expect(result).toEqual(mockApiResponse);
+ expect(mockGeminiClient.generateJson).toHaveBeenCalledTimes(1);
+ });
+
+ it("should return { next_speaker: 'user' } when model asks a question", async () => {
+ (mockChat.getHistory as Mock).mockResolvedValue([
+ { role: 'model', parts: [{ text: 'What would you like to do?' }] },
+ ] as Content[]);
+ const mockApiResponse: NextSpeakerResponse = {
+ reasoning: 'Model asked a question.',
+ next_speaker: 'user',
+ };
+ (mockGeminiClient.generateJson as Mock).mockResolvedValue(mockApiResponse);
+
+ const result = await checkNextSpeaker(mockChat, mockGeminiClient);
+ expect(result).toEqual(mockApiResponse);
+ });
+
+ it("should return { next_speaker: 'user' } when model makes a statement", async () => {
+ (mockChat.getHistory as Mock).mockResolvedValue([
+ { role: 'model', parts: [{ text: 'This is a statement.' }] },
+ ] as Content[]);
+ const mockApiResponse: NextSpeakerResponse = {
+ reasoning: 'Model made a statement, awaiting user input.',
+ next_speaker: 'user',
+ };
+ (mockGeminiClient.generateJson as Mock).mockResolvedValue(mockApiResponse);
+
+ const result = await checkNextSpeaker(mockChat, mockGeminiClient);
+ expect(result).toEqual(mockApiResponse);
+ });
+
+ it('should return null if geminiClient.generateJson throws an error', async () => {
+ const consoleWarnSpy = vi
+ .spyOn(console, 'warn')
+ .mockImplementation(() => {});
+ (mockChat.getHistory as Mock).mockResolvedValue([
+ { role: 'model', parts: [{ text: 'Some model output.' }] },
+ ] as Content[]);
+ (mockGeminiClient.generateJson as Mock).mockRejectedValue(
+ new Error('API Error'),
+ );
+
+ const result = await checkNextSpeaker(mockChat, mockGeminiClient);
+ expect(result).toBeNull();
+ consoleWarnSpy.mockRestore();
+ });
+
+ it('should return null if geminiClient.generateJson returns invalid JSON (missing next_speaker)', async () => {
+ (mockChat.getHistory as Mock).mockResolvedValue([
+ { role: 'model', parts: [{ text: 'Some model output.' }] },
+ ] as Content[]);
+ (mockGeminiClient.generateJson as Mock).mockResolvedValue({
+ reasoning: 'This is incomplete.',
+ } as unknown as NextSpeakerResponse); // Type assertion to simulate invalid response
+
+ const result = await checkNextSpeaker(mockChat, mockGeminiClient);
+ expect(result).toBeNull();
+ });
+
+ it('should return null if geminiClient.generateJson returns a non-string next_speaker', async () => {
+ (mockChat.getHistory as Mock).mockResolvedValue([
+ { role: 'model', parts: [{ text: 'Some model output.' }] },
+ ] as Content[]);
+ (mockGeminiClient.generateJson as Mock).mockResolvedValue({
+ reasoning: 'Model made a statement, awaiting user input.',
+ next_speaker: 123, // Invalid type
+ } as unknown as NextSpeakerResponse);
+
+ const result = await checkNextSpeaker(mockChat, mockGeminiClient);
+ expect(result).toBeNull();
+ });
+
+ it('should return null if geminiClient.generateJson returns an invalid next_speaker string value', async () => {
+ (mockChat.getHistory as Mock).mockResolvedValue([
+ { role: 'model', parts: [{ text: 'Some model output.' }] },
+ ] as Content[]);
+ (mockGeminiClient.generateJson as Mock).mockResolvedValue({
+ reasoning: 'Model made a statement, awaiting user input.',
+ next_speaker: 'neither', // Invalid enum value
+ } as unknown as NextSpeakerResponse);
+
+ const result = await checkNextSpeaker(mockChat, mockGeminiClient);
+ expect(result).toBeNull();
+ });
+});
diff --git a/packages/server/src/utils/nextSpeakerChecker.ts b/packages/server/src/utils/nextSpeakerChecker.ts
new file mode 100644
index 00000000..f852879f
--- /dev/null
+++ b/packages/server/src/utils/nextSpeakerChecker.ts
@@ -0,0 +1,97 @@
+/**
+ * @license
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { Chat, Content, SchemaUnion, Type } from '@google/genai';
+import { GeminiClient } from '../core/client.js';
+
+const CHECK_PROMPT = `Analyze *only* the content and structure of your immediately preceding response (your last turn in the conversation history). Based *strictly* on that response, determine who should logically speak next: the 'user' or the 'model' (you).
+**Decision Rules (apply in order):**
+1. **Model Continues:** If your last response explicitly states an immediate next action *you* intend to take (e.g., "Next, I will...", "Now I'll process...", "Moving on to analyze...", indicates an intended tool call that didn't execute), OR if the response seems clearly incomplete (cut off mid-thought without a natural conclusion), then the **'model'** should speak next.
+2. **Question to User:** If your last response ends with a direct question specifically addressed *to the user*, then the **'user'** should speak next.
+3. **Waiting for User:** If your last response completed a thought, statement, or task *and* does not meet the criteria for Rule 1 (Model Continues) or Rule 2 (Question to User), it implies a pause expecting user input or reaction. In this case, the **'user'** should speak next.
+**Output Format:**
+Respond *only* in JSON format according to the following schema. Do not include any text outside the JSON structure.
+\`\`\`json
+{
+ "type": "object",
+ "properties": {
+ "reasoning": {
+ "type": "string",
+ "description": "Brief explanation justifying the 'next_speaker' choice based *strictly* on the applicable rule and the content/structure of the preceding turn."
+ },
+ "next_speaker": {
+ "type": "string",
+ "enum": ["user", "model"],
+ "description": "Who should speak next based *only* on the preceding turn and the decision rules."
+ }
+ },
+ "required": ["next_speaker", "reasoning"]
+}
+\`\`\`
+`;
+
+const RESPONSE_SCHEMA: SchemaUnion = {
+ type: Type.OBJECT,
+ properties: {
+ reasoning: {
+ type: Type.STRING,
+ description:
+ "Brief explanation justifying the 'next_speaker' choice based *strictly* on the applicable rule and the content/structure of the preceding turn.",
+ },
+ next_speaker: {
+ type: Type.STRING,
+ enum: ['user', 'model'],
+ description:
+ 'Who should speak next based *only* on the preceding turn and the decision rules',
+ },
+ },
+ required: ['reasoning', 'next_speaker'],
+};
+
+export interface NextSpeakerResponse {
+ reasoning: string;
+ next_speaker: 'user' | 'model';
+}
+
+export async function checkNextSpeaker(
+ chat: Chat,
+ geminiClient: GeminiClient,
+): Promise<NextSpeakerResponse | null> {
+ const history = await chat.getHistory();
+ // Ensure there's a model response to analyze
+ if (history.length === 0 || history[history.length - 1].role !== 'model') {
+ // Cannot determine next speaker if the last turn wasn't from the model
+ // or if history is empty.
+ return null;
+ }
+
+ const contents: Content[] = [
+ ...history,
+ { role: 'user', parts: [{ text: CHECK_PROMPT }] },
+ ];
+
+ try {
+ const parsedResponse = (await geminiClient.generateJson(
+ contents,
+ RESPONSE_SCHEMA,
+ )) as unknown as NextSpeakerResponse;
+
+ if (
+ parsedResponse &&
+ parsedResponse.next_speaker &&
+ ['user', 'model'].includes(parsedResponse.next_speaker)
+ ) {
+ return parsedResponse;
+ }
+ return null;
+ } catch (error) {
+ console.warn(
+ 'Failed to talk to Gemiin endpoint when seeing if conversation should continue.',
+ error,
+ );
+ return null;
+ }
+}