diff options
Diffstat (limited to 'packages/server/src/utils/nextSpeakerChecker.ts')
| -rw-r--r-- | packages/server/src/utils/nextSpeakerChecker.ts | 151 |
1 files changed, 0 insertions, 151 deletions
diff --git a/packages/server/src/utils/nextSpeakerChecker.ts b/packages/server/src/utils/nextSpeakerChecker.ts deleted file mode 100644 index 66fa4395..00000000 --- a/packages/server/src/utils/nextSpeakerChecker.ts +++ /dev/null @@ -1,151 +0,0 @@ -/** - * @license - * Copyright 2025 Google LLC - * SPDX-License-Identifier: Apache-2.0 - */ - -import { Content, SchemaUnion, Type } from '@google/genai'; -import { GeminiClient } from '../core/client.js'; -import { GeminiChat } from '../core/geminiChat.js'; -import { isFunctionResponse } from './messageInspectors.js'; - -const CHECK_PROMPT = `Analyze *only* the content and structure of your immediately preceding response (your last turn in the conversation history). Based *strictly* on that response, determine who should logically speak next: the 'user' or the 'model' (you). -**Decision Rules (apply in order):** -1. **Model Continues:** If your last response explicitly states an immediate next action *you* intend to take (e.g., "Next, I will...", "Now I'll process...", "Moving on to analyze...", indicates an intended tool call that didn't execute), OR if the response seems clearly incomplete (cut off mid-thought without a natural conclusion), then the **'model'** should speak next. -2. **Question to User:** If your last response ends with a direct question specifically addressed *to the user*, then the **'user'** should speak next. -3. **Waiting for User:** If your last response completed a thought, statement, or task *and* does not meet the criteria for Rule 1 (Model Continues) or Rule 2 (Question to User), it implies a pause expecting user input or reaction. In this case, the **'user'** should speak next. -**Output Format:** -Respond *only* in JSON format according to the following schema. Do not include any text outside the JSON structure. -\`\`\`json -{ - "type": "object", - "properties": { - "reasoning": { - "type": "string", - "description": "Brief explanation justifying the 'next_speaker' choice based *strictly* on the applicable rule and the content/structure of the preceding turn." - }, - "next_speaker": { - "type": "string", - "enum": ["user", "model"], - "description": "Who should speak next based *only* on the preceding turn and the decision rules." - } - }, - "required": ["next_speaker", "reasoning"] -} -\`\`\` -`; - -const RESPONSE_SCHEMA: SchemaUnion = { - type: Type.OBJECT, - properties: { - reasoning: { - type: Type.STRING, - description: - "Brief explanation justifying the 'next_speaker' choice based *strictly* on the applicable rule and the content/structure of the preceding turn.", - }, - next_speaker: { - type: Type.STRING, - enum: ['user', 'model'], - description: - 'Who should speak next based *only* on the preceding turn and the decision rules', - }, - }, - required: ['reasoning', 'next_speaker'], -}; - -export interface NextSpeakerResponse { - reasoning: string; - next_speaker: 'user' | 'model'; -} - -export async function checkNextSpeaker( - chat: GeminiChat, - geminiClient: GeminiClient, - abortSignal: AbortSignal, -): Promise<NextSpeakerResponse | null> { - // We need to capture the curated history because there are many moments when the model will return invalid turns - // that when passed back up to the endpoint will break subsequent calls. An example of this is when the model decides - // to respond with an empty part collection if you were to send that message back to the server it will respond with - // a 400 indicating that model part collections MUST have content. - const curatedHistory = chat.getHistory(/* curated */ true); - - // Ensure there's a model response to analyze - if (curatedHistory.length === 0) { - // Cannot determine next speaker if history is empty. - return null; - } - - const comprehensiveHistory = chat.getHistory(); - // If comprehensiveHistory is empty, there is no last message to check. - // This case should ideally be caught by the curatedHistory.length check earlier, - // but as a safeguard: - if (comprehensiveHistory.length === 0) { - return null; - } - const lastComprehensiveMessage = - comprehensiveHistory[comprehensiveHistory.length - 1]; - - // If the last message is a user message containing only function_responses, - // then the model should speak next. - if ( - lastComprehensiveMessage && - isFunctionResponse(lastComprehensiveMessage) - ) { - return { - reasoning: - 'The last message was a function response, so the model should speak next.', - next_speaker: 'model', - }; - } - - if ( - lastComprehensiveMessage && - lastComprehensiveMessage.role === 'model' && - lastComprehensiveMessage.parts && - lastComprehensiveMessage.parts.length === 0 - ) { - lastComprehensiveMessage.parts.push({ text: '' }); - return { - reasoning: - 'The last message was a filler model message with no content (nothing for user to act on), model should speak next.', - next_speaker: 'model', - }; - } - - // Things checked out. Lets proceed to potentially making an LLM request. - - const lastMessage = curatedHistory[curatedHistory.length - 1]; - if (!lastMessage || lastMessage.role !== 'model') { - // Cannot determine next speaker if the last turn wasn't from the model - // or if history is empty. - return null; - } - - const contents: Content[] = [ - ...curatedHistory, - { role: 'user', parts: [{ text: CHECK_PROMPT }] }, - ]; - - try { - const parsedResponse = (await geminiClient.generateJson( - contents, - RESPONSE_SCHEMA, - abortSignal, - )) as unknown as NextSpeakerResponse; - - if ( - parsedResponse && - parsedResponse.next_speaker && - ['user', 'model'].includes(parsedResponse.next_speaker) - ) { - return parsedResponse; - } - return null; - } catch (error) { - console.warn( - 'Failed to talk to Gemini endpoint when seeing if conversation should continue.', - error, - ); - return null; - } -} |
