diff options
Diffstat (limited to 'packages/server/src/utils/nextSpeakerChecker.ts')
| -rw-r--r-- | packages/server/src/utils/nextSpeakerChecker.ts | 52 |
1 files changed, 49 insertions, 3 deletions
diff --git a/packages/server/src/utils/nextSpeakerChecker.ts b/packages/server/src/utils/nextSpeakerChecker.ts index 3fe813db..fb00b39c 100644 --- a/packages/server/src/utils/nextSpeakerChecker.ts +++ b/packages/server/src/utils/nextSpeakerChecker.ts @@ -7,6 +7,7 @@ import { Content, SchemaUnion, Type } from '@google/genai'; import { GeminiClient } from '../core/client.js'; import { GeminiChat } from '../core/geminiChat.js'; +import { isFunctionResponse } from './messageInspectors.js'; const CHECK_PROMPT = `Analyze *only* the content and structure of your immediately preceding response (your last turn in the conversation history). Based *strictly* on that response, determine who should logically speak next: the 'user' or the 'model' (you). **Decision Rules (apply in order):** @@ -65,17 +66,62 @@ export async function checkNextSpeaker( // that when passed back up to the endpoint will break subsequent calls. An example of this is when the model decides // to respond with an empty part collection if you were to send that message back to the server it will respond with // a 400 indicating that model part collections MUST have content. - const history = await chat.getHistory(/* curated */ true); + const curatedHistory = chat.getHistory(/* curated */ true); // Ensure there's a model response to analyze - if (history.length === 0 || history[history.length - 1].role !== 'model') { + if (curatedHistory.length === 0) { + // Cannot determine next speaker if history is empty. + return null; + } + + const comprehensiveHistory = chat.getHistory(); + // If comprehensiveHistory is empty, there is no last message to check. + // This case should ideally be caught by the curatedHistory.length check earlier, + // but as a safeguard: + if (comprehensiveHistory.length === 0) { + return null; + } + const lastComprehensiveMessage = + comprehensiveHistory[comprehensiveHistory.length - 1]; + + // If the last message is a user message containing only function_responses, + // then the model should speak next. + if ( + lastComprehensiveMessage && + isFunctionResponse(lastComprehensiveMessage) + ) { + return { + reasoning: + 'The last message was a function response, so the model should speak next.', + next_speaker: 'model', + }; + } + + if ( + lastComprehensiveMessage && + lastComprehensiveMessage.role === 'model' && + lastComprehensiveMessage.parts && + lastComprehensiveMessage.parts.length === 0 + ) { + lastComprehensiveMessage.parts.push({ text: '' }); + return { + reasoning: + 'The last message was a filler model message with no content (nothing for user to act on), model should speak next.', + next_speaker: 'model', + }; + } + + // Things checked out. Lets proceed to potentially making an LLM request. + + const lastMessage = curatedHistory[curatedHistory.length - 1]; + if (!lastMessage || lastMessage.role !== 'model') { // Cannot determine next speaker if the last turn wasn't from the model // or if history is empty. return null; } const contents: Content[] = [ - ...history, + ...curatedHistory, { role: 'user', parts: [{ text: CHECK_PROMPT }] }, ]; |
