summaryrefslogtreecommitdiff
path: root/packages/server/src/utils/nextSpeakerChecker.ts
diff options
context:
space:
mode:
Diffstat (limited to 'packages/server/src/utils/nextSpeakerChecker.ts')
-rw-r--r--packages/server/src/utils/nextSpeakerChecker.ts52
1 files changed, 49 insertions, 3 deletions
diff --git a/packages/server/src/utils/nextSpeakerChecker.ts b/packages/server/src/utils/nextSpeakerChecker.ts
index 3fe813db..fb00b39c 100644
--- a/packages/server/src/utils/nextSpeakerChecker.ts
+++ b/packages/server/src/utils/nextSpeakerChecker.ts
@@ -7,6 +7,7 @@
import { Content, SchemaUnion, Type } from '@google/genai';
import { GeminiClient } from '../core/client.js';
import { GeminiChat } from '../core/geminiChat.js';
+import { isFunctionResponse } from './messageInspectors.js';
const CHECK_PROMPT = `Analyze *only* the content and structure of your immediately preceding response (your last turn in the conversation history). Based *strictly* on that response, determine who should logically speak next: the 'user' or the 'model' (you).
**Decision Rules (apply in order):**
@@ -65,17 +66,62 @@ export async function checkNextSpeaker(
// that when passed back up to the endpoint will break subsequent calls. An example of this is when the model decides
// to respond with an empty part collection if you were to send that message back to the server it will respond with
// a 400 indicating that model part collections MUST have content.
- const history = await chat.getHistory(/* curated */ true);
+ const curatedHistory = chat.getHistory(/* curated */ true);
// Ensure there's a model response to analyze
- if (history.length === 0 || history[history.length - 1].role !== 'model') {
+ if (curatedHistory.length === 0) {
+ // Cannot determine next speaker if history is empty.
+ return null;
+ }
+
+ const comprehensiveHistory = chat.getHistory();
+ // If comprehensiveHistory is empty, there is no last message to check.
+ // This case should ideally be caught by the curatedHistory.length check earlier,
+ // but as a safeguard:
+ if (comprehensiveHistory.length === 0) {
+ return null;
+ }
+ const lastComprehensiveMessage =
+ comprehensiveHistory[comprehensiveHistory.length - 1];
+
+ // If the last message is a user message containing only function_responses,
+ // then the model should speak next.
+ if (
+ lastComprehensiveMessage &&
+ isFunctionResponse(lastComprehensiveMessage)
+ ) {
+ return {
+ reasoning:
+ 'The last message was a function response, so the model should speak next.',
+ next_speaker: 'model',
+ };
+ }
+
+ if (
+ lastComprehensiveMessage &&
+ lastComprehensiveMessage.role === 'model' &&
+ lastComprehensiveMessage.parts &&
+ lastComprehensiveMessage.parts.length === 0
+ ) {
+ lastComprehensiveMessage.parts.push({ text: '' });
+ return {
+ reasoning:
+ 'The last message was a filler model message with no content (nothing for user to act on), model should speak next.',
+ next_speaker: 'model',
+ };
+ }
+
+ // Things checked out. Lets proceed to potentially making an LLM request.
+
+ const lastMessage = curatedHistory[curatedHistory.length - 1];
+ if (!lastMessage || lastMessage.role !== 'model') {
// Cannot determine next speaker if the last turn wasn't from the model
// or if history is empty.
return null;
}
const contents: Content[] = [
- ...history,
+ ...curatedHistory,
{ role: 'user', parts: [{ text: CHECK_PROMPT }] },
];