Add `/compress` command to force a compression of the context (#986)

Related to https://b.corp.google.com/issues/423605555 - I figured this might be a simpler solution to start with, while still also being useful on its own even if we do implement that.
author: Jacob MacDonald <[email protected]> 2025-06-13 21:21:40 -0700
committer: GitHub <[email protected]> 2025-06-14 04:21:40 +0000
commit: d5c6bb9740a52d87b71d812e698d0e88abf10caa (patch)
tree: 7cb990c0adbd1d76d60a655d3dfa15e22db033e3 /packages/core/src
parent: 1452bb4ca4ffe3b5c13aab81baaf510d4c45f06f (diff)
2 files changed, 54 insertions, 25 deletions
diff --git a/packages/core/src/core/client.ts b/packages/core/src/core/client.ts
index 7cfec9d6..fd44bf03 100644
--- a/packages/core/src/core/client.ts
+++ b/packages/core/src/core/client.ts
@@ -15,7 +15,12 @@ import {
   GenerateContentResponse,
 } from '@google/genai';
 import { getFolderStructure } from '../utils/getFolderStructure.js';
-import { Turn, ServerGeminiStreamEvent, GeminiEventType } from './turn.js';
+import {
+  Turn,
+  ServerGeminiStreamEvent,
+  GeminiEventType,
+  ChatCompressionInfo,
+} from './turn.js';
 import { Config } from '../config/config.js';
 import { getCoreSystemPrompt } from './prompts.js';
 import { ReadManyFilesTool } from '../tools/read-many-files.js';
@@ -194,7 +199,7 @@ export class GeminiClient {
 
     const compressed = await this.tryCompressChat();
     if (compressed) {
-      yield { type: GeminiEventType.ChatCompressed };
+      yield { type: GeminiEventType.ChatCompressed, value: compressed };
     }
     const chat = await this.chat;
     const turn = new Turn(chat);
@@ -390,44 +395,55 @@ export class GeminiClient {
     });
   }
 
-  private async tryCompressChat(): Promise<boolean> {
+  async tryCompressChat(
+    force: boolean = false,
+  ): Promise<ChatCompressionInfo | null> {
     const chat = await this.chat;
     const history = chat.getHistory(true); // Get curated history
 
+    // Regardless of `force`, don't do anything if the history is empty.
+    if (history.length === 0) {
+      return null;
+    }
+
     const cg = await this.contentGenerator;
-    const { totalTokens } = await cg.countTokens({
+    const { totalTokens: originalTokenCount } = await cg.countTokens({
       model: this.model,
       contents: history,
     });
 
-    if (totalTokens === undefined) {
-      // If token count is undefined, we can't determine if we need to compress.
-      console.warn(
-        `Could not determine token count for model ${this.model}. Skipping compression check.`,
-      );
-      return false;
-    }
-    const tokenCount = totalTokens; // Now guaranteed to be a number
+    // If not forced, check if we should compress based on context size.
+    if (!force) {
+      if (originalTokenCount === undefined) {
+        // If token count is undefined, we can't determine if we need to compress.
+        console.warn(
+          `Could not determine token count for model ${this.model}. Skipping compression check.`,
+        );
+        return null;
+      }
+      const tokenCount = originalTokenCount; // Now guaranteed to be a number
 
-    const limit = tokenLimit(this.model);
-    if (!limit) {
-      // If no limit is defined for the model, we can't compress.
-      console.warn(
-        `No token limit defined for model ${this.model}. Skipping compression check.`,
-      );
-      return false;
-    }
+      const limit = tokenLimit(this.model);
+      if (!limit) {
+        // If no limit is defined for the model, we can't compress.
+        console.warn(
+          `No token limit defined for model ${this.model}. Skipping compression check.`,
+        );
+        return null;
+      }
 
-    if (tokenCount < 0.95 * limit) {
-      return false;
+      if (tokenCount < 0.95 * limit) {
+        return null;
+      }
     }
+
     const summarizationRequestMessage = {
       text: 'Summarize our conversation up to this point. The summary should be a concise yet comprehensive overview of all key topics, questions, answers, and important details discussed. This summary will replace the current chat history to conserve tokens, so it must capture everything essential to understand the context and continue our conversation effectively as if no information was lost.',
     };
     const response = await chat.sendMessage({
       message: summarizationRequestMessage,
     });
-    this.chat = this.startChat([
+    const newHistory = [
       {
         role: 'user',
         parts: [summarizationRequestMessage],
@@ -436,8 +452,15 @@ export class GeminiClient {
         role: 'model',
         parts: [{ text: response.text }],
       },
-    ]);
+    ];
+    this.chat = this.startChat(newHistory);
+    const newTokenCount = (
+      await cg.countTokens({ model: this.model, contents: newHistory })
+    ).totalTokens;
 
-    return true;
+    return {
+      originalTokenCount,
+      newTokenCount,
+    };
   }
 }
diff --git a/packages/core/src/core/turn.ts b/packages/core/src/core/turn.ts
index 71c02d83..87533c28 100644
--- a/packages/core/src/core/turn.ts
+++ b/packages/core/src/core/turn.ts
@@ -98,8 +98,14 @@ export type ServerGeminiErrorEvent = {
   value: GeminiErrorEventValue;
 };
 
+export interface ChatCompressionInfo {
+  originalTokenCount: number | undefined;
+  newTokenCount: number | undefined;
+}
+
 export type ServerGeminiChatCompressedEvent = {
   type: GeminiEventType.ChatCompressed;
+  value: ChatCompressionInfo;
 };
 
 export type ServerGeminiUsageMetadataEvent = {
author	Jacob MacDonald <[email protected]>	2025-06-13 21:21:40 -0700
committer	GitHub <[email protected]>	2025-06-14 04:21:40 +0000
commit	d5c6bb9740a52d87b71d812e698d0e88abf10caa (patch)
tree	7cb990c0adbd1d76d60a655d3dfa15e22db033e3 /packages/core/src
parent	1452bb4ca4ffe3b5c13aab81baaf510d4c45f06f (diff)