Starting to modularize into separate cli / server packages. (#55)

* Starting to move a lot of code into packages/server * More of the massive refactor, builds and runs, some issues though. * Fixing outstanding issue with double messages. * Fixing a minor UI issue. * Fixing the build post-merge. * Running formatting. * Addressing comments.
author: Evan Senter <[email protected]> 2025-04-19 19:45:42 +0100
committer: GitHub <[email protected]> 2025-04-19 19:45:42 +0100
commit: 3fce6cea27d3e6129d6c06e528b62e1b11bf7094 (patch)
tree: 244b8e9ab94f902d65d4bda8739a6538e377ed17 /packages/server/src/core
parent: 0c9e1ef61be7db53e6e73b7208b649cd8cbed6c3 (diff)
3 files changed, 471 insertions, 0 deletions
diff --git a/packages/server/src/core/gemini-client.ts b/packages/server/src/core/gemini-client.ts
new file mode 100644
index 00000000..c7415ed8
--- /dev/null
+++ b/packages/server/src/core/gemini-client.ts
@@ -0,0 +1,171 @@
+/**
+ * @license
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import {
+  GenerateContentConfig,
+  GoogleGenAI,
+  Part,
+  Chat,
+  SchemaUnion,
+  PartListUnion,
+  Content,
+  FunctionDeclaration,
+  Tool,
+} from '@google/genai';
+import { CoreSystemPrompt } from './prompts.js';
+import process from 'node:process';
+import { getFolderStructure } from '../utils/getFolderStructure.js';
+import { Turn, ServerTool, GeminiEventType } from './turn.js';
+
+// Import the ServerGeminiStreamEvent type
+type ServerGeminiStreamEvent =
+  | { type: GeminiEventType.Content; value: string }
+  | {
+      type: GeminiEventType.ToolCallRequest;
+      value: { callId: string; name: string; args: Record<string, unknown> };
+    };
+
+export class GeminiClient {
+  private ai: GoogleGenAI;
+  private model: string;
+  private generateContentConfig: GenerateContentConfig = {
+    temperature: 0,
+    topP: 1,
+  };
+  private readonly MAX_TURNS = 100;
+
+  constructor(apiKey: string, model: string) {
+    this.ai = new GoogleGenAI({ apiKey: apiKey });
+    this.model = model;
+  }
+
+  private async getEnvironment(): Promise<Part> {
+    const cwd = process.cwd();
+    const today = new Date().toLocaleDateString(undefined, {
+      weekday: 'long',
+      year: 'numeric',
+      month: 'long',
+      day: 'numeric',
+    });
+    const platform = process.platform;
+    const folderStructure = await getFolderStructure(cwd);
+    const context = `
+  Okay, just setting up the context for our chat.
+  Today is ${today}.
+  My operating system is: ${platform}
+  I'm currently working in the directory: ${cwd}
+  ${folderStructure}
+          `.trim();
+    return { text: context };
+  }
+
+  async startChat(toolDeclarations: FunctionDeclaration[]): Promise<Chat> {
+    const envPart = await this.getEnvironment();
+    const tools: Tool[] = toolDeclarations.map((declaration) => ({
+      functionDeclarations: [declaration],
+    }));
+    try {
+      const chat = this.ai.chats.create({
+        model: this.model,
+        config: {
+          systemInstruction: CoreSystemPrompt,
+          ...this.generateContentConfig,
+          tools: tools,
+        },
+        history: [
+          {
+            role: 'user',
+            parts: [envPart],
+          },
+          {
+            role: 'model',
+            parts: [{ text: 'Got it. Thanks for the context!' }],
+          },
+        ],
+      });
+      return chat;
+    } catch (error) {
+      console.error('Error initializing Gemini chat session:', error);
+      const message = error instanceof Error ? error.message : 'Unknown error.';
+      throw new Error(`Failed to initialize chat: ${message}`);
+    }
+  }
+
+  async *sendMessageStream(
+    chat: Chat,
+    request: PartListUnion,
+    availableTools: ServerTool[],
+    signal?: AbortSignal,
+  ): AsyncGenerator<ServerGeminiStreamEvent> {
+    let turns = 0;
+    try {
+      while (turns < this.MAX_TURNS) {
+        turns++;
+        const turn = new Turn(chat, availableTools);
+        const resultStream = turn.run(request, signal);
+        for await (const event of resultStream) {
+          yield event;
+        }
+        const fnResponses = turn.getFunctionResponses();
+        if (fnResponses.length > 0) {
+          request = fnResponses;
+          continue;
+        } else {
+          break;
+        }
+      }
+      if (turns >= this.MAX_TURNS) {
+        console.warn(
+          'sendMessageStream: Reached maximum tool call turns limit.',
+        );
+      }
+    } catch (error: unknown) {
+      if (error instanceof Error && error.name === 'AbortError') {
+        console.log('Gemini stream request aborted by user.');
+        throw error;
+      } else {
+        console.error(`Error during Gemini stream or tool interaction:`, error);
+        throw error;
+      }
+    }
+  }
+
+  async generateJson(
+    contents: Content[],
+    schema: SchemaUnion,
+  ): Promise<Record<string, unknown>> {
+    try {
+      const result = await this.ai.models.generateContent({
+        model: this.model,
+        config: {
+          ...this.generateContentConfig,
+          systemInstruction: CoreSystemPrompt,
+          responseSchema: schema,
+          responseMimeType: 'application/json',
+        },
+        contents,
+      });
+      const responseText = result.text;
+      if (!responseText) {
+        throw new Error('API returned an empty response.');
+      }
+      try {
+        const parsedJson = JSON.parse(responseText);
+        return parsedJson;
+      } catch (parseError) {
+        console.error('Failed to parse JSON response:', responseText);
+        throw new Error(
+          `Failed to parse API response as JSON: ${parseError instanceof Error ? parseError.message : String(parseError)}`,
+        );
+      }
+    } catch (error) {
+      console.error('Error generating JSON content:', error);
+      const message =
+        error instanceof Error ? error.message : 'Unknown API error.';
+      throw new Error(`Failed to generate JSON content: ${message}`);
+    }
+  }
+}
diff --git a/packages/server/src/core/prompts.ts b/packages/server/src/core/prompts.ts
new file mode 100644
index 00000000..60e1ff5c
--- /dev/null
+++ b/packages/server/src/core/prompts.ts
@@ -0,0 +1,101 @@
+/**
+ * @license
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+// Note: Tool names are referenced here. If they change in tool definitions, update this prompt.
+// import { ReadFileTool } from '../tools/read-file.tool.js';
+// import { TerminalTool } from '../tools/terminal.tool.js';
+
+const MEMORY_FILE_NAME = 'GEMINI.md';
+
+const contactEmail = '[email protected]';
+export const CoreSystemPrompt = `
+You are an interactive CLI tool assistant specializing in software engineering tasks. Your primary goal is to help users safely and efficiently, adhering strictly to the following instructions and utilizing your available tools.
+
+# Core Directives & Safety Rules
+1.  **Explain Critical Commands:** Before executing any command (especially using \`execute_bash_command\`) that modifies the file system, codebase, or system state, you *must* provide a brief explanation of the command's purpose and potential impact. Prioritize user understanding and safety.
+2.  **NEVER Commit Changes:** Unless explicitly instructed by the user to do so, you MUST NOT commit changes to version control (e.g., git commit). This is critical for user control over their repository.
+3.  **Security First:** Always apply security best practices. Never introduce code that exposes, logs, or commits secrets, API keys, or other sensitive information.
+
+# Primary Workflow: Software Engineering Tasks
+When requested to perform tasks like fixing bugs, adding features, refactoring, or explaining code, follow this sequence:
+1.  **Understand:** Analyze the user's request and the relevant codebase context. Check for project-specific information in \`${MEMORY_FILE_NAME}\` if it exists. Use search tools extensively (in parallel if independent) to understand file structures, existing code patterns, and conventions.
+2.  **Implement:** Use the available tools (e.g., file editing, \`execute_bash_command\`) to construct the solution, strictly adhering to the project's established conventions (see 'Following Conventions' below).
+    - If creating a new project rely on scaffolding commands do lay out the initial project structure (i.e. npm init ...)
+3.  **Verify (Tests):** If applicable and feasible, verify the changes using the project's testing procedures. Identify the correct test commands and frameworks by examining \`README\` files, \`${MEMORY_FILE_NAME}\`, build/package configuration (e.g., \`package.json\`), or existing test execution patterns. NEVER assume standard test commands.
+4.  **Verify (Standards):** VERY IMPORTANT: After making code changes, execute the project-specific linting and type-checking commands (e.g., \`npm run lint\`, \`ruff check .\`, \`tsc\`) that you have identified for this project (or obtained from the user). This ensures code quality and adherence to standards. If unsure about these commands, ask the user and propose adding them to \`${MEMORY_FILE_NAME}\` for future reference.
+
+# Key Operating Principles
+
+## Following Conventions
+Rigorously adhere to existing project conventions when reading or modifying code. Analyze surrounding code and configuration first.
+-   **Libraries/Frameworks:** NEVER assume a library/framework is available or appropriate. Verify its established usage within the project (check imports, configuration files like \`package.json\`, \`Cargo.toml\`, \`requirements.txt\`, \`build.gradle\`, etc., or observe neighboring files) before employing it.
+-   **Style & Structure:** Mimic the style (formatting, naming), structure, framework choices, typing, and architectural patterns of existing code in the project.
+-   **Idiomatic Changes:** When editing, understand the local context (imports, functions/classes) to ensure your changes integrate naturally and idiomatically.
+-   **Comments:** Add code comments sparingly. Focus on *why* something is done, especially for complex logic, rather than *what* is done. Only add comments if necessary for clarity or if requested by the user.
+
+## Memory (${MEMORY_FILE_NAME})
+Utilize the \`${MEMORY_FILE_NAME}\` file in the current working directory for project-specific context:
+-   Reference stored commands, style preferences, and codebase notes when performing tasks.
+-   When you discover frequently used commands (build, test, lint, typecheck) or learn about specific project conventions or style preferences, proactively propose adding them to \`${MEMORY_FILE_NAME}\` for future sessions.
+
+## Tone and Style (CLI Interaction)
+-   **Concise & Direct:** Adopt a professional, direct, and concise tone suitable for a CLI environment.
+-   **Minimal Output:** Aim for fewer than 4 lines of text output (excluding tool use/code generation) per response whenever practical. Focus strictly on the user's query.
+-   **Clarity over Brevity (When Needed):** While conciseness is key, prioritize clarity for essential explanations (like pre-command warnings) or when seeking necessary clarification if a request is ambiguous.
+-   **No Chitchat:** Avoid conversational filler, preambles ("Okay, I will now..."), or postambles ("I have finished the changes..."). Get straight to the action or answer.
+-   **Formatting:** Use GitHub-flavored Markdown. Responses will be rendered in monospace.
+-   **Tools vs. Text:** Use tools for actions, text output *only* for communication. Do not add explanatory comments within tool calls or code blocks unless specifically part of the required code/command itself.
+-   **Handling Inability:** If unable/unwilling to fulfill a request, state so briefly (1-2 sentences) without excessive justification. Offer alternatives if appropriate.
+
+## Proactiveness
+-   **Act within Scope:** Fulfill the user's request thoroughly, including reasonable, directly implied follow-up actions.
+-   **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If asked *how* to do something, explain first, don't just do it.
+-   **Stop After Action:** After completing a code modification or file operation, simply stop. Do not provide summaries unless asked.
+
+# Tool Usage
+-   **Search:** Prefer the Agent tool for file searching to optimize context usage.
+-   **Parallelism:** Execute multiple independent tool calls in parallel when feasible.
+-   **Command Execution:** Use the \`execute_bash_command\` tool for running shell commands, remembering the safety rule to explain modifying commands first.
+
+# Interaction Details
+-   **Help Command:** Use \`/help\` to display Gemini Code help. To get specific command/flag info, execute \`gemini -h\` via \`execute_bash_command\` and show the output.
+-   **Synthetic Messages:** Ignore system messages like \`++Request Cancelled++\`. Do not generate them.
+-   **Feedback:** Direct feedback to ${contactEmail}.
+
+# Examples (Illustrating Tone and Workflow)
+<example>
+user: 1 + 2
+assistant: 3
+</example>
+
+<example>
+user: is 13 a prime number?
+assistant: true
+</example>
+
+<example>
+user: List files here.
+assistant: [tool_call: execute_bash_command for 'ls -la']))]
+</example>
+
+<example>
+user: Refactor the auth logic in src/auth.py to use the 'requests' library.
+assistant: Okay, I see src/auth.py currently uses 'urllib'. Before changing it, I need to check if 'requests' is already a project dependency. [tool_call: execute_bash_command for grep 'requests', 'requirements.txt']
+(After confirming dependency or asking user to add it)
+Okay, 'requests' is available. I will now refactor src/auth.py.
+[tool_call: Uses read, edit tools following conventions]
+(After editing)
+[tool_call: Runs project-specific lint/typecheck commands found previously, e.g., execute_bash_command for 'ruff', 'check', 'src/auth.py']
+</example>
+
+<example>
+user: Delete the temp directory.
+assistant: I can run \`rm -rf ./temp\`. This will permanently delete the directory and all its contents. Is it okay to proceed?
+</example>
+
+# Final Reminder
+Your core function is efficient and safe assistance. Balance extreme conciseness with the crucial need for clarity, especially regarding safety and potential system modifications. Always prioritize user control and project conventions. Never make assumptions on the contents of files; instead use the read_file to ensure you aren't making too broad of assumptions.
+`;
diff --git a/packages/server/src/core/turn.ts b/packages/server/src/core/turn.ts
new file mode 100644
index 00000000..bf5c3e86
--- /dev/null
+++ b/packages/server/src/core/turn.ts
@@ -0,0 +1,199 @@
+import {
+  Part,
+  Chat,
+  PartListUnion,
+  GenerateContentResponse,
+  FunctionCall,
+  FunctionDeclaration,
+} from '@google/genai';
+// Removed UI type imports
+import { ToolResult } from '../tools/tools.js'; // Keep ToolResult for now
+// Removed gemini-stream import (types defined locally)
+
+// --- Types for Server Logic ---
+
+// Define a simpler structure for Tool execution results within the server
+interface ServerToolExecutionOutcome {
+  callId: string;
+  name: string;
+  args: Record<string, unknown>; // Use unknown for broader compatibility
+  result?: ToolResult;
+  error?: Error;
+  // Confirmation details are handled by CLI, not server logic
+}
+
+// Define a structure for tools passed to the server
+export interface ServerTool {
+  name: string;
+  schema: FunctionDeclaration; // Schema is needed
+  // The execute method signature might differ slightly or be wrapped
+  execute(params: Record<string, unknown>): Promise<ToolResult>;
+  // validation and description might be handled differently or passed
+}
+
+// Redefine necessary event types locally
+export enum GeminiEventType {
+  Content = 'content',
+  ToolCallRequest = 'tool_call_request',
+}
+
+interface ToolCallRequestInfo {
+  callId: string;
+  name: string;
+  args: Record<string, unknown>;
+}
+
+type ServerGeminiStreamEvent =
+  | { type: GeminiEventType.Content; value: string }
+  | { type: GeminiEventType.ToolCallRequest; value: ToolCallRequestInfo };
+
+// --- Turn Class (Refactored for Server) ---
+
+// A turn manages the agentic loop turn within the server context.
+export class Turn {
+  private readonly chat: Chat;
+  private readonly availableTools: Map<string, ServerTool>; // Use passed-in tools
+  private pendingToolCalls: Array<{
+    callId: string;
+    name: string;
+    args: Record<string, unknown>; // Use unknown
+  }>;
+  private fnResponses: Part[];
+  private debugResponses: GenerateContentResponse[];
+
+  constructor(chat: Chat, availableTools: ServerTool[]) {
+    this.chat = chat;
+    this.availableTools = new Map(availableTools.map((t) => [t.name, t]));
+    this.pendingToolCalls = [];
+    this.fnResponses = [];
+    this.debugResponses = [];
+  }
+
+  // The run method yields simpler events suitable for server logic
+  async *run(
+    req: PartListUnion,
+    signal?: AbortSignal,
+  ): AsyncGenerator<ServerGeminiStreamEvent> {
+    const responseStream = await this.chat.sendMessageStream({ message: req });
+
+    for await (const resp of responseStream) {
+      this.debugResponses.push(resp);
+      if (signal?.aborted) {
+        throw this.abortError();
+      }
+      if (resp.text) {
+        yield { type: GeminiEventType.Content, value: resp.text };
+        continue;
+      }
+      if (!resp.functionCalls) {
+        continue;
+      }
+
+      // Handle function calls (requesting tool execution)
+      for (const fnCall of resp.functionCalls) {
+        const event = this.handlePendingFunctionCall(fnCall);
+        if (event) {
+          yield event;
+        }
+      }
+
+      // Execute pending tool calls
+      const toolPromises = this.pendingToolCalls.map(
+        async (pendingToolCall): Promise<ServerToolExecutionOutcome> => {
+          const tool = this.availableTools.get(pendingToolCall.name);
+          if (!tool) {
+            return {
+              ...pendingToolCall,
+              error: new Error(
+                `Tool "${pendingToolCall.name}" not found or not provided to Turn.`,
+              ),
+            };
+          }
+          // No confirmation logic in the server Turn
+          try {
+            // TODO: Add validation step if needed (tool.validateParams?)
+            const result = await tool.execute(pendingToolCall.args);
+            return { ...pendingToolCall, result };
+          } catch (execError: unknown) {
+            return {
+              ...pendingToolCall,
+              error: new Error(
+                `Tool execution failed: ${execError instanceof Error ? execError.message : String(execError)}`,
+              ),
+            };
+          }
+        },
+      );
+      const outcomes = await Promise.all(toolPromises);
+
+      // Process outcomes and prepare function responses
+      this.fnResponses = this.buildFunctionResponses(outcomes);
+      this.pendingToolCalls = []; // Clear pending calls for this turn
+
+      // If there were function responses, the caller (GeminiService) will loop
+      // and call run() again with these responses.
+      // If no function responses, the turn ends here.
+    }
+  }
+
+  // Generates a ToolCallRequest event to signal the need for execution
+  private handlePendingFunctionCall(
+    fnCall: FunctionCall,
+  ): ServerGeminiStreamEvent | null {
+    const callId =
+      fnCall.id ??
+      `${fnCall.name}-${Date.now()}-${Math.random().toString(16).slice(2)}`;
+    const name = fnCall.name || 'undefined_tool_name';
+    const args = (fnCall.args || {}) as Record<string, unknown>;
+
+    this.pendingToolCalls.push({ callId, name, args });
+
+    // Yield a request for the tool call, not the pending/confirming status
+    const value: ToolCallRequestInfo = { callId, name, args };
+    return { type: GeminiEventType.ToolCallRequest, value };
+  }
+
+  // Builds the Part array expected by the Google GenAI API
+  private buildFunctionResponses(
+    outcomes: ServerToolExecutionOutcome[],
+  ): Part[] {
+    return outcomes.map((outcome): Part => {
+      const { name, result, error } = outcome;
+      let fnResponsePayload: Record<string, unknown>;
+
+      if (error) {
+        // Format error for the LLM
+        const errorMessage = error?.message || String(error);
+        fnResponsePayload = { error: `Tool execution failed: ${errorMessage}` };
+        console.error(`[Server Turn] Error executing tool ${name}:`, error);
+      } else {
+        // Pass successful tool result (content meant for LLM)
+        fnResponsePayload = { output: result?.llmContent ?? '' }; // Default to empty string if no content
+      }
+
+      return {
+        functionResponse: {
+          name,
+          id: outcome.callId,
+          response: fnResponsePayload,
+        },
+      };
+    });
+  }
+
+  private abortError(): Error {
+    const error = new Error('Request cancelled by user during stream.');
+    error.name = 'AbortError';
+    return error; // Return instead of throw, let caller handle
+  }
+
+  // Allows the service layer to get the responses needed for the next API call
+  getFunctionResponses(): Part[] {
+    return this.fnResponses;
+  }
+
+  // Debugging information (optional)
+  getDebugResponses(): GenerateContentResponse[] {
+    return this.debugResponses;
+  }
+}
author	Evan Senter <[email protected]>	2025-04-19 19:45:42 +0100
committer	GitHub <[email protected]>	2025-04-19 19:45:42 +0100
commit	3fce6cea27d3e6129d6c06e528b62e1b11bf7094 (patch)
tree	244b8e9ab94f902d65d4bda8739a6538e377ed17 /packages/server/src/core
parent	0c9e1ef61be7db53e6e73b7208b649cd8cbed6c3 (diff)