2 files changed, 409 insertions, 0 deletions
diff --git a/packages/server/src/utils/editCorrector.test.ts b/packages/server/src/utils/editCorrector.test.ts
new file mode 100644
index 00000000..8b27bdf1
--- /dev/null
+++ b/packages/server/src/utils/editCorrector.test.ts
@@ -0,0 +1,117 @@
+/**
+ * @license
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { describe, it, expect } from 'vitest';
+import {
+  countOccurrences,
+  unescapeStringForGeminiBug,
+} from './editCorrector.js';
+
+describe('editCorrector', () => {
+  describe('countOccurrences', () => {
+    it('should return 0 for empty string', () => {
+      expect(countOccurrences('', 'a')).toBe(0);
+    });
+
+    it('should return 0 for empty substring', () => {
+      expect(countOccurrences('abc', '')).toBe(0);
+    });
+
+    it('should return 0 if substring is not found', () => {
+      expect(countOccurrences('abc', 'd')).toBe(0);
+    });
+
+    it('should return 1 if substring is found once', () => {
+      expect(countOccurrences('abc', 'b')).toBe(1);
+    });
+
+    it('should return correct count for multiple occurrences', () => {
+      expect(countOccurrences('ababa', 'a')).toBe(3);
+      expect(countOccurrences('ababab', 'ab')).toBe(3);
+    });
+
+    it('should count non-overlapping occurrences', () => {
+      expect(countOccurrences('aaaaa', 'aa')).toBe(2); // Non-overlapping: aa_aa_
+      expect(countOccurrences('ababab', 'aba')).toBe(1); // Non-overlapping: aba_ab -> 1
+    });
+
+    it('should correctly count occurrences when substring is longer', () => {
+      expect(countOccurrences('abc', 'abcdef')).toBe(0);
+    });
+
+    it('should be case sensitive', () => {
+      expect(countOccurrences('abcABC', 'a')).toBe(1);
+      expect(countOccurrences('abcABC', 'A')).toBe(1);
+    });
+  });
+
+  describe('unescapeStringForGeminiBug', () => {
+    it('should unescape common sequences', () => {
+      expect(unescapeStringForGeminiBug('\\n')).toBe('\n');
+      expect(unescapeStringForGeminiBug('\\t')).toBe('\t');
+      expect(unescapeStringForGeminiBug("\\'")).toBe("'");
+      expect(unescapeStringForGeminiBug('\\"')).toBe('"');
+      expect(unescapeStringForGeminiBug('\\`')).toBe('`');
+    });
+
+    it('should handle multiple escaped sequences', () => {
+      expect(unescapeStringForGeminiBug('Hello\\nWorld\\tTest')).toBe(
+        'Hello\nWorld\tTest',
+      );
+    });
+
+    it('should not alter already correct sequences', () => {
+      expect(unescapeStringForGeminiBug('\n')).toBe('\n');
+      expect(unescapeStringForGeminiBug('Correct string')).toBe(
+        'Correct string',
+      );
+    });
+
+    it('should handle mixed correct and incorrect sequences', () => {
+      expect(unescapeStringForGeminiBug('\\nCorrect\t\\`')).toBe(
+        '\nCorrect\t`',
+      );
+    });
+
+    it('should handle backslash followed by actual newline character', () => {
+      expect(unescapeStringForGeminiBug('\\\n')).toBe('\n');
+      expect(unescapeStringForGeminiBug('First line\\\nSecond line')).toBe(
+        'First line\nSecond line',
+      );
+    });
+
+    it('should handle multiple backslashes before an escapable character', () => {
+      expect(unescapeStringForGeminiBug('\\\\n')).toBe('\n'); // \\n -> \n
+      expect(unescapeStringForGeminiBug('\\\\\\t')).toBe('\t'); // \\\t -> \t
+      expect(unescapeStringForGeminiBug('\\\\\\\\`')).toBe('`'); // \\\\` -> `
+    });
+
+    it('should return empty string for empty input', () => {
+      expect(unescapeStringForGeminiBug('')).toBe('');
+    });
+
+    it('should not alter strings with no targeted escape sequences', () => {
+      expect(unescapeStringForGeminiBug('abc def')).toBe('abc def');
+      // \\F and \\S are not targeted escapes, so they should remain as \\F and \\S
+      expect(unescapeStringForGeminiBug('C:\\Folder\\File')).toBe(
+        'C:\\Folder\\File',
+      );
+    });
+
+    it('should correctly process strings with some targeted escapes', () => {
+      // \\U is not targeted, \\n is.
+      expect(unescapeStringForGeminiBug('C:\\Users\\name')).toBe(
+        'C:\\Users\name',
+      );
+    });
+
+    it('should handle complex cases with mixed slashes and characters', () => {
+      expect(
+        unescapeStringForGeminiBug('\\\\\\nLine1\\\nLine2\\tTab\\\\`Tick\\"'),
+      ).toBe('\nLine1\nLine2\tTab`Tick"');
+    });
+  });
+});
diff --git a/packages/server/src/utils/editCorrector.ts b/packages/server/src/utils/editCorrector.ts
new file mode 100644
index 00000000..c7095ece
--- /dev/null
+++ b/packages/server/src/utils/editCorrector.ts
@@ -0,0 +1,292 @@
+/**
+ * @license
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import {
+  Content,
+  GenerateContentConfig,
+  SchemaUnion,
+  Type,
+} from '@google/genai';
+import { GeminiClient } from '../core/client.js';
+import { EditToolParams } from '../tools/edit.js';
+
+const EditModel = 'gemini-2.5-flash-preview-04-17';
+const EditConfig: GenerateContentConfig = {
+  thinkingConfig: {
+    thinkingBudget: 0,
+  },
+};
+
+/**
+ * Counts occurrences of a substring in a string
+ */
+export function countOccurrences(str: string, substr: string): number {
+  if (substr === '') {
+    return 0;
+  }
+  let count = 0;
+  let pos = str.indexOf(substr);
+  while (pos !== -1) {
+    count++;
+    pos = str.indexOf(substr, pos + substr.length); // Start search after the current match
+  }
+  return count;
+}
+
+/**
+ * Attempts to correct edit parameters if the original old_string is not found.
+ * It tries unescaping, and then LLM-based correction.
+ *
+ * @param currentContent The current content of the file.
+ * @param params The original EditToolParams.
+ * @param client The GeminiClient for LLM calls.
+ * @returns A promise resolving to an object containing the (potentially corrected) EditToolParams and the final occurrences count.
+ */
+export async function ensureCorrectEdit(
+  currentContent: string,
+  originalParams: EditToolParams,
+  client: GeminiClient,
+): Promise<CorrectedEditResult> {
+  let occurrences = countOccurrences(currentContent, originalParams.old_string);
+  const currentParams = { ...originalParams };
+
+  if (occurrences === 1) {
+    return { params: currentParams, occurrences };
+  }
+
+  const unescapedOldString = unescapeStringForGeminiBug(
+    currentParams.old_string,
+  );
+  occurrences = countOccurrences(currentContent, unescapedOldString);
+
+  if (occurrences === 1) {
+    currentParams.old_string = unescapedOldString;
+    currentParams.new_string = unescapeStringForGeminiBug(
+      currentParams.new_string,
+    );
+  } else if (occurrences === 0) {
+    const llmCorrectedOldString = await correctOldStringMismatch(
+      client,
+      currentContent,
+      unescapedOldString,
+    );
+    occurrences = countOccurrences(currentContent, llmCorrectedOldString);
+
+    if (occurrences === 1) {
+      const llmCorrectedNewString = await correctNewString(
+        client,
+        unescapedOldString,
+        llmCorrectedOldString,
+        currentParams.new_string,
+      );
+      currentParams.old_string = llmCorrectedOldString;
+      currentParams.new_string = llmCorrectedNewString;
+    } else {
+      // If LLM correction also results in 0 or >1 occurrences,
+      // return the original params and 0 occurrences,
+      // letting the caller handle the "still not found" case.
+      return { params: originalParams, occurrences: 0 };
+    }
+  } else {
+    // If unescaping resulted in >1 occurrences, return original params and that count.
+    return { params: originalParams, occurrences };
+  }
+
+  return { params: currentParams, occurrences };
+}
+
+/**
+ * Attempts to correct potential formatting/escaping issues in a snippet using an LLM call.
+ */
+async function correctOldStringMismatch(
+  geminiClient: GeminiClient,
+  fileContent: string,
+  problematicSnippet: string,
+): Promise<string> {
+  const prompt = `
+Context: A process needs to find an exact literal, unique match for a specific text snippet within a file's content. The provided snippet failed to match exactly. This is most likely because it has been overly escaped.
+
+Task: Analyze the provided file content and the problematic target snippet. Identify the segment in the file content that the snippet was *most likely* intended to match. Output the *exact*, literal text of that segment from the file content. Focus *only* on removing extra escape characters and correcting formatting, whitespace, or minor differences to achieve a PERFECT literal match. The output must be the exact literal text as it appears in the file.
+
+Problematic target snippet:
+\`\`\`
+${problematicSnippet}
+\`\`\`
+
+File Content:
+\`\`\`
+${fileContent}
+\`\`\`
+
+For example, if the problematic target snippet was "\\\\\\nconst greeting = \`Hello \\\\\`\${name}\\\\\`\`;" and the file content had content that looked like "\nconst greeting = \`Hello ${'\\`'}\${name}${'\\`'}\`;", then corrected_target_snippet should likely be "\nconst greeting = \`Hello ${'\\`'}\${name}${'\\`'}\`;" to fix the incorrect escaping to match the original file content.
+If the differences are only in whitespace or formatting, apply similar whitespace/formatting changes to the corrected_target_snippet.
+
+Return ONLY the corrected target snippet in the specified JSON format with the key 'corrected_target_snippet'. If no clear, unique match can be found, return an empty string for 'corrected_target_snippet'.
+`.trim();
+
+  const contents: Content[] = [{ role: 'user', parts: [{ text: prompt }] }];
+
+  try {
+    const result = await geminiClient.generateJson(
+      contents,
+      OLD_STRING_CORRECTION_SCHEMA,
+      EditModel,
+      EditConfig,
+    );
+
+    if (
+      result &&
+      typeof result.corrected_target_snippet === 'string' &&
+      result.corrected_target_snippet.length > 0
+    ) {
+      return result.corrected_target_snippet;
+    } else {
+      return problematicSnippet;
+    }
+  } catch (error) {
+    console.error(
+      'Error during LLM call for old string snippet correction:',
+      error,
+    );
+    return problematicSnippet;
+  }
+}
+
+/**
+ * Adjusts the new_string to align with a corrected old_string, maintaining the original intent.
+ */
+async function correctNewString(
+  geminiClient: GeminiClient,
+  originalOldString: string,
+  correctedOldString: string,
+  originalNewString: string,
+): Promise<string> {
+  if (originalOldString === correctedOldString) {
+    return originalNewString;
+  }
+
+  const prompt = `
+Context: A text replacement operation was planned. The original text to be replaced (original_old_string) was slightly different from the actual text in the file (corrected_old_string). The original_old_string has now been corrected to match the file content.
+We now need to adjust the replacement text (original_new_string) so that it makes sense as a replacement for the corrected_old_string, while preserving the original intent of the change.
+
+original_old_string (what was initially intended to be found):
+\`\`\`
+${originalOldString}
+\`\`\`
+
+corrected_old_string (what was actually found in the file and will be replaced):
+\`\`\`
+${correctedOldString}
+\`\`\`
+
+original_new_string (what was intended to replace original_old_string):
+\`\`\`
+${originalNewString}
+\`\`\`
+
+Task: Based on the differences between original_old_string and corrected_old_string, and the content of original_new_string, generate a corrected_new_string. This corrected_new_string should be what original_new_string would have been if it was designed to replace corrected_old_string directly, while maintaining the spirit of the original transformation.
+
+For example, if original_old_string was "\\\\\\nconst greeting = \`Hello \\\\\`\${name}\\\\\`\`;" and corrected_old_string is "\nconst greeting = \`Hello ${'\\`'}\${name}${'\\`'}\`;", and original_new_string was "\\\\\\nconst greeting = \`Hello \\\\\`\${name} \${lastName}\\\\\`\`;", then corrected_new_string should likely be "\nconst greeting = \`Hello ${'\\`'}\${name} \${lastName}${'\\`'}\`;" to fix the incorrect escaping.
+If the differences are only in whitespace or formatting, apply similar whitespace/formatting changes to the corrected_new_string.
+
+Return ONLY the corrected string in the specified JSON format with the key 'corrected_new_string'. If no adjustment is deemed necessary or possible, return the original_new_string.
+  `.trim();
+
+  const contents: Content[] = [{ role: 'user', parts: [{ text: prompt }] }];
+
+  try {
+    const result = await geminiClient.generateJson(
+      contents,
+      NEW_STRING_CORRECTION_SCHEMA,
+      EditModel,
+      EditConfig,
+    );
+
+    if (
+      result &&
+      typeof result.corrected_new_string === 'string' &&
+      result.corrected_new_string.length > 0
+    ) {
+      return result.corrected_new_string;
+    } else {
+      return originalNewString;
+    }
+  } catch (error) {
+    console.error('Error during LLM call for new_string correction:', error);
+    return originalNewString;
+  }
+}
+
+export interface CorrectedEditResult {
+  params: EditToolParams;
+  occurrences: number;
+}
+
+// Define the expected JSON schema for the LLM response for old_string correction
+const OLD_STRING_CORRECTION_SCHEMA: SchemaUnion = {
+  type: Type.OBJECT,
+  properties: {
+    corrected_target_snippet: {
+      type: Type.STRING,
+      description:
+        'The corrected version of the target snippet that exactly and uniquely matches a segment within the provided file content.',
+    },
+  },
+  required: ['corrected_target_snippet'],
+};
+
+// Define the expected JSON schema for the new_string correction LLM response
+const NEW_STRING_CORRECTION_SCHEMA: SchemaUnion = {
+  type: Type.OBJECT,
+  properties: {
+    corrected_new_string: {
+      type: Type.STRING,
+      description:
+        'The original_new_string adjusted to be a suitable replacement for the corrected_old_string, while maintaining the original intent of the change.',
+    },
+  },
+  required: ['corrected_new_string'],
+};
+
+/**
+ * Unescapes a string that might have been overly escaped by an LLM.
+ */
+export function unescapeStringForGeminiBug(inputString: string): string {
+  // Regex explanation:
+  // \\+ : Matches one or more literal backslash characters.
+  // (n|t|r|'|"|`|\n) : This is a capturing group. It matches one of the following:
+  //   n, t, r, ', ", ` : These match the literal characters 'n', 't', 'r', single quote, double quote, or backtick.
+  //                       This handles cases like "\\n", "\\\\`", etc.
+  //   \n                 : This matches an actual newline character. This handles cases where the input
+  //                       string might have something like "\\\n" (a literal backslash followed by a newline).
+  // g : Global flag, to replace all occurrences.
+
+  return inputString.replace(/\\+(n|t|r|'|"|`|\n)/g, (match, capturedChar) => {
+    // 'match' is the entire erroneous sequence, e.g., if the input (in memory) was "\\\\`", match is "\\\\`".
+    // 'capturedChar' is the character that determines the true meaning, e.g., '`'.
+
+    switch (capturedChar) {
+      case 'n':
+        return '\n'; // Correctly escaped: \n (newline character)
+      case 't':
+        return '\t'; // Correctly escaped: \t (tab character)
+      case 'r':
+        return '\r'; // Correctly escaped: \r (carriage return character)
+      case "'":
+        return "'"; // Correctly escaped: ' (apostrophe character)
+      case '"':
+        return '"'; // Correctly escaped: " (quotation mark character)
+      case '`':
+        return '`'; // Correctly escaped: ` (backtick character)
+      case '\n': // This handles when 'capturedChar' is an actual newline
+        return '\n'; // Replace the whole erroneous sequence (e.g., "\\\n" in memory) with a clean newline
+      default:
+        // This fallback should ideally not be reached if the regex captures correctly.
+        // It would return the original matched sequence if an unexpected character was captured.
+        return match;
+    }
+  });
+}