Fix(write-file): Correct over-escaping and improve content generation

- Leveraged existing edit correction technology from `edit.ts` to address over-escaping issues in `write-file.ts`. - Introduced `ensureCorrectFileContent` for correcting content in new files, where a simple "replace" isnt applicable. This uses a new LLM prompt tailored for correcting potentially problematic string escaping. - Added caching for `ensureCorrectFileContent` to optimize performance. - Refactored `write-file.ts` to integrate these corrections, improving the reliability of file content generation and modification. Part of https://github.com/google-gemini/gemini-cli/issues/484
author: Taylor Mullen <[email protected]> 2025-05-25 14:41:37 -0700
committer: N. Taylor Mullen <[email protected]> 2025-05-25 14:45:47 -0700
commit: 5097b5a6563a4d514242580495c5974b7e665c3f (patch)
tree: c9d99b33a87ae4a90dabf13d044ce738f45127da /packages/server/src/utils/editCorrector.ts
parent: 1a5fe16b22dbb13861bd3aa97c8ae5f40566b6af (diff)
1 files changed, 85 insertions, 0 deletions
diff --git a/packages/server/src/utils/editCorrector.ts b/packages/server/src/utils/editCorrector.ts
index 91aa85a3..92551478 100644
--- a/packages/server/src/utils/editCorrector.ts
+++ b/packages/server/src/utils/editCorrector.ts
@@ -28,6 +28,9 @@ const editCorrectionCache = new LruCache<string, CorrectedEditResult>(
   MAX_CACHE_SIZE,
 );
 
+// Cache for ensureCorrectFileContent results
+const fileContentCorrectionCache = new LruCache<string, string>(MAX_CACHE_SIZE);
+
 /**
  * Defines the structure of the parameters within CorrectedEditResult
  */
@@ -174,6 +177,27 @@ export async function ensureCorrectEdit(
   return result;
 }
 
+export async function ensureCorrectFileContent(
+  content: string,
+  client: GeminiClient,
+): Promise<string> {
+  const cachedResult = fileContentCorrectionCache.get(content);
+  if (cachedResult) {
+    return cachedResult;
+  }
+
+  const contentPotentiallyEscaped =
+    unescapeStringForGeminiBug(content) !== content;
+  if (!contentPotentiallyEscaped) {
+    fileContentCorrectionCache.set(content, content);
+    return content;
+  }
+
+  const correctedContent = await correctStringEscaping(content, client);
+  fileContentCorrectionCache.set(content, correctedContent);
+  return correctedContent;
+}
+
 // Define the expected JSON schema for the LLM response for old_string correction
 const OLD_STRING_CORRECTION_SCHEMA: SchemaUnion = {
   type: Type.OBJECT,
@@ -385,6 +409,66 @@ Return ONLY the corrected string in the specified JSON format with the key 'corr
   }
 }
 
+const CORRECT_STRING_ESCAPING_SCHEMA: SchemaUnion = {
+  type: Type.OBJECT,
+  properties: {
+    corrected_string_escaping: {
+      type: Type.STRING,
+      description:
+        'The string with corrected escaping, ensuring it is valid, specially considering potential over-escaping issues from previous LLM generations.',
+    },
+  },
+  required: ['corrected_string_escaping'],
+};
+
+export async function correctStringEscaping(
+  potentiallyProblematicString: string,
+  client: GeminiClient,
+): Promise<string> {
+  const prompt = `
+Context: An LLM has just generated potentially_problematic_string and the text might have been improperly escaped (e.g. too many backslashes for newlines like \\n instead of \n, or unnecessarily quotes like \\"Hello\\" instead of "Hello").
+
+potentially_problematic_string (this text MIGHT have bad escaping, or might be entirely correct):
+\`\`\`
+${potentiallyProblematicString}
+\`\`\`
+
+Task: Analyze the potentially_problematic_string. If it's syntactically invalid due to incorrect escaping (e.g., "\n", "\t", "\\", "\\'", "\\""), correct the invalid syntax. The goal is to ensure the text will be a valid and correctly interpreted.
+
+For example, if potentially_problematic_string is "bar\\nbaz", the corrected_new_string_escaping should be "bar\nbaz".
+If potentially_problematic_string is console.log(\\"Hello World\\"), it should be console.log("Hello World").
+
+Return ONLY the corrected string in the specified JSON format with the key 'corrected_string_escaping'. If no escaping correction is needed, return the original potentially_problematic_string.
+  `.trim();
+
+  const contents: Content[] = [{ role: 'user', parts: [{ text: prompt }] }];
+
+  try {
+    const result = await client.generateJson(
+      contents,
+      CORRECT_STRING_ESCAPING_SCHEMA,
+      EditModel,
+      EditConfig,
+    );
+
+    if (
+      result &&
+      typeof result.corrected_new_string_escaping === 'string' &&
+      result.corrected_new_string_escaping.length > 0
+    ) {
+      return result.corrected_new_string_escaping;
+    } else {
+      return potentiallyProblematicString;
+    }
+  } catch (error) {
+    console.error(
+      'Error during LLM call for string escaping correction:',
+      error,
+    );
+    return potentiallyProblematicString;
+  }
+}
+
 function trimPairIfPossible(
   target: string,
   trimIfTargetTrims: string,
@@ -470,4 +554,5 @@ export function countOccurrences(str: string, substr: string): number {
 
 export function resetEditCorrectorCaches_TEST_ONLY() {
   editCorrectionCache.clear();
+  fileContentCorrectionCache.clear();
 }
author	Taylor Mullen <[email protected]>	2025-05-25 14:41:37 -0700
committer	N. Taylor Mullen <[email protected]>	2025-05-25 14:45:47 -0700
commit	5097b5a6563a4d514242580495c5974b7e665c3f (patch)
tree	c9d99b33a87ae4a90dabf13d044ce738f45127da /packages/server/src/utils/editCorrector.ts
parent	1a5fe16b22dbb13861bd3aa97c8ae5f40566b6af (diff)