14 files changed, 611 insertions, 63 deletions
diff --git a/packages/cli/src/ui/App.tsx b/packages/cli/src/ui/App.tsx
index feb132ae..833cc2b5 100644
--- a/packages/cli/src/ui/App.tsx
+++ b/packages/cli/src/ui/App.tsx
@@ -67,6 +67,10 @@ import { useBracketedPaste } from './hooks/useBracketedPaste.js';
 import { useTextBuffer } from './components/shared/text-buffer.js';
 import * as fs from 'fs';
 import { UpdateNotification } from './components/UpdateNotification.js';
+import {
+  isProQuotaExceededError,
+  isGenericQuotaExceededError,
+} from '@google/gemini-cli-core';
 import { checkForUpdates } from './utils/updateCheck.js';
 import ansiEscapes from 'ansi-escapes';
 import { OverflowProvider } from './contexts/OverflowContext.js';
@@ -243,15 +247,34 @@ const App = ({ config, settings, startupWarnings = [] }: AppProps) => {
     const flashFallbackHandler = async (
       currentModel: string,
       fallbackModel: string,
+      error?: unknown,
     ): Promise<boolean> => {
+      let message: string;
+
+      // Check if this is a Pro quota exceeded error
+      if (error && isProQuotaExceededError(error)) {
+        message = `⚡ You have reached your daily ${currentModel} quota limit.
+⚡ Automatically switching from ${currentModel} to ${fallbackModel} for the remainder of this session.
+⚡ To increase your limits, upgrade to a Gemini Code Assist Standard or Enterprise plan with higher limits at https://goo.gle/set-up-gemini-code-assist
+⚡ Or you can utilize a Gemini API Key. See: https://goo.gle/gemini-cli-docs-auth#gemini-api-key
+⚡ You can switch authentication methods by typing /auth`;
+      } else if (error && isGenericQuotaExceededError(error)) {
+        message = `⚡ You have reached your daily quota limit.
+⚡ Automatically switching from ${currentModel} to ${fallbackModel} for the remainder of this session.
+⚡ To increase your limits, upgrade to a Gemini Code Assist Standard or Enterprise plan with higher limits at https://goo.gle/set-up-gemini-code-assist
+⚡ Or you can utilize a Gemini API Key. See: https://goo.gle/gemini-cli-docs-auth#gemini-api-key
+⚡ You can switch authentication methods by typing /auth`;
+      } else {
+        // Default fallback message for other cases (like consecutive 429s)
+        message = `⚡ Slow response times detected.
+⚡ Automatically switching from ${currentModel} to ${fallbackModel} for faster responses for the remainder of this session.`;
+      }
+
       // Add message to UI history
       addItem(
         {
           type: MessageType.INFO,
-          text: `⚡ Slow response times detected. Automatically switching from ${currentModel} to ${fallbackModel} for faster responses for the remainder of this session.
-⚡ To avoid this you can either upgrade to Standard tier. See: https://goo.gle/set-up-gemini-code-assist
-⚡ Or you can utilize a Gemini API Key. See: https://goo.gle/gemini-cli-docs-auth#gemini-api-key
-⚡ You can switch authentication methods by typing /auth`,
+          text: message,
         },
         Date.now(),
       );
diff --git a/packages/cli/src/ui/hooks/useGeminiStream.test.tsx b/packages/cli/src/ui/hooks/useGeminiStream.test.tsx
index 3a002919..fc6f93c5 100644
--- a/packages/cli/src/ui/hooks/useGeminiStream.test.tsx
+++ b/packages/cli/src/ui/hooks/useGeminiStream.test.tsx
@@ -1097,6 +1097,7 @@ describe('useGeminiStream', () => {
         getContentGeneratorConfig: vi.fn(() => ({
           authType: mockAuthType,
         })),
+        getModel: vi.fn(() => 'gemini-2.5-pro'),
       } as unknown as Config;
 
       const { result } = renderHook(() =>
@@ -1125,6 +1126,9 @@ describe('useGeminiStream', () => {
         expect(mockParseAndFormatApiError).toHaveBeenCalledWith(
           'Rate limit exceeded',
           mockAuthType,
+          undefined,
+          'gemini-2.5-pro',
+          'gemini-2.5-flash',
         );
       });
     });
diff --git a/packages/cli/src/ui/hooks/useGeminiStream.ts b/packages/cli/src/ui/hooks/useGeminiStream.ts
index b4acdb9a..550cab86 100644
--- a/packages/cli/src/ui/hooks/useGeminiStream.ts
+++ b/packages/cli/src/ui/hooks/useGeminiStream.ts
@@ -24,6 +24,7 @@ import {
   ThoughtSummary,
   UnauthorizedError,
   UserPromptEvent,
+  DEFAULT_GEMINI_FLASH_MODEL,
 } from '@google/gemini-cli-core';
 import { type Part, type PartListUnion } from '@google/genai';
 import {
@@ -397,6 +398,9 @@ export const useGeminiStream = (
           text: parseAndFormatApiError(
             eventValue.error,
             config.getContentGeneratorConfig().authType,
+            undefined,
+            config.getModel(),
+            DEFAULT_GEMINI_FLASH_MODEL,
           ),
         },
         userMessageTimestamp,
@@ -533,6 +537,9 @@ export const useGeminiStream = (
               text: parseAndFormatApiError(
                 getErrorMessage(error) || 'Unknown error',
                 config.getContentGeneratorConfig().authType,
+                undefined,
+                config.getModel(),
+                DEFAULT_GEMINI_FLASH_MODEL,
               ),
             },
             userMessageTimestamp,
diff --git a/packages/cli/src/ui/utils/errorParsing.test.ts b/packages/cli/src/ui/utils/errorParsing.test.ts
index 4bbaabf1..3d228efb 100644
--- a/packages/cli/src/ui/utils/errorParsing.test.ts
+++ b/packages/cli/src/ui/utils/errorParsing.test.ts
@@ -6,10 +6,16 @@
 
 import { describe, it, expect } from 'vitest';
 import { parseAndFormatApiError } from './errorParsing.js';
-import { AuthType, StructuredError } from '@google/gemini-cli-core';
+import {
+  AuthType,
+  UserTierId,
+  DEFAULT_GEMINI_FLASH_MODEL,
+  isProQuotaExceededError,
+} from '@google/gemini-cli-core';
 
 describe('parseAndFormatApiError', () => {
-  const enterpriseMessage = 'upgrade to a plan with higher limits';
+  const _enterpriseMessage =
+    'upgrade to a Gemini Code Assist Standard or Enterprise plan with higher limits';
   const vertexMessage = 'request a quota increase through Vertex';
   const geminiMessage = 'request a quota increase through AI Studio';
 
@@ -24,9 +30,17 @@ describe('parseAndFormatApiError', () => {
   it('should format a 429 API error with the default message', () => {
     const errorMessage =
       'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Rate limit exceeded","status":"RESOURCE_EXHAUSTED"}}';
-    const result = parseAndFormatApiError(errorMessage);
+    const result = parseAndFormatApiError(
+      errorMessage,
+      undefined,
+      undefined,
+      'gemini-2.5-pro',
+      DEFAULT_GEMINI_FLASH_MODEL,
+    );
     expect(result).toContain('[API Error: Rate limit exceeded');
-    expect(result).toContain('Your request has been rate limited');
+    expect(result).toContain(
+      'Slow response times detected. Switching to the gemini-2.5-flash model',
+    );
   });
 
   it('should format a 429 API error with the personal message', () => {
@@ -35,9 +49,14 @@ describe('parseAndFormatApiError', () => {
     const result = parseAndFormatApiError(
       errorMessage,
       AuthType.LOGIN_WITH_GOOGLE,
+      undefined,
+      'gemini-2.5-pro',
+      DEFAULT_GEMINI_FLASH_MODEL,
     );
     expect(result).toContain('[API Error: Rate limit exceeded');
-    expect(result).toContain(enterpriseMessage);
+    expect(result).toContain(
+      'Slow response times detected. Switching to the gemini-2.5-flash model',
+    );
   });
 
   it('should format a 429 API error with the vertex message', () => {
@@ -116,4 +135,284 @@ describe('parseAndFormatApiError', () => {
     const expected = '[API Error: An unknown error occurred.]';
     expect(parseAndFormatApiError(error)).toBe(expected);
   });
+
+  it('should format a 429 API error with Pro quota exceeded message for Google auth (Free tier)', () => {
+    const errorMessage =
+      'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'Gemini 2.5 Pro Requests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}';
+    const result = parseAndFormatApiError(
+      errorMessage,
+      AuthType.LOGIN_WITH_GOOGLE,
+      undefined,
+      'gemini-2.5-pro',
+      DEFAULT_GEMINI_FLASH_MODEL,
+    );
+    expect(result).toContain(
+      "[API Error: Quota exceeded for quota metric 'Gemini 2.5 Pro Requests'",
+    );
+    expect(result).toContain(
+      'You have reached your daily gemini-2.5-pro quota limit',
+    );
+    expect(result).toContain(
+      'upgrade to a Gemini Code Assist Standard or Enterprise plan',
+    );
+  });
+
+  it('should format a regular 429 API error with standard message for Google auth', () => {
+    const errorMessage =
+      'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Rate limit exceeded","status":"RESOURCE_EXHAUSTED"}}';
+    const result = parseAndFormatApiError(
+      errorMessage,
+      AuthType.LOGIN_WITH_GOOGLE,
+      undefined,
+      'gemini-2.5-pro',
+      DEFAULT_GEMINI_FLASH_MODEL,
+    );
+    expect(result).toContain('[API Error: Rate limit exceeded');
+    expect(result).toContain(
+      'Slow response times detected. Switching to the gemini-2.5-flash model',
+    );
+    expect(result).not.toContain(
+      'You have reached your daily gemini-2.5-pro quota limit',
+    );
+  });
+
+  it('should format a 429 API error with generic quota exceeded message for Google auth', () => {
+    const errorMessage =
+      'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'GenerationRequests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}';
+    const result = parseAndFormatApiError(
+      errorMessage,
+      AuthType.LOGIN_WITH_GOOGLE,
+      undefined,
+      'gemini-2.5-pro',
+      DEFAULT_GEMINI_FLASH_MODEL,
+    );
+    expect(result).toContain(
+      "[API Error: Quota exceeded for quota metric 'GenerationRequests'",
+    );
+    expect(result).toContain('You have reached your daily quota limit');
+    expect(result).not.toContain(
+      'You have reached your daily Gemini 2.5 Pro quota limit',
+    );
+  });
+
+  it('should prioritize Pro quota message over generic quota message for Google auth', () => {
+    const errorMessage =
+      'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'Gemini 2.5 Pro Requests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}';
+    const result = parseAndFormatApiError(
+      errorMessage,
+      AuthType.LOGIN_WITH_GOOGLE,
+      undefined,
+      'gemini-2.5-pro',
+      DEFAULT_GEMINI_FLASH_MODEL,
+    );
+    expect(result).toContain(
+      "[API Error: Quota exceeded for quota metric 'Gemini 2.5 Pro Requests'",
+    );
+    expect(result).toContain(
+      'You have reached your daily gemini-2.5-pro quota limit',
+    );
+    expect(result).not.toContain('You have reached your daily quota limit');
+  });
+
+  it('should format a 429 API error with Pro quota exceeded message for Google auth (Standard tier)', () => {
+    const errorMessage =
+      'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'Gemini 2.5 Pro Requests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}';
+    const result = parseAndFormatApiError(
+      errorMessage,
+      AuthType.LOGIN_WITH_GOOGLE,
+      UserTierId.STANDARD,
+      'gemini-2.5-pro',
+      DEFAULT_GEMINI_FLASH_MODEL,
+    );
+    expect(result).toContain(
+      "[API Error: Quota exceeded for quota metric 'Gemini 2.5 Pro Requests'",
+    );
+    expect(result).toContain(
+      'You have reached your daily gemini-2.5-pro quota limit',
+    );
+    expect(result).toContain(
+      'We appreciate you for choosing Gemini Code Assist and the Gemini CLI',
+    );
+    expect(result).not.toContain(
+      'upgrade to a Gemini Code Assist Standard or Enterprise plan',
+    );
+  });
+
+  it('should format a 429 API error with Pro quota exceeded message for Google auth (Legacy tier)', () => {
+    const errorMessage =
+      'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'Gemini 2.5 Pro Requests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}';
+    const result = parseAndFormatApiError(
+      errorMessage,
+      AuthType.LOGIN_WITH_GOOGLE,
+      UserTierId.LEGACY,
+      'gemini-2.5-pro',
+      DEFAULT_GEMINI_FLASH_MODEL,
+    );
+    expect(result).toContain(
+      "[API Error: Quota exceeded for quota metric 'Gemini 2.5 Pro Requests'",
+    );
+    expect(result).toContain(
+      'You have reached your daily gemini-2.5-pro quota limit',
+    );
+    expect(result).toContain(
+      'We appreciate you for choosing Gemini Code Assist and the Gemini CLI',
+    );
+    expect(result).not.toContain(
+      'upgrade to a Gemini Code Assist Standard or Enterprise plan',
+    );
+  });
+
+  it('should handle different Gemini version strings in Pro quota exceeded errors', () => {
+    const errorMessage15 =
+      'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'Gemini 1.5 Pro Requests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}';
+    const errorMessagePreview =
+      'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'Gemini 2.5-preview Pro Requests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}';
+    const errorMessageBeta =
+      'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'Gemini beta-3.0 Pro Requests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}';
+    const errorMessageExperimental =
+      'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'Gemini experimental-v2 Pro Requests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}';
+
+    const result15 = parseAndFormatApiError(
+      errorMessage15,
+      AuthType.LOGIN_WITH_GOOGLE,
+      undefined,
+      'gemini-1.5-pro',
+      DEFAULT_GEMINI_FLASH_MODEL,
+    );
+    const resultPreview = parseAndFormatApiError(
+      errorMessagePreview,
+      AuthType.LOGIN_WITH_GOOGLE,
+      undefined,
+      'gemini-2.5-preview-pro',
+      DEFAULT_GEMINI_FLASH_MODEL,
+    );
+    const resultBeta = parseAndFormatApiError(
+      errorMessageBeta,
+      AuthType.LOGIN_WITH_GOOGLE,
+      undefined,
+      'gemini-beta-3.0-pro',
+      DEFAULT_GEMINI_FLASH_MODEL,
+    );
+    const resultExperimental = parseAndFormatApiError(
+      errorMessageExperimental,
+      AuthType.LOGIN_WITH_GOOGLE,
+      undefined,
+      'gemini-experimental-v2-pro',
+      DEFAULT_GEMINI_FLASH_MODEL,
+    );
+
+    expect(result15).toContain(
+      'You have reached your daily gemini-1.5-pro quota limit',
+    );
+    expect(resultPreview).toContain(
+      'You have reached your daily gemini-2.5-preview-pro quota limit',
+    );
+    expect(resultBeta).toContain(
+      'You have reached your daily gemini-beta-3.0-pro quota limit',
+    );
+    expect(resultExperimental).toContain(
+      'You have reached your daily gemini-experimental-v2-pro quota limit',
+    );
+    expect(result15).toContain(
+      'upgrade to a Gemini Code Assist Standard or Enterprise plan',
+    );
+    expect(resultPreview).toContain(
+      'upgrade to a Gemini Code Assist Standard or Enterprise plan',
+    );
+    expect(resultBeta).toContain(
+      'upgrade to a Gemini Code Assist Standard or Enterprise plan',
+    );
+    expect(resultExperimental).toContain(
+      'upgrade to a Gemini Code Assist Standard or Enterprise plan',
+    );
+  });
+
+  it('should not match non-Pro models with similar version strings', () => {
+    // Test that Flash models with similar version strings don't match
+    expect(
+      isProQuotaExceededError(
+        "Quota exceeded for quota metric 'Gemini 2.5 Flash Requests' and limit",
+      ),
+    ).toBe(false);
+    expect(
+      isProQuotaExceededError(
+        "Quota exceeded for quota metric 'Gemini 2.5-preview Flash Requests' and limit",
+      ),
+    ).toBe(false);
+    expect(
+      isProQuotaExceededError(
+        "Quota exceeded for quota metric 'Gemini beta-3.0 Flash Requests' and limit",
+      ),
+    ).toBe(false);
+    expect(
+      isProQuotaExceededError(
+        "Quota exceeded for quota metric 'Gemini experimental-v2 Flash Requests' and limit",
+      ),
+    ).toBe(false);
+
+    // Test other model types
+    expect(
+      isProQuotaExceededError(
+        "Quota exceeded for quota metric 'Gemini 2.5 Ultra Requests' and limit",
+      ),
+    ).toBe(false);
+    expect(
+      isProQuotaExceededError(
+        "Quota exceeded for quota metric 'Gemini 2.5 Standard Requests' and limit",
+      ),
+    ).toBe(false);
+
+    // Test generic quota messages
+    expect(
+      isProQuotaExceededError(
+        "Quota exceeded for quota metric 'GenerationRequests' and limit",
+      ),
+    ).toBe(false);
+    expect(
+      isProQuotaExceededError(
+        "Quota exceeded for quota metric 'EmbeddingRequests' and limit",
+      ),
+    ).toBe(false);
+  });
+
+  it('should format a generic quota exceeded message for Google auth (Standard tier)', () => {
+    const errorMessage =
+      'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'GenerationRequests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}';
+    const result = parseAndFormatApiError(
+      errorMessage,
+      AuthType.LOGIN_WITH_GOOGLE,
+      UserTierId.STANDARD,
+      'gemini-2.5-pro',
+      DEFAULT_GEMINI_FLASH_MODEL,
+    );
+    expect(result).toContain(
+      "[API Error: Quota exceeded for quota metric 'GenerationRequests'",
+    );
+    expect(result).toContain('You have reached your daily quota limit');
+    expect(result).toContain(
+      'We appreciate you for choosing Gemini Code Assist and the Gemini CLI',
+    );
+    expect(result).not.toContain(
+      'upgrade to a Gemini Code Assist Standard or Enterprise plan',
+    );
+  });
+
+  it('should format a regular 429 API error with standard message for Google auth (Standard tier)', () => {
+    const errorMessage =
+      'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Rate limit exceeded","status":"RESOURCE_EXHAUSTED"}}';
+    const result = parseAndFormatApiError(
+      errorMessage,
+      AuthType.LOGIN_WITH_GOOGLE,
+      UserTierId.STANDARD,
+      'gemini-2.5-pro',
+      DEFAULT_GEMINI_FLASH_MODEL,
+    );
+    expect(result).toContain('[API Error: Rate limit exceeded');
+    expect(result).toContain(
+      'We appreciate you for choosing Gemini Code Assist and the Gemini CLI',
+    );
+    expect(result).not.toContain(
+      'upgrade to a Gemini Code Assist Standard or Enterprise plan',
+    );
+  });
 });
diff --git a/packages/cli/src/ui/utils/errorParsing.ts b/packages/cli/src/ui/utils/errorParsing.ts
index 33014812..555d5e4e 100644
--- a/packages/cli/src/ui/utils/errorParsing.ts
+++ b/packages/cli/src/ui/utils/errorParsing.ts
@@ -4,66 +4,118 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
-import { AuthType, StructuredError } from '@google/gemini-cli-core';
+import {
+  AuthType,
+  UserTierId,
+  DEFAULT_GEMINI_FLASH_MODEL,
+  DEFAULT_GEMINI_MODEL,
+  isProQuotaExceededError,
+  isGenericQuotaExceededError,
+  isApiError,
+  isStructuredError,
+} from '@google/gemini-cli-core';
 
-const RATE_LIMIT_ERROR_MESSAGE_GOOGLE =
-  '\nPlease wait and try again later. To increase your limits, upgrade to a plan with higher limits, or use /auth to switch to using a paid API key from AI Studio at https://aistudio.google.com/apikey';
+// Free Tier message functions
+const getRateLimitErrorMessageGoogleFree = (
+  fallbackModel: string = DEFAULT_GEMINI_FLASH_MODEL,
+) =>
+  `\nSlow response times detected. Switching to the ${fallbackModel} model for the rest of this session.`;
+
+const getRateLimitErrorMessageGoogleProQuotaFree = (
+  currentModel: string = DEFAULT_GEMINI_MODEL,
+  fallbackModel: string = DEFAULT_GEMINI_FLASH_MODEL,
+) =>
+  `\nYou have reached your daily ${currentModel} quota limit. You will be switched to the ${fallbackModel} model for the rest of this session. To increase your limits, upgrade to a Gemini Code Assist Standard or Enterprise plan with higher limits at https://goo.gle/set-up-gemini-code-assist, or use /auth to switch to using a paid API key from AI Studio at https://aistudio.google.com/apikey`;
+
+const getRateLimitErrorMessageGoogleGenericQuotaFree = () =>
+  `\nYou have reached your daily quota limit. To increase your limits, upgrade to a Gemini Code Assist Standard or Enterprise plan with higher limits at https://goo.gle/set-up-gemini-code-assist, or use /auth to switch to using a paid API key from AI Studio at https://aistudio.google.com/apikey`;
+
+// Legacy/Standard Tier message functions
+const getRateLimitErrorMessageGooglePaid = (
+  fallbackModel: string = DEFAULT_GEMINI_FLASH_MODEL,
+) =>
+  `\nSlow response times detected. Switching to the ${fallbackModel} model for the rest of this session. We appreciate you for choosing Gemini Code Assist and the Gemini CLI.`;
+
+const getRateLimitErrorMessageGoogleProQuotaPaid = (
+  currentModel: string = DEFAULT_GEMINI_MODEL,
+  fallbackModel: string = DEFAULT_GEMINI_FLASH_MODEL,
+) =>
+  `\nYou have reached your daily ${currentModel} quota limit. You will be switched to the ${fallbackModel} model for the rest of this session. We appreciate you for choosing Gemini Code Assist and the Gemini CLI. To continue accessing the ${currentModel} model today, consider using /auth to switch to using a paid API key from AI Studio at https://aistudio.google.com/apikey`;
+
+const getRateLimitErrorMessageGoogleGenericQuotaPaid = (
+  currentModel: string = DEFAULT_GEMINI_MODEL,
+) =>
+  `\nYou have reached your daily quota limit. We appreciate you for choosing Gemini Code Assist and the Gemini CLI. To continue accessing the ${currentModel} model today, consider using /auth to switch to using a paid API key from AI Studio at https://aistudio.google.com/apikey`;
 const RATE_LIMIT_ERROR_MESSAGE_USE_GEMINI =
   '\nPlease wait and try again later. To increase your limits, request a quota increase through AI Studio, or switch to another /auth method';
 const RATE_LIMIT_ERROR_MESSAGE_VERTEX =
   '\nPlease wait and try again later. To increase your limits, request a quota increase through Vertex, or switch to another /auth method';
-const RATE_LIMIT_ERROR_MESSAGE_DEFAULT =
-  'Your request has been rate limited. Please wait and try again later.';
-
-export interface ApiError {
-  error: {
-    code: number;
-    message: string;
-    status: string;
-    details: unknown[];
-  };
-}
-
-function isApiError(error: unknown): error is ApiError {
-  return (
-    typeof error === 'object' &&
-    error !== null &&
-    'error' in error &&
-    typeof (error as ApiError).error === 'object' &&
-    'message' in (error as ApiError).error
-  );
-}
-
-function isStructuredError(error: unknown): error is StructuredError {
-  return (
-    typeof error === 'object' &&
-    error !== null &&
-    'message' in error &&
-    typeof (error as StructuredError).message === 'string'
-  );
-}
+const getRateLimitErrorMessageDefault = (
+  fallbackModel: string = DEFAULT_GEMINI_FLASH_MODEL,
+) =>
+  `\nSlow response times detected. Switching to the ${fallbackModel} model for the rest of this session.`;
 
-function getRateLimitMessage(authType?: AuthType): string {
+function getRateLimitMessage(
+  authType?: AuthType,
+  error?: unknown,
+  userTier?: UserTierId,
+  currentModel?: string,
+  fallbackModel?: string,
+): string {
   switch (authType) {
-    case AuthType.LOGIN_WITH_GOOGLE:
-      return RATE_LIMIT_ERROR_MESSAGE_GOOGLE;
+    case AuthType.LOGIN_WITH_GOOGLE: {
+      // Determine if user is on a paid tier (Legacy or Standard) - default to FREE if not specified
+      const isPaidTier =
+        userTier === UserTierId.LEGACY || userTier === UserTierId.STANDARD;
+
+      if (isProQuotaExceededError(error)) {
+        return isPaidTier
+          ? getRateLimitErrorMessageGoogleProQuotaPaid(
+              currentModel || DEFAULT_GEMINI_MODEL,
+              fallbackModel,
+            )
+          : getRateLimitErrorMessageGoogleProQuotaFree(
+              currentModel || DEFAULT_GEMINI_MODEL,
+              fallbackModel,
+            );
+      } else if (isGenericQuotaExceededError(error)) {
+        return isPaidTier
+          ? getRateLimitErrorMessageGoogleGenericQuotaPaid(
+              currentModel || DEFAULT_GEMINI_MODEL,
+            )
+          : getRateLimitErrorMessageGoogleGenericQuotaFree();
+      } else {
+        return isPaidTier
+          ? getRateLimitErrorMessageGooglePaid(fallbackModel)
+          : getRateLimitErrorMessageGoogleFree(fallbackModel);
+      }
+    }
     case AuthType.USE_GEMINI:
       return RATE_LIMIT_ERROR_MESSAGE_USE_GEMINI;
     case AuthType.USE_VERTEX_AI:
       return RATE_LIMIT_ERROR_MESSAGE_VERTEX;
     default:
-      return RATE_LIMIT_ERROR_MESSAGE_DEFAULT;
+      return getRateLimitErrorMessageDefault(fallbackModel);
   }
 }
 
 export function parseAndFormatApiError(
   error: unknown,
   authType?: AuthType,
+  userTier?: UserTierId,
+  currentModel?: string,
+  fallbackModel?: string,
 ): string {
   if (isStructuredError(error)) {
     let text = `[API Error: ${error.message}]`;
     if (error.status === 429) {
-      text += getRateLimitMessage(authType);
+      text += getRateLimitMessage(
+        authType,
+        error,
+        userTier,
+        currentModel,
+        fallbackModel,
+      );
     }
     return text;
   }
@@ -92,7 +144,13 @@ export function parseAndFormatApiError(
         }
         let text = `[API Error: ${finalMessage} (Status: ${parsedError.error.status})]`;
         if (parsedError.error.code === 429) {
-          text += getRateLimitMessage(authType);
+          text += getRateLimitMessage(
+            authType,
+            parsedError,
+            userTier,
+            currentModel,
+            fallbackModel,
+          );
         }
         return text;
       }
diff --git a/packages/core/src/config/config.ts b/packages/core/src/config/config.ts
index 2cea70ca..b0659a9d 100644
--- a/packages/core/src/config/config.ts
+++ b/packages/core/src/config/config.ts
@@ -103,6 +103,7 @@ export interface SandboxConfig {
 export type FlashFallbackHandler = (
   currentModel: string,
   fallbackModel: string,
+  error?: unknown,
 ) => Promise<boolean>;
 
 export interface ConfigParameters {
diff --git a/packages/core/src/core/client.test.ts b/packages/core/src/core/client.test.ts
index 9d3791fd..80680aca 100644
--- a/packages/core/src/core/client.test.ts
+++ b/packages/core/src/core/client.test.ts
@@ -845,6 +845,7 @@ describe('Gemini Client (client.ts)', () => {
       expect(mockFallbackHandler).toHaveBeenCalledWith(
         currentModel,
         fallbackModel,
+        undefined,
       );
     });
   });
diff --git a/packages/core/src/core/client.ts b/packages/core/src/core/client.ts
index 6cfcd407..b8996cbf 100644
--- a/packages/core/src/core/client.ts
+++ b/packages/core/src/core/client.ts
@@ -323,8 +323,8 @@ export class GeminiClient {
         });
 
       const result = await retryWithBackoff(apiCall, {
-        onPersistent429: async (authType?: string) =>
-          await this.handleFlashFallback(authType),
+        onPersistent429: async (authType?: string, error?: unknown) =>
+          await this.handleFlashFallback(authType, error),
         authType: this.config.getContentGeneratorConfig()?.authType,
       });
 
@@ -411,8 +411,8 @@ export class GeminiClient {
         });
 
       const result = await retryWithBackoff(apiCall, {
-        onPersistent429: async (authType?: string) =>
-          await this.handleFlashFallback(authType),
+        onPersistent429: async (authType?: string, error?: unknown) =>
+          await this.handleFlashFallback(authType, error),
         authType: this.config.getContentGeneratorConfig()?.authType,
       });
       return result;
@@ -559,7 +559,10 @@ export class GeminiClient {
    * Handles fallback to Flash model when persistent 429 errors occur for OAuth users.
    * Uses a fallback handler if provided by the config, otherwise returns null.
    */
-  private async handleFlashFallback(authType?: string): Promise<string | null> {
+  private async handleFlashFallback(
+    authType?: string,
+    error?: unknown,
+  ): Promise<string | null> {
     // Only handle fallback for OAuth users
     if (authType !== AuthType.LOGIN_WITH_GOOGLE) {
       return null;
@@ -577,7 +580,11 @@ export class GeminiClient {
     const fallbackHandler = this.config.flashFallbackHandler;
     if (typeof fallbackHandler === 'function') {
       try {
-        const accepted = await fallbackHandler(currentModel, fallbackModel);
+        const accepted = await fallbackHandler(
+          currentModel,
+          fallbackModel,
+          error,
+        );
         if (accepted) {
           this.config.setModel(fallbackModel);
           return fallbackModel;
diff --git a/packages/core/src/core/geminiChat.ts b/packages/core/src/core/geminiChat.ts
index 537d55a0..1be84f2e 100644
--- a/packages/core/src/core/geminiChat.ts
+++ b/packages/core/src/core/geminiChat.ts
@@ -191,7 +191,10 @@ export class GeminiChat {
    * Handles fallback to Flash model when persistent 429 errors occur for OAuth users.
    * Uses a fallback handler if provided by the config, otherwise returns null.
    */
-  private async handleFlashFallback(authType?: string): Promise<string | null> {
+  private async handleFlashFallback(
+    authType?: string,
+    error?: unknown,
+  ): Promise<string | null> {
     // Only handle fallback for OAuth users
     if (authType !== AuthType.LOGIN_WITH_GOOGLE) {
       return null;
@@ -209,7 +212,11 @@ export class GeminiChat {
     const fallbackHandler = this.config.flashFallbackHandler;
     if (typeof fallbackHandler === 'function') {
       try {
-        const accepted = await fallbackHandler(currentModel, fallbackModel);
+        const accepted = await fallbackHandler(
+          currentModel,
+          fallbackModel,
+          error,
+        );
         if (accepted) {
           this.config.setModel(fallbackModel);
           return fallbackModel;
@@ -270,8 +277,8 @@ export class GeminiChat {
           }
           return false;
         },
-        onPersistent429: async (authType?: string) =>
-          await this.handleFlashFallback(authType),
+        onPersistent429: async (authType?: string, error?: unknown) =>
+          await this.handleFlashFallback(authType, error),
         authType: this.config.getContentGeneratorConfig()?.authType,
       });
       const durationMs = Date.now() - startTime;
@@ -367,8 +374,8 @@ export class GeminiChat {
           }
           return false; // Don't retry other errors by default
         },
-        onPersistent429: async (authType?: string) =>
-          await this.handleFlashFallback(authType),
+        onPersistent429: async (authType?: string, error?: unknown) =>
+          await this.handleFlashFallback(authType, error),
         authType: this.config.getContentGeneratorConfig()?.authType,
       });
 
diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts
index aff37f50..df7db12c 100644
--- a/packages/core/src/index.ts
+++ b/packages/core/src/index.ts
@@ -32,6 +32,7 @@ export * from './utils/getFolderStructure.js';
 export * from './utils/memoryDiscovery.js';
 export * from './utils/gitIgnoreParser.js';
 export * from './utils/editor.js';
+export * from './utils/quotaErrorDetection.js';
 
 // Export services
 export * from './services/fileDiscoveryService.js';
diff --git a/packages/core/src/utils/flashFallback.integration.test.ts b/packages/core/src/utils/flashFallback.integration.test.ts
index 6554425f..f5e354a0 100644
--- a/packages/core/src/utils/flashFallback.integration.test.ts
+++ b/packages/core/src/utils/flashFallback.integration.test.ts
@@ -86,6 +86,7 @@ describe('Flash Fallback Integration', () => {
     expect(fallbackModel).toBe(DEFAULT_GEMINI_FLASH_MODEL);
     expect(mockFallbackHandler).toHaveBeenCalledWith(
       AuthType.LOGIN_WITH_GOOGLE,
+      expect.any(Error),
     );
     expect(result).toBe('success after fallback');
     // Should have: 2 failures, then fallback triggered, then 1 success after retry reset
diff --git a/packages/core/src/utils/quotaErrorDetection.ts b/packages/core/src/utils/quotaErrorDetection.ts
new file mode 100644
index 00000000..ec77f5ee
--- /dev/null
+++ b/packages/core/src/utils/quotaErrorDetection.ts
@@ -0,0 +1,82 @@
+/**
+ * @license
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+export interface ApiError {
+  error: {
+    code: number;
+    message: string;
+    status: string;
+    details: unknown[];
+  };
+}
+
+interface StructuredError {
+  message: string;
+  status?: number;
+}
+
+export function isApiError(error: unknown): error is ApiError {
+  return (
+    typeof error === 'object' &&
+    error !== null &&
+    'error' in error &&
+    typeof (error as ApiError).error === 'object' &&
+    'message' in (error as ApiError).error
+  );
+}
+
+export function isStructuredError(error: unknown): error is StructuredError {
+  return (
+    typeof error === 'object' &&
+    error !== null &&
+    'message' in error &&
+    typeof (error as StructuredError).message === 'string'
+  );
+}
+
+export function isProQuotaExceededError(error: unknown): boolean {
+  // Check for Pro quota exceeded errors by looking for the specific pattern
+  // This will match patterns like:
+  // - "Quota exceeded for quota metric 'Gemini 2.5 Pro Requests'"
+  // - "Quota exceeded for quota metric 'Gemini 1.5-preview Pro Requests'"
+  // - "Quota exceeded for quota metric 'Gemini beta-3.0 Pro Requests'"
+  // - "Quota exceeded for quota metric 'Gemini experimental-v2 Pro Requests'"
+  // We use string methods instead of regex to avoid ReDoS vulnerabilities
+
+  const checkMessage = (message: string): boolean =>
+    message.includes("Quota exceeded for quota metric 'Gemini") &&
+    message.includes("Pro Requests'");
+
+  if (typeof error === 'string') {
+    return checkMessage(error);
+  }
+
+  if (isStructuredError(error)) {
+    return checkMessage(error.message);
+  }
+
+  if (isApiError(error)) {
+    return checkMessage(error.error.message);
+  }
+
+  return false;
+}
+
+export function isGenericQuotaExceededError(error: unknown): boolean {
+  if (typeof error === 'string') {
+    return error.includes('Quota exceeded for quota metric');
+  }
+
+  if (isStructuredError(error)) {
+    return error.message.includes('Quota exceeded for quota metric');
+  }
+
+  if (isApiError(error)) {
+    return error.error.message.includes('Quota exceeded for quota metric');
+  }
+
+  return false;
+}
diff --git a/packages/core/src/utils/retry.test.ts b/packages/core/src/utils/retry.test.ts
index a0294c31..f84d2004 100644
--- a/packages/core/src/utils/retry.test.ts
+++ b/packages/core/src/utils/retry.test.ts
@@ -357,7 +357,10 @@ describe('retryWithBackoff', () => {
       // Should fail with original error when fallback is rejected
       expect(result).toBeInstanceOf(Error);
       expect(result.message).toBe('Rate limit exceeded');
-      expect(fallbackCallback).toHaveBeenCalledWith('oauth-personal');
+      expect(fallbackCallback).toHaveBeenCalledWith(
+        'oauth-personal',
+        expect.any(Error),
+      );
     });
 
     it('should handle mixed error types (only count consecutive 429s)', async () => {
diff --git a/packages/core/src/utils/retry.ts b/packages/core/src/utils/retry.ts
index f3f5f2d2..01651950 100644
--- a/packages/core/src/utils/retry.ts
+++ b/packages/core/src/utils/retry.ts
@@ -5,13 +5,20 @@
  */
 
 import { AuthType } from '../core/contentGenerator.js';
+import {
+  isProQuotaExceededError,
+  isGenericQuotaExceededError,
+} from './quotaErrorDetection.js';
 
 export interface RetryOptions {
   maxAttempts: number;
   initialDelayMs: number;
   maxDelayMs: number;
   shouldRetry: (error: Error) => boolean;
-  onPersistent429?: (authType?: string) => Promise<string | null>;
+  onPersistent429?: (
+    authType?: string,
+    error?: unknown,
+  ) => Promise<string | null>;
   authType?: string;
 }
 
@@ -86,6 +93,53 @@ export async function retryWithBackoff<T>(
     } catch (error) {
       const errorStatus = getErrorStatus(error);
 
+      // Check for Pro quota exceeded error first - immediate fallback for OAuth users
+      if (
+        errorStatus === 429 &&
+        authType === AuthType.LOGIN_WITH_GOOGLE &&
+        isProQuotaExceededError(error) &&
+        onPersistent429
+      ) {
+        try {
+          const fallbackModel = await onPersistent429(authType, error);
+          if (fallbackModel) {
+            // Reset attempt counter and try with new model
+            attempt = 0;
+            consecutive429Count = 0;
+            currentDelay = initialDelayMs;
+            // With the model updated, we continue to the next attempt
+            continue;
+          }
+        } catch (fallbackError) {
+          // If fallback fails, continue with original error
+          console.warn('Fallback to Flash model failed:', fallbackError);
+        }
+      }
+
+      // Check for generic quota exceeded error (but not Pro, which was handled above) - immediate fallback for OAuth users
+      if (
+        errorStatus === 429 &&
+        authType === AuthType.LOGIN_WITH_GOOGLE &&
+        !isProQuotaExceededError(error) &&
+        isGenericQuotaExceededError(error) &&
+        onPersistent429
+      ) {
+        try {
+          const fallbackModel = await onPersistent429(authType, error);
+          if (fallbackModel) {
+            // Reset attempt counter and try with new model
+            attempt = 0;
+            consecutive429Count = 0;
+            currentDelay = initialDelayMs;
+            // With the model updated, we continue to the next attempt
+            continue;
+          }
+        } catch (fallbackError) {
+          // If fallback fails, continue with original error
+          console.warn('Fallback to Flash model failed:', fallbackError);
+        }
+      }
+
       // Track consecutive 429 errors
       if (errorStatus === 429) {
         consecutive429Count++;
@@ -100,7 +154,7 @@ export async function retryWithBackoff<T>(
         authType === AuthType.LOGIN_WITH_GOOGLE
       ) {
         try {
-          const fallbackModel = await onPersistent429(authType);
+          const fallbackModel = await onPersistent429(authType, error);
           if (fallbackModel) {
             // Reset attempt counter and try with new model
             attempt = 0;