summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--packages/cli/src/ui/App.tsx31
-rw-r--r--packages/cli/src/ui/hooks/useGeminiStream.test.tsx4
-rw-r--r--packages/cli/src/ui/hooks/useGeminiStream.ts7
-rw-r--r--packages/cli/src/ui/utils/errorParsing.test.ts309
-rw-r--r--packages/cli/src/ui/utils/errorParsing.ts136
-rw-r--r--packages/core/src/config/config.ts1
-rw-r--r--packages/core/src/core/client.test.ts1
-rw-r--r--packages/core/src/core/client.ts19
-rw-r--r--packages/core/src/core/geminiChat.ts19
-rw-r--r--packages/core/src/index.ts1
-rw-r--r--packages/core/src/utils/flashFallback.integration.test.ts1
-rw-r--r--packages/core/src/utils/quotaErrorDetection.ts82
-rw-r--r--packages/core/src/utils/retry.test.ts5
-rw-r--r--packages/core/src/utils/retry.ts58
14 files changed, 611 insertions, 63 deletions
diff --git a/packages/cli/src/ui/App.tsx b/packages/cli/src/ui/App.tsx
index feb132ae..833cc2b5 100644
--- a/packages/cli/src/ui/App.tsx
+++ b/packages/cli/src/ui/App.tsx
@@ -67,6 +67,10 @@ import { useBracketedPaste } from './hooks/useBracketedPaste.js';
import { useTextBuffer } from './components/shared/text-buffer.js';
import * as fs from 'fs';
import { UpdateNotification } from './components/UpdateNotification.js';
+import {
+ isProQuotaExceededError,
+ isGenericQuotaExceededError,
+} from '@google/gemini-cli-core';
import { checkForUpdates } from './utils/updateCheck.js';
import ansiEscapes from 'ansi-escapes';
import { OverflowProvider } from './contexts/OverflowContext.js';
@@ -243,15 +247,34 @@ const App = ({ config, settings, startupWarnings = [] }: AppProps) => {
const flashFallbackHandler = async (
currentModel: string,
fallbackModel: string,
+ error?: unknown,
): Promise<boolean> => {
+ let message: string;
+
+ // Check if this is a Pro quota exceeded error
+ if (error && isProQuotaExceededError(error)) {
+ message = `⚡ You have reached your daily ${currentModel} quota limit.
+⚡ Automatically switching from ${currentModel} to ${fallbackModel} for the remainder of this session.
+⚡ To increase your limits, upgrade to a Gemini Code Assist Standard or Enterprise plan with higher limits at https://goo.gle/set-up-gemini-code-assist
+⚡ Or you can utilize a Gemini API Key. See: https://goo.gle/gemini-cli-docs-auth#gemini-api-key
+⚡ You can switch authentication methods by typing /auth`;
+ } else if (error && isGenericQuotaExceededError(error)) {
+ message = `⚡ You have reached your daily quota limit.
+⚡ Automatically switching from ${currentModel} to ${fallbackModel} for the remainder of this session.
+⚡ To increase your limits, upgrade to a Gemini Code Assist Standard or Enterprise plan with higher limits at https://goo.gle/set-up-gemini-code-assist
+⚡ Or you can utilize a Gemini API Key. See: https://goo.gle/gemini-cli-docs-auth#gemini-api-key
+⚡ You can switch authentication methods by typing /auth`;
+ } else {
+ // Default fallback message for other cases (like consecutive 429s)
+ message = `⚡ Slow response times detected.
+⚡ Automatically switching from ${currentModel} to ${fallbackModel} for faster responses for the remainder of this session.`;
+ }
+
// Add message to UI history
addItem(
{
type: MessageType.INFO,
- text: `⚡ Slow response times detected. Automatically switching from ${currentModel} to ${fallbackModel} for faster responses for the remainder of this session.
-⚡ To avoid this you can either upgrade to Standard tier. See: https://goo.gle/set-up-gemini-code-assist
-⚡ Or you can utilize a Gemini API Key. See: https://goo.gle/gemini-cli-docs-auth#gemini-api-key
-⚡ You can switch authentication methods by typing /auth`,
+ text: message,
},
Date.now(),
);
diff --git a/packages/cli/src/ui/hooks/useGeminiStream.test.tsx b/packages/cli/src/ui/hooks/useGeminiStream.test.tsx
index 3a002919..fc6f93c5 100644
--- a/packages/cli/src/ui/hooks/useGeminiStream.test.tsx
+++ b/packages/cli/src/ui/hooks/useGeminiStream.test.tsx
@@ -1097,6 +1097,7 @@ describe('useGeminiStream', () => {
getContentGeneratorConfig: vi.fn(() => ({
authType: mockAuthType,
})),
+ getModel: vi.fn(() => 'gemini-2.5-pro'),
} as unknown as Config;
const { result } = renderHook(() =>
@@ -1125,6 +1126,9 @@ describe('useGeminiStream', () => {
expect(mockParseAndFormatApiError).toHaveBeenCalledWith(
'Rate limit exceeded',
mockAuthType,
+ undefined,
+ 'gemini-2.5-pro',
+ 'gemini-2.5-flash',
);
});
});
diff --git a/packages/cli/src/ui/hooks/useGeminiStream.ts b/packages/cli/src/ui/hooks/useGeminiStream.ts
index b4acdb9a..550cab86 100644
--- a/packages/cli/src/ui/hooks/useGeminiStream.ts
+++ b/packages/cli/src/ui/hooks/useGeminiStream.ts
@@ -24,6 +24,7 @@ import {
ThoughtSummary,
UnauthorizedError,
UserPromptEvent,
+ DEFAULT_GEMINI_FLASH_MODEL,
} from '@google/gemini-cli-core';
import { type Part, type PartListUnion } from '@google/genai';
import {
@@ -397,6 +398,9 @@ export const useGeminiStream = (
text: parseAndFormatApiError(
eventValue.error,
config.getContentGeneratorConfig().authType,
+ undefined,
+ config.getModel(),
+ DEFAULT_GEMINI_FLASH_MODEL,
),
},
userMessageTimestamp,
@@ -533,6 +537,9 @@ export const useGeminiStream = (
text: parseAndFormatApiError(
getErrorMessage(error) || 'Unknown error',
config.getContentGeneratorConfig().authType,
+ undefined,
+ config.getModel(),
+ DEFAULT_GEMINI_FLASH_MODEL,
),
},
userMessageTimestamp,
diff --git a/packages/cli/src/ui/utils/errorParsing.test.ts b/packages/cli/src/ui/utils/errorParsing.test.ts
index 4bbaabf1..3d228efb 100644
--- a/packages/cli/src/ui/utils/errorParsing.test.ts
+++ b/packages/cli/src/ui/utils/errorParsing.test.ts
@@ -6,10 +6,16 @@
import { describe, it, expect } from 'vitest';
import { parseAndFormatApiError } from './errorParsing.js';
-import { AuthType, StructuredError } from '@google/gemini-cli-core';
+import {
+ AuthType,
+ UserTierId,
+ DEFAULT_GEMINI_FLASH_MODEL,
+ isProQuotaExceededError,
+} from '@google/gemini-cli-core';
describe('parseAndFormatApiError', () => {
- const enterpriseMessage = 'upgrade to a plan with higher limits';
+ const _enterpriseMessage =
+ 'upgrade to a Gemini Code Assist Standard or Enterprise plan with higher limits';
const vertexMessage = 'request a quota increase through Vertex';
const geminiMessage = 'request a quota increase through AI Studio';
@@ -24,9 +30,17 @@ describe('parseAndFormatApiError', () => {
it('should format a 429 API error with the default message', () => {
const errorMessage =
'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Rate limit exceeded","status":"RESOURCE_EXHAUSTED"}}';
- const result = parseAndFormatApiError(errorMessage);
+ const result = parseAndFormatApiError(
+ errorMessage,
+ undefined,
+ undefined,
+ 'gemini-2.5-pro',
+ DEFAULT_GEMINI_FLASH_MODEL,
+ );
expect(result).toContain('[API Error: Rate limit exceeded');
- expect(result).toContain('Your request has been rate limited');
+ expect(result).toContain(
+ 'Slow response times detected. Switching to the gemini-2.5-flash model',
+ );
});
it('should format a 429 API error with the personal message', () => {
@@ -35,9 +49,14 @@ describe('parseAndFormatApiError', () => {
const result = parseAndFormatApiError(
errorMessage,
AuthType.LOGIN_WITH_GOOGLE,
+ undefined,
+ 'gemini-2.5-pro',
+ DEFAULT_GEMINI_FLASH_MODEL,
);
expect(result).toContain('[API Error: Rate limit exceeded');
- expect(result).toContain(enterpriseMessage);
+ expect(result).toContain(
+ 'Slow response times detected. Switching to the gemini-2.5-flash model',
+ );
});
it('should format a 429 API error with the vertex message', () => {
@@ -116,4 +135,284 @@ describe('parseAndFormatApiError', () => {
const expected = '[API Error: An unknown error occurred.]';
expect(parseAndFormatApiError(error)).toBe(expected);
});
+
+ it('should format a 429 API error with Pro quota exceeded message for Google auth (Free tier)', () => {
+ const errorMessage =
+ 'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'Gemini 2.5 Pro Requests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}';
+ const result = parseAndFormatApiError(
+ errorMessage,
+ AuthType.LOGIN_WITH_GOOGLE,
+ undefined,
+ 'gemini-2.5-pro',
+ DEFAULT_GEMINI_FLASH_MODEL,
+ );
+ expect(result).toContain(
+ "[API Error: Quota exceeded for quota metric 'Gemini 2.5 Pro Requests'",
+ );
+ expect(result).toContain(
+ 'You have reached your daily gemini-2.5-pro quota limit',
+ );
+ expect(result).toContain(
+ 'upgrade to a Gemini Code Assist Standard or Enterprise plan',
+ );
+ });
+
+ it('should format a regular 429 API error with standard message for Google auth', () => {
+ const errorMessage =
+ 'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Rate limit exceeded","status":"RESOURCE_EXHAUSTED"}}';
+ const result = parseAndFormatApiError(
+ errorMessage,
+ AuthType.LOGIN_WITH_GOOGLE,
+ undefined,
+ 'gemini-2.5-pro',
+ DEFAULT_GEMINI_FLASH_MODEL,
+ );
+ expect(result).toContain('[API Error: Rate limit exceeded');
+ expect(result).toContain(
+ 'Slow response times detected. Switching to the gemini-2.5-flash model',
+ );
+ expect(result).not.toContain(
+ 'You have reached your daily gemini-2.5-pro quota limit',
+ );
+ });
+
+ it('should format a 429 API error with generic quota exceeded message for Google auth', () => {
+ const errorMessage =
+ 'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'GenerationRequests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}';
+ const result = parseAndFormatApiError(
+ errorMessage,
+ AuthType.LOGIN_WITH_GOOGLE,
+ undefined,
+ 'gemini-2.5-pro',
+ DEFAULT_GEMINI_FLASH_MODEL,
+ );
+ expect(result).toContain(
+ "[API Error: Quota exceeded for quota metric 'GenerationRequests'",
+ );
+ expect(result).toContain('You have reached your daily quota limit');
+ expect(result).not.toContain(
+ 'You have reached your daily Gemini 2.5 Pro quota limit',
+ );
+ });
+
+ it('should prioritize Pro quota message over generic quota message for Google auth', () => {
+ const errorMessage =
+ 'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'Gemini 2.5 Pro Requests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}';
+ const result = parseAndFormatApiError(
+ errorMessage,
+ AuthType.LOGIN_WITH_GOOGLE,
+ undefined,
+ 'gemini-2.5-pro',
+ DEFAULT_GEMINI_FLASH_MODEL,
+ );
+ expect(result).toContain(
+ "[API Error: Quota exceeded for quota metric 'Gemini 2.5 Pro Requests'",
+ );
+ expect(result).toContain(
+ 'You have reached your daily gemini-2.5-pro quota limit',
+ );
+ expect(result).not.toContain('You have reached your daily quota limit');
+ });
+
+ it('should format a 429 API error with Pro quota exceeded message for Google auth (Standard tier)', () => {
+ const errorMessage =
+ 'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'Gemini 2.5 Pro Requests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}';
+ const result = parseAndFormatApiError(
+ errorMessage,
+ AuthType.LOGIN_WITH_GOOGLE,
+ UserTierId.STANDARD,
+ 'gemini-2.5-pro',
+ DEFAULT_GEMINI_FLASH_MODEL,
+ );
+ expect(result).toContain(
+ "[API Error: Quota exceeded for quota metric 'Gemini 2.5 Pro Requests'",
+ );
+ expect(result).toContain(
+ 'You have reached your daily gemini-2.5-pro quota limit',
+ );
+ expect(result).toContain(
+ 'We appreciate you for choosing Gemini Code Assist and the Gemini CLI',
+ );
+ expect(result).not.toContain(
+ 'upgrade to a Gemini Code Assist Standard or Enterprise plan',
+ );
+ });
+
+ it('should format a 429 API error with Pro quota exceeded message for Google auth (Legacy tier)', () => {
+ const errorMessage =
+ 'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'Gemini 2.5 Pro Requests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}';
+ const result = parseAndFormatApiError(
+ errorMessage,
+ AuthType.LOGIN_WITH_GOOGLE,
+ UserTierId.LEGACY,
+ 'gemini-2.5-pro',
+ DEFAULT_GEMINI_FLASH_MODEL,
+ );
+ expect(result).toContain(
+ "[API Error: Quota exceeded for quota metric 'Gemini 2.5 Pro Requests'",
+ );
+ expect(result).toContain(
+ 'You have reached your daily gemini-2.5-pro quota limit',
+ );
+ expect(result).toContain(
+ 'We appreciate you for choosing Gemini Code Assist and the Gemini CLI',
+ );
+ expect(result).not.toContain(
+ 'upgrade to a Gemini Code Assist Standard or Enterprise plan',
+ );
+ });
+
+ it('should handle different Gemini version strings in Pro quota exceeded errors', () => {
+ const errorMessage15 =
+ 'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'Gemini 1.5 Pro Requests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}';
+ const errorMessagePreview =
+ 'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'Gemini 2.5-preview Pro Requests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}';
+ const errorMessageBeta =
+ 'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'Gemini beta-3.0 Pro Requests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}';
+ const errorMessageExperimental =
+ 'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'Gemini experimental-v2 Pro Requests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}';
+
+ const result15 = parseAndFormatApiError(
+ errorMessage15,
+ AuthType.LOGIN_WITH_GOOGLE,
+ undefined,
+ 'gemini-1.5-pro',
+ DEFAULT_GEMINI_FLASH_MODEL,
+ );
+ const resultPreview = parseAndFormatApiError(
+ errorMessagePreview,
+ AuthType.LOGIN_WITH_GOOGLE,
+ undefined,
+ 'gemini-2.5-preview-pro',
+ DEFAULT_GEMINI_FLASH_MODEL,
+ );
+ const resultBeta = parseAndFormatApiError(
+ errorMessageBeta,
+ AuthType.LOGIN_WITH_GOOGLE,
+ undefined,
+ 'gemini-beta-3.0-pro',
+ DEFAULT_GEMINI_FLASH_MODEL,
+ );
+ const resultExperimental = parseAndFormatApiError(
+ errorMessageExperimental,
+ AuthType.LOGIN_WITH_GOOGLE,
+ undefined,
+ 'gemini-experimental-v2-pro',
+ DEFAULT_GEMINI_FLASH_MODEL,
+ );
+
+ expect(result15).toContain(
+ 'You have reached your daily gemini-1.5-pro quota limit',
+ );
+ expect(resultPreview).toContain(
+ 'You have reached your daily gemini-2.5-preview-pro quota limit',
+ );
+ expect(resultBeta).toContain(
+ 'You have reached your daily gemini-beta-3.0-pro quota limit',
+ );
+ expect(resultExperimental).toContain(
+ 'You have reached your daily gemini-experimental-v2-pro quota limit',
+ );
+ expect(result15).toContain(
+ 'upgrade to a Gemini Code Assist Standard or Enterprise plan',
+ );
+ expect(resultPreview).toContain(
+ 'upgrade to a Gemini Code Assist Standard or Enterprise plan',
+ );
+ expect(resultBeta).toContain(
+ 'upgrade to a Gemini Code Assist Standard or Enterprise plan',
+ );
+ expect(resultExperimental).toContain(
+ 'upgrade to a Gemini Code Assist Standard or Enterprise plan',
+ );
+ });
+
+ it('should not match non-Pro models with similar version strings', () => {
+ // Test that Flash models with similar version strings don't match
+ expect(
+ isProQuotaExceededError(
+ "Quota exceeded for quota metric 'Gemini 2.5 Flash Requests' and limit",
+ ),
+ ).toBe(false);
+ expect(
+ isProQuotaExceededError(
+ "Quota exceeded for quota metric 'Gemini 2.5-preview Flash Requests' and limit",
+ ),
+ ).toBe(false);
+ expect(
+ isProQuotaExceededError(
+ "Quota exceeded for quota metric 'Gemini beta-3.0 Flash Requests' and limit",
+ ),
+ ).toBe(false);
+ expect(
+ isProQuotaExceededError(
+ "Quota exceeded for quota metric 'Gemini experimental-v2 Flash Requests' and limit",
+ ),
+ ).toBe(false);
+
+ // Test other model types
+ expect(
+ isProQuotaExceededError(
+ "Quota exceeded for quota metric 'Gemini 2.5 Ultra Requests' and limit",
+ ),
+ ).toBe(false);
+ expect(
+ isProQuotaExceededError(
+ "Quota exceeded for quota metric 'Gemini 2.5 Standard Requests' and limit",
+ ),
+ ).toBe(false);
+
+ // Test generic quota messages
+ expect(
+ isProQuotaExceededError(
+ "Quota exceeded for quota metric 'GenerationRequests' and limit",
+ ),
+ ).toBe(false);
+ expect(
+ isProQuotaExceededError(
+ "Quota exceeded for quota metric 'EmbeddingRequests' and limit",
+ ),
+ ).toBe(false);
+ });
+
+ it('should format a generic quota exceeded message for Google auth (Standard tier)', () => {
+ const errorMessage =
+ 'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'GenerationRequests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}';
+ const result = parseAndFormatApiError(
+ errorMessage,
+ AuthType.LOGIN_WITH_GOOGLE,
+ UserTierId.STANDARD,
+ 'gemini-2.5-pro',
+ DEFAULT_GEMINI_FLASH_MODEL,
+ );
+ expect(result).toContain(
+ "[API Error: Quota exceeded for quota metric 'GenerationRequests'",
+ );
+ expect(result).toContain('You have reached your daily quota limit');
+ expect(result).toContain(
+ 'We appreciate you for choosing Gemini Code Assist and the Gemini CLI',
+ );
+ expect(result).not.toContain(
+ 'upgrade to a Gemini Code Assist Standard or Enterprise plan',
+ );
+ });
+
+ it('should format a regular 429 API error with standard message for Google auth (Standard tier)', () => {
+ const errorMessage =
+ 'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Rate limit exceeded","status":"RESOURCE_EXHAUSTED"}}';
+ const result = parseAndFormatApiError(
+ errorMessage,
+ AuthType.LOGIN_WITH_GOOGLE,
+ UserTierId.STANDARD,
+ 'gemini-2.5-pro',
+ DEFAULT_GEMINI_FLASH_MODEL,
+ );
+ expect(result).toContain('[API Error: Rate limit exceeded');
+ expect(result).toContain(
+ 'We appreciate you for choosing Gemini Code Assist and the Gemini CLI',
+ );
+ expect(result).not.toContain(
+ 'upgrade to a Gemini Code Assist Standard or Enterprise plan',
+ );
+ });
});
diff --git a/packages/cli/src/ui/utils/errorParsing.ts b/packages/cli/src/ui/utils/errorParsing.ts
index 33014812..555d5e4e 100644
--- a/packages/cli/src/ui/utils/errorParsing.ts
+++ b/packages/cli/src/ui/utils/errorParsing.ts
@@ -4,66 +4,118 @@
* SPDX-License-Identifier: Apache-2.0
*/
-import { AuthType, StructuredError } from '@google/gemini-cli-core';
+import {
+ AuthType,
+ UserTierId,
+ DEFAULT_GEMINI_FLASH_MODEL,
+ DEFAULT_GEMINI_MODEL,
+ isProQuotaExceededError,
+ isGenericQuotaExceededError,
+ isApiError,
+ isStructuredError,
+} from '@google/gemini-cli-core';
-const RATE_LIMIT_ERROR_MESSAGE_GOOGLE =
- '\nPlease wait and try again later. To increase your limits, upgrade to a plan with higher limits, or use /auth to switch to using a paid API key from AI Studio at https://aistudio.google.com/apikey';
+// Free Tier message functions
+const getRateLimitErrorMessageGoogleFree = (
+ fallbackModel: string = DEFAULT_GEMINI_FLASH_MODEL,
+) =>
+ `\nSlow response times detected. Switching to the ${fallbackModel} model for the rest of this session.`;
+
+const getRateLimitErrorMessageGoogleProQuotaFree = (
+ currentModel: string = DEFAULT_GEMINI_MODEL,
+ fallbackModel: string = DEFAULT_GEMINI_FLASH_MODEL,
+) =>
+ `\nYou have reached your daily ${currentModel} quota limit. You will be switched to the ${fallbackModel} model for the rest of this session. To increase your limits, upgrade to a Gemini Code Assist Standard or Enterprise plan with higher limits at https://goo.gle/set-up-gemini-code-assist, or use /auth to switch to using a paid API key from AI Studio at https://aistudio.google.com/apikey`;
+
+const getRateLimitErrorMessageGoogleGenericQuotaFree = () =>
+ `\nYou have reached your daily quota limit. To increase your limits, upgrade to a Gemini Code Assist Standard or Enterprise plan with higher limits at https://goo.gle/set-up-gemini-code-assist, or use /auth to switch to using a paid API key from AI Studio at https://aistudio.google.com/apikey`;
+
+// Legacy/Standard Tier message functions
+const getRateLimitErrorMessageGooglePaid = (
+ fallbackModel: string = DEFAULT_GEMINI_FLASH_MODEL,
+) =>
+ `\nSlow response times detected. Switching to the ${fallbackModel} model for the rest of this session. We appreciate you for choosing Gemini Code Assist and the Gemini CLI.`;
+
+const getRateLimitErrorMessageGoogleProQuotaPaid = (
+ currentModel: string = DEFAULT_GEMINI_MODEL,
+ fallbackModel: string = DEFAULT_GEMINI_FLASH_MODEL,
+) =>
+ `\nYou have reached your daily ${currentModel} quota limit. You will be switched to the ${fallbackModel} model for the rest of this session. We appreciate you for choosing Gemini Code Assist and the Gemini CLI. To continue accessing the ${currentModel} model today, consider using /auth to switch to using a paid API key from AI Studio at https://aistudio.google.com/apikey`;
+
+const getRateLimitErrorMessageGoogleGenericQuotaPaid = (
+ currentModel: string = DEFAULT_GEMINI_MODEL,
+) =>
+ `\nYou have reached your daily quota limit. We appreciate you for choosing Gemini Code Assist and the Gemini CLI. To continue accessing the ${currentModel} model today, consider using /auth to switch to using a paid API key from AI Studio at https://aistudio.google.com/apikey`;
const RATE_LIMIT_ERROR_MESSAGE_USE_GEMINI =
'\nPlease wait and try again later. To increase your limits, request a quota increase through AI Studio, or switch to another /auth method';
const RATE_LIMIT_ERROR_MESSAGE_VERTEX =
'\nPlease wait and try again later. To increase your limits, request a quota increase through Vertex, or switch to another /auth method';
-const RATE_LIMIT_ERROR_MESSAGE_DEFAULT =
- 'Your request has been rate limited. Please wait and try again later.';
-
-export interface ApiError {
- error: {
- code: number;
- message: string;
- status: string;
- details: unknown[];
- };
-}
-
-function isApiError(error: unknown): error is ApiError {
- return (
- typeof error === 'object' &&
- error !== null &&
- 'error' in error &&
- typeof (error as ApiError).error === 'object' &&
- 'message' in (error as ApiError).error
- );
-}
-
-function isStructuredError(error: unknown): error is StructuredError {
- return (
- typeof error === 'object' &&
- error !== null &&
- 'message' in error &&
- typeof (error as StructuredError).message === 'string'
- );
-}
+const getRateLimitErrorMessageDefault = (
+ fallbackModel: string = DEFAULT_GEMINI_FLASH_MODEL,
+) =>
+ `\nSlow response times detected. Switching to the ${fallbackModel} model for the rest of this session.`;
-function getRateLimitMessage(authType?: AuthType): string {
+function getRateLimitMessage(
+ authType?: AuthType,
+ error?: unknown,
+ userTier?: UserTierId,
+ currentModel?: string,
+ fallbackModel?: string,
+): string {
switch (authType) {
- case AuthType.LOGIN_WITH_GOOGLE:
- return RATE_LIMIT_ERROR_MESSAGE_GOOGLE;
+ case AuthType.LOGIN_WITH_GOOGLE: {
+ // Determine if user is on a paid tier (Legacy or Standard) - default to FREE if not specified
+ const isPaidTier =
+ userTier === UserTierId.LEGACY || userTier === UserTierId.STANDARD;
+
+ if (isProQuotaExceededError(error)) {
+ return isPaidTier
+ ? getRateLimitErrorMessageGoogleProQuotaPaid(
+ currentModel || DEFAULT_GEMINI_MODEL,
+ fallbackModel,
+ )
+ : getRateLimitErrorMessageGoogleProQuotaFree(
+ currentModel || DEFAULT_GEMINI_MODEL,
+ fallbackModel,
+ );
+ } else if (isGenericQuotaExceededError(error)) {
+ return isPaidTier
+ ? getRateLimitErrorMessageGoogleGenericQuotaPaid(
+ currentModel || DEFAULT_GEMINI_MODEL,
+ )
+ : getRateLimitErrorMessageGoogleGenericQuotaFree();
+ } else {
+ return isPaidTier
+ ? getRateLimitErrorMessageGooglePaid(fallbackModel)
+ : getRateLimitErrorMessageGoogleFree(fallbackModel);
+ }
+ }
case AuthType.USE_GEMINI:
return RATE_LIMIT_ERROR_MESSAGE_USE_GEMINI;
case AuthType.USE_VERTEX_AI:
return RATE_LIMIT_ERROR_MESSAGE_VERTEX;
default:
- return RATE_LIMIT_ERROR_MESSAGE_DEFAULT;
+ return getRateLimitErrorMessageDefault(fallbackModel);
}
}
export function parseAndFormatApiError(
error: unknown,
authType?: AuthType,
+ userTier?: UserTierId,
+ currentModel?: string,
+ fallbackModel?: string,
): string {
if (isStructuredError(error)) {
let text = `[API Error: ${error.message}]`;
if (error.status === 429) {
- text += getRateLimitMessage(authType);
+ text += getRateLimitMessage(
+ authType,
+ error,
+ userTier,
+ currentModel,
+ fallbackModel,
+ );
}
return text;
}
@@ -92,7 +144,13 @@ export function parseAndFormatApiError(
}
let text = `[API Error: ${finalMessage} (Status: ${parsedError.error.status})]`;
if (parsedError.error.code === 429) {
- text += getRateLimitMessage(authType);
+ text += getRateLimitMessage(
+ authType,
+ parsedError,
+ userTier,
+ currentModel,
+ fallbackModel,
+ );
}
return text;
}
diff --git a/packages/core/src/config/config.ts b/packages/core/src/config/config.ts
index 2cea70ca..b0659a9d 100644
--- a/packages/core/src/config/config.ts
+++ b/packages/core/src/config/config.ts
@@ -103,6 +103,7 @@ export interface SandboxConfig {
export type FlashFallbackHandler = (
currentModel: string,
fallbackModel: string,
+ error?: unknown,
) => Promise<boolean>;
export interface ConfigParameters {
diff --git a/packages/core/src/core/client.test.ts b/packages/core/src/core/client.test.ts
index 9d3791fd..80680aca 100644
--- a/packages/core/src/core/client.test.ts
+++ b/packages/core/src/core/client.test.ts
@@ -845,6 +845,7 @@ describe('Gemini Client (client.ts)', () => {
expect(mockFallbackHandler).toHaveBeenCalledWith(
currentModel,
fallbackModel,
+ undefined,
);
});
});
diff --git a/packages/core/src/core/client.ts b/packages/core/src/core/client.ts
index 6cfcd407..b8996cbf 100644
--- a/packages/core/src/core/client.ts
+++ b/packages/core/src/core/client.ts
@@ -323,8 +323,8 @@ export class GeminiClient {
});
const result = await retryWithBackoff(apiCall, {
- onPersistent429: async (authType?: string) =>
- await this.handleFlashFallback(authType),
+ onPersistent429: async (authType?: string, error?: unknown) =>
+ await this.handleFlashFallback(authType, error),
authType: this.config.getContentGeneratorConfig()?.authType,
});
@@ -411,8 +411,8 @@ export class GeminiClient {
});
const result = await retryWithBackoff(apiCall, {
- onPersistent429: async (authType?: string) =>
- await this.handleFlashFallback(authType),
+ onPersistent429: async (authType?: string, error?: unknown) =>
+ await this.handleFlashFallback(authType, error),
authType: this.config.getContentGeneratorConfig()?.authType,
});
return result;
@@ -559,7 +559,10 @@ export class GeminiClient {
* Handles fallback to Flash model when persistent 429 errors occur for OAuth users.
* Uses a fallback handler if provided by the config, otherwise returns null.
*/
- private async handleFlashFallback(authType?: string): Promise<string | null> {
+ private async handleFlashFallback(
+ authType?: string,
+ error?: unknown,
+ ): Promise<string | null> {
// Only handle fallback for OAuth users
if (authType !== AuthType.LOGIN_WITH_GOOGLE) {
return null;
@@ -577,7 +580,11 @@ export class GeminiClient {
const fallbackHandler = this.config.flashFallbackHandler;
if (typeof fallbackHandler === 'function') {
try {
- const accepted = await fallbackHandler(currentModel, fallbackModel);
+ const accepted = await fallbackHandler(
+ currentModel,
+ fallbackModel,
+ error,
+ );
if (accepted) {
this.config.setModel(fallbackModel);
return fallbackModel;
diff --git a/packages/core/src/core/geminiChat.ts b/packages/core/src/core/geminiChat.ts
index 537d55a0..1be84f2e 100644
--- a/packages/core/src/core/geminiChat.ts
+++ b/packages/core/src/core/geminiChat.ts
@@ -191,7 +191,10 @@ export class GeminiChat {
* Handles fallback to Flash model when persistent 429 errors occur for OAuth users.
* Uses a fallback handler if provided by the config, otherwise returns null.
*/
- private async handleFlashFallback(authType?: string): Promise<string | null> {
+ private async handleFlashFallback(
+ authType?: string,
+ error?: unknown,
+ ): Promise<string | null> {
// Only handle fallback for OAuth users
if (authType !== AuthType.LOGIN_WITH_GOOGLE) {
return null;
@@ -209,7 +212,11 @@ export class GeminiChat {
const fallbackHandler = this.config.flashFallbackHandler;
if (typeof fallbackHandler === 'function') {
try {
- const accepted = await fallbackHandler(currentModel, fallbackModel);
+ const accepted = await fallbackHandler(
+ currentModel,
+ fallbackModel,
+ error,
+ );
if (accepted) {
this.config.setModel(fallbackModel);
return fallbackModel;
@@ -270,8 +277,8 @@ export class GeminiChat {
}
return false;
},
- onPersistent429: async (authType?: string) =>
- await this.handleFlashFallback(authType),
+ onPersistent429: async (authType?: string, error?: unknown) =>
+ await this.handleFlashFallback(authType, error),
authType: this.config.getContentGeneratorConfig()?.authType,
});
const durationMs = Date.now() - startTime;
@@ -367,8 +374,8 @@ export class GeminiChat {
}
return false; // Don't retry other errors by default
},
- onPersistent429: async (authType?: string) =>
- await this.handleFlashFallback(authType),
+ onPersistent429: async (authType?: string, error?: unknown) =>
+ await this.handleFlashFallback(authType, error),
authType: this.config.getContentGeneratorConfig()?.authType,
});
diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts
index aff37f50..df7db12c 100644
--- a/packages/core/src/index.ts
+++ b/packages/core/src/index.ts
@@ -32,6 +32,7 @@ export * from './utils/getFolderStructure.js';
export * from './utils/memoryDiscovery.js';
export * from './utils/gitIgnoreParser.js';
export * from './utils/editor.js';
+export * from './utils/quotaErrorDetection.js';
// Export services
export * from './services/fileDiscoveryService.js';
diff --git a/packages/core/src/utils/flashFallback.integration.test.ts b/packages/core/src/utils/flashFallback.integration.test.ts
index 6554425f..f5e354a0 100644
--- a/packages/core/src/utils/flashFallback.integration.test.ts
+++ b/packages/core/src/utils/flashFallback.integration.test.ts
@@ -86,6 +86,7 @@ describe('Flash Fallback Integration', () => {
expect(fallbackModel).toBe(DEFAULT_GEMINI_FLASH_MODEL);
expect(mockFallbackHandler).toHaveBeenCalledWith(
AuthType.LOGIN_WITH_GOOGLE,
+ expect.any(Error),
);
expect(result).toBe('success after fallback');
// Should have: 2 failures, then fallback triggered, then 1 success after retry reset
diff --git a/packages/core/src/utils/quotaErrorDetection.ts b/packages/core/src/utils/quotaErrorDetection.ts
new file mode 100644
index 00000000..ec77f5ee
--- /dev/null
+++ b/packages/core/src/utils/quotaErrorDetection.ts
@@ -0,0 +1,82 @@
+/**
+ * @license
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+export interface ApiError {
+ error: {
+ code: number;
+ message: string;
+ status: string;
+ details: unknown[];
+ };
+}
+
+interface StructuredError {
+ message: string;
+ status?: number;
+}
+
+export function isApiError(error: unknown): error is ApiError {
+ return (
+ typeof error === 'object' &&
+ error !== null &&
+ 'error' in error &&
+ typeof (error as ApiError).error === 'object' &&
+ 'message' in (error as ApiError).error
+ );
+}
+
+export function isStructuredError(error: unknown): error is StructuredError {
+ return (
+ typeof error === 'object' &&
+ error !== null &&
+ 'message' in error &&
+ typeof (error as StructuredError).message === 'string'
+ );
+}
+
+export function isProQuotaExceededError(error: unknown): boolean {
+ // Check for Pro quota exceeded errors by looking for the specific pattern
+ // This will match patterns like:
+ // - "Quota exceeded for quota metric 'Gemini 2.5 Pro Requests'"
+ // - "Quota exceeded for quota metric 'Gemini 1.5-preview Pro Requests'"
+ // - "Quota exceeded for quota metric 'Gemini beta-3.0 Pro Requests'"
+ // - "Quota exceeded for quota metric 'Gemini experimental-v2 Pro Requests'"
+ // We use string methods instead of regex to avoid ReDoS vulnerabilities
+
+ const checkMessage = (message: string): boolean =>
+ message.includes("Quota exceeded for quota metric 'Gemini") &&
+ message.includes("Pro Requests'");
+
+ if (typeof error === 'string') {
+ return checkMessage(error);
+ }
+
+ if (isStructuredError(error)) {
+ return checkMessage(error.message);
+ }
+
+ if (isApiError(error)) {
+ return checkMessage(error.error.message);
+ }
+
+ return false;
+}
+
+export function isGenericQuotaExceededError(error: unknown): boolean {
+ if (typeof error === 'string') {
+ return error.includes('Quota exceeded for quota metric');
+ }
+
+ if (isStructuredError(error)) {
+ return error.message.includes('Quota exceeded for quota metric');
+ }
+
+ if (isApiError(error)) {
+ return error.error.message.includes('Quota exceeded for quota metric');
+ }
+
+ return false;
+}
diff --git a/packages/core/src/utils/retry.test.ts b/packages/core/src/utils/retry.test.ts
index a0294c31..f84d2004 100644
--- a/packages/core/src/utils/retry.test.ts
+++ b/packages/core/src/utils/retry.test.ts
@@ -357,7 +357,10 @@ describe('retryWithBackoff', () => {
// Should fail with original error when fallback is rejected
expect(result).toBeInstanceOf(Error);
expect(result.message).toBe('Rate limit exceeded');
- expect(fallbackCallback).toHaveBeenCalledWith('oauth-personal');
+ expect(fallbackCallback).toHaveBeenCalledWith(
+ 'oauth-personal',
+ expect.any(Error),
+ );
});
it('should handle mixed error types (only count consecutive 429s)', async () => {
diff --git a/packages/core/src/utils/retry.ts b/packages/core/src/utils/retry.ts
index f3f5f2d2..01651950 100644
--- a/packages/core/src/utils/retry.ts
+++ b/packages/core/src/utils/retry.ts
@@ -5,13 +5,20 @@
*/
import { AuthType } from '../core/contentGenerator.js';
+import {
+ isProQuotaExceededError,
+ isGenericQuotaExceededError,
+} from './quotaErrorDetection.js';
export interface RetryOptions {
maxAttempts: number;
initialDelayMs: number;
maxDelayMs: number;
shouldRetry: (error: Error) => boolean;
- onPersistent429?: (authType?: string) => Promise<string | null>;
+ onPersistent429?: (
+ authType?: string,
+ error?: unknown,
+ ) => Promise<string | null>;
authType?: string;
}
@@ -86,6 +93,53 @@ export async function retryWithBackoff<T>(
} catch (error) {
const errorStatus = getErrorStatus(error);
+ // Check for Pro quota exceeded error first - immediate fallback for OAuth users
+ if (
+ errorStatus === 429 &&
+ authType === AuthType.LOGIN_WITH_GOOGLE &&
+ isProQuotaExceededError(error) &&
+ onPersistent429
+ ) {
+ try {
+ const fallbackModel = await onPersistent429(authType, error);
+ if (fallbackModel) {
+ // Reset attempt counter and try with new model
+ attempt = 0;
+ consecutive429Count = 0;
+ currentDelay = initialDelayMs;
+ // With the model updated, we continue to the next attempt
+ continue;
+ }
+ } catch (fallbackError) {
+ // If fallback fails, continue with original error
+ console.warn('Fallback to Flash model failed:', fallbackError);
+ }
+ }
+
+ // Check for generic quota exceeded error (but not Pro, which was handled above) - immediate fallback for OAuth users
+ if (
+ errorStatus === 429 &&
+ authType === AuthType.LOGIN_WITH_GOOGLE &&
+ !isProQuotaExceededError(error) &&
+ isGenericQuotaExceededError(error) &&
+ onPersistent429
+ ) {
+ try {
+ const fallbackModel = await onPersistent429(authType, error);
+ if (fallbackModel) {
+ // Reset attempt counter and try with new model
+ attempt = 0;
+ consecutive429Count = 0;
+ currentDelay = initialDelayMs;
+ // With the model updated, we continue to the next attempt
+ continue;
+ }
+ } catch (fallbackError) {
+ // If fallback fails, continue with original error
+ console.warn('Fallback to Flash model failed:', fallbackError);
+ }
+ }
+
// Track consecutive 429 errors
if (errorStatus === 429) {
consecutive429Count++;
@@ -100,7 +154,7 @@ export async function retryWithBackoff<T>(
authType === AuthType.LOGIN_WITH_GOOGLE
) {
try {
- const fallbackModel = await onPersistent429(authType);
+ const fallbackModel = await onPersistent429(authType, error);
if (fallbackModel) {
// Reset attempt counter and try with new model
attempt = 0;