summaryrefslogtreecommitdiff
path: root/packages/cli/src
diff options
context:
space:
mode:
authorBryan Morgan <[email protected]>2025-07-09 13:55:56 -0400
committerGitHub <[email protected]>2025-07-09 17:55:56 +0000
commit8a6509ffeba271a8e7ccb83066a9a31a5d72a647 (patch)
treee67893a06d291f074e69f7f14d4f22ccbe3a6550 /packages/cli/src
parent01e756481f359a28aca0d5220f853daec8d25ed4 (diff)
Remove auto-execution on Flash in the event of a 429/Quota failover (#3662)
Co-authored-by: Jenna Inouye <[email protected]>
Diffstat (limited to 'packages/cli/src')
-rw-r--r--packages/cli/src/ui/App.tsx52
-rw-r--r--packages/cli/src/ui/hooks/useGeminiStream.test.tsx16
-rw-r--r--packages/cli/src/ui/hooks/useGeminiStream.ts16
-rw-r--r--packages/cli/src/ui/utils/errorParsing.test.ts64
-rw-r--r--packages/cli/src/ui/utils/errorParsing.ts6
5 files changed, 93 insertions, 61 deletions
diff --git a/packages/cli/src/ui/App.tsx b/packages/cli/src/ui/App.tsx
index 833cc2b5..e3a5eb55 100644
--- a/packages/cli/src/ui/App.tsx
+++ b/packages/cli/src/ui/App.tsx
@@ -70,6 +70,7 @@ import { UpdateNotification } from './components/UpdateNotification.js';
import {
isProQuotaExceededError,
isGenericQuotaExceededError,
+ UserTierId,
} from '@google/gemini-cli-core';
import { checkForUpdates } from './utils/updateCheck.js';
import ansiEscapes from 'ansi-escapes';
@@ -136,6 +137,8 @@ const App = ({ config, settings, startupWarnings = [] }: AppProps) => {
const ctrlDTimerRef = useRef<NodeJS.Timeout | null>(null);
const [constrainHeight, setConstrainHeight] = useState<boolean>(true);
const [showPrivacyNotice, setShowPrivacyNotice] = useState<boolean>(false);
+ const [modelSwitchedFromQuotaError, setModelSwitchedFromQuotaError] =
+ useState<boolean>(false);
const openPrivacyNotice = useCallback(() => {
setShowPrivacyNotice(true);
@@ -251,23 +254,51 @@ const App = ({ config, settings, startupWarnings = [] }: AppProps) => {
): Promise<boolean> => {
let message: string;
+ // For quota errors, assume FREE tier (safe default) - only show upgrade messaging to free tier users
+ // TODO: Get actual user tier from config when available
+ const userTier = undefined; // Defaults to FREE tier behavior
+ const isPaidTier =
+ userTier === UserTierId.LEGACY || userTier === UserTierId.STANDARD;
+
// Check if this is a Pro quota exceeded error
if (error && isProQuotaExceededError(error)) {
- message = `⚡ You have reached your daily ${currentModel} quota limit.
+ if (isPaidTier) {
+ message = `⚡ You have reached your daily ${currentModel} quota limit.
+⚡ Automatically switching from ${currentModel} to ${fallbackModel} for the remainder of this session.
+⚡ To continue accessing the ${currentModel} model today, consider using /auth to switch to using a paid API key from AI Studio at https://aistudio.google.com/apikey`;
+ } else {
+ message = `⚡ You have reached your daily ${currentModel} quota limit.
⚡ Automatically switching from ${currentModel} to ${fallbackModel} for the remainder of this session.
⚡ To increase your limits, upgrade to a Gemini Code Assist Standard or Enterprise plan with higher limits at https://goo.gle/set-up-gemini-code-assist
⚡ Or you can utilize a Gemini API Key. See: https://goo.gle/gemini-cli-docs-auth#gemini-api-key
⚡ You can switch authentication methods by typing /auth`;
+ }
} else if (error && isGenericQuotaExceededError(error)) {
- message = `⚡ You have reached your daily quota limit.
+ if (isPaidTier) {
+ message = `⚡ You have reached your daily quota limit.
+⚡ Automatically switching from ${currentModel} to ${fallbackModel} for the remainder of this session.
+⚡ To continue accessing the ${currentModel} model today, consider using /auth to switch to using a paid API key from AI Studio at https://aistudio.google.com/apikey`;
+ } else {
+ message = `⚡ You have reached your daily quota limit.
⚡ Automatically switching from ${currentModel} to ${fallbackModel} for the remainder of this session.
⚡ To increase your limits, upgrade to a Gemini Code Assist Standard or Enterprise plan with higher limits at https://goo.gle/set-up-gemini-code-assist
⚡ Or you can utilize a Gemini API Key. See: https://goo.gle/gemini-cli-docs-auth#gemini-api-key
⚡ You can switch authentication methods by typing /auth`;
+ }
} else {
- // Default fallback message for other cases (like consecutive 429s)
- message = `⚡ Slow response times detected.
-⚡ Automatically switching from ${currentModel} to ${fallbackModel} for faster responses for the remainder of this session.`;
+ if (isPaidTier) {
+ // Default fallback message for other cases (like consecutive 429s)
+ message = `⚡ Automatically switching from ${currentModel} to ${fallbackModel} for faster responses for the remainder of this session.
+⚡ Possible reasons for this are that you have received multiple consecutive capacity errors or you have reached your daily ${currentModel} quota limit
+⚡ To continue accessing the ${currentModel} model today, consider using /auth to switch to using a paid API key from AI Studio at https://aistudio.google.com/apikey`;
+ } else {
+ // Default fallback message for other cases (like consecutive 429s)
+ message = `⚡ Automatically switching from ${currentModel} to ${fallbackModel} for faster responses for the remainder of this session.
+⚡ Possible reasons for this are that you have received multiple consecutive capacity errors or you have reached your daily ${currentModel} quota limit
+⚡ To increase your limits, upgrade to a Gemini Code Assist Standard or Enterprise plan with higher limits at https://goo.gle/set-up-gemini-code-assist
+⚡ Or you can utilize a Gemini API Key. See: https://goo.gle/gemini-cli-docs-auth#gemini-api-key
+⚡ You can switch authentication methods by typing /auth`;
+ }
}
// Add message to UI history
@@ -278,7 +309,14 @@ const App = ({ config, settings, startupWarnings = [] }: AppProps) => {
},
Date.now(),
);
- return true; // Always accept the fallback
+
+ // Set the flag to prevent tool continuation
+ setModelSwitchedFromQuotaError(true);
+ // Set global quota error flag to prevent Flash model calls
+ config.setQuotaErrorOccurred(true);
+ // Switch model for future use but return false to stop current retry
+ config.setModel(fallbackModel);
+ return false; // Don't continue with current prompt
};
config.setFlashFallbackHandler(flashFallbackHandler);
@@ -445,6 +483,8 @@ const App = ({ config, settings, startupWarnings = [] }: AppProps) => {
getPreferredEditor,
onAuthError,
performMemoryRefresh,
+ modelSwitchedFromQuotaError,
+ setModelSwitchedFromQuotaError,
);
pendingHistoryItems.push(...pendingGeminiHistoryItems);
const { elapsedTime, currentLoadingPhrase } =
diff --git a/packages/cli/src/ui/hooks/useGeminiStream.test.tsx b/packages/cli/src/ui/hooks/useGeminiStream.test.tsx
index fc6f93c5..62ade50f 100644
--- a/packages/cli/src/ui/hooks/useGeminiStream.test.tsx
+++ b/packages/cli/src/ui/hooks/useGeminiStream.test.tsx
@@ -301,6 +301,8 @@ describe('useGeminiStream', () => {
getUsageStatisticsEnabled: () => true,
getDebugMode: () => false,
addHistory: vi.fn(),
+ setQuotaErrorOccurred: vi.fn(),
+ getQuotaErrorOccurred: vi.fn(() => false),
} as unknown as Config;
mockOnDebugMessage = vi.fn();
mockHandleSlashCommand = vi.fn().mockResolvedValue(false);
@@ -386,6 +388,8 @@ describe('useGeminiStream', () => {
() => 'vscode' as EditorType,
() => {},
() => Promise.resolve(),
+ false,
+ () => {},
);
},
{
@@ -518,6 +522,8 @@ describe('useGeminiStream', () => {
() => 'vscode' as EditorType,
() => {},
() => Promise.resolve(),
+ false,
+ () => {},
),
);
@@ -582,6 +588,8 @@ describe('useGeminiStream', () => {
() => 'vscode' as EditorType,
() => {},
() => Promise.resolve(),
+ false,
+ () => {},
),
);
@@ -675,6 +683,8 @@ describe('useGeminiStream', () => {
() => 'vscode' as EditorType,
() => {},
() => Promise.resolve(),
+ false,
+ () => {},
),
);
@@ -775,6 +785,8 @@ describe('useGeminiStream', () => {
() => 'vscode' as EditorType,
() => {},
() => Promise.resolve(),
+ false,
+ () => {},
),
);
@@ -1063,6 +1075,8 @@ describe('useGeminiStream', () => {
() => 'vscode' as EditorType,
() => {},
mockPerformMemoryRefresh,
+ false,
+ () => {},
),
);
@@ -1113,6 +1127,8 @@ describe('useGeminiStream', () => {
() => 'vscode' as EditorType,
() => {},
() => Promise.resolve(),
+ false,
+ () => {},
),
);
diff --git a/packages/cli/src/ui/hooks/useGeminiStream.ts b/packages/cli/src/ui/hooks/useGeminiStream.ts
index 550cab86..d32c9ffa 100644
--- a/packages/cli/src/ui/hooks/useGeminiStream.ts
+++ b/packages/cli/src/ui/hooks/useGeminiStream.ts
@@ -90,6 +90,8 @@ export const useGeminiStream = (
getPreferredEditor: () => EditorType | undefined,
onAuthError: () => void,
performMemoryRefresh: () => Promise<void>,
+ modelSwitchedFromQuotaError: boolean,
+ setModelSwitchedFromQuotaError: React.Dispatch<React.SetStateAction<boolean>>,
) => {
const [initError, setInitError] = useState<string | null>(null);
const abortControllerRef = useRef<AbortController | null>(null);
@@ -494,6 +496,12 @@ export const useGeminiStream = (
const userMessageTimestamp = Date.now();
setShowHelp(false);
+ // Reset quota error flag when starting a new query (not a continuation)
+ if (!options?.isContinuation) {
+ setModelSwitchedFromQuotaError(false);
+ config.setQuotaErrorOccurred(false);
+ }
+
abortControllerRef.current = new AbortController();
const abortSignal = abortControllerRef.current.signal;
turnCancelledRef.current = false;
@@ -552,6 +560,7 @@ export const useGeminiStream = (
[
streamingState,
setShowHelp,
+ setModelSwitchedFromQuotaError,
prepareQueryForGemini,
processGeminiStreamEvents,
pendingHistoryItemRef,
@@ -668,6 +677,12 @@ export const useGeminiStream = (
);
markToolsAsSubmitted(callIdsToMarkAsSubmitted);
+
+ // Don't continue if model was switched due to quota error
+ if (modelSwitchedFromQuotaError) {
+ return;
+ }
+
submitQuery(mergePartListUnions(responsesToSend), {
isContinuation: true,
});
@@ -678,6 +693,7 @@ export const useGeminiStream = (
markToolsAsSubmitted,
geminiClient,
performMemoryRefresh,
+ modelSwitchedFromQuotaError,
],
);
diff --git a/packages/cli/src/ui/utils/errorParsing.test.ts b/packages/cli/src/ui/utils/errorParsing.test.ts
index 3d228efb..770dffad 100644
--- a/packages/cli/src/ui/utils/errorParsing.test.ts
+++ b/packages/cli/src/ui/utils/errorParsing.test.ts
@@ -39,7 +39,7 @@ describe('parseAndFormatApiError', () => {
);
expect(result).toContain('[API Error: Rate limit exceeded');
expect(result).toContain(
- 'Slow response times detected. Switching to the gemini-2.5-flash model',
+ 'Possible quota limitations in place or slow response times detected. Switching to the gemini-2.5-flash model',
);
});
@@ -55,7 +55,7 @@ describe('parseAndFormatApiError', () => {
);
expect(result).toContain('[API Error: Rate limit exceeded');
expect(result).toContain(
- 'Slow response times detected. Switching to the gemini-2.5-flash model',
+ 'Possible quota limitations in place or slow response times detected. Switching to the gemini-2.5-flash model',
);
});
@@ -169,7 +169,7 @@ describe('parseAndFormatApiError', () => {
);
expect(result).toContain('[API Error: Rate limit exceeded');
expect(result).toContain(
- 'Slow response times detected. Switching to the gemini-2.5-flash model',
+ 'Possible quota limitations in place or slow response times detected. Switching to the gemini-2.5-flash model',
);
expect(result).not.toContain(
'You have reached your daily gemini-2.5-pro quota limit',
@@ -262,21 +262,17 @@ describe('parseAndFormatApiError', () => {
);
});
- it('should handle different Gemini version strings in Pro quota exceeded errors', () => {
- const errorMessage15 =
- 'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'Gemini 1.5 Pro Requests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}';
+ it('should handle different Gemini 2.5 version strings in Pro quota exceeded errors', () => {
+ const errorMessage25 =
+ 'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'Gemini 2.5 Pro Requests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}';
const errorMessagePreview =
'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'Gemini 2.5-preview Pro Requests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}';
- const errorMessageBeta =
- 'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'Gemini beta-3.0 Pro Requests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}';
- const errorMessageExperimental =
- 'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'Gemini experimental-v2 Pro Requests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}';
- const result15 = parseAndFormatApiError(
- errorMessage15,
+ const result25 = parseAndFormatApiError(
+ errorMessage25,
AuthType.LOGIN_WITH_GOOGLE,
undefined,
- 'gemini-1.5-pro',
+ 'gemini-2.5-pro',
DEFAULT_GEMINI_FLASH_MODEL,
);
const resultPreview = parseAndFormatApiError(
@@ -286,45 +282,19 @@ describe('parseAndFormatApiError', () => {
'gemini-2.5-preview-pro',
DEFAULT_GEMINI_FLASH_MODEL,
);
- const resultBeta = parseAndFormatApiError(
- errorMessageBeta,
- AuthType.LOGIN_WITH_GOOGLE,
- undefined,
- 'gemini-beta-3.0-pro',
- DEFAULT_GEMINI_FLASH_MODEL,
- );
- const resultExperimental = parseAndFormatApiError(
- errorMessageExperimental,
- AuthType.LOGIN_WITH_GOOGLE,
- undefined,
- 'gemini-experimental-v2-pro',
- DEFAULT_GEMINI_FLASH_MODEL,
- );
- expect(result15).toContain(
- 'You have reached your daily gemini-1.5-pro quota limit',
+ expect(result25).toContain(
+ 'You have reached your daily gemini-2.5-pro quota limit',
);
expect(resultPreview).toContain(
'You have reached your daily gemini-2.5-preview-pro quota limit',
);
- expect(resultBeta).toContain(
- 'You have reached your daily gemini-beta-3.0-pro quota limit',
- );
- expect(resultExperimental).toContain(
- 'You have reached your daily gemini-experimental-v2-pro quota limit',
- );
- expect(result15).toContain(
+ expect(result25).toContain(
'upgrade to a Gemini Code Assist Standard or Enterprise plan',
);
expect(resultPreview).toContain(
'upgrade to a Gemini Code Assist Standard or Enterprise plan',
);
- expect(resultBeta).toContain(
- 'upgrade to a Gemini Code Assist Standard or Enterprise plan',
- );
- expect(resultExperimental).toContain(
- 'upgrade to a Gemini Code Assist Standard or Enterprise plan',
- );
});
it('should not match non-Pro models with similar version strings', () => {
@@ -339,16 +309,6 @@ describe('parseAndFormatApiError', () => {
"Quota exceeded for quota metric 'Gemini 2.5-preview Flash Requests' and limit",
),
).toBe(false);
- expect(
- isProQuotaExceededError(
- "Quota exceeded for quota metric 'Gemini beta-3.0 Flash Requests' and limit",
- ),
- ).toBe(false);
- expect(
- isProQuotaExceededError(
- "Quota exceeded for quota metric 'Gemini experimental-v2 Flash Requests' and limit",
- ),
- ).toBe(false);
// Test other model types
expect(
diff --git a/packages/cli/src/ui/utils/errorParsing.ts b/packages/cli/src/ui/utils/errorParsing.ts
index 555d5e4e..5031bc0a 100644
--- a/packages/cli/src/ui/utils/errorParsing.ts
+++ b/packages/cli/src/ui/utils/errorParsing.ts
@@ -19,7 +19,7 @@ import {
const getRateLimitErrorMessageGoogleFree = (
fallbackModel: string = DEFAULT_GEMINI_FLASH_MODEL,
) =>
- `\nSlow response times detected. Switching to the ${fallbackModel} model for the rest of this session.`;
+ `\nPossible quota limitations in place or slow response times detected. Switching to the ${fallbackModel} model for the rest of this session.`;
const getRateLimitErrorMessageGoogleProQuotaFree = (
currentModel: string = DEFAULT_GEMINI_MODEL,
@@ -34,7 +34,7 @@ const getRateLimitErrorMessageGoogleGenericQuotaFree = () =>
const getRateLimitErrorMessageGooglePaid = (
fallbackModel: string = DEFAULT_GEMINI_FLASH_MODEL,
) =>
- `\nSlow response times detected. Switching to the ${fallbackModel} model for the rest of this session. We appreciate you for choosing Gemini Code Assist and the Gemini CLI.`;
+ `\nPossible quota limitations in place or slow response times detected. Switching to the ${fallbackModel} model for the rest of this session. We appreciate you for choosing Gemini Code Assist and the Gemini CLI.`;
const getRateLimitErrorMessageGoogleProQuotaPaid = (
currentModel: string = DEFAULT_GEMINI_MODEL,
@@ -53,7 +53,7 @@ const RATE_LIMIT_ERROR_MESSAGE_VERTEX =
const getRateLimitErrorMessageDefault = (
fallbackModel: string = DEFAULT_GEMINI_FLASH_MODEL,
) =>
- `\nSlow response times detected. Switching to the ${fallbackModel} model for the rest of this session.`;
+ `\nPossible quota limitations in place or slow response times detected. Switching to the ${fallbackModel} model for the rest of this session.`;
function getRateLimitMessage(
authType?: AuthType,