diff options
Diffstat (limited to 'packages/cli/src')
| -rw-r--r-- | packages/cli/src/ui/App.tsx | 52 | ||||
| -rw-r--r-- | packages/cli/src/ui/hooks/useGeminiStream.test.tsx | 16 | ||||
| -rw-r--r-- | packages/cli/src/ui/hooks/useGeminiStream.ts | 16 | ||||
| -rw-r--r-- | packages/cli/src/ui/utils/errorParsing.test.ts | 64 | ||||
| -rw-r--r-- | packages/cli/src/ui/utils/errorParsing.ts | 6 |
5 files changed, 93 insertions, 61 deletions
diff --git a/packages/cli/src/ui/App.tsx b/packages/cli/src/ui/App.tsx index 833cc2b5..e3a5eb55 100644 --- a/packages/cli/src/ui/App.tsx +++ b/packages/cli/src/ui/App.tsx @@ -70,6 +70,7 @@ import { UpdateNotification } from './components/UpdateNotification.js'; import { isProQuotaExceededError, isGenericQuotaExceededError, + UserTierId, } from '@google/gemini-cli-core'; import { checkForUpdates } from './utils/updateCheck.js'; import ansiEscapes from 'ansi-escapes'; @@ -136,6 +137,8 @@ const App = ({ config, settings, startupWarnings = [] }: AppProps) => { const ctrlDTimerRef = useRef<NodeJS.Timeout | null>(null); const [constrainHeight, setConstrainHeight] = useState<boolean>(true); const [showPrivacyNotice, setShowPrivacyNotice] = useState<boolean>(false); + const [modelSwitchedFromQuotaError, setModelSwitchedFromQuotaError] = + useState<boolean>(false); const openPrivacyNotice = useCallback(() => { setShowPrivacyNotice(true); @@ -251,23 +254,51 @@ const App = ({ config, settings, startupWarnings = [] }: AppProps) => { ): Promise<boolean> => { let message: string; + // For quota errors, assume FREE tier (safe default) - only show upgrade messaging to free tier users + // TODO: Get actual user tier from config when available + const userTier = undefined; // Defaults to FREE tier behavior + const isPaidTier = + userTier === UserTierId.LEGACY || userTier === UserTierId.STANDARD; + // Check if this is a Pro quota exceeded error if (error && isProQuotaExceededError(error)) { - message = `⚡ You have reached your daily ${currentModel} quota limit. + if (isPaidTier) { + message = `⚡ You have reached your daily ${currentModel} quota limit. +⚡ Automatically switching from ${currentModel} to ${fallbackModel} for the remainder of this session. +⚡ To continue accessing the ${currentModel} model today, consider using /auth to switch to using a paid API key from AI Studio at https://aistudio.google.com/apikey`; + } else { + message = `⚡ You have reached your daily ${currentModel} quota limit. ⚡ Automatically switching from ${currentModel} to ${fallbackModel} for the remainder of this session. ⚡ To increase your limits, upgrade to a Gemini Code Assist Standard or Enterprise plan with higher limits at https://goo.gle/set-up-gemini-code-assist ⚡ Or you can utilize a Gemini API Key. See: https://goo.gle/gemini-cli-docs-auth#gemini-api-key ⚡ You can switch authentication methods by typing /auth`; + } } else if (error && isGenericQuotaExceededError(error)) { - message = `⚡ You have reached your daily quota limit. + if (isPaidTier) { + message = `⚡ You have reached your daily quota limit. +⚡ Automatically switching from ${currentModel} to ${fallbackModel} for the remainder of this session. +⚡ To continue accessing the ${currentModel} model today, consider using /auth to switch to using a paid API key from AI Studio at https://aistudio.google.com/apikey`; + } else { + message = `⚡ You have reached your daily quota limit. ⚡ Automatically switching from ${currentModel} to ${fallbackModel} for the remainder of this session. ⚡ To increase your limits, upgrade to a Gemini Code Assist Standard or Enterprise plan with higher limits at https://goo.gle/set-up-gemini-code-assist ⚡ Or you can utilize a Gemini API Key. See: https://goo.gle/gemini-cli-docs-auth#gemini-api-key ⚡ You can switch authentication methods by typing /auth`; + } } else { - // Default fallback message for other cases (like consecutive 429s) - message = `⚡ Slow response times detected. -⚡ Automatically switching from ${currentModel} to ${fallbackModel} for faster responses for the remainder of this session.`; + if (isPaidTier) { + // Default fallback message for other cases (like consecutive 429s) + message = `⚡ Automatically switching from ${currentModel} to ${fallbackModel} for faster responses for the remainder of this session. +⚡ Possible reasons for this are that you have received multiple consecutive capacity errors or you have reached your daily ${currentModel} quota limit +⚡ To continue accessing the ${currentModel} model today, consider using /auth to switch to using a paid API key from AI Studio at https://aistudio.google.com/apikey`; + } else { + // Default fallback message for other cases (like consecutive 429s) + message = `⚡ Automatically switching from ${currentModel} to ${fallbackModel} for faster responses for the remainder of this session. +⚡ Possible reasons for this are that you have received multiple consecutive capacity errors or you have reached your daily ${currentModel} quota limit +⚡ To increase your limits, upgrade to a Gemini Code Assist Standard or Enterprise plan with higher limits at https://goo.gle/set-up-gemini-code-assist +⚡ Or you can utilize a Gemini API Key. See: https://goo.gle/gemini-cli-docs-auth#gemini-api-key +⚡ You can switch authentication methods by typing /auth`; + } } // Add message to UI history @@ -278,7 +309,14 @@ const App = ({ config, settings, startupWarnings = [] }: AppProps) => { }, Date.now(), ); - return true; // Always accept the fallback + + // Set the flag to prevent tool continuation + setModelSwitchedFromQuotaError(true); + // Set global quota error flag to prevent Flash model calls + config.setQuotaErrorOccurred(true); + // Switch model for future use but return false to stop current retry + config.setModel(fallbackModel); + return false; // Don't continue with current prompt }; config.setFlashFallbackHandler(flashFallbackHandler); @@ -445,6 +483,8 @@ const App = ({ config, settings, startupWarnings = [] }: AppProps) => { getPreferredEditor, onAuthError, performMemoryRefresh, + modelSwitchedFromQuotaError, + setModelSwitchedFromQuotaError, ); pendingHistoryItems.push(...pendingGeminiHistoryItems); const { elapsedTime, currentLoadingPhrase } = diff --git a/packages/cli/src/ui/hooks/useGeminiStream.test.tsx b/packages/cli/src/ui/hooks/useGeminiStream.test.tsx index fc6f93c5..62ade50f 100644 --- a/packages/cli/src/ui/hooks/useGeminiStream.test.tsx +++ b/packages/cli/src/ui/hooks/useGeminiStream.test.tsx @@ -301,6 +301,8 @@ describe('useGeminiStream', () => { getUsageStatisticsEnabled: () => true, getDebugMode: () => false, addHistory: vi.fn(), + setQuotaErrorOccurred: vi.fn(), + getQuotaErrorOccurred: vi.fn(() => false), } as unknown as Config; mockOnDebugMessage = vi.fn(); mockHandleSlashCommand = vi.fn().mockResolvedValue(false); @@ -386,6 +388,8 @@ describe('useGeminiStream', () => { () => 'vscode' as EditorType, () => {}, () => Promise.resolve(), + false, + () => {}, ); }, { @@ -518,6 +522,8 @@ describe('useGeminiStream', () => { () => 'vscode' as EditorType, () => {}, () => Promise.resolve(), + false, + () => {}, ), ); @@ -582,6 +588,8 @@ describe('useGeminiStream', () => { () => 'vscode' as EditorType, () => {}, () => Promise.resolve(), + false, + () => {}, ), ); @@ -675,6 +683,8 @@ describe('useGeminiStream', () => { () => 'vscode' as EditorType, () => {}, () => Promise.resolve(), + false, + () => {}, ), ); @@ -775,6 +785,8 @@ describe('useGeminiStream', () => { () => 'vscode' as EditorType, () => {}, () => Promise.resolve(), + false, + () => {}, ), ); @@ -1063,6 +1075,8 @@ describe('useGeminiStream', () => { () => 'vscode' as EditorType, () => {}, mockPerformMemoryRefresh, + false, + () => {}, ), ); @@ -1113,6 +1127,8 @@ describe('useGeminiStream', () => { () => 'vscode' as EditorType, () => {}, () => Promise.resolve(), + false, + () => {}, ), ); diff --git a/packages/cli/src/ui/hooks/useGeminiStream.ts b/packages/cli/src/ui/hooks/useGeminiStream.ts index 550cab86..d32c9ffa 100644 --- a/packages/cli/src/ui/hooks/useGeminiStream.ts +++ b/packages/cli/src/ui/hooks/useGeminiStream.ts @@ -90,6 +90,8 @@ export const useGeminiStream = ( getPreferredEditor: () => EditorType | undefined, onAuthError: () => void, performMemoryRefresh: () => Promise<void>, + modelSwitchedFromQuotaError: boolean, + setModelSwitchedFromQuotaError: React.Dispatch<React.SetStateAction<boolean>>, ) => { const [initError, setInitError] = useState<string | null>(null); const abortControllerRef = useRef<AbortController | null>(null); @@ -494,6 +496,12 @@ export const useGeminiStream = ( const userMessageTimestamp = Date.now(); setShowHelp(false); + // Reset quota error flag when starting a new query (not a continuation) + if (!options?.isContinuation) { + setModelSwitchedFromQuotaError(false); + config.setQuotaErrorOccurred(false); + } + abortControllerRef.current = new AbortController(); const abortSignal = abortControllerRef.current.signal; turnCancelledRef.current = false; @@ -552,6 +560,7 @@ export const useGeminiStream = ( [ streamingState, setShowHelp, + setModelSwitchedFromQuotaError, prepareQueryForGemini, processGeminiStreamEvents, pendingHistoryItemRef, @@ -668,6 +677,12 @@ export const useGeminiStream = ( ); markToolsAsSubmitted(callIdsToMarkAsSubmitted); + + // Don't continue if model was switched due to quota error + if (modelSwitchedFromQuotaError) { + return; + } + submitQuery(mergePartListUnions(responsesToSend), { isContinuation: true, }); @@ -678,6 +693,7 @@ export const useGeminiStream = ( markToolsAsSubmitted, geminiClient, performMemoryRefresh, + modelSwitchedFromQuotaError, ], ); diff --git a/packages/cli/src/ui/utils/errorParsing.test.ts b/packages/cli/src/ui/utils/errorParsing.test.ts index 3d228efb..770dffad 100644 --- a/packages/cli/src/ui/utils/errorParsing.test.ts +++ b/packages/cli/src/ui/utils/errorParsing.test.ts @@ -39,7 +39,7 @@ describe('parseAndFormatApiError', () => { ); expect(result).toContain('[API Error: Rate limit exceeded'); expect(result).toContain( - 'Slow response times detected. Switching to the gemini-2.5-flash model', + 'Possible quota limitations in place or slow response times detected. Switching to the gemini-2.5-flash model', ); }); @@ -55,7 +55,7 @@ describe('parseAndFormatApiError', () => { ); expect(result).toContain('[API Error: Rate limit exceeded'); expect(result).toContain( - 'Slow response times detected. Switching to the gemini-2.5-flash model', + 'Possible quota limitations in place or slow response times detected. Switching to the gemini-2.5-flash model', ); }); @@ -169,7 +169,7 @@ describe('parseAndFormatApiError', () => { ); expect(result).toContain('[API Error: Rate limit exceeded'); expect(result).toContain( - 'Slow response times detected. Switching to the gemini-2.5-flash model', + 'Possible quota limitations in place or slow response times detected. Switching to the gemini-2.5-flash model', ); expect(result).not.toContain( 'You have reached your daily gemini-2.5-pro quota limit', @@ -262,21 +262,17 @@ describe('parseAndFormatApiError', () => { ); }); - it('should handle different Gemini version strings in Pro quota exceeded errors', () => { - const errorMessage15 = - 'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'Gemini 1.5 Pro Requests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}'; + it('should handle different Gemini 2.5 version strings in Pro quota exceeded errors', () => { + const errorMessage25 = + 'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'Gemini 2.5 Pro Requests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}'; const errorMessagePreview = 'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'Gemini 2.5-preview Pro Requests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}'; - const errorMessageBeta = - 'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'Gemini beta-3.0 Pro Requests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}'; - const errorMessageExperimental = - 'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'Gemini experimental-v2 Pro Requests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}'; - const result15 = parseAndFormatApiError( - errorMessage15, + const result25 = parseAndFormatApiError( + errorMessage25, AuthType.LOGIN_WITH_GOOGLE, undefined, - 'gemini-1.5-pro', + 'gemini-2.5-pro', DEFAULT_GEMINI_FLASH_MODEL, ); const resultPreview = parseAndFormatApiError( @@ -286,45 +282,19 @@ describe('parseAndFormatApiError', () => { 'gemini-2.5-preview-pro', DEFAULT_GEMINI_FLASH_MODEL, ); - const resultBeta = parseAndFormatApiError( - errorMessageBeta, - AuthType.LOGIN_WITH_GOOGLE, - undefined, - 'gemini-beta-3.0-pro', - DEFAULT_GEMINI_FLASH_MODEL, - ); - const resultExperimental = parseAndFormatApiError( - errorMessageExperimental, - AuthType.LOGIN_WITH_GOOGLE, - undefined, - 'gemini-experimental-v2-pro', - DEFAULT_GEMINI_FLASH_MODEL, - ); - expect(result15).toContain( - 'You have reached your daily gemini-1.5-pro quota limit', + expect(result25).toContain( + 'You have reached your daily gemini-2.5-pro quota limit', ); expect(resultPreview).toContain( 'You have reached your daily gemini-2.5-preview-pro quota limit', ); - expect(resultBeta).toContain( - 'You have reached your daily gemini-beta-3.0-pro quota limit', - ); - expect(resultExperimental).toContain( - 'You have reached your daily gemini-experimental-v2-pro quota limit', - ); - expect(result15).toContain( + expect(result25).toContain( 'upgrade to a Gemini Code Assist Standard or Enterprise plan', ); expect(resultPreview).toContain( 'upgrade to a Gemini Code Assist Standard or Enterprise plan', ); - expect(resultBeta).toContain( - 'upgrade to a Gemini Code Assist Standard or Enterprise plan', - ); - expect(resultExperimental).toContain( - 'upgrade to a Gemini Code Assist Standard or Enterprise plan', - ); }); it('should not match non-Pro models with similar version strings', () => { @@ -339,16 +309,6 @@ describe('parseAndFormatApiError', () => { "Quota exceeded for quota metric 'Gemini 2.5-preview Flash Requests' and limit", ), ).toBe(false); - expect( - isProQuotaExceededError( - "Quota exceeded for quota metric 'Gemini beta-3.0 Flash Requests' and limit", - ), - ).toBe(false); - expect( - isProQuotaExceededError( - "Quota exceeded for quota metric 'Gemini experimental-v2 Flash Requests' and limit", - ), - ).toBe(false); // Test other model types expect( diff --git a/packages/cli/src/ui/utils/errorParsing.ts b/packages/cli/src/ui/utils/errorParsing.ts index 555d5e4e..5031bc0a 100644 --- a/packages/cli/src/ui/utils/errorParsing.ts +++ b/packages/cli/src/ui/utils/errorParsing.ts @@ -19,7 +19,7 @@ import { const getRateLimitErrorMessageGoogleFree = ( fallbackModel: string = DEFAULT_GEMINI_FLASH_MODEL, ) => - `\nSlow response times detected. Switching to the ${fallbackModel} model for the rest of this session.`; + `\nPossible quota limitations in place or slow response times detected. Switching to the ${fallbackModel} model for the rest of this session.`; const getRateLimitErrorMessageGoogleProQuotaFree = ( currentModel: string = DEFAULT_GEMINI_MODEL, @@ -34,7 +34,7 @@ const getRateLimitErrorMessageGoogleGenericQuotaFree = () => const getRateLimitErrorMessageGooglePaid = ( fallbackModel: string = DEFAULT_GEMINI_FLASH_MODEL, ) => - `\nSlow response times detected. Switching to the ${fallbackModel} model for the rest of this session. We appreciate you for choosing Gemini Code Assist and the Gemini CLI.`; + `\nPossible quota limitations in place or slow response times detected. Switching to the ${fallbackModel} model for the rest of this session. We appreciate you for choosing Gemini Code Assist and the Gemini CLI.`; const getRateLimitErrorMessageGoogleProQuotaPaid = ( currentModel: string = DEFAULT_GEMINI_MODEL, @@ -53,7 +53,7 @@ const RATE_LIMIT_ERROR_MESSAGE_VERTEX = const getRateLimitErrorMessageDefault = ( fallbackModel: string = DEFAULT_GEMINI_FLASH_MODEL, ) => - `\nSlow response times detected. Switching to the ${fallbackModel} model for the rest of this session.`; + `\nPossible quota limitations in place or slow response times detected. Switching to the ${fallbackModel} model for the rest of this session.`; function getRateLimitMessage( authType?: AuthType, |
