diff --git a/src/endpoints/backends/chat-completions.js b/src/endpoints/backends/chat-completions.js index e440d6c01..b6cce98a0 100644 --- a/src/endpoints/backends/chat-completions.js +++ b/src/endpoints/backends/chat-completions.js @@ -200,11 +200,12 @@ async function sendClaudeRequest(request, response) { betaHeaders.push('prompt-caching-2024-07-31'); } - if (useThinking) { + const reasoningEffort = request.body.reasoning_effort; + const budgetTokens = calculateClaudeBudgetTokens(requestBody.max_tokens, reasoningEffort, requestBody.stream); + + if (useThinking && Number.isInteger(budgetTokens)) { // No prefill when thinking voidPrefill = true; - const reasoningEffort = request.body.reasoning_effort; - const budgetTokens = calculateClaudeBudgetTokens(requestBody.max_tokens, reasoningEffort, requestBody.stream); const minThinkTokens = 1024; if (requestBody.max_tokens <= minThinkTokens) { const newValue = requestBody.max_tokens + minThinkTokens; diff --git a/src/prompt-converters.js b/src/prompt-converters.js index c894a4736..ae4c0c0fb 100644 --- a/src/prompt-converters.js +++ b/src/prompt-converters.js @@ -917,19 +917,20 @@ export function cachingAtDepthForOpenRouterClaude(messages, cachingAtDepth) { * @param {number} maxTokens Maximum tokens * @param {string} reasoningEffort Reasoning effort * @param {boolean} stream If streaming is enabled - * @returns {number} Budget tokens + * @returns {number?} Budget tokens */ export function calculateClaudeBudgetTokens(maxTokens, reasoningEffort, stream) { let budgetTokens = 0; switch (reasoningEffort) { + case REASONING_EFFORT.auto: + return null; case REASONING_EFFORT.min: budgetTokens = 1024; break; case REASONING_EFFORT.low: budgetTokens = Math.floor(maxTokens * 0.1); break; - case REASONING_EFFORT.auto: case REASONING_EFFORT.medium: budgetTokens = Math.floor(maxTokens * 0.25); break;