Merge pull request #3971 from SillyTavern/ccllaauuddee

Assorted Claude adjustments
This commit is contained in:
Cohee
2025-05-09 00:02:30 +03:00
committed by GitHub
3 changed files with 21 additions and 14 deletions

View File

@@ -1963,7 +1963,7 @@
</span> </span>
</div> </div>
</div> </div>
<div class="range-block" data-source="makersuite,openrouter"> <div class="range-block" data-source="makersuite,openrouter,claude">
<label for="openai_enable_web_search" class="checkbox_label flexWrap widthFreeExpand"> <label for="openai_enable_web_search" class="checkbox_label flexWrap widthFreeExpand">
<input id="openai_enable_web_search" type="checkbox" /> <input id="openai_enable_web_search" type="checkbox" />
<span data-i18n="Enable web search">Enable web search</span> <span data-i18n="Enable web search">Enable web search</span>

View File

@@ -148,7 +148,8 @@ async function sendClaudeRequest(request, response) {
const useSystemPrompt = (request.body.model.startsWith('claude-2') || request.body.model.startsWith('claude-3')) && request.body.claude_use_sysprompt; const useSystemPrompt = (request.body.model.startsWith('claude-2') || request.body.model.startsWith('claude-3')) && request.body.claude_use_sysprompt;
const convertedPrompt = convertClaudeMessages(request.body.messages, request.body.assistant_prefill, useSystemPrompt, useTools, getPromptNames(request)); const convertedPrompt = convertClaudeMessages(request.body.messages, request.body.assistant_prefill, useSystemPrompt, useTools, getPromptNames(request));
const useThinking = request.body.model.startsWith('claude-3-7') && Boolean(request.body.include_reasoning); const useThinking = request.body.model.startsWith('claude-3-7') && Boolean(request.body.include_reasoning);
let voidPrefill = false; const useWebSearch = /^claude-3-(5|7)/.test(request.body.model) && Boolean(request.body.enable_web_search);
let fixThinkingPrefill = false;
// Add custom stop sequences // Add custom stop sequences
const stopSequences = []; const stopSequences = [];
if (Array.isArray(request.body.stop)) { if (Array.isArray(request.body.stop)) {
@@ -183,15 +184,19 @@ async function sendClaudeRequest(request, response) {
.map(tool => tool.function) .map(tool => tool.function)
.map(fn => ({ name: fn.name, description: fn.description, input_schema: fn.parameters })); .map(fn => ({ name: fn.name, description: fn.description, input_schema: fn.parameters }));
if (requestBody.tools.length) {
// No prefill when using tools
voidPrefill = true;
}
if (enableSystemPromptCache && requestBody.tools.length) { if (enableSystemPromptCache && requestBody.tools.length) {
requestBody.tools[requestBody.tools.length - 1]['cache_control'] = { type: 'ephemeral' }; requestBody.tools[requestBody.tools.length - 1]['cache_control'] = { type: 'ephemeral' };
} }
} }
if (useWebSearch) {
const webSearchTool = [{
'type': 'web_search_20250305',
'name': 'web_search',
}];
requestBody.tools = [...(requestBody.tools || []), ...webSearchTool];
}
if (cachingAtDepth !== -1) { if (cachingAtDepth !== -1) {
cachingAtDepthForClaude(convertedPrompt.messages, cachingAtDepth); cachingAtDepthForClaude(convertedPrompt.messages, cachingAtDepth);
} }
@@ -200,11 +205,12 @@ async function sendClaudeRequest(request, response) {
betaHeaders.push('prompt-caching-2024-07-31'); betaHeaders.push('prompt-caching-2024-07-31');
} }
if (useThinking) { const reasoningEffort = request.body.reasoning_effort;
const budgetTokens = calculateClaudeBudgetTokens(requestBody.max_tokens, reasoningEffort, requestBody.stream);
if (useThinking && Number.isInteger(budgetTokens)) {
// No prefill when thinking // No prefill when thinking
voidPrefill = true; fixThinkingPrefill = true;
const reasoningEffort = request.body.reasoning_effort;
const budgetTokens = calculateClaudeBudgetTokens(requestBody.max_tokens, reasoningEffort, requestBody.stream);
const minThinkTokens = 1024; const minThinkTokens = 1024;
if (requestBody.max_tokens <= minThinkTokens) { if (requestBody.max_tokens <= minThinkTokens) {
const newValue = requestBody.max_tokens + minThinkTokens; const newValue = requestBody.max_tokens + minThinkTokens;
@@ -223,8 +229,8 @@ async function sendClaudeRequest(request, response) {
delete requestBody.top_k; delete requestBody.top_k;
} }
if (voidPrefill && convertedPrompt.messages.length && convertedPrompt.messages[convertedPrompt.messages.length - 1].role === 'assistant') { if (fixThinkingPrefill && convertedPrompt.messages.length && convertedPrompt.messages[convertedPrompt.messages.length - 1].role === 'assistant') {
convertedPrompt.messages.push({ role: 'user', content: [{ type: 'text', text: '\u200b' }] }); convertedPrompt.messages[convertedPrompt.messages.length - 1].role = 'user';
} }
if (betaHeaders.length) { if (betaHeaders.length) {

View File

@@ -917,19 +917,20 @@ export function cachingAtDepthForOpenRouterClaude(messages, cachingAtDepth) {
* @param {number} maxTokens Maximum tokens * @param {number} maxTokens Maximum tokens
* @param {string} reasoningEffort Reasoning effort * @param {string} reasoningEffort Reasoning effort
* @param {boolean} stream If streaming is enabled * @param {boolean} stream If streaming is enabled
* @returns {number} Budget tokens * @returns {number?} Budget tokens
*/ */
export function calculateClaudeBudgetTokens(maxTokens, reasoningEffort, stream) { export function calculateClaudeBudgetTokens(maxTokens, reasoningEffort, stream) {
let budgetTokens = 0; let budgetTokens = 0;
switch (reasoningEffort) { switch (reasoningEffort) {
case REASONING_EFFORT.auto:
return null;
case REASONING_EFFORT.min: case REASONING_EFFORT.min:
budgetTokens = 1024; budgetTokens = 1024;
break; break;
case REASONING_EFFORT.low: case REASONING_EFFORT.low:
budgetTokens = Math.floor(maxTokens * 0.1); budgetTokens = Math.floor(maxTokens * 0.1);
break; break;
case REASONING_EFFORT.auto:
case REASONING_EFFORT.medium: case REASONING_EFFORT.medium:
budgetTokens = Math.floor(maxTokens * 0.25); budgetTokens = Math.floor(maxTokens * 0.25);
break; break;