Merge pull request #3971 from SillyTavern/ccllaauuddee

Assorted Claude adjustments
This commit is contained in:
Cohee
2025-05-09 00:02:30 +03:00
committed by GitHub
3 changed files with 21 additions and 14 deletions

View File

@ -1963,7 +1963,7 @@
</span>
</div>
</div>
<div class="range-block" data-source="makersuite,openrouter">
<div class="range-block" data-source="makersuite,openrouter,claude">
<label for="openai_enable_web_search" class="checkbox_label flexWrap widthFreeExpand">
<input id="openai_enable_web_search" type="checkbox" />
<span data-i18n="Enable web search">Enable web search</span>

View File

@ -148,7 +148,8 @@ async function sendClaudeRequest(request, response) {
const useSystemPrompt = (request.body.model.startsWith('claude-2') || request.body.model.startsWith('claude-3')) && request.body.claude_use_sysprompt;
const convertedPrompt = convertClaudeMessages(request.body.messages, request.body.assistant_prefill, useSystemPrompt, useTools, getPromptNames(request));
const useThinking = request.body.model.startsWith('claude-3-7') && Boolean(request.body.include_reasoning);
let voidPrefill = false;
const useWebSearch = /^claude-3-(5|7)/.test(request.body.model) && Boolean(request.body.enable_web_search);
let fixThinkingPrefill = false;
// Add custom stop sequences
const stopSequences = [];
if (Array.isArray(request.body.stop)) {
@ -183,15 +184,19 @@ async function sendClaudeRequest(request, response) {
.map(tool => tool.function)
.map(fn => ({ name: fn.name, description: fn.description, input_schema: fn.parameters }));
if (requestBody.tools.length) {
// No prefill when using tools
voidPrefill = true;
}
if (enableSystemPromptCache && requestBody.tools.length) {
requestBody.tools[requestBody.tools.length - 1]['cache_control'] = { type: 'ephemeral' };
}
}
if (useWebSearch) {
const webSearchTool = [{
'type': 'web_search_20250305',
'name': 'web_search',
}];
requestBody.tools = [...(requestBody.tools || []), ...webSearchTool];
}
if (cachingAtDepth !== -1) {
cachingAtDepthForClaude(convertedPrompt.messages, cachingAtDepth);
}
@ -200,11 +205,12 @@ async function sendClaudeRequest(request, response) {
betaHeaders.push('prompt-caching-2024-07-31');
}
if (useThinking) {
const reasoningEffort = request.body.reasoning_effort;
const budgetTokens = calculateClaudeBudgetTokens(requestBody.max_tokens, reasoningEffort, requestBody.stream);
if (useThinking && Number.isInteger(budgetTokens)) {
// No prefill when thinking
voidPrefill = true;
const reasoningEffort = request.body.reasoning_effort;
const budgetTokens = calculateClaudeBudgetTokens(requestBody.max_tokens, reasoningEffort, requestBody.stream);
fixThinkingPrefill = true;
const minThinkTokens = 1024;
if (requestBody.max_tokens <= minThinkTokens) {
const newValue = requestBody.max_tokens + minThinkTokens;
@ -223,8 +229,8 @@ async function sendClaudeRequest(request, response) {
delete requestBody.top_k;
}
if (voidPrefill && convertedPrompt.messages.length && convertedPrompt.messages[convertedPrompt.messages.length - 1].role === 'assistant') {
convertedPrompt.messages.push({ role: 'user', content: [{ type: 'text', text: '\u200b' }] });
if (fixThinkingPrefill && convertedPrompt.messages.length && convertedPrompt.messages[convertedPrompt.messages.length - 1].role === 'assistant') {
convertedPrompt.messages[convertedPrompt.messages.length - 1].role = 'user';
}
if (betaHeaders.length) {

View File

@ -917,19 +917,20 @@ export function cachingAtDepthForOpenRouterClaude(messages, cachingAtDepth) {
* @param {number} maxTokens Maximum tokens
* @param {string} reasoningEffort Reasoning effort
* @param {boolean} stream If streaming is enabled
* @returns {number} Budget tokens
* @returns {number?} Budget tokens
*/
export function calculateClaudeBudgetTokens(maxTokens, reasoningEffort, stream) {
let budgetTokens = 0;
switch (reasoningEffort) {
case REASONING_EFFORT.auto:
return null;
case REASONING_EFFORT.min:
budgetTokens = 1024;
break;
case REASONING_EFFORT.low:
budgetTokens = Math.floor(maxTokens * 0.1);
break;
case REASONING_EFFORT.auto:
case REASONING_EFFORT.medium:
budgetTokens = Math.floor(maxTokens * 0.25);
break;