OpenRouter: Support reasoning blocks

2025-02-18 21:20:39 +01:00 · 2025-01-24 00:56:44 +02:00 · 2025-01-24 00:56:44 +02:00 · 03c98fb55a
commit 03c98fb55a
parent 7f9b139ae0
7 changed files with 37 additions and 31 deletions
--- a/public/index.html
+++ b/public/index.html
@ -1977,7 +1977,7 @@
                                            </span>
                                        </div>
                                    </div>
-                                    <div class="range-block" data-source="makersuite,deepseek">
+                                    <div class="range-block" data-source="makersuite,deepseek,openrouter">
                                        <label for="openai_show_thoughts" class="checkbox_label widthFreeExpand">
                                            <input id="openai_show_thoughts" type="checkbox" />
                                            <span>
--- a/public/script.js
+++ b/public/script.js
@ -170,7 +170,7 @@ import {
    isElementInViewport,
    copyText,
 } from './scripts/utils.js';
-import { debounce_timeout, THINK_BREAK } from './scripts/constants.js';
+import { debounce_timeout } from './scripts/constants.js';

 import { doDailyExtensionUpdatesCheck, extension_settings, initExtensions, loadExtensionSettings, runGenerationInterceptors, saveMetadataDebounced } from './scripts/extensions.js';
 import { COMMENT_NAME_DEFAULT, executeSlashCommandsOnChatInput, getSlashCommandsHelp, initDefaultSlashCommands, isExecutingCommandsFromChatInput, pauseScriptExecution, processChatSlashCommands, stopScriptExecution } from './scripts/slash-commands.js';
@ -5700,10 +5700,6 @@ function getTextContextFromData(data) {
 function extractMessageFromData(data){
    const content = String(getTextContextFromData(data) ?? '');

-    if (content.includes(THINK_BREAK)) {
-        return content.split(THINK_BREAK)[1];
-    }
-
    return content;
 }

@ -5713,14 +5709,15 @@ function extractMessageFromData(data){
 * @returns {string} Extracted reasoning
 */
 function extractReasoningFromData(data) {
-    const content = String(getTextContextFromData(data) ?? '');
-
-    if (content.includes(THINK_BREAK)) {
-        return content.split(THINK_BREAK)[0];
-    }
-
-    if (main_api === 'openai' && oai_settings.chat_completion_source === chat_completion_sources.DEEPSEEK && oai_settings.show_thoughts) {
-        return data?.choices?.[0]?.message?.reasoning_content ?? '';
+    if (main_api === 'openai' && oai_settings.show_thoughts) {
+        switch (oai_settings.chat_completion_source) {
+            case chat_completion_sources.DEEPSEEK:
+                return data?.choices?.[0]?.message?.reasoning_content ?? '';
+            case chat_completion_sources.OPENROUTER:
+                return data?.choices?.[0]?.message?.reasoning ?? '';
+            case chat_completion_sources.MAKERSUITE:
+                return data?.responseContent?.parts?.filter(part => part.thought)?.map(part => part.text)?.join('\n\n') ?? '';
+        }
    }

    return '';
--- a/public/scripts/constants.js
+++ b/public/scripts/constants.js
@ -14,8 +14,3 @@ export const debounce_timeout = {
    /** [5 sec] For delayed tasks, like auto-saving or completing batch operations that need a significant pause. */
    extended: 5000,
 };
-
-/**
- * Custom boundary for splitting the text between the model's reasoning and the actual response.
- */
-export const THINK_BREAK = '##<23>THINK_BREAK<41>##';
--- a/public/scripts/openai.js
+++ b/public/scripts/openai.js
@ -2161,6 +2161,11 @@ function getStreamingReply(data, state) {
            state.reasoning += (data.choices?.filter(x => x?.delta?.reasoning_content)?.[0]?.delta?.reasoning_content || '');
        }
        return data.choices?.[0]?.delta?.content || '';
+    } else if (oai_settings.chat_completion_source === chat_completion_sources.OPENROUTER) {
+        if (oai_settings.show_thoughts) {
+            state.reasoning += (data.choices?.filter(x => x?.delta?.reasoning)?.[0]?.delta?.reasoning || '');
+        }
+        return data.choices?.[0]?.delta?.content ?? data.choices?.[0]?.message?.content ?? data.choices?.[0]?.text ?? '';
    } else  {
        return data.choices?.[0]?.delta?.content ?? data.choices?.[0]?.message?.content ?? data.choices?.[0]?.text ?? '';
    }
--- a/public/scripts/sse-stream.js
+++ b/public/scripts/sse-stream.js
@ -235,6 +235,21 @@ async function* parseStreamData(json) {
                }
                return;
            }
+            else if (typeof json.choices[0].delta.reasoning === 'string' && json.choices[0].delta.reasoning.length > 0) {
+                for (let j = 0; j < json.choices[0].delta.reasoning.length; j++) {
+                    const str = json.choices[0].delta.reasoning[j];
+                    const isLastSymbol = j === json.choices[0].delta.reasoning.length - 1;
+                    const choiceClone = structuredClone(json.choices[0]);
+                    choiceClone.delta.reasoning = str;
+                    choiceClone.delta.content = isLastSymbol ? choiceClone.delta.content : '';
+                    const choices = [choiceClone];
+                    yield {
+                        data: { ...json, choices },
+                        chunk: str,
+                    };
+                }
+                return;
+            }
            else if (typeof json.choices[0].delta.content === 'string' && json.choices[0].delta.content.length > 0) {
                for (let j = 0; j < json.choices[0].delta.content.length; j++) {
                    const str = json.choices[0].delta.content[j];
--- a/src/constants.js
+++ b/src/constants.js
@ -413,8 +413,3 @@ export const VLLM_KEYS = [
    'guided_decoding_backend',
    'guided_whitespace_pattern',
 ];
-
-/**
- * Custom boundary for splitting the text between the model's reasoning and the actual response.
- */
-export const THINK_BREAK = '##<23>THINK_BREAK<41>##';
--- a/src/endpoints/backends/chat-completions.js
+++ b/src/endpoints/backends/chat-completions.js
@ -7,7 +7,6 @@ import {
    CHAT_COMPLETION_SOURCES,
    GEMINI_SAFETY,
    OPENROUTER_HEADERS,
-    THINK_BREAK,
 } from '../../constants.js';
 import {
    forwardFetchResponse,
@ -392,11 +391,7 @@ async function sendMakerSuiteRequest(request, response) {
            const responseContent = candidates[0].content ?? candidates[0].output;
            console.log('Google AI Studio response:', responseContent);

-            if (Array.isArray(responseContent?.parts) && isThinking && !showThoughts) {
-                responseContent.parts = responseContent.parts.filter(part => !part.thought);
-            }
-
-            const responseText = typeof responseContent === 'string' ? responseContent : responseContent?.parts?.map(part => part.text)?.join(THINK_BREAK);
+            const responseText = typeof responseContent === 'string' ? responseContent : responseContent?.parts?.filter(part => !part.thought)?.map(part => part.text)?.join('\n\n');
            if (!responseText) {
                let message = 'Google AI Studio Candidate text empty';
                console.log(message, generateResponseJson);
@ -404,7 +399,7 @@ async function sendMakerSuiteRequest(request, response) {
            }

            // Wrap it back to OAI format
-            const reply = { choices: [{ 'message': { 'content': responseText } }] };
+            const reply = { choices: [{ 'message': { 'content': responseText } }], responseContent };
            return response.send(reply);
        }
    } catch (error) {
@ -993,6 +988,10 @@ router.post('/generate', jsonParser, function (request, response) {
            bodyParams['route'] = 'fallback';
        }

+        if (request.body.show_thoughts) {
+            bodyParams['include_reasoning'] = true;
+        }
+
        let cachingAtDepth = getConfigValue('claude.cachingAtDepth', -1);
        if (Number.isInteger(cachingAtDepth) && cachingAtDepth >= 0 && request.body.model?.startsWith('anthropic/claude-3')) {
            cachingAtDepthForOpenRouterClaude(request.body.messages, cachingAtDepth);