diff --git a/public/index.html b/public/index.html index 3984afa5e..3fe2162fb 100644 --- a/public/index.html +++ b/public/index.html @@ -6229,6 +6229,7 @@ +
diff --git a/public/script.js b/public/script.js index 3ba3aacbc..b641e161c 100644 --- a/public/script.js +++ b/public/script.js @@ -170,7 +170,7 @@ import { isElementInViewport, copyText, } from './scripts/utils.js'; -import { debounce_timeout } from './scripts/constants.js'; +import { debounce_timeout, THINK_BREAK } from './scripts/constants.js'; import { doDailyExtensionUpdatesCheck, extension_settings, initExtensions, loadExtensionSettings, runGenerationInterceptors, saveMetadataDebounced } from './scripts/extensions.js'; import { COMMENT_NAME_DEFAULT, executeSlashCommandsOnChatInput, getSlashCommandsHelp, initDefaultSlashCommands, isExecutingCommandsFromChatInput, pauseScriptExecution, processChatSlashCommands, stopScriptExecution } from './scripts/slash-commands.js'; @@ -2199,6 +2199,7 @@ function getMessageFromTemplate({ isUser, avatarImg, bias, + reasoning, isSystem, title, timerValue, @@ -2223,6 +2224,7 @@ function getMessageFromTemplate({ mes.find('.avatar img').attr('src', avatarImg); mes.find('.ch_name .name_text').text(characterName); mes.find('.mes_bias').html(bias); + mes.find('.mes_reasoning').html(reasoning); mes.find('.timestamp').text(timestamp).attr('title', `${extra?.api ? extra.api + ' - ' : ''}${extra?.model ?? ''}`); mes.find('.mesIDDisplay').text(`#${mesId}`); tokenCount && mes.find('.tokenCounterDisplay').text(`${tokenCount}t`); @@ -2241,6 +2243,7 @@ export function updateMessageBlock(messageId, message) { const messageElement = $(`#chat [mesid="${messageId}"]`); const text = message?.extra?.display_text ?? message.mes; messageElement.find('.mes_text').html(messageFormatting(text, message.name, message.is_system, message.is_user, messageId)); + messageElement.find('.mes_reasoning').html(messageFormatting(message.extra?.reasoning ?? '', '', false, false, -1)); addCopyToCodeBlocks(messageElement); appendMediaToMessage(message, messageElement); } @@ -2399,6 +2402,7 @@ export function addOneMessage(mes, { type = 'normal', insertAfter = null, scroll sanitizerOverrides, ); const bias = messageFormatting(mes.extra?.bias ?? '', '', false, false, -1); + const reasoning = messageFormatting(mes.extra?.reasoning ?? '', '', false, false, -1); let bookmarkLink = mes?.extra?.bookmark_link ?? ''; let params = { @@ -2408,6 +2412,7 @@ export function addOneMessage(mes, { type = 'normal', insertAfter = null, scroll isUser: mes.is_user, avatarImg: avatarImg, bias: bias, + reasoning: reasoning, isSystem: isSystem, title: title, bookmarkLink: bookmarkLink, @@ -2467,6 +2472,7 @@ export function addOneMessage(mes, { type = 'normal', insertAfter = null, scroll const swipeMessage = chatElement.find(`[mesid="${chat.length - 1}"]`); swipeMessage.attr('swipeid', params.swipeId); swipeMessage.find('.mes_text').html(messageText).attr('title', title); + swipeMessage.find('.mes_reasoning').html(reasoning); swipeMessage.find('.timestamp').text(timestamp).attr('title', `${params.extra.api} - ${params.extra.model}`); appendMediaToMessage(mes, swipeMessage); if (power_user.timestamp_model_icon && params.extra?.api) { @@ -3077,6 +3083,7 @@ class StreamingProcessor { this.messageTextDom = null; this.messageTimerDom = null; this.messageTokenCounterDom = null; + this.messageReasoningDom = null; /** @type {HTMLTextAreaElement} */ this.sendTextarea = document.querySelector('#send_textarea'); this.type = type; @@ -3092,6 +3099,7 @@ class StreamingProcessor { /** @type {import('./scripts/logprobs.js').TokenLogprobs[]} */ this.messageLogprobs = []; this.toolCalls = []; + this.reasoning = ''; } #checkDomElements(messageId) { @@ -3100,6 +3108,7 @@ class StreamingProcessor { this.messageTextDom = this.messageDom?.querySelector('.mes_text'); this.messageTimerDom = this.messageDom?.querySelector('.mes_timer'); this.messageTokenCounterDom = this.messageDom?.querySelector('.tokenCounterDisplay'); + this.messageReasoningDom = this.messageDom?.querySelector('.mes_reasoning'); } } @@ -3184,11 +3193,17 @@ class StreamingProcessor { chat[messageId]['gen_started'] = this.timeStarted; chat[messageId]['gen_finished'] = currentTime; - if (currentTokenCount) { - if (!chat[messageId]['extra']) { - chat[messageId]['extra'] = {}; - } + if (!chat[messageId]['extra']) { + chat[messageId]['extra'] = {}; + } + if (this.reasoning && this.messageReasoningDom instanceof HTMLElement) { + chat[messageId]['extra']['reasoning'] = this.reasoning; + const formattedReasoning = messageFormatting(this.reasoning, '', false, false, -1); + this.messageReasoningDom.innerHTML = formattedReasoning; + } + + if (currentTokenCount) { chat[messageId]['extra']['token_count'] = currentTokenCount; if (this.messageTokenCounterDom instanceof HTMLElement) { this.messageTokenCounterDom.textContent = `${currentTokenCount}t`; @@ -3320,7 +3335,7 @@ class StreamingProcessor { } /** - * @returns {Generator<{ text: string, swipes: string[], logprobs: import('./scripts/logprobs.js').TokenLogprobs, toolCalls: any[] }, void, void>} + * @returns {Generator<{ text: string, swipes: string[], logprobs: import('./scripts/logprobs.js').TokenLogprobs, toolCalls: any[], state: any }, void, void>} */ *nullStreamingGeneration() { throw new Error('Generation function for streaming is not hooked up'); @@ -3342,7 +3357,7 @@ class StreamingProcessor { try { const sw = new Stopwatch(1000 / power_user.streaming_fps); const timestamps = []; - for await (const { text, swipes, logprobs, toolCalls } of this.generator()) { + for await (const { text, swipes, logprobs, toolCalls, state } of this.generator()) { timestamps.push(Date.now()); if (this.isStopped) { return; @@ -3354,6 +3369,7 @@ class StreamingProcessor { if (logprobs) { this.messageLogprobs.push(...(Array.isArray(logprobs) ? logprobs : [logprobs])); } + this.reasoning = state?.reasoning ?? ''; await eventSource.emit(event_types.STREAM_TOKEN_RECEIVED, text); await sw.tick(() => this.onProgressStreaming(this.messageId, this.continueMessage + text)); } @@ -4741,6 +4757,7 @@ export async function Generate(type, { automatic_trigger, force_name2, quiet_pro //const getData = await response.json(); let getMessage = extractMessageFromData(data); let title = extractTitleFromData(data); + let reasoning = extractReasoningFromData(data); kobold_horde_model = title; const swipes = extractMultiSwipes(data, type); @@ -4767,10 +4784,10 @@ export async function Generate(type, { automatic_trigger, force_name2, quiet_pro else { // Without streaming we'll be having a full message on continuation. Treat it as a last chunk. if (originalType !== 'continue') { - ({ type, getMessage } = await saveReply(type, getMessage, false, title, swipes)); + ({ type, getMessage } = await saveReply(type, getMessage, false, title, swipes, reasoning)); } else { - ({ type, getMessage } = await saveReply('appendFinal', getMessage, false, title, swipes)); + ({ type, getMessage } = await saveReply('appendFinal', getMessage, false, title, swipes, reasoning)); } // This relies on `saveReply` having been called to add the message to the chat, so it must be last. @@ -5649,42 +5666,65 @@ function parseAndSaveLogprobs(data, continueFrom) { } /** - * Extracts the message from the response data. - * @param {object} data Response data - * @returns {string} Extracted message + * Gets the text context from the response data. + * @param {object} data Response JSON data + * @returns {string} Extracted text */ -function extractMessageFromData(data) { +function getTextContextFromData(data) { if (typeof data === 'string') { return data; } - function getTextContext() { - switch (main_api) { - case 'kobold': - return data.results[0].text; - case 'koboldhorde': - return data.text; - case 'textgenerationwebui': - return data.choices?.[0]?.text ?? data.content ?? data.response ?? ''; - case 'novel': - return data.output; - case 'openai': - return data?.choices?.[0]?.message?.content ?? data?.choices?.[0]?.text ?? data?.text ?? data?.message?.content?.[0]?.text ?? data?.message?.tool_plan ?? ''; - default: - return ''; - } + switch (main_api) { + case 'kobold': + return data.results[0].text; + case 'koboldhorde': + return data.text; + case 'textgenerationwebui': + return data.choices?.[0]?.text ?? data.content ?? data.response ?? ''; + case 'novel': + return data.output; + case 'openai': + return data?.choices?.[0]?.message?.content ?? data?.choices?.[0]?.text ?? data?.text ?? data?.message?.content?.[0]?.text ?? data?.message?.tool_plan ?? ''; + default: + return ''; } +} - const content = getTextContext(); +/** + * Extracts the message from the response data. + * @param {object} data Response data + * @returns {string} Extracted message + */ +function extractMessageFromData(data){ + const content = String(getTextContextFromData(data) ?? ''); - if (main_api === 'openai' && oai_settings.chat_completion_source === chat_completion_sources.DEEPSEEK && oai_settings.show_thoughts) { - const thoughts = data?.choices?.[0]?.message?.reasoning_content ?? ''; - return [thoughts, content].filter(x => x).join('\n\n'); + if (content.includes(THINK_BREAK)) { + return content.split(THINK_BREAK)[1]; } return content; } +/** + * Extracts the reasoning from the response data. + * @param {object} data Response data + * @returns {string} Extracted reasoning + */ +function extractReasoningFromData(data) { + const content = String(getTextContextFromData(data) ?? ''); + + if (content.includes(THINK_BREAK)) { + return content.split(THINK_BREAK)[0]; + } + + if (main_api === 'openai' && oai_settings.chat_completion_source === chat_completion_sources.DEEPSEEK && oai_settings.show_thoughts) { + return data?.choices?.[0]?.message?.reasoning_content ?? ''; + } + + return ''; +} + /** * Extracts multiswipe swipes from the response data. * @param {Object} data Response data @@ -5865,7 +5905,7 @@ export function cleanUpMessage(getMessage, isImpersonate, isContinue, displayInc return getMessage; } -export async function saveReply(type, getMessage, fromStreaming, title, swipes) { +export async function saveReply(type, getMessage, fromStreaming, title, swipes, reasoning) { if (type != 'append' && type != 'continue' && type != 'appendFinal' && chat.length && (chat[chat.length - 1]['swipe_id'] === undefined || chat[chat.length - 1]['is_user'])) { type = 'normal'; @@ -5890,6 +5930,7 @@ export async function saveReply(type, getMessage, fromStreaming, title, swipes) chat[chat.length - 1]['send_date'] = getMessageTimeStamp(); chat[chat.length - 1]['extra']['api'] = getGeneratingApi(); chat[chat.length - 1]['extra']['model'] = getGeneratingModel(); + chat[chat.length - 1]['extra']['reasoning'] = reasoning; if (power_user.message_token_count_enabled) { chat[chat.length - 1]['extra']['token_count'] = await getTokenCountAsync(chat[chat.length - 1]['mes'], 0); } @@ -5910,6 +5951,7 @@ export async function saveReply(type, getMessage, fromStreaming, title, swipes) chat[chat.length - 1]['send_date'] = getMessageTimeStamp(); chat[chat.length - 1]['extra']['api'] = getGeneratingApi(); chat[chat.length - 1]['extra']['model'] = getGeneratingModel(); + chat[chat.length - 1]['extra']['reasoning'] += reasoning; if (power_user.message_token_count_enabled) { chat[chat.length - 1]['extra']['token_count'] = await getTokenCountAsync(chat[chat.length - 1]['mes'], 0); } @@ -5927,6 +5969,7 @@ export async function saveReply(type, getMessage, fromStreaming, title, swipes) chat[chat.length - 1]['send_date'] = getMessageTimeStamp(); chat[chat.length - 1]['extra']['api'] = getGeneratingApi(); chat[chat.length - 1]['extra']['model'] = getGeneratingModel(); + chat[chat.length - 1]['extra']['reasoning'] += reasoning; if (power_user.message_token_count_enabled) { chat[chat.length - 1]['extra']['token_count'] = await getTokenCountAsync(chat[chat.length - 1]['mes'], 0); } @@ -5944,6 +5987,7 @@ export async function saveReply(type, getMessage, fromStreaming, title, swipes) chat[chat.length - 1]['send_date'] = getMessageTimeStamp(); chat[chat.length - 1]['extra']['api'] = getGeneratingApi(); chat[chat.length - 1]['extra']['model'] = getGeneratingModel(); + chat[chat.length - 1]['extra']['reasoning'] = reasoning; if (power_user.trim_spaces) { getMessage = getMessage.trim(); } @@ -8646,6 +8690,7 @@ const swipe_right = () => { // resets the timer swipeMessage.find('.mes_timer').html(''); swipeMessage.find('.tokenCounterDisplay').text(''); + swipeMessage.find('.mes_reasoning').html(''); } else { //console.log('showing previously generated swipe candidate, or "..."'); //console.log('onclick right swipe calling addOneMessage'); diff --git a/public/scripts/constants.js b/public/scripts/constants.js index f95a8e146..935a74219 100644 --- a/public/scripts/constants.js +++ b/public/scripts/constants.js @@ -14,3 +14,8 @@ export const debounce_timeout = { /** [5 sec] For delayed tasks, like auto-saving or completing batch operations that need a significant pause. */ extended: 5000, }; + +/** + * Custom boundary for splitting the text between the model's reasoning and the actual response. + */ +export const THINK_BREAK = '##�THINK_BREAK�##'; diff --git a/public/scripts/kai-settings.js b/public/scripts/kai-settings.js index 6efadce87..65d47fc4b 100644 --- a/public/scripts/kai-settings.js +++ b/public/scripts/kai-settings.js @@ -188,7 +188,7 @@ export async function generateKoboldWithStreaming(generate_data, signal) { if (data?.token) { text += data.token; } - yield { text, swipes: [], toolCalls: [] }; + yield { text, swipes: [], toolCalls: [], state: {} }; } }; } diff --git a/public/scripts/nai-settings.js b/public/scripts/nai-settings.js index f95e7d9f6..91ff09ef6 100644 --- a/public/scripts/nai-settings.js +++ b/public/scripts/nai-settings.js @@ -746,7 +746,7 @@ export async function generateNovelWithStreaming(generate_data, signal) { text += data.token; } - yield { text, swipes: [], logprobs: parseNovelAILogprobs(data.logprobs), toolCalls: [] }; + yield { text, swipes: [], logprobs: parseNovelAILogprobs(data.logprobs), toolCalls: [], state: {} }; } }; } diff --git a/public/scripts/openai.js b/public/scripts/openai.js index 54e9f6125..5585f82ef 100644 --- a/public/scripts/openai.js +++ b/public/scripts/openai.js @@ -2095,7 +2095,7 @@ async function sendOpenAIRequest(type, messages, signal) { let text = ''; const swipes = []; const toolCalls = []; - const state = {}; + const state = { reasoning: '' }; while (true) { const { done, value } = await reader.read(); if (done) return; @@ -2113,7 +2113,7 @@ async function sendOpenAIRequest(type, messages, signal) { ToolManager.parseToolCalls(toolCalls, parsed); - yield { text, swipes: swipes, logprobs: parseChatCompletionLogprobs(parsed), toolCalls: toolCalls }; + yield { text, swipes: swipes, logprobs: parseChatCompletionLogprobs(parsed), toolCalls: toolCalls, state: state }; } }; } @@ -2150,16 +2150,17 @@ function getStreamingReply(data, state) { if (oai_settings.chat_completion_source === chat_completion_sources.CLAUDE) { return data?.delta?.text || ''; } else if (oai_settings.chat_completion_source === chat_completion_sources.MAKERSUITE) { - return data?.candidates?.[0]?.content?.parts?.filter(x => oai_settings.show_thoughts || !x.thought)?.map(x => x.text)?.filter(x => x)?.join('\n\n') || ''; + if (oai_settings.show_thoughts) { + state.reasoning += (data?.candidates?.[0]?.content?.parts?.filter(x => x.thought)?.map(x => x.text)?.[0] || ''); + } + return data?.candidates?.[0]?.content?.parts?.filter(x => !x.thought)?.map(x => x.text)?.[0] || ''; } else if (oai_settings.chat_completion_source === chat_completion_sources.COHERE) { return data?.delta?.message?.content?.text || data?.delta?.message?.tool_plan || ''; } else if (oai_settings.chat_completion_source === chat_completion_sources.DEEPSEEK) { - const hadThoughts = state.hadThoughts; - const thoughts = data.choices?.filter(x => oai_settings.show_thoughts || !x?.delta?.reasoning_content)?.[0]?.delta?.reasoning_content || ''; - const content = data.choices?.[0]?.delta?.content || ''; - state.hadThoughts = !!thoughts; - const separator = hadThoughts && !thoughts ? '\n\n' : ''; - return [thoughts, separator, content].filter(x => x).join('\n\n'); + if (oai_settings.show_thoughts) { + state.reasoning += (data.choices?.filter(x => x?.delta?.reasoning_content)?.[0]?.delta?.reasoning_content || ''); + } + return data.choices?.[0]?.delta?.content || ''; } else { return data.choices?.[0]?.delta?.content ?? data.choices?.[0]?.message?.content ?? data.choices?.[0]?.text ?? ''; } diff --git a/public/scripts/textgen-settings.js b/public/scripts/textgen-settings.js index cd1004991..19b729374 100644 --- a/public/scripts/textgen-settings.js +++ b/public/scripts/textgen-settings.js @@ -986,6 +986,7 @@ export async function generateTextGenWithStreaming(generate_data, signal) { let logprobs = null; const swipes = []; const toolCalls = []; + const state = {}; while (true) { const { done, value } = await reader.read(); if (done) return; @@ -1004,7 +1005,7 @@ export async function generateTextGenWithStreaming(generate_data, signal) { logprobs = parseTextgenLogprobs(newText, data.choices?.[0]?.logprobs || data?.completion_probabilities); } - yield { text, swipes, logprobs, toolCalls }; + yield { text, swipes, logprobs, toolCalls, state }; } }; } diff --git a/public/style.css b/public/style.css index cbfc96185..ee7cf9094 100644 --- a/public/style.css +++ b/public/style.css @@ -332,6 +332,23 @@ input[type='checkbox']:focus-visible { color: var(--SmartThemeQuoteColor); } +.mes_reasoning { + display: block; + border: 1px solid var(--SmartThemeBorderColor); + background-color: var(--black30a); + border-radius: 5px; + padding: 5px; + margin: 5px 0; + overflow-y: auto; + max-height: 100px; +} + +.mes_block:has(.edit_textarea) .mes_reasoning, +.mes_bias:empty, +.mes_reasoning:empty { + display: none; +} + .mes_text i, .mes_text em { color: var(--SmartThemeEmColor); @@ -1022,6 +1039,7 @@ body .panelControlBar { /*only affects bubblechat to make it sit nicely at the bottom*/ } +.last_mes .mes_reasoning, .last_mes .mes_text { padding-right: 30px; } @@ -1235,14 +1253,18 @@ body.swipeAllMessages .mes:not(.last_mes) .swipes-counter { overflow-y: clip; } -.mes_text { +.mes_text, +.mes_reasoning { font-weight: 500; line-height: calc(var(--mainFontSize) + .5rem); + max-width: 100%; + overflow-wrap: anywhere; +} + +.mes_text { padding-left: 0; padding-top: 5px; padding-bottom: 5px; - max-width: 100%; - overflow-wrap: anywhere; } br { diff --git a/src/constants.js b/src/constants.js index 35118a04b..faddaaf81 100644 --- a/src/constants.js +++ b/src/constants.js @@ -413,3 +413,8 @@ export const VLLM_KEYS = [ 'guided_decoding_backend', 'guided_whitespace_pattern', ]; + +/** + * Custom boundary for splitting the text between the model's reasoning and the actual response. + */ +export const THINK_BREAK = '##�THINK_BREAK�##'; diff --git a/src/endpoints/backends/chat-completions.js b/src/endpoints/backends/chat-completions.js index d67f309da..5e81b2280 100644 --- a/src/endpoints/backends/chat-completions.js +++ b/src/endpoints/backends/chat-completions.js @@ -7,6 +7,7 @@ import { CHAT_COMPLETION_SOURCES, GEMINI_SAFETY, OPENROUTER_HEADERS, + THINK_BREAK, } from '../../constants.js'; import { forwardFetchResponse, @@ -389,7 +390,7 @@ async function sendMakerSuiteRequest(request, response) { responseContent.parts = responseContent.parts.filter(part => !part.thought); } - const responseText = typeof responseContent === 'string' ? responseContent : responseContent?.parts?.map(part => part.text)?.join('\n\n'); + const responseText = typeof responseContent === 'string' ? responseContent : responseContent?.parts?.map(part => part.text)?.join(THINK_BREAK); if (!responseText) { let message = 'Google AI Studio Candidate text empty'; console.log(message, generateResponseJson);