diff --git a/public/index.html b/public/index.html index 8b259db96..f0ddf2013 100644 --- a/public/index.html +++ b/public/index.html @@ -260,6 +260,22 @@ +
+
+
+ +
+ + Display the response bit by bit as it is generated.
+ + When this is off, responses will be displayed all at once when they are complete. + +
+
+
diff --git a/public/script.js b/public/script.js index 55b909d31..2ea619b14 100644 --- a/public/script.js +++ b/public/script.js @@ -2,11 +2,13 @@ import { humanizedDateTime, favsToHotswap } from "./scripts/RossAscends-mods.js" import { encode } from "../scripts/gpt-2-3-tokenizer/mod.js"; import { GPT3BrowserTokenizer } from "../scripts/gpt-3-tokenizer/gpt3-tokenizer.js"; import { + generateKoboldWithStreaming, kai_settings, loadKoboldSettings, formatKoboldUrl, getKoboldGenerationData, canUseKoboldStopSequence, + canUseKoboldStreaming, } from "./scripts/kai-settings.js"; import { @@ -743,9 +745,10 @@ async function getStatus() { is_pygmalion = false; } - // determine if we can use stop sequence + // determine if we can use stop sequence and streaming if (main_api === "kobold" || main_api === "koboldhorde") { kai_settings.use_stop_sequence = canUseKoboldStopSequence(data.version); + kai_settings.can_use_streaming = canUseKoboldStreaming(data.koboldVersion); } //console.log(online_status); @@ -1587,6 +1590,7 @@ function appendToStoryString(value, prefix) { function isStreamingEnabled() { return ((main_api == 'openai' && oai_settings.stream_openai) + || (main_api == 'kobold' && kai_settings.streaming_kobold && kai_settings.can_use_streaming) || (main_api == 'novel' && nai_settings.streaming_novel) || (main_api == 'poe' && poe_settings.streaming) || (main_api == 'textgenerationwebui' && textgenerationwebui_settings.streaming)) @@ -1854,6 +1858,10 @@ async function Generate(type, { automatic_trigger, force_name2, resolve, reject, return; } + if (main_api == 'kobold' && kai_settings.streaming_kobold && !kai_settings.can_use_streaming) { + toastr.warning('Streaming is enabled, but the version of Kobold used does not support token streaming.', undefined, { timeOut: 10000, preventDuplicates: true, }); + } + if (isHordeGenerationNotAllowed()) { is_send_press = false; return; @@ -2368,6 +2376,9 @@ async function Generate(type, { automatic_trigger, force_name2, resolve, reject, else if (main_api == 'novel' && isStreamingEnabled() && type !== 'quiet') { streamingProcessor.generator = await generateNovelWithStreaming(generate_data, streamingProcessor.abortController.signal); } + else if (main_api == 'kobold' && isStreamingEnabled() && type !== 'quiet') { + streamingProcessor.generator = await generateKoboldWithStreaming(generate_data, streamingProcessor.abortController.signal); + } else { try { const response = await fetch(generate_url, { diff --git a/public/scripts/kai-settings.js b/public/scripts/kai-settings.js index 842a571e2..d9f7c581a 100644 --- a/public/scripts/kai-settings.js +++ b/public/scripts/kai-settings.js @@ -1,4 +1,5 @@ import { + getRequestHeaders, saveSettingsDebounced, getStoppingStrings, } from "../script.js"; @@ -9,6 +10,7 @@ export { formatKoboldUrl, getKoboldGenerationData, canUseKoboldStopSequence, + canUseKoboldStreaming, }; const kai_settings = { @@ -23,9 +25,11 @@ const kai_settings = { rep_pen_slope: 0.9, single_line: false, use_stop_sequence: false, + streaming_kobold: false, }; const MIN_STOP_SEQUENCE_VERSION = '1.2.2'; +const MIN_STREAMING_KCPPVERSION = '1.30'; function formatKoboldUrl(value) { try { @@ -58,6 +62,10 @@ function loadKoboldSettings(preset) { kai_settings.single_line = preset.single_line; $('#single_line').prop('checked', kai_settings.single_line); } + if (preset.hasOwnProperty('streaming_kobold')) { + kai_settings.streaming_kobold = preset.streaming_kobold; + $('#streaming_kobold').prop('checked', kai_settings.streaming_kobold); + } } function getKoboldGenerationData(finalPromt, this_settings, this_amount_gen, this_max_context, isImpersonate) { @@ -86,10 +94,53 @@ function getKoboldGenerationData(finalPromt, this_settings, this_amount_gen, thi use_world_info: false, singleline: kai_settings.single_line, stop_sequence: kai_settings.use_stop_sequence ? getStoppingStrings(isImpersonate, false) : undefined, + streaming: kai_settings.streaming_kobold && kai_settings.can_use_streaming, }; return generate_data; } +export async function generateKoboldWithStreaming(generate_data, signal) { + const response = await fetch('/generate', { + headers: getRequestHeaders(), + body: JSON.stringify(generate_data), + method: 'POST', + signal: signal, + }); + + return async function* streamData() { + const decoder = new TextDecoder(); + const reader = response.body.getReader(); + let getMessage = ''; + let messageBuffer = ""; + while (true) { + const { done, value } = await reader.read(); + let response = decoder.decode(value); + let eventList = []; + + // ReadableStream's buffer is not guaranteed to contain full SSE messages as they arrive in chunks + // We need to buffer chunks until we have one or more full messages (separated by double newlines) + messageBuffer += response; + eventList = messageBuffer.split("\n\n"); + // Last element will be an empty string or a leftover partial message + messageBuffer = eventList.pop(); + + for (let event of eventList) { + for (let subEvent of event.split('\n')) { + if (subEvent.startsWith("data")) { + let data = JSON.parse(subEvent.substring(5)); + getMessage += (data?.token || ''); + yield getMessage; + } + } + } + + if (done) { + return; + } + } + } +} + const sliders = [ { name: "temp", @@ -160,6 +211,12 @@ function canUseKoboldStopSequence(version) { return (version || '0.0.0').localeCompare(MIN_STOP_SEQUENCE_VERSION, undefined, { numeric: true, sensitivity: 'base' }) > -1; } +function canUseKoboldStreaming(koboldVersion) { + if (koboldVersion.result == 'KoboldCpp') { + return (koboldVersion.version || '0.0').localeCompare(MIN_STREAMING_KCPPVERSION, undefined, { numeric: true, sensitivity: 'base' }) > -1; + } else return false; +} + $(document).ready(function () { sliders.forEach(slider => { $(document).on("input", slider.sliderId, function () { @@ -176,4 +233,10 @@ $(document).ready(function () { kai_settings.single_line = value; saveSettingsDebounced(); }); -}); \ No newline at end of file + + $('#streaming_kobold').on("input", function () { + const value = $(this).prop('checked'); + kai_settings.streaming_kobold = value; + saveSettingsDebounced(); + }); +}); diff --git a/public/scripts/poe.js b/public/scripts/poe.js index 56df5b8c4..188f36f12 100644 --- a/public/scripts/poe.js +++ b/public/scripts/poe.js @@ -44,7 +44,7 @@ If you have any objections to these requirements, please mention them specifical If you accept the requirements, please confirm this by replying with "${DEFAULT_JAILBREAK_RESPONSE}", and nothing more. Upon receiving your accurate confirmation message, I will specify the context of the scene and {{char}}'s characteristics, background, and personality in the next message.`; -const DEFAULT_CHARACTER_NUDGE_MESSAGE = "[Unless otherwise stated by {{user}}, your the next response shall only be written from the point of view of {{char}}. Do not seek approval of your writing style at the end of the response. Don't reply with .]"; +const DEFAULT_CHARACTER_NUDGE_MESSAGE = "[Unless otherwise stated by {{user}}, your the next response shall only be written from the point of view of {{char}}. Do not seek approval of your writing style at the end of the response.]"; const DEFAULT_IMPERSONATION_PROMPT = "[Write 1 reply only in internet RP style from the point of view of {{user}}, using the chat history so far as a guideline for the writing style of {{user}}. Don't write as {{char}} or system.]"; const poe_settings = { @@ -281,7 +281,7 @@ async function generatePoe(type, finalPrompt, signal) { } async function sendChunkedMessage(finalPrompt, withStreaming, signal) { - const fastReplyPrompt = '\n[REPLY TO THIS MESSAGE WITH ONLY!!!]'; + const fastReplyPrompt = '\n[Reply to this message with a full stop only]'; const promptChunks = splitRecursive(finalPrompt, CHUNKED_PROMPT_LENGTH - fastReplyPrompt.length); console.debug(`Splitting prompt into ${promptChunks.length} chunks`, promptChunks); let reply = ''; diff --git a/server.js b/server.js index 5817c5698..53301e990 100644 --- a/server.js +++ b/server.js @@ -381,34 +381,61 @@ app.post("/generate", jsonParser, async function (request, response_generate = r console.log(this_settings); const args = { body: JSON.stringify(this_settings), - signal: controller.signal, headers: { "Content-Type": "application/json" }, + signal: controller.signal, }; - const MAX_RETRIES = 10; - const delayAmount = 3000; + const MAX_RETRIES = 50; + const delayAmount = 2500; + let fetch, url, response; for (let i = 0; i < MAX_RETRIES; i++) { try { - const data = await postAsync(api_server + "/v1/generate", args); - console.log(data); - return response_generate.send(data); - } - catch (error) { - // data - if (typeof error['text'] === 'function') { - console.log(await error.text()); - } + fetch = require('node-fetch').default; + url = request.body.streaming ? `${api_server}/extra/generate/stream` : `${api_server}/v1/generate`; + response = await fetch(url, { method: 'POST', timeout: 0, ...args }); + if (request.body.streaming) { + + request.socket.on('close', function () { + response.body.destroy(); // Close the remote stream + response_generate.end(); // End the Express response + }); + + response.body.on('end', function () { + console.log("Streaming request finished"); + response_generate.end(); + }); + + // Pipe remote SSE stream to Express response + return response.body.pipe(response_generate); + } else { + if (!response.ok) { + console.log(`Kobold returned error: ${response.status} ${response.statusText} ${await response.text()}`); + return response.status(response.status).send({ error: true }); + } + + const data = await response.json(); + return response_generate.send(data); + } + } catch (error) { // response - switch (error.statusCode) { - case 503: + switch (error?.status) { + case 403: + case 503: // retry in case of temporary service issue, possibly caused by a queue failure? + console.debug(`KoboldAI is busy. Retry attempt ${i+1} of ${MAX_RETRIES}...`); await delay(delayAmount); break; default: + if ('status' in error) { + console.log('Status Code from Kobold:', error.status); + } return response_generate.send({ error: true }); } } } + + console.log('Max retries exceeded. Giving up.'); + return response_generate.send({ error: true }); }); //************** Text generation web UI @@ -575,6 +602,7 @@ app.post("/getstatus", jsonParser, async function (request, response_getstatus = }; var url = api_server + "/v1/model"; let version = ''; + let koboldVersion = {}; if (main_api == "kobold") { try { version = (await getAsync(api_server + "/v1/info/version")).result; @@ -582,6 +610,15 @@ app.post("/getstatus", jsonParser, async function (request, response_getstatus = catch { version = '0.0.0'; } + try { + koboldVersion = (await getAsync(api_server + "/extra/version")); + } + catch { + koboldVersion = { + result: 'Kobold', + version: '0.0', + }; + } } client.get(url, args, function (data, response) { if (typeof data !== 'object') { @@ -589,6 +626,7 @@ app.post("/getstatus", jsonParser, async function (request, response_getstatus = } if (response.statusCode == 200) { data.version = version; + data.koboldVersion = koboldVersion; if (data.result != "ReadOnly") { } else { data.result = "no_connection"; @@ -3120,7 +3158,7 @@ async function postAsync(url, args) { return data; } - throw new Error(response); + throw response; } function getAsync(url, args) {