diff --git a/public/script.js b/public/script.js
index 55b909d31..2ea619b14 100644
--- a/public/script.js
+++ b/public/script.js
@@ -2,11 +2,13 @@ import { humanizedDateTime, favsToHotswap } from "./scripts/RossAscends-mods.js"
import { encode } from "../scripts/gpt-2-3-tokenizer/mod.js";
import { GPT3BrowserTokenizer } from "../scripts/gpt-3-tokenizer/gpt3-tokenizer.js";
import {
+ generateKoboldWithStreaming,
kai_settings,
loadKoboldSettings,
formatKoboldUrl,
getKoboldGenerationData,
canUseKoboldStopSequence,
+ canUseKoboldStreaming,
} from "./scripts/kai-settings.js";
import {
@@ -743,9 +745,10 @@ async function getStatus() {
is_pygmalion = false;
}
- // determine if we can use stop sequence
+ // determine if we can use stop sequence and streaming
if (main_api === "kobold" || main_api === "koboldhorde") {
kai_settings.use_stop_sequence = canUseKoboldStopSequence(data.version);
+ kai_settings.can_use_streaming = canUseKoboldStreaming(data.koboldVersion);
}
//console.log(online_status);
@@ -1587,6 +1590,7 @@ function appendToStoryString(value, prefix) {
function isStreamingEnabled() {
return ((main_api == 'openai' && oai_settings.stream_openai)
+ || (main_api == 'kobold' && kai_settings.streaming_kobold && kai_settings.can_use_streaming)
|| (main_api == 'novel' && nai_settings.streaming_novel)
|| (main_api == 'poe' && poe_settings.streaming)
|| (main_api == 'textgenerationwebui' && textgenerationwebui_settings.streaming))
@@ -1854,6 +1858,10 @@ async function Generate(type, { automatic_trigger, force_name2, resolve, reject,
return;
}
+ if (main_api == 'kobold' && kai_settings.streaming_kobold && !kai_settings.can_use_streaming) {
+ toastr.warning('Streaming is enabled, but the version of Kobold used does not support token streaming.', undefined, { timeOut: 10000, preventDuplicates: true, });
+ }
+
if (isHordeGenerationNotAllowed()) {
is_send_press = false;
return;
@@ -2368,6 +2376,9 @@ async function Generate(type, { automatic_trigger, force_name2, resolve, reject,
else if (main_api == 'novel' && isStreamingEnabled() && type !== 'quiet') {
streamingProcessor.generator = await generateNovelWithStreaming(generate_data, streamingProcessor.abortController.signal);
}
+ else if (main_api == 'kobold' && isStreamingEnabled() && type !== 'quiet') {
+ streamingProcessor.generator = await generateKoboldWithStreaming(generate_data, streamingProcessor.abortController.signal);
+ }
else {
try {
const response = await fetch(generate_url, {
diff --git a/public/scripts/kai-settings.js b/public/scripts/kai-settings.js
index 842a571e2..d9f7c581a 100644
--- a/public/scripts/kai-settings.js
+++ b/public/scripts/kai-settings.js
@@ -1,4 +1,5 @@
import {
+ getRequestHeaders,
saveSettingsDebounced,
getStoppingStrings,
} from "../script.js";
@@ -9,6 +10,7 @@ export {
formatKoboldUrl,
getKoboldGenerationData,
canUseKoboldStopSequence,
+ canUseKoboldStreaming,
};
const kai_settings = {
@@ -23,9 +25,11 @@ const kai_settings = {
rep_pen_slope: 0.9,
single_line: false,
use_stop_sequence: false,
+ streaming_kobold: false,
};
const MIN_STOP_SEQUENCE_VERSION = '1.2.2';
+const MIN_STREAMING_KCPPVERSION = '1.30';
function formatKoboldUrl(value) {
try {
@@ -58,6 +62,10 @@ function loadKoboldSettings(preset) {
kai_settings.single_line = preset.single_line;
$('#single_line').prop('checked', kai_settings.single_line);
}
+ if (preset.hasOwnProperty('streaming_kobold')) {
+ kai_settings.streaming_kobold = preset.streaming_kobold;
+ $('#streaming_kobold').prop('checked', kai_settings.streaming_kobold);
+ }
}
function getKoboldGenerationData(finalPromt, this_settings, this_amount_gen, this_max_context, isImpersonate) {
@@ -86,10 +94,53 @@ function getKoboldGenerationData(finalPromt, this_settings, this_amount_gen, thi
use_world_info: false,
singleline: kai_settings.single_line,
stop_sequence: kai_settings.use_stop_sequence ? getStoppingStrings(isImpersonate, false) : undefined,
+ streaming: kai_settings.streaming_kobold && kai_settings.can_use_streaming,
};
return generate_data;
}
+export async function generateKoboldWithStreaming(generate_data, signal) {
+ const response = await fetch('/generate', {
+ headers: getRequestHeaders(),
+ body: JSON.stringify(generate_data),
+ method: 'POST',
+ signal: signal,
+ });
+
+ return async function* streamData() {
+ const decoder = new TextDecoder();
+ const reader = response.body.getReader();
+ let getMessage = '';
+ let messageBuffer = "";
+ while (true) {
+ const { done, value } = await reader.read();
+ let response = decoder.decode(value);
+ let eventList = [];
+
+ // ReadableStream's buffer is not guaranteed to contain full SSE messages as they arrive in chunks
+ // We need to buffer chunks until we have one or more full messages (separated by double newlines)
+ messageBuffer += response;
+ eventList = messageBuffer.split("\n\n");
+ // Last element will be an empty string or a leftover partial message
+ messageBuffer = eventList.pop();
+
+ for (let event of eventList) {
+ for (let subEvent of event.split('\n')) {
+ if (subEvent.startsWith("data")) {
+ let data = JSON.parse(subEvent.substring(5));
+ getMessage += (data?.token || '');
+ yield getMessage;
+ }
+ }
+ }
+
+ if (done) {
+ return;
+ }
+ }
+ }
+}
+
const sliders = [
{
name: "temp",
@@ -160,6 +211,12 @@ function canUseKoboldStopSequence(version) {
return (version || '0.0.0').localeCompare(MIN_STOP_SEQUENCE_VERSION, undefined, { numeric: true, sensitivity: 'base' }) > -1;
}
+function canUseKoboldStreaming(koboldVersion) {
+ if (koboldVersion.result == 'KoboldCpp') {
+ return (koboldVersion.version || '0.0').localeCompare(MIN_STREAMING_KCPPVERSION, undefined, { numeric: true, sensitivity: 'base' }) > -1;
+ } else return false;
+}
+
$(document).ready(function () {
sliders.forEach(slider => {
$(document).on("input", slider.sliderId, function () {
@@ -176,4 +233,10 @@ $(document).ready(function () {
kai_settings.single_line = value;
saveSettingsDebounced();
});
-});
\ No newline at end of file
+
+ $('#streaming_kobold').on("input", function () {
+ const value = $(this).prop('checked');
+ kai_settings.streaming_kobold = value;
+ saveSettingsDebounced();
+ });
+});
diff --git a/public/scripts/poe.js b/public/scripts/poe.js
index 56df5b8c4..188f36f12 100644
--- a/public/scripts/poe.js
+++ b/public/scripts/poe.js
@@ -44,7 +44,7 @@ If you have any objections to these requirements, please mention them specifical
If you accept the requirements, please confirm this by replying with "${DEFAULT_JAILBREAK_RESPONSE}", and nothing more. Upon receiving your accurate confirmation message, I will specify the context of the scene and {{char}}'s characteristics, background, and personality in the next message.`;
-const DEFAULT_CHARACTER_NUDGE_MESSAGE = "[Unless otherwise stated by {{user}}, your the next response shall only be written from the point of view of {{char}}. Do not seek approval of your writing style at the end of the response. Don't reply with
.]";
+const DEFAULT_CHARACTER_NUDGE_MESSAGE = "[Unless otherwise stated by {{user}}, your the next response shall only be written from the point of view of {{char}}. Do not seek approval of your writing style at the end of the response.]";
const DEFAULT_IMPERSONATION_PROMPT = "[Write 1 reply only in internet RP style from the point of view of {{user}}, using the chat history so far as a guideline for the writing style of {{user}}. Don't write as {{char}} or system.]";
const poe_settings = {
@@ -281,7 +281,7 @@ async function generatePoe(type, finalPrompt, signal) {
}
async function sendChunkedMessage(finalPrompt, withStreaming, signal) {
- const fastReplyPrompt = '\n[REPLY TO THIS MESSAGE WITH ONLY!!!]';
+ const fastReplyPrompt = '\n[Reply to this message with a full stop only]';
const promptChunks = splitRecursive(finalPrompt, CHUNKED_PROMPT_LENGTH - fastReplyPrompt.length);
console.debug(`Splitting prompt into ${promptChunks.length} chunks`, promptChunks);
let reply = '';
diff --git a/server.js b/server.js
index 5817c5698..53301e990 100644
--- a/server.js
+++ b/server.js
@@ -381,34 +381,61 @@ app.post("/generate", jsonParser, async function (request, response_generate = r
console.log(this_settings);
const args = {
body: JSON.stringify(this_settings),
- signal: controller.signal,
headers: { "Content-Type": "application/json" },
+ signal: controller.signal,
};
- const MAX_RETRIES = 10;
- const delayAmount = 3000;
+ const MAX_RETRIES = 50;
+ const delayAmount = 2500;
+ let fetch, url, response;
for (let i = 0; i < MAX_RETRIES; i++) {
try {
- const data = await postAsync(api_server + "/v1/generate", args);
- console.log(data);
- return response_generate.send(data);
- }
- catch (error) {
- // data
- if (typeof error['text'] === 'function') {
- console.log(await error.text());
- }
+ fetch = require('node-fetch').default;
+ url = request.body.streaming ? `${api_server}/extra/generate/stream` : `${api_server}/v1/generate`;
+ response = await fetch(url, { method: 'POST', timeout: 0, ...args });
+ if (request.body.streaming) {
+
+ request.socket.on('close', function () {
+ response.body.destroy(); // Close the remote stream
+ response_generate.end(); // End the Express response
+ });
+
+ response.body.on('end', function () {
+ console.log("Streaming request finished");
+ response_generate.end();
+ });
+
+ // Pipe remote SSE stream to Express response
+ return response.body.pipe(response_generate);
+ } else {
+ if (!response.ok) {
+ console.log(`Kobold returned error: ${response.status} ${response.statusText} ${await response.text()}`);
+ return response.status(response.status).send({ error: true });
+ }
+
+ const data = await response.json();
+ return response_generate.send(data);
+ }
+ } catch (error) {
// response
- switch (error.statusCode) {
- case 503:
+ switch (error?.status) {
+ case 403:
+ case 503: // retry in case of temporary service issue, possibly caused by a queue failure?
+ console.debug(`KoboldAI is busy. Retry attempt ${i+1} of ${MAX_RETRIES}...`);
await delay(delayAmount);
break;
default:
+ if ('status' in error) {
+ console.log('Status Code from Kobold:', error.status);
+ }
return response_generate.send({ error: true });
}
}
}
+
+ console.log('Max retries exceeded. Giving up.');
+ return response_generate.send({ error: true });
});
//************** Text generation web UI
@@ -575,6 +602,7 @@ app.post("/getstatus", jsonParser, async function (request, response_getstatus =
};
var url = api_server + "/v1/model";
let version = '';
+ let koboldVersion = {};
if (main_api == "kobold") {
try {
version = (await getAsync(api_server + "/v1/info/version")).result;
@@ -582,6 +610,15 @@ app.post("/getstatus", jsonParser, async function (request, response_getstatus =
catch {
version = '0.0.0';
}
+ try {
+ koboldVersion = (await getAsync(api_server + "/extra/version"));
+ }
+ catch {
+ koboldVersion = {
+ result: 'Kobold',
+ version: '0.0',
+ };
+ }
}
client.get(url, args, function (data, response) {
if (typeof data !== 'object') {
@@ -589,6 +626,7 @@ app.post("/getstatus", jsonParser, async function (request, response_getstatus =
}
if (response.statusCode == 200) {
data.version = version;
+ data.koboldVersion = koboldVersion;
if (data.result != "ReadOnly") {
} else {
data.result = "no_connection";
@@ -3120,7 +3158,7 @@ async function postAsync(url, args) {
return data;
}
- throw new Error(response);
+ throw response;
}
function getAsync(url, args) {