Implement support for koboldcpp SSE streaming.

koboldcpp added SSE streaming to its API as of:
https://github.com/LostRuins/koboldcpp/pull/220

This has not yet been added to release, but I figued I'd try to
get the work done to make it ready.

Work mostly hijacked from:
10bbc97069
This commit is contained in:
Zixaphir
2023-06-11 13:24:07 -07:00
parent 4bce33b1a9
commit 698461c188
4 changed files with 107 additions and 25 deletions

View File

@ -260,6 +260,22 @@
</div> </div>
</div> </div>
</div> </div>
<hr>
<div id="range_block">
<div class="range-block">
<label class="checkbox_label widthFreeExpand">
<input id="streaming_kobold" type="checkbox" />
<span data-i18n="Streaming">Streaming</span>
</label>
<div class="toggle-description justifyLeft">
<span data-i18n="Display the response bit by bit as it is generated.">
Display the response bit by bit as it is generated.</span><br>
<span data-i18n="When this is off, responses will be displayed all at once when they are complete.">
When this is off, responses will be displayed all at once when they are complete.
</span>
</div>
</div>
</div>
</div> </div>
<div id="range_block_novel"> <div id="range_block_novel">
<div class="range-block"> <div class="range-block">

View File

@ -2,6 +2,7 @@ import { humanizedDateTime, favsToHotswap } from "./scripts/RossAscends-mods.js"
import { encode } from "../scripts/gpt-2-3-tokenizer/mod.js"; import { encode } from "../scripts/gpt-2-3-tokenizer/mod.js";
import { GPT3BrowserTokenizer } from "../scripts/gpt-3-tokenizer/gpt3-tokenizer.js"; import { GPT3BrowserTokenizer } from "../scripts/gpt-3-tokenizer/gpt3-tokenizer.js";
import { import {
generateKoboldWithStreaming,
kai_settings, kai_settings,
loadKoboldSettings, loadKoboldSettings,
formatKoboldUrl, formatKoboldUrl,
@ -1586,6 +1587,7 @@ function appendToStoryString(value, prefix) {
function isStreamingEnabled() { function isStreamingEnabled() {
return ((main_api == 'openai' && oai_settings.stream_openai) return ((main_api == 'openai' && oai_settings.stream_openai)
|| (main_api == 'kobold' && kai_settings.streaming_kobold)
|| (main_api == 'novel' && nai_settings.streaming_novel) || (main_api == 'novel' && nai_settings.streaming_novel)
|| (main_api == 'poe' && poe_settings.streaming) || (main_api == 'poe' && poe_settings.streaming)
|| (main_api == 'textgenerationwebui' && textgenerationwebui_settings.streaming)) || (main_api == 'textgenerationwebui' && textgenerationwebui_settings.streaming))
@ -2367,6 +2369,9 @@ async function Generate(type, { automatic_trigger, force_name2, resolve, reject,
else if (main_api == 'novel' && isStreamingEnabled() && type !== 'quiet') { else if (main_api == 'novel' && isStreamingEnabled() && type !== 'quiet') {
streamingProcessor.generator = await generateNovelWithStreaming(generate_data, streamingProcessor.abortController.signal); streamingProcessor.generator = await generateNovelWithStreaming(generate_data, streamingProcessor.abortController.signal);
} }
else if (main_api == 'kobold' && isStreamingEnabled() && type !== 'quiet') {
streamingProcessor.generator = await generateKoboldWithStreaming(generate_data, streamingProcessor.abortController.signal);
}
else { else {
try { try {
const response = await fetch(generate_url, { const response = await fetch(generate_url, {
@ -6690,7 +6695,7 @@ $(document).ready(function () {
if (this_chid !== undefined || selected_group) { if (this_chid !== undefined || selected_group) {
// Previously system messages we're allowed to be edited // Previously system messages we're allowed to be edited
/*const message = $(this).closest(".mes"); /*const message = $(this).closest(".mes");
if (message.data("isSystem")) { if (message.data("isSystem")) {
return; return;
}*/ }*/

View File

@ -1,4 +1,5 @@
import { import {
getRequestHeaders,
saveSettingsDebounced, saveSettingsDebounced,
getStoppingStrings, getStoppingStrings,
} from "../script.js"; } from "../script.js";
@ -23,6 +24,7 @@ const kai_settings = {
rep_pen_slope: 0.9, rep_pen_slope: 0.9,
single_line: false, single_line: false,
use_stop_sequence: false, use_stop_sequence: false,
streaming_kobold: false,
}; };
const MIN_STOP_SEQUENCE_VERSION = '1.2.2'; const MIN_STOP_SEQUENCE_VERSION = '1.2.2';
@ -58,6 +60,10 @@ function loadKoboldSettings(preset) {
kai_settings.single_line = preset.single_line; kai_settings.single_line = preset.single_line;
$('#single_line').prop('checked', kai_settings.single_line); $('#single_line').prop('checked', kai_settings.single_line);
} }
if (preset.hasOwnProperty('streaming_kobold')) {
kai_settings.streaming_kobold = preset.streaming_kobold;
$('#streaming_kobold').prop('checked', kai_settings.streaming_kobold);
}
} }
function getKoboldGenerationData(finalPromt, this_settings, this_amount_gen, this_max_context, isImpersonate) { function getKoboldGenerationData(finalPromt, this_settings, this_amount_gen, this_max_context, isImpersonate) {
@ -86,10 +92,53 @@ function getKoboldGenerationData(finalPromt, this_settings, this_amount_gen, thi
use_world_info: false, use_world_info: false,
singleline: kai_settings.single_line, singleline: kai_settings.single_line,
stop_sequence: kai_settings.use_stop_sequence ? getStoppingStrings(isImpersonate, false) : undefined, stop_sequence: kai_settings.use_stop_sequence ? getStoppingStrings(isImpersonate, false) : undefined,
streaming: kai_settings.streaming_kobold,
}; };
return generate_data; return generate_data;
} }
export async function generateKoboldWithStreaming(generate_data, signal) {
const response = await fetch('/generate', {
headers: getRequestHeaders(),
body: JSON.stringify(generate_data),
method: 'POST',
signal: signal,
});
return async function* streamData() {
const decoder = new TextDecoder();
const reader = response.body.getReader();
let getMessage = '';
let messageBuffer = "";
while (true) {
const { done, value } = await reader.read();
let response = decoder.decode(value);
let eventList = [];
// ReadableStream's buffer is not guaranteed to contain full SSE messages as they arrive in chunks
// We need to buffer chunks until we have one or more full messages (separated by double newlines)
messageBuffer += response;
eventList = messageBuffer.split("\n\n");
// Last element will be an empty string or a leftover partial message
messageBuffer = eventList.pop();
for (let event of eventList) {
for (let subEvent of event.split('\n')) {
if (subEvent.startsWith("data")) {
let data = JSON.parse(subEvent.substring(5));
getMessage += (data?.token || '');
yield getMessage;
}
}
}
if (done) {
return;
}
}
}
}
const sliders = [ const sliders = [
{ {
name: "temp", name: "temp",
@ -176,4 +225,10 @@ $(document).ready(function () {
kai_settings.single_line = value; kai_settings.single_line = value;
saveSettingsDebounced(); saveSettingsDebounced();
}); });
});
$('#streaming_kobold').on("input", function () {
const value = $(this).prop('checked');
kai_settings.streaming_kobold = value;
saveSettingsDebounced();
});
});

View File

@ -381,33 +381,39 @@ app.post("/generate", jsonParser, async function (request, response_generate = r
console.log(this_settings); console.log(this_settings);
const args = { const args = {
body: JSON.stringify(this_settings), body: JSON.stringify(this_settings),
signal: controller.signal,
headers: { "Content-Type": "application/json" }, headers: { "Content-Type": "application/json" },
signal: controller.signal,
}; };
const MAX_RETRIES = 10; try {
const delayAmount = 3000; const fetch = require('node-fetch').default;
for (let i = 0; i < MAX_RETRIES; i++) { const url = request.body.streaming ? `${api_server}/extra/generate/stream` : `${api_server}/v1/generate`;
try { const response = await fetch(url, { method: 'POST', timeout: 0, ...args });
const data = await postAsync(api_server + "/v1/generate", args); console.log(response);
console.log(data); if (request.body.streaming) {
return response_generate.send(data); // Pipe remote SSE stream to Express response
} response.body.pipe(response_generate);
catch (error) {
// data
if (typeof error['text'] === 'function') {
console.log(await error.text());
}
// response request.socket.on('close', function () {
switch (error.statusCode) { response.body.destroy(); // Close the remote stream
case 503: response_generate.end(); // End the Express response
await delay(delayAmount); });
break;
default: response.body.on('end', function () {
return response_generate.send({ error: true }); console.log("Streaming request finished");
} response_generate.end();
} });
} else {
if (!response.ok) {
console.log(`Kobold returned error: ${response.status} ${response.statusText} ${await response.text()}`);
return response.status(response.status).send({ error: true });
}
const data = await response.json();
return response_generate.send(data);
}
} catch (error) {
return response_generate.send({ error: true });
} }
}); });