From 698461c188c6d643ded1a9802be65547e89f264b Mon Sep 17 00:00:00 2001 From: Zixaphir Date: Sun, 11 Jun 2023 13:24:07 -0700 Subject: [PATCH 1/9] Implement support for koboldcpp SSE streaming. koboldcpp added SSE streaming to its API as of: https://github.com/LostRuins/koboldcpp/pull/220 This has not yet been added to release, but I figued I'd try to get the work done to make it ready. Work mostly hijacked from: https://github.com/SillyTavern/SillyTavern/commit/10bbc970695e10c12f31442cf6704afb92d8806e --- public/index.html | 16 ++++++++++ public/script.js | 7 ++++- public/scripts/kai-settings.js | 57 +++++++++++++++++++++++++++++++++- server.js | 52 +++++++++++++++++-------------- 4 files changed, 107 insertions(+), 25 deletions(-) diff --git a/public/index.html b/public/index.html index a99d8ac6d..ab1a6a379 100644 --- a/public/index.html +++ b/public/index.html @@ -260,6 +260,22 @@ +
+
+
+ +
+ + Display the response bit by bit as it is generated.
+ + When this is off, responses will be displayed all at once when they are complete. + +
+
+
diff --git a/public/script.js b/public/script.js index c17a78cd6..89f2e3646 100644 --- a/public/script.js +++ b/public/script.js @@ -2,6 +2,7 @@ import { humanizedDateTime, favsToHotswap } from "./scripts/RossAscends-mods.js" import { encode } from "../scripts/gpt-2-3-tokenizer/mod.js"; import { GPT3BrowserTokenizer } from "../scripts/gpt-3-tokenizer/gpt3-tokenizer.js"; import { + generateKoboldWithStreaming, kai_settings, loadKoboldSettings, formatKoboldUrl, @@ -1586,6 +1587,7 @@ function appendToStoryString(value, prefix) { function isStreamingEnabled() { return ((main_api == 'openai' && oai_settings.stream_openai) + || (main_api == 'kobold' && kai_settings.streaming_kobold) || (main_api == 'novel' && nai_settings.streaming_novel) || (main_api == 'poe' && poe_settings.streaming) || (main_api == 'textgenerationwebui' && textgenerationwebui_settings.streaming)) @@ -2367,6 +2369,9 @@ async function Generate(type, { automatic_trigger, force_name2, resolve, reject, else if (main_api == 'novel' && isStreamingEnabled() && type !== 'quiet') { streamingProcessor.generator = await generateNovelWithStreaming(generate_data, streamingProcessor.abortController.signal); } + else if (main_api == 'kobold' && isStreamingEnabled() && type !== 'quiet') { + streamingProcessor.generator = await generateKoboldWithStreaming(generate_data, streamingProcessor.abortController.signal); + } else { try { const response = await fetch(generate_url, { @@ -6690,7 +6695,7 @@ $(document).ready(function () { if (this_chid !== undefined || selected_group) { // Previously system messages we're allowed to be edited /*const message = $(this).closest(".mes"); - + if (message.data("isSystem")) { return; }*/ diff --git a/public/scripts/kai-settings.js b/public/scripts/kai-settings.js index 842a571e2..63b523298 100644 --- a/public/scripts/kai-settings.js +++ b/public/scripts/kai-settings.js @@ -1,4 +1,5 @@ import { + getRequestHeaders, saveSettingsDebounced, getStoppingStrings, } from "../script.js"; @@ -23,6 +24,7 @@ const kai_settings = { rep_pen_slope: 0.9, single_line: false, use_stop_sequence: false, + streaming_kobold: false, }; const MIN_STOP_SEQUENCE_VERSION = '1.2.2'; @@ -58,6 +60,10 @@ function loadKoboldSettings(preset) { kai_settings.single_line = preset.single_line; $('#single_line').prop('checked', kai_settings.single_line); } + if (preset.hasOwnProperty('streaming_kobold')) { + kai_settings.streaming_kobold = preset.streaming_kobold; + $('#streaming_kobold').prop('checked', kai_settings.streaming_kobold); + } } function getKoboldGenerationData(finalPromt, this_settings, this_amount_gen, this_max_context, isImpersonate) { @@ -86,10 +92,53 @@ function getKoboldGenerationData(finalPromt, this_settings, this_amount_gen, thi use_world_info: false, singleline: kai_settings.single_line, stop_sequence: kai_settings.use_stop_sequence ? getStoppingStrings(isImpersonate, false) : undefined, + streaming: kai_settings.streaming_kobold, }; return generate_data; } +export async function generateKoboldWithStreaming(generate_data, signal) { + const response = await fetch('/generate', { + headers: getRequestHeaders(), + body: JSON.stringify(generate_data), + method: 'POST', + signal: signal, + }); + + return async function* streamData() { + const decoder = new TextDecoder(); + const reader = response.body.getReader(); + let getMessage = ''; + let messageBuffer = ""; + while (true) { + const { done, value } = await reader.read(); + let response = decoder.decode(value); + let eventList = []; + + // ReadableStream's buffer is not guaranteed to contain full SSE messages as they arrive in chunks + // We need to buffer chunks until we have one or more full messages (separated by double newlines) + messageBuffer += response; + eventList = messageBuffer.split("\n\n"); + // Last element will be an empty string or a leftover partial message + messageBuffer = eventList.pop(); + + for (let event of eventList) { + for (let subEvent of event.split('\n')) { + if (subEvent.startsWith("data")) { + let data = JSON.parse(subEvent.substring(5)); + getMessage += (data?.token || ''); + yield getMessage; + } + } + } + + if (done) { + return; + } + } + } +} + const sliders = [ { name: "temp", @@ -176,4 +225,10 @@ $(document).ready(function () { kai_settings.single_line = value; saveSettingsDebounced(); }); -}); \ No newline at end of file + + $('#streaming_kobold').on("input", function () { + const value = $(this).prop('checked'); + kai_settings.streaming_kobold = value; + saveSettingsDebounced(); + }); +}); diff --git a/server.js b/server.js index a42a2d52d..1b9265480 100644 --- a/server.js +++ b/server.js @@ -381,33 +381,39 @@ app.post("/generate", jsonParser, async function (request, response_generate = r console.log(this_settings); const args = { body: JSON.stringify(this_settings), - signal: controller.signal, headers: { "Content-Type": "application/json" }, + signal: controller.signal, }; - const MAX_RETRIES = 10; - const delayAmount = 3000; - for (let i = 0; i < MAX_RETRIES; i++) { - try { - const data = await postAsync(api_server + "/v1/generate", args); - console.log(data); - return response_generate.send(data); - } - catch (error) { - // data - if (typeof error['text'] === 'function') { - console.log(await error.text()); - } + try { + const fetch = require('node-fetch').default; + const url = request.body.streaming ? `${api_server}/extra/generate/stream` : `${api_server}/v1/generate`; + const response = await fetch(url, { method: 'POST', timeout: 0, ...args }); + console.log(response); + if (request.body.streaming) { + // Pipe remote SSE stream to Express response + response.body.pipe(response_generate); - // response - switch (error.statusCode) { - case 503: - await delay(delayAmount); - break; - default: - return response_generate.send({ error: true }); - } - } + request.socket.on('close', function () { + response.body.destroy(); // Close the remote stream + response_generate.end(); // End the Express response + }); + + response.body.on('end', function () { + console.log("Streaming request finished"); + response_generate.end(); + }); + } else { + if (!response.ok) { + console.log(`Kobold returned error: ${response.status} ${response.statusText} ${await response.text()}`); + return response.status(response.status).send({ error: true }); + } + + const data = await response.json(); + return response_generate.send(data); + } + } catch (error) { + return response_generate.send({ error: true }); } }); From e9427a14d5e97c74d95518da19ac4f25589cda24 Mon Sep 17 00:00:00 2001 From: Zixaphir Date: Sun, 11 Jun 2023 18:08:36 -0700 Subject: [PATCH 2/9] Add warning if streaming is unsupported on kobold version. minimum version of koboldcpp 1.30 is the only known implementation to support SSE token streaming, assuming 1.30 releases with the feature enabled. --- public/script.js | 8 +++++++- public/scripts/kai-settings.js | 10 +++++++++- server.js | 13 ++++++++++++- 3 files changed, 28 insertions(+), 3 deletions(-) diff --git a/public/script.js b/public/script.js index 89f2e3646..6a2c0a7db 100644 --- a/public/script.js +++ b/public/script.js @@ -8,6 +8,7 @@ import { formatKoboldUrl, getKoboldGenerationData, canUseKoboldStopSequence, + canUseKoboldStreaming, } from "./scripts/kai-settings.js"; import { @@ -747,6 +748,7 @@ async function getStatus() { // determine if we can use stop sequence if (main_api === "kobold" || main_api === "koboldhorde") { kai_settings.use_stop_sequence = canUseKoboldStopSequence(data.version); + kai_settings.can_use_streaming = canUseKoboldStreaming(data.koboldVersion); } //console.log(online_status); @@ -1587,7 +1589,7 @@ function appendToStoryString(value, prefix) { function isStreamingEnabled() { return ((main_api == 'openai' && oai_settings.stream_openai) - || (main_api == 'kobold' && kai_settings.streaming_kobold) + || (main_api == 'kobold' && kai_settings.streaming_kobold && kai_settings.can_use_streaming) || (main_api == 'novel' && nai_settings.streaming_novel) || (main_api == 'poe' && poe_settings.streaming) || (main_api == 'textgenerationwebui' && textgenerationwebui_settings.streaming)) @@ -1855,6 +1857,10 @@ async function Generate(type, { automatic_trigger, force_name2, resolve, reject, return; } + if (main_api == 'kobold' && kai_settings.streaming_kobold && !kai_settings.can_use_streaming) { + toastr.warning('Streaming is enabled, but the version of kobold used does not support token streaming.'); + } + if (isHordeGenerationNotAllowed()) { is_send_press = false; return; diff --git a/public/scripts/kai-settings.js b/public/scripts/kai-settings.js index 63b523298..9c7eadb35 100644 --- a/public/scripts/kai-settings.js +++ b/public/scripts/kai-settings.js @@ -10,6 +10,7 @@ export { formatKoboldUrl, getKoboldGenerationData, canUseKoboldStopSequence, + canUseKoboldStreaming, }; const kai_settings = { @@ -28,6 +29,7 @@ const kai_settings = { }; const MIN_STOP_SEQUENCE_VERSION = '1.2.2'; +const MIN_STREAMING_KCPPVERSION = '1.30'; function formatKoboldUrl(value) { try { @@ -92,7 +94,7 @@ function getKoboldGenerationData(finalPromt, this_settings, this_amount_gen, thi use_world_info: false, singleline: kai_settings.single_line, stop_sequence: kai_settings.use_stop_sequence ? getStoppingStrings(isImpersonate, false) : undefined, - streaming: kai_settings.streaming_kobold, + streaming: kai_settings.streaming_kobold ? canUseKoboldStreaming() : false, }; return generate_data; } @@ -209,6 +211,12 @@ function canUseKoboldStopSequence(version) { return (version || '0.0.0').localeCompare(MIN_STOP_SEQUENCE_VERSION, undefined, { numeric: true, sensitivity: 'base' }) > -1; } +function canUseKoboldStreaming(koboldVersion) { + if (koboldVersion = 'KoboldCpp') { + return (koboldVersion.version || '0.0').localeCompare(MIN_STREAMING_KCPPVERSION, undefined, { numeric: true, sensitivity: 'base' }) > -1; + } else return false; +} + $(document).ready(function () { sliders.forEach(slider => { $(document).on("input", slider.sliderId, function () { diff --git a/server.js b/server.js index 1b9265480..e7f205272 100644 --- a/server.js +++ b/server.js @@ -389,7 +389,7 @@ app.post("/generate", jsonParser, async function (request, response_generate = r const fetch = require('node-fetch').default; const url = request.body.streaming ? `${api_server}/extra/generate/stream` : `${api_server}/v1/generate`; const response = await fetch(url, { method: 'POST', timeout: 0, ...args }); - console.log(response); + if (request.body.streaming) { // Pipe remote SSE stream to Express response response.body.pipe(response_generate); @@ -581,6 +581,7 @@ app.post("/getstatus", jsonParser, async function (request, response_getstatus = }; var url = api_server + "/v1/model"; let version = ''; + let koboldVersion = {}; if (main_api == "kobold") { try { version = (await getAsync(api_server + "/v1/info/version")).result; @@ -588,6 +589,15 @@ app.post("/getstatus", jsonParser, async function (request, response_getstatus = catch { version = '0.0.0'; } + try { + koboldVersion = await getAsync(api_server + "/extra/version"); + } + catch { + koboldVersion = { + result: 'Kobold', + version: '0.0', + }; + } } client.get(url, args, function (data, response) { if (typeof data !== 'object') { @@ -595,6 +605,7 @@ app.post("/getstatus", jsonParser, async function (request, response_getstatus = } if (response.statusCode == 200) { data.version = version; + data.koboldVersion = koboldVersion; if (data.result != "ReadOnly") { } else { data.result = "no_connection"; From b652b64863af75e1200a503643e366277d15cae0 Mon Sep 17 00:00:00 2001 From: Zixaphir Date: Sun, 11 Jun 2023 19:14:41 -0700 Subject: [PATCH 3/9] Correct testing fail. --- public/script.js | 2 +- public/scripts/kai-settings.js | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/public/script.js b/public/script.js index 6a2c0a7db..298c7bac4 100644 --- a/public/script.js +++ b/public/script.js @@ -745,7 +745,7 @@ async function getStatus() { is_pygmalion = false; } - // determine if we can use stop sequence + // determine if we can use stop sequence and streaming if (main_api === "kobold" || main_api === "koboldhorde") { kai_settings.use_stop_sequence = canUseKoboldStopSequence(data.version); kai_settings.can_use_streaming = canUseKoboldStreaming(data.koboldVersion); diff --git a/public/scripts/kai-settings.js b/public/scripts/kai-settings.js index 9c7eadb35..4fa453368 100644 --- a/public/scripts/kai-settings.js +++ b/public/scripts/kai-settings.js @@ -94,7 +94,7 @@ function getKoboldGenerationData(finalPromt, this_settings, this_amount_gen, thi use_world_info: false, singleline: kai_settings.single_line, stop_sequence: kai_settings.use_stop_sequence ? getStoppingStrings(isImpersonate, false) : undefined, - streaming: kai_settings.streaming_kobold ? canUseKoboldStreaming() : false, + streaming: kai_settings.streaming_kobold && kai_settings.can_use_streaming, }; return generate_data; } @@ -212,7 +212,7 @@ function canUseKoboldStopSequence(version) { } function canUseKoboldStreaming(koboldVersion) { - if (koboldVersion = 'KoboldCpp') { + if (koboldVersion.result = 'KoboldCpp') { return (koboldVersion.version || '0.0').localeCompare(MIN_STREAMING_KCPPVERSION, undefined, { numeric: true, sensitivity: 'base' }) > -1; } else return false; } From 524cc881d489e14f1a272e3570b9510ef143b822 Mon Sep 17 00:00:00 2001 From: Zixaphir Date: Mon, 12 Jun 2023 11:43:18 -0700 Subject: [PATCH 4/9] = =/= == Can't believe I missed this yesterday. --- public/scripts/kai-settings.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/scripts/kai-settings.js b/public/scripts/kai-settings.js index 4fa453368..d9f7c581a 100644 --- a/public/scripts/kai-settings.js +++ b/public/scripts/kai-settings.js @@ -212,7 +212,7 @@ function canUseKoboldStopSequence(version) { } function canUseKoboldStreaming(koboldVersion) { - if (koboldVersion.result = 'KoboldCpp') { + if (koboldVersion.result == 'KoboldCpp') { return (koboldVersion.version || '0.0').localeCompare(MIN_STREAMING_KCPPVERSION, undefined, { numeric: true, sensitivity: 'base' }) > -1; } else return false; } From 3a33b7cd3e8f16d213cbc56fafcdf35f3260c889 Mon Sep 17 00:00:00 2001 From: Zixaphir Date: Mon, 12 Jun 2023 11:59:25 -0700 Subject: [PATCH 5/9] Reimplement railguards for temporary API service unavailable errors. --- server.js | 66 +++++++++++++++++++++++++++++++++---------------------- 1 file changed, 40 insertions(+), 26 deletions(-) diff --git a/server.js b/server.js index e7f205272..ae1c228f1 100644 --- a/server.js +++ b/server.js @@ -385,35 +385,49 @@ app.post("/generate", jsonParser, async function (request, response_generate = r signal: controller.signal, }; - try { - const fetch = require('node-fetch').default; - const url = request.body.streaming ? `${api_server}/extra/generate/stream` : `${api_server}/v1/generate`; - const response = await fetch(url, { method: 'POST', timeout: 0, ...args }); + const MAX_RETRIES = 10; + const delayAmount = 3000; - if (request.body.streaming) { - // Pipe remote SSE stream to Express response - response.body.pipe(response_generate); + let fetch, url, response; + for (let i = 0; i < MAX_RETRIES; i++) { + try { + fetch = require('node-fetch').default; + url = request.body.streaming ? `${api_server}/extra/generate/stream` : `${api_server}/v1/generate`; + response = await fetch(url, { method: 'POST', timeout: 0, ...args }); - request.socket.on('close', function () { - response.body.destroy(); // Close the remote stream - response_generate.end(); // End the Express response - }); + if (request.body.streaming) { - response.body.on('end', function () { - console.log("Streaming request finished"); - response_generate.end(); - }); - } else { - if (!response.ok) { - console.log(`Kobold returned error: ${response.status} ${response.statusText} ${await response.text()}`); - return response.status(response.status).send({ error: true }); - } + request.socket.on('close', function () { + response.body.destroy(); // Close the remote stream + response_generate.end(); // End the Express response + }); - const data = await response.json(); - return response_generate.send(data); - } - } catch (error) { - return response_generate.send({ error: true }); + response.body.on('end', function () { + console.log("Streaming request finished"); + response_generate.end(); + }); + + // Pipe remote SSE stream to Express response + return response.body.pipe(response_generate); + } else { + if (!response.ok) { + console.log(`Kobold returned error: ${response.status} ${response.statusText} ${await response.text()}`); + return response.status(response.status).send({ error: true }); + } + + const data = await response.json(); + return response_generate.send(data); + } + } catch (error) { + // response + switch (error.statusCode) { + case 503: // retry in case of temporary service issue, possibly caused by a queue failure? + await delay(delayAmount); + break; + default: + return response_generate.send({ error: true }); + } + } } }); @@ -590,7 +604,7 @@ app.post("/getstatus", jsonParser, async function (request, response_getstatus = version = '0.0.0'; } try { - koboldVersion = await getAsync(api_server + "/extra/version"); + koboldVersion = (await getAsync(api_server + "/extra/version")); } catch { koboldVersion = { From 88fadce5c6e978812523ae4fb3e9c24909da2c24 Mon Sep 17 00:00:00 2001 From: Cohee Date: Mon, 12 Jun 2023 22:32:20 +0300 Subject: [PATCH 6/9] Fix Kobold retry logic --- server.js | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/server.js b/server.js index 5817c5698..feef36e4a 100644 --- a/server.js +++ b/server.js @@ -385,8 +385,8 @@ app.post("/generate", jsonParser, async function (request, response_generate = r headers: { "Content-Type": "application/json" }, }; - const MAX_RETRIES = 10; - const delayAmount = 3000; + const MAX_RETRIES = 20; + const delayAmount = 5000; for (let i = 0; i < MAX_RETRIES; i++) { try { const data = await postAsync(api_server + "/v1/generate", args); @@ -400,11 +400,14 @@ app.post("/generate", jsonParser, async function (request, response_generate = r } // response - switch (error.statusCode) { + switch (error.status) { + case 403: case 503: + console.debug(`KoboldAI is busy. Retry attempt ${i+1} of ${MAX_RETRIES}...`); await delay(delayAmount); break; default: + console.log('Status Code from Kobold:', error.status); return response_generate.send({ error: true }); } } @@ -3120,7 +3123,7 @@ async function postAsync(url, args) { return data; } - throw new Error(response); + throw response; } function getAsync(url, args) { From 5971efd330cd163a9be32b21ea6b345b994c02c3 Mon Sep 17 00:00:00 2001 From: Cohee Date: Mon, 12 Jun 2023 23:01:01 +0300 Subject: [PATCH 7/9] Adjust KAI retry logic --- server.js | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/server.js b/server.js index feef36e4a..3a2cb3e99 100644 --- a/server.js +++ b/server.js @@ -385,8 +385,8 @@ app.post("/generate", jsonParser, async function (request, response_generate = r headers: { "Content-Type": "application/json" }, }; - const MAX_RETRIES = 20; - const delayAmount = 5000; + const MAX_RETRIES = 50; + const delayAmount = 2500; for (let i = 0; i < MAX_RETRIES; i++) { try { const data = await postAsync(api_server + "/v1/generate", args); @@ -400,18 +400,23 @@ app.post("/generate", jsonParser, async function (request, response_generate = r } // response - switch (error.status) { + switch (error?.status) { case 403: case 503: console.debug(`KoboldAI is busy. Retry attempt ${i+1} of ${MAX_RETRIES}...`); await delay(delayAmount); break; default: - console.log('Status Code from Kobold:', error.status); + if ('status' in error) { + console.log('Status Code from Kobold:', error.status); + } return response_generate.send({ error: true }); } } } + + console.log('Max retries exceeded. Giving up.'); + return response_generate.send({ error: true }); }); //************** Text generation web UI From af889ecdfc0a166f907ea8e14749aac74268d2ee Mon Sep 17 00:00:00 2001 From: Cohee Date: Mon, 12 Jun 2023 23:47:47 +0300 Subject: [PATCH 8/9] Prevent duplicate KAI warnings --- public/script.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/script.js b/public/script.js index cbbb1b71c..2ea619b14 100644 --- a/public/script.js +++ b/public/script.js @@ -1859,7 +1859,7 @@ async function Generate(type, { automatic_trigger, force_name2, resolve, reject, } if (main_api == 'kobold' && kai_settings.streaming_kobold && !kai_settings.can_use_streaming) { - toastr.warning('Streaming is enabled, but the version of kobold used does not support token streaming.'); + toastr.warning('Streaming is enabled, but the version of Kobold used does not support token streaming.', undefined, { timeOut: 10000, preventDuplicates: true, }); } if (isHordeGenerationNotAllowed()) { From 4525070aa53a18f57b48072c3bed83d57889e8c2 Mon Sep 17 00:00:00 2001 From: Cohee Date: Tue, 13 Jun 2023 00:08:06 +0300 Subject: [PATCH 9/9] Change chunk delimiter message --- public/scripts/poe.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/public/scripts/poe.js b/public/scripts/poe.js index 56df5b8c4..188f36f12 100644 --- a/public/scripts/poe.js +++ b/public/scripts/poe.js @@ -44,7 +44,7 @@ If you have any objections to these requirements, please mention them specifical If you accept the requirements, please confirm this by replying with "${DEFAULT_JAILBREAK_RESPONSE}", and nothing more. Upon receiving your accurate confirmation message, I will specify the context of the scene and {{char}}'s characteristics, background, and personality in the next message.`; -const DEFAULT_CHARACTER_NUDGE_MESSAGE = "[Unless otherwise stated by {{user}}, your the next response shall only be written from the point of view of {{char}}. Do not seek approval of your writing style at the end of the response. Don't reply with .]"; +const DEFAULT_CHARACTER_NUDGE_MESSAGE = "[Unless otherwise stated by {{user}}, your the next response shall only be written from the point of view of {{char}}. Do not seek approval of your writing style at the end of the response.]"; const DEFAULT_IMPERSONATION_PROMPT = "[Write 1 reply only in internet RP style from the point of view of {{user}}, using the chat history so far as a guideline for the writing style of {{user}}. Don't write as {{char}} or system.]"; const poe_settings = { @@ -281,7 +281,7 @@ async function generatePoe(type, finalPrompt, signal) { } async function sendChunkedMessage(finalPrompt, withStreaming, signal) { - const fastReplyPrompt = '\n[REPLY TO THIS MESSAGE WITH ONLY!!!]'; + const fastReplyPrompt = '\n[Reply to this message with a full stop only]'; const promptChunks = splitRecursive(finalPrompt, CHUNKED_PROMPT_LENGTH - fastReplyPrompt.length); console.debug(`Splitting prompt into ${promptChunks.length} chunks`, promptChunks); let reply = '';