diff --git a/public/index.html b/public/index.html
index 8b259db96..f0ddf2013 100644
--- a/public/index.html
+++ b/public/index.html
@@ -260,6 +260,22 @@
                                         </div>
                                     </div>
                                 </div>
+                                <hr>
+                                <div id="range_block">
+                                    <div class="range-block">
+                                        <label class="checkbox_label widthFreeExpand">
+                                            <input id="streaming_kobold" type="checkbox" />
+                                            <span data-i18n="Streaming">Streaming</span>
+                                        </label>
+                                        <div class="toggle-description justifyLeft">
+                                            <span data-i18n="Display the response bit by bit as it is generated.">
+                                                Display the response bit by bit as it is generated.</span><br>
+                                            <span data-i18n="When this is off, responses will be displayed all at once when they are complete.">
+                                                When this is off, responses will be displayed all at once when they are complete.
+                                            </span>
+                                        </div>
+                                    </div>
+                                </div>
                             </div>
                             <div id="range_block_novel">
                                 <div class="range-block">
diff --git a/public/script.js b/public/script.js
index 55b909d31..2ea619b14 100644
--- a/public/script.js
+++ b/public/script.js
@@ -2,11 +2,13 @@ import { humanizedDateTime, favsToHotswap } from "./scripts/RossAscends-mods.js"
 import { encode } from "../scripts/gpt-2-3-tokenizer/mod.js";
 import { GPT3BrowserTokenizer } from "../scripts/gpt-3-tokenizer/gpt3-tokenizer.js";
 import {
+    generateKoboldWithStreaming,
     kai_settings,
     loadKoboldSettings,
     formatKoboldUrl,
     getKoboldGenerationData,
     canUseKoboldStopSequence,
+    canUseKoboldStreaming,
 } from "./scripts/kai-settings.js";
 
 import {
@@ -743,9 +745,10 @@ async function getStatus() {
                     is_pygmalion = false;
                 }
 
-                // determine if we can use stop sequence
+                // determine if we can use stop sequence and streaming
                 if (main_api === "kobold" || main_api === "koboldhorde") {
                     kai_settings.use_stop_sequence = canUseKoboldStopSequence(data.version);
+                    kai_settings.can_use_streaming = canUseKoboldStreaming(data.koboldVersion);
                 }
 
                 //console.log(online_status);
@@ -1587,6 +1590,7 @@ function appendToStoryString(value, prefix) {
 
 function isStreamingEnabled() {
     return ((main_api == 'openai' && oai_settings.stream_openai)
+        || (main_api == 'kobold' && kai_settings.streaming_kobold && kai_settings.can_use_streaming)
         || (main_api == 'novel' && nai_settings.streaming_novel)
         || (main_api == 'poe' && poe_settings.streaming)
         || (main_api == 'textgenerationwebui' && textgenerationwebui_settings.streaming))
@@ -1854,6 +1858,10 @@ async function Generate(type, { automatic_trigger, force_name2, resolve, reject,
         return;
     }
 
+    if (main_api == 'kobold' && kai_settings.streaming_kobold && !kai_settings.can_use_streaming) {
+        toastr.warning('Streaming is enabled, but the version of Kobold used does not support token streaming.', undefined, { timeOut: 10000, preventDuplicates: true, });
+    }
+
     if (isHordeGenerationNotAllowed()) {
         is_send_press = false;
         return;
@@ -2368,6 +2376,9 @@ async function Generate(type, { automatic_trigger, force_name2, resolve, reject,
             else if (main_api == 'novel' && isStreamingEnabled() && type !== 'quiet') {
                 streamingProcessor.generator = await generateNovelWithStreaming(generate_data, streamingProcessor.abortController.signal);
             }
+            else if (main_api == 'kobold' && isStreamingEnabled() && type !== 'quiet') {
+                streamingProcessor.generator = await generateKoboldWithStreaming(generate_data, streamingProcessor.abortController.signal);
+            }
             else {
                 try {
                     const response = await fetch(generate_url, {
diff --git a/public/scripts/kai-settings.js b/public/scripts/kai-settings.js
index 842a571e2..d9f7c581a 100644
--- a/public/scripts/kai-settings.js
+++ b/public/scripts/kai-settings.js
@@ -1,4 +1,5 @@
 import {
+    getRequestHeaders,
     saveSettingsDebounced,
     getStoppingStrings,
 } from "../script.js";
@@ -9,6 +10,7 @@ export {
     formatKoboldUrl,
     getKoboldGenerationData,
     canUseKoboldStopSequence,
+    canUseKoboldStreaming,
 };
 
 const kai_settings = {
@@ -23,9 +25,11 @@ const kai_settings = {
     rep_pen_slope: 0.9,
     single_line: false,
     use_stop_sequence: false,
+    streaming_kobold: false,
 };
 
 const MIN_STOP_SEQUENCE_VERSION = '1.2.2';
+const MIN_STREAMING_KCPPVERSION = '1.30';
 
 function formatKoboldUrl(value) {
     try {
@@ -58,6 +62,10 @@ function loadKoboldSettings(preset) {
         kai_settings.single_line = preset.single_line;
         $('#single_line').prop('checked', kai_settings.single_line);
     }
+    if (preset.hasOwnProperty('streaming_kobold')) {
+        kai_settings.streaming_kobold = preset.streaming_kobold;
+        $('#streaming_kobold').prop('checked', kai_settings.streaming_kobold);
+    }
 }
 
 function getKoboldGenerationData(finalPromt, this_settings, this_amount_gen, this_max_context, isImpersonate) {
@@ -86,10 +94,53 @@ function getKoboldGenerationData(finalPromt, this_settings, this_amount_gen, thi
         use_world_info: false,
         singleline: kai_settings.single_line,
         stop_sequence: kai_settings.use_stop_sequence ? getStoppingStrings(isImpersonate, false) : undefined,
+        streaming: kai_settings.streaming_kobold && kai_settings.can_use_streaming,
     };
     return generate_data;
 }
 
+export async function generateKoboldWithStreaming(generate_data, signal) {
+    const response = await fetch('/generate', {
+        headers: getRequestHeaders(),
+        body: JSON.stringify(generate_data),
+        method: 'POST',
+        signal: signal,
+    });
+
+    return async function* streamData() {
+        const decoder = new TextDecoder();
+        const reader = response.body.getReader();
+        let getMessage = '';
+        let messageBuffer = "";
+        while (true) {
+            const { done, value } = await reader.read();
+            let response = decoder.decode(value);
+            let eventList = [];
+
+            // ReadableStream's buffer is not guaranteed to contain full SSE messages as they arrive in chunks
+            // We need to buffer chunks until we have one or more full messages (separated by double newlines)
+            messageBuffer += response;
+            eventList = messageBuffer.split("\n\n");
+            // Last element will be an empty string or a leftover partial message
+            messageBuffer = eventList.pop();
+
+            for (let event of eventList) {
+                for (let subEvent of event.split('\n')) {
+                    if (subEvent.startsWith("data")) {
+                        let data = JSON.parse(subEvent.substring(5));
+                        getMessage += (data?.token || '');
+                        yield getMessage;
+                    }
+                }
+            }
+
+            if (done) {
+                return;
+           }
+        }
+    }
+}
+
 const sliders = [
     {
         name: "temp",
@@ -160,6 +211,12 @@ function canUseKoboldStopSequence(version) {
     return (version || '0.0.0').localeCompare(MIN_STOP_SEQUENCE_VERSION, undefined, { numeric: true, sensitivity: 'base' }) > -1;
 }
 
+function canUseKoboldStreaming(koboldVersion) {
+    if (koboldVersion.result == 'KoboldCpp') {
+        return (koboldVersion.version || '0.0').localeCompare(MIN_STREAMING_KCPPVERSION, undefined, { numeric: true, sensitivity: 'base' }) > -1;
+    } else return false;
+}
+
 $(document).ready(function () {
     sliders.forEach(slider => {
         $(document).on("input", slider.sliderId, function () {
@@ -176,4 +233,10 @@ $(document).ready(function () {
         kai_settings.single_line = value;
         saveSettingsDebounced();
     });
-});
\ No newline at end of file
+
+    $('#streaming_kobold').on("input", function () {
+        const value = $(this).prop('checked');
+        kai_settings.streaming_kobold = value;
+        saveSettingsDebounced();
+    });
+});
diff --git a/public/scripts/poe.js b/public/scripts/poe.js
index 56df5b8c4..188f36f12 100644
--- a/public/scripts/poe.js
+++ b/public/scripts/poe.js
@@ -44,7 +44,7 @@ If you have any objections to these requirements, please mention them specifical
 
 If you accept the requirements, please confirm this by replying with "${DEFAULT_JAILBREAK_RESPONSE}", and nothing more. Upon receiving your accurate confirmation message, I will specify the context of the scene and {{char}}'s characteristics, background, and personality in the next message.`;
 
-const DEFAULT_CHARACTER_NUDGE_MESSAGE = "[Unless otherwise stated by {{user}}, your the next response shall only be written from the point of view of {{char}}. Do not seek approval of your writing style at the end of the response. Don't reply with <ACK>.]";
+const DEFAULT_CHARACTER_NUDGE_MESSAGE = "[Unless otherwise stated by {{user}}, your the next response shall only be written from the point of view of {{char}}. Do not seek approval of your writing style at the end of the response.]";
 const DEFAULT_IMPERSONATION_PROMPT = "[Write 1 reply only in internet RP style from the point of view of {{user}}, using the chat history so far as a guideline for the writing style of {{user}}. Don't write as {{char}} or system.]";
 
 const poe_settings = {
@@ -281,7 +281,7 @@ async function generatePoe(type, finalPrompt, signal) {
 }
 
 async function sendChunkedMessage(finalPrompt, withStreaming, signal) {
-    const fastReplyPrompt = '\n[REPLY TO THIS MESSAGE WITH <ACK> ONLY!!!]';
+    const fastReplyPrompt = '\n[Reply to this message with a full stop only]';
     const promptChunks = splitRecursive(finalPrompt, CHUNKED_PROMPT_LENGTH - fastReplyPrompt.length);
     console.debug(`Splitting prompt into ${promptChunks.length} chunks`, promptChunks);
     let reply = '';
diff --git a/server.js b/server.js
index 5817c5698..53301e990 100644
--- a/server.js
+++ b/server.js
@@ -381,34 +381,61 @@ app.post("/generate", jsonParser, async function (request, response_generate = r
     console.log(this_settings);
     const args = {
         body: JSON.stringify(this_settings),
-        signal: controller.signal,
         headers: { "Content-Type": "application/json" },
+        signal: controller.signal,
     };
 
-    const MAX_RETRIES = 10;
-    const delayAmount = 3000;
+    const MAX_RETRIES = 50;
+    const delayAmount = 2500;
+    let fetch, url, response;
     for (let i = 0; i < MAX_RETRIES; i++) {
         try {
-            const data = await postAsync(api_server + "/v1/generate", args);
-            console.log(data);
-            return response_generate.send(data);
-        }
-        catch (error) {
-            // data
-            if (typeof error['text'] === 'function') {
-                console.log(await error.text());
-            }
+            fetch = require('node-fetch').default;
+            url = request.body.streaming ? `${api_server}/extra/generate/stream` : `${api_server}/v1/generate`;
+            response = await fetch(url, { method: 'POST', timeout: 0, ...args });
 
+            if (request.body.streaming) {
+
+                request.socket.on('close', function () {
+                    response.body.destroy(); // Close the remote stream
+                    response_generate.end(); // End the Express response
+                });
+
+                response.body.on('end', function () {
+                    console.log("Streaming request finished");
+                    response_generate.end();
+                });
+
+                // Pipe remote SSE stream to Express response
+                return response.body.pipe(response_generate);
+            } else {
+                if (!response.ok) {
+                    console.log(`Kobold returned error: ${response.status} ${response.statusText} ${await response.text()}`);
+                    return response.status(response.status).send({ error: true });
+                }
+
+                const data = await response.json();
+                return response_generate.send(data);
+            }
+        } catch (error) {
             // response
-            switch (error.statusCode) {
-                case 503:
+            switch (error?.status) {
+                case 403:
+                case 503: // retry in case of temporary service issue, possibly caused by a queue failure?
+                    console.debug(`KoboldAI is busy. Retry attempt ${i+1} of ${MAX_RETRIES}...`);
                     await delay(delayAmount);
                     break;
                 default:
+                    if ('status' in error) {
+                        console.log('Status Code from Kobold:', error.status);
+                    }
                     return response_generate.send({ error: true });
             }
         }
     }
+
+    console.log('Max retries exceeded. Giving up.');
+    return response_generate.send({ error: true });
 });
 
 //************** Text generation web UI
@@ -575,6 +602,7 @@ app.post("/getstatus", jsonParser, async function (request, response_getstatus =
     };
     var url = api_server + "/v1/model";
     let version = '';
+    let koboldVersion = {};
     if (main_api == "kobold") {
         try {
             version = (await getAsync(api_server + "/v1/info/version")).result;
@@ -582,6 +610,15 @@ app.post("/getstatus", jsonParser, async function (request, response_getstatus =
         catch {
             version = '0.0.0';
         }
+        try {
+            koboldVersion = (await getAsync(api_server + "/extra/version"));
+        }
+        catch {
+            koboldVersion = {
+                result: 'Kobold',
+                version: '0.0',
+            };
+        }
     }
     client.get(url, args, function (data, response) {
         if (typeof data !== 'object') {
@@ -589,6 +626,7 @@ app.post("/getstatus", jsonParser, async function (request, response_getstatus =
         }
         if (response.statusCode == 200) {
             data.version = version;
+            data.koboldVersion = koboldVersion;
             if (data.result != "ReadOnly") {
             } else {
                 data.result = "no_connection";
@@ -3120,7 +3158,7 @@ async function postAsync(url, args) {
         return data;
     }
 
-    throw new Error(response);
+    throw response;
 }
 
 function getAsync(url, args) {