diff --git a/public/scripts/custom-request.js b/public/scripts/custom-request.js
index d6fccf1a9..d19a62563 100644
--- a/public/scripts/custom-request.js
+++ b/public/scripts/custom-request.js
@@ -3,10 +3,13 @@ import { extractMessageFromData, getGenerateUrl, getRequestHeaders } from '../sc
 import { getTextGenServer } from './textgen-settings.js';
 import { extractReasoningFromData } from './reasoning.js';
 import { formatInstructModeChat, formatInstructModePrompt, names_behavior_types } from './instruct-mode.js';
+import { getStreamingReply, tryParseStreamingError } from './openai.js';
+import EventSourceStream from './sse-stream.js';
 
 // #region Type Definitions
 /**
  * @typedef {Object} TextCompletionRequestBase
+ * @property {boolean?} [stream=false] - Whether to stream the response
  * @property {number} max_tokens - Maximum number of tokens to generate
  * @property {string} [model] - Optional model name
  * @property {string} api_type - Type of API to use
@@ -17,6 +20,7 @@ import { formatInstructModeChat, formatInstructModePrompt, names_behavior_types
 
 /**
  * @typedef {Object} TextCompletionPayloadBase
+ * @property {boolean?} [stream=false] - Whether to stream the response
  * @property {string} prompt - The text prompt for completion
  * @property {number} max_tokens - Maximum number of tokens to generate
  * @property {number} max_new_tokens - Alias for max_tokens
@@ -36,6 +40,7 @@ import { formatInstructModeChat, formatInstructModePrompt, names_behavior_types
 
 /**
  * @typedef {Object} ChatCompletionPayloadBase
+ * @property {boolean?} [stream=false] - Whether to stream the response
  * @property {ChatCompletionMessage[]} messages - Array of chat messages
  * @property {string} [model] - Optional model name to use for completion
  * @property {string} chat_completion_source - Source provider for chat completion
@@ -52,10 +57,20 @@ import { formatInstructModeChat, formatInstructModePrompt, names_behavior_types
  * @property {string} reasoning - Extracted reasoning.
  */
 
+/**
+ * @typedef {Object} StreamResponse
+ * @property {string} text - Generated text.
+ * @property {string[]} swipes - Generated swipes
+ * @property {Object} state - Generated state
+ * @property {string?} [state.reasoning] - Generated reasoning
+ * @property {string?} [state.image] - Generated image
+ * @returns {StreamResponse}
+ */
+
 // #endregion
 
 /**
- * Creates & sends a text completion request. Streaming is not supported.
+ * Creates & sends a text completion request.
  */
 export class TextCompletionService {
     static TYPE = 'textgenerationwebui';
@@ -64,9 +79,10 @@ export class TextCompletionService {
      * @param {Record<string, any> & TextCompletionRequestBase & {prompt: string}} custom
      * @returns {TextCompletionPayload}
      */
-    static createRequestData({ prompt, max_tokens, model, api_type, api_server, temperature, min_p, ...props }) {
+    static createRequestData({ stream = false, prompt, max_tokens, model, api_type, api_server, temperature, min_p, ...props }) {
         const payload = {
             ...props,
+            stream,
             prompt,
             max_tokens,
             max_new_tokens: max_tokens,
@@ -75,7 +91,6 @@ export class TextCompletionService {
             api_server: api_server ?? getTextGenServer(api_type),
             temperature,
             min_p,
-            stream: false,
         };
 
         // Remove undefined values to avoid API errors
@@ -92,34 +107,81 @@ export class TextCompletionService {
      * Sends a text completion request to the specified server
      * @param {TextCompletionPayload} data Request data
      * @param {boolean?} extractData Extract message from the response. Default true
-     * @returns {Promise<ExtractedData | any>} Extracted data or the raw response
+     * @param {AbortSignal?} signal
+     * @returns {Promise<ExtractedData | (() => AsyncGenerator<StreamResponse>)>} If not streaming, returns extracted data; if streaming, returns a function that creates an AsyncGenerator
      * @throws {Error}
      */
-    static async sendRequest(data, extractData = true) {
-        const response = await fetch(getGenerateUrl(this.TYPE), {
+    static async sendRequest(data, extractData = true, signal = null) {
+        if (!data.stream) {
+            const response = await fetch(getGenerateUrl(this.TYPE), {
+                method: 'POST',
+                headers: getRequestHeaders(),
+                cache: 'no-cache',
+                body: JSON.stringify(data),
+                signal: signal ?? new AbortController().signal,
+            });
+
+            const json = await response.json();
+            if (!response.ok || json.error) {
+                throw json;
+            }
+
+            if (!extractData) {
+                return json;
+            }
+
+            return {
+                content: extractMessageFromData(json, this.TYPE),
+                reasoning: extractReasoningFromData(json, {
+                    mainApi: this.TYPE,
+                    textGenType: data.api_type,
+                    ignoreShowThoughts: true,
+                }),
+            };
+        }
+
+        const response = await fetch('/api/backends/text-completions/generate', {
             method: 'POST',
             headers: getRequestHeaders(),
             cache: 'no-cache',
             body: JSON.stringify(data),
-            signal: new AbortController().signal,
+            signal: signal ?? new AbortController().signal,
         });
 
-        const json = await response.json();
-        if (!response.ok || json.error) {
-            throw json;
+        if (!response.ok) {
+            const text = await response.text();
+            tryParseStreamingError(response, text, true);
+
+            throw new Error(`Got response status ${response.status}`);
         }
 
-        if (!extractData) {
-            return json;
-        }
+        const eventStream = new EventSourceStream();
+        response.body.pipeThrough(eventStream);
+        const reader = eventStream.readable.getReader();
+        return async function* streamData() {
+            let text = '';
+            const swipes = [];
+            const state = { reasoning: '' };
+            while (true) {
+                const { done, value } = await reader.read();
+                if (done) return;
+                if (value.data === '[DONE]') return;
 
-        return {
-            content: extractMessageFromData(json, this.TYPE),
-            reasoning: extractReasoningFromData(json, {
-                mainApi: this.TYPE,
-                textGenType: data.api_type,
-                ignoreShowThoughts: true,
-            }),
+                tryParseStreamingError(response, value.data, true);
+
+                let data = JSON.parse(value.data);
+
+                if (data?.choices?.[0]?.index > 0) {
+                    const swipeIndex = data.choices[0].index - 1;
+                    swipes[swipeIndex] = (swipes[swipeIndex] || '') + data.choices[0].text;
+                } else {
+                    const newText = data?.choices?.[0]?.text || data?.content || '';
+                    text += newText;
+                    state.reasoning += data?.choices?.[0]?.reasoning ?? '';
+                }
+
+                yield { text, swipes, state };
+            }
         };
     }
 
@@ -130,13 +192,15 @@ export class TextCompletionService {
      * @param {string?} [options.presetName] - Name of the preset to use for generation settings
      * @param {string?} [options.instructName] - Name of instruct preset for message formatting
      * @param {boolean} extractData - Whether to extract structured data from response
-     * @returns {Promise<ExtractedData | any>} Extracted data or the raw response
+     * @param {AbortSignal?} [signal]
+     * @returns {Promise<ExtractedData | (() => AsyncGenerator<StreamResponse>)>} If not streaming, returns extracted data; if streaming, returns a function that creates an AsyncGenerator
      * @throws {Error}
      */
     static async processRequest(
         custom,
         options = {},
         extractData = true,
+        signal = null,
     ) {
         const { presetName, instructName } = options;
         let requestData = { ...custom };
@@ -220,7 +284,7 @@ export class TextCompletionService {
         // @ts-ignore
         const data = this.createRequestData(requestData);
 
-        return await this.sendRequest(data, extractData);
+        return await this.sendRequest(data, extractData, signal);
     }
 
     /**
@@ -256,7 +320,7 @@ export class TextCompletionService {
 }
 
 /**
- * Creates & sends a chat completion request. Streaming is not supported.
+ * Creates & sends a chat completion request.
  */
 export class ChatCompletionService {
     static TYPE = 'openai';
@@ -265,16 +329,16 @@ export class ChatCompletionService {
      * @param {ChatCompletionPayload} custom
      * @returns {ChatCompletionPayload}
      */
-    static createRequestData({ messages, model, chat_completion_source, max_tokens, temperature, custom_url, ...props }) {
+    static createRequestData({ stream = false, messages, model, chat_completion_source, max_tokens, temperature, custom_url, ...props }) {
         const payload = {
             ...props,
+            stream,
             messages,
             model,
             chat_completion_source,
             max_tokens,
             temperature,
             custom_url,
-            stream: false,
         };
 
         // Remove undefined values to avoid API errors
@@ -291,34 +355,74 @@ export class ChatCompletionService {
      * Sends a chat completion request
      * @param {ChatCompletionPayload} data Request data
      * @param {boolean?} extractData Extract message from the response. Default true
-     * @returns {Promise<ExtractedData | any>} Extracted data or the raw response
+     * @param {AbortSignal?} signal Abort signal
+     * @returns {Promise<ExtractedData | (() => AsyncGenerator<StreamResponse>)>} If not streaming, returns extracted data; if streaming, returns a function that creates an AsyncGenerator
      * @throws {Error}
      */
-    static async sendRequest(data, extractData = true) {
+    static async sendRequest(data, extractData = true, signal = null) {
         const response = await fetch('/api/backends/chat-completions/generate', {
             method: 'POST',
             headers: getRequestHeaders(),
             cache: 'no-cache',
             body: JSON.stringify(data),
-            signal: new AbortController().signal,
+            signal: signal ?? new AbortController().signal,
         });
 
-        const json = await response.json();
-        if (!response.ok || json.error) {
-            throw json;
+        if (!data.stream) {
+            const json = await response.json();
+            if (!response.ok || json.error) {
+                throw json;
+            }
+
+            if (!extractData) {
+                return json;
+            }
+
+            return {
+                content: extractMessageFromData(json, this.TYPE),
+                reasoning: extractReasoningFromData(json, {
+                    mainApi: this.TYPE,
+                    textGenType: data.chat_completion_source,
+                    ignoreShowThoughts: true,
+                }),
+            };
         }
 
-        if (!extractData) {
-            return json;
+        if (!response.ok) {
+            const text = await response.text();
+            tryParseStreamingError(response, text, true);
+
+            throw new Error(`Got response status ${response.status}`);
         }
 
-        return {
-            content: extractMessageFromData(json, this.TYPE),
-            reasoning: extractReasoningFromData(json, {
-                mainApi: this.TYPE,
-                textGenType: data.chat_completion_source,
-                ignoreShowThoughts: true,
-            }),
+        const eventStream = new EventSourceStream();
+        response.body.pipeThrough(eventStream);
+        const reader = eventStream.readable.getReader();
+        return async function* streamData() {
+            let text = '';
+            const swipes = [];
+            const state = { reasoning: '', image: '' };
+            while (true) {
+                const { done, value } = await reader.read();
+                if (done) return;
+                const rawData = value.data;
+                if (rawData === '[DONE]') return;
+                tryParseStreamingError(response, rawData, true);
+                const parsed = JSON.parse(rawData);
+
+                const reply = getStreamingReply(parsed, state, {
+                    chatCompletionSource: data.chat_completion_source,
+                    ignoreShowThoughts: true,
+                });
+                if (Array.isArray(parsed?.choices) && parsed?.choices?.[0]?.index > 0) {
+                    const swipeIndex = parsed.choices[0].index - 1;
+                    swipes[swipeIndex] = (swipes[swipeIndex] || '') + reply;
+                } else {
+                    text += reply;
+                }
+
+                yield { text, swipes: swipes, state };
+            }
         };
     }
 
@@ -327,11 +431,12 @@ export class ChatCompletionService {
      * @param {ChatCompletionPayload} custom
      * @param {Object} options - Configuration options
      * @param {string?} [options.presetName] - Name of the preset to use for generation settings
-     * @param {boolean} extractData - Whether to extract structured data from response
-     * @returns {Promise<ExtractedData | any>} Extracted data or the raw response
+     * @param {boolean} [extractData=true] - Whether to extract structured data from response
+     * @param {AbortSignal?} [signal] - Abort signal
+     * @returns {Promise<ExtractedData | (() => AsyncGenerator<StreamResponse>)>} If not streaming, returns extracted data; if streaming, returns a function that creates an AsyncGenerator
      * @throws {Error}
      */
-    static async processRequest(custom, options, extractData = true) {
+    static async processRequest(custom, options, extractData = true, signal = null) {
         const { presetName } = options;
         let requestData = { ...custom };
 
@@ -354,7 +459,7 @@ export class ChatCompletionService {
 
         const data = this.createRequestData(requestData);
 
-        return await this.sendRequest(data, extractData);
+        return await this.sendRequest(data, extractData, signal);
     }
 
     /**
diff --git a/public/scripts/extensions/shared.js b/public/scripts/extensions/shared.js
index d5549c40a..144ac3d6b 100644
--- a/public/scripts/extensions/shared.js
+++ b/public/scripts/extensions/shared.js
@@ -276,10 +276,12 @@ export async function getWebLlmContextSize() {
 }
 
 /**
- * It uses the profiles to send a generate request to the API. Doesn't support streaming.
+ * It uses the profiles to send a generate request to the API.
  */
 export class ConnectionManagerRequestService {
     static defaultSendRequestParams = {
+        stream: false,
+        signal: null,
         extractData: true,
         includePreset: true,
         includeInstruct: true,
@@ -296,11 +298,11 @@ export class ConnectionManagerRequestService {
      * @param {string} profileId
      * @param {string | (import('../custom-request.js').ChatCompletionMessage & {ignoreInstruct?: boolean})[]} prompt
      * @param {number} maxTokens
-     * @param {{extractData?: boolean, includePreset?: boolean, includeInstruct?: boolean}} custom - default values are true
-     * @returns {Promise<import('../custom-request.js').ExtractedData | any>} Extracted data or the raw response
+     * @param {{stream?: boolean, signal?: AbortSignal, extractData?: boolean, includePreset?: boolean, includeInstruct?: boolean}} custom - default values are true
+     * @returns {Promise<import('../custom-request.js').ExtractedData | (() => AsyncGenerator<import('../custom-request.js').StreamResponse>)>} If not streaming, returns extracted data; if streaming, returns a function that creates an AsyncGenerator
      */
     static async sendRequest(profileId, prompt, maxTokens, custom = this.defaultSendRequestParams) {
-        const { extractData, includePreset, includeInstruct } = { ...this.defaultSendRequestParams, ...custom };
+        const { stream, signal, extractData, includePreset, includeInstruct } = { ...this.defaultSendRequestParams, ...custom };
 
         const context = SillyTavern.getContext();
         if (context.extensionSettings.disabledExtensions.includes('connection-manager')) {
@@ -319,6 +321,7 @@ export class ConnectionManagerRequestService {
 
                     const messages = Array.isArray(prompt) ? prompt : [{ role: 'user', content: prompt }];
                     return await context.ChatCompletionService.processRequest({
+                        stream,
                         messages,
                         max_tokens: maxTokens,
                         model: profile.model,
@@ -326,7 +329,7 @@ export class ConnectionManagerRequestService {
                         custom_url: profile['api-url'],
                     }, {
                         presetName: includePreset ? profile.preset : undefined,
-                    }, extractData);
+                    }, extractData, signal);
                 }
                 case 'textgenerationwebui': {
                     if (!selectedApiMap.type) {
@@ -334,6 +337,7 @@ export class ConnectionManagerRequestService {
                     }
 
                     return await context.TextCompletionService.processRequest({
+                        stream,
                         prompt,
                         max_tokens: maxTokens,
                         model: profile.model,
@@ -342,7 +346,7 @@ export class ConnectionManagerRequestService {
                     }, {
                         instructName: includeInstruct ? profile.instruct : undefined,
                         presetName: includePreset ? profile.preset : undefined,
-                    }, extractData);
+                    }, extractData, signal);
                 }
                 default: {
                     throw new Error(`Unknown API type ${selectedApiMap.selected}`);
diff --git a/public/scripts/openai.js b/public/scripts/openai.js
index 36e224bf9..6626ac26e 100644
--- a/public/scripts/openai.js
+++ b/public/scripts/openai.js
@@ -1444,8 +1444,9 @@ export async function prepareOpenAIMessages({
  * Handles errors during streaming requests.
  * @param {Response} response
  * @param {string} decoded - response text or decoded stream data
+ * @param {boolean?} [supressToastr=false]
  */
-function tryParseStreamingError(response, decoded) {
+export function tryParseStreamingError(response, decoded, supressToastr = false) {
     try {
         const data = JSON.parse(decoded);
 
@@ -1453,19 +1454,19 @@ function tryParseStreamingError(response, decoded) {
             return;
         }
 
-        checkQuotaError(data);
-        checkModerationError(data);
+        checkQuotaError(data, supressToastr);
+        checkModerationError(data, supressToastr);
 
         // these do not throw correctly (equiv to Error("[object Object]"))
         // if trying to fix "[object Object]" displayed to users, start here
 
         if (data.error) {
-            toastr.error(data.error.message || response.statusText, 'Chat Completion API');
+            !supressToastr && toastr.error(data.error.message || response.statusText, 'Chat Completion API');
             throw new Error(data);
         }
 
         if (data.message) {
-            toastr.error(data.message, 'Chat Completion API');
+            !supressToastr && toastr.error(data.message, 'Chat Completion API');
             throw new Error(data);
         }
     }
@@ -1477,16 +1478,17 @@ function tryParseStreamingError(response, decoded) {
 /**
  * Checks if the response contains a quota error and displays a popup if it does.
  * @param data
+ * @param {boolean?} [supressToastr=false]
  * @returns {void}
  * @throws {object} - response JSON
  */
-function checkQuotaError(data) {
+function checkQuotaError(data, supressToastr = false) {
     if (!data) {
         return;
     }
 
     if (data.quota_error) {
-        renderTemplateAsync('quotaError').then((html) => Popup.show.text('Quota Error', html));
+        !supressToastr && renderTemplateAsync('quotaError').then((html) => Popup.show.text('Quota Error', html));
 
         // this does not throw correctly (equiv to Error("[object Object]"))
         // if trying to fix "[object Object]" displayed to users, start here
@@ -1494,9 +1496,13 @@ function checkQuotaError(data) {
     }
 }
 
-function checkModerationError(data) {
+/**
+ * @param {any} data
+ * @param {boolean?} [supressToastr=false]
+ */
+function checkModerationError(data, supressToastr = false) {
     const moderationError = data?.error?.message?.includes('requires moderation');
-    if (moderationError) {
+    if (moderationError && !supressToastr) {
         const moderationReason = `Reasons: ${data?.error?.metadata?.reasons?.join(', ') ?? '(N/A)'}`;
         const flaggedText = data?.error?.metadata?.flagged_input ?? '(N/A)';
         toastr.info(flaggedText, moderationReason, { timeOut: 10000 });
@@ -2255,37 +2261,43 @@ async function sendOpenAIRequest(type, messages, signal) {
  * Extracts the reply from the response data from a chat completions-like source
  * @param {object} data Response data from the chat completions-like source
  * @param {object} state Additional state to keep track of
+ * @param {object} options Additional options
+ * @param {string?} [options.chatCompletionSource] Chat completion source
+ * @param {boolean?} [options.ignoreShowThoughts] Ignore show thoughts
  * @returns {string} The reply extracted from the response data
  */
-function getStreamingReply(data, state) {
-    if (oai_settings.chat_completion_source === chat_completion_sources.CLAUDE) {
-        if (oai_settings.show_thoughts) {
+export function getStreamingReply(data, state, { chatCompletionSource = null, ignoreShowThoughts = false } = {}) {
+    const chat_completion_source = chatCompletionSource ?? oai_settings.chat_completion_source;
+    const show_thoughts = ignoreShowThoughts ? true : oai_settings.show_thoughts;
+
+    if (chat_completion_source === chat_completion_sources.CLAUDE) {
+        if (show_thoughts) {
             state.reasoning += data?.delta?.thinking || '';
         }
         return data?.delta?.text || '';
-    } else if (oai_settings.chat_completion_source === chat_completion_sources.MAKERSUITE) {
+    } else if (chat_completion_source === chat_completion_sources.MAKERSUITE) {
         const inlineData = data?.candidates?.[0]?.content?.parts?.find(x => x.inlineData)?.inlineData;
         if (inlineData) {
             state.image = `data:${inlineData.mimeType};base64,${inlineData.data}`;
         }
-        if (oai_settings.show_thoughts) {
+        if (show_thoughts) {
             state.reasoning += (data?.candidates?.[0]?.content?.parts?.filter(x => x.thought)?.map(x => x.text)?.[0] || '');
         }
         return data?.candidates?.[0]?.content?.parts?.filter(x => !x.thought)?.map(x => x.text)?.[0] || '';
-    } else if (oai_settings.chat_completion_source === chat_completion_sources.COHERE) {
+    } else if (chat_completion_source === chat_completion_sources.COHERE) {
         return data?.delta?.message?.content?.text || data?.delta?.message?.tool_plan || '';
-    } else if (oai_settings.chat_completion_source === chat_completion_sources.DEEPSEEK) {
-        if (oai_settings.show_thoughts) {
+    } else if (chat_completion_source === chat_completion_sources.DEEPSEEK) {
+        if (show_thoughts) {
             state.reasoning += (data.choices?.filter(x => x?.delta?.reasoning_content)?.[0]?.delta?.reasoning_content || '');
         }
         return data.choices?.[0]?.delta?.content || '';
-    } else if (oai_settings.chat_completion_source === chat_completion_sources.OPENROUTER) {
-        if (oai_settings.show_thoughts) {
+    } else if (chat_completion_source === chat_completion_sources.OPENROUTER) {
+        if (show_thoughts) {
             state.reasoning += (data.choices?.filter(x => x?.delta?.reasoning)?.[0]?.delta?.reasoning || '');
         }
         return data.choices?.[0]?.delta?.content ?? data.choices?.[0]?.message?.content ?? data.choices?.[0]?.text ?? '';
-    } else if (oai_settings.chat_completion_source === chat_completion_sources.CUSTOM) {
-        if (oai_settings.show_thoughts) {
+    } else if (chat_completion_source === chat_completion_sources.CUSTOM) {
+        if (show_thoughts) {
             state.reasoning +=
                 data.choices?.filter(x => x?.delta?.reasoning_content)?.[0]?.delta?.reasoning_content ??
                 data.choices?.filter(x => x?.delta?.reasoning)?.[0]?.delta?.reasoning ??