Merge branch 'staging' into gork-ai

2025-06-05 21:59:27 +02:00 · 2025-04-11 21:15:54 +03:00
parent 70d65f2d05 2982d7af52
commit 91fc50b82d
6 changed files with 107 additions and 29 deletions
--- a/public/scripts/custom-request.js
+++ b/public/scripts/custom-request.js
@@ -43,10 +43,12 @@ import EventSourceStream from './sse-stream.js';
 * @property {boolean?} [stream=false] - Whether to stream the response
 * @property {ChatCompletionMessage[]} messages - Array of chat messages
 * @property {string} [model] - Optional model name to use for completion
- * @property {string} chat_completion_source - Source provider for chat completion
+ * @property {string} chat_completion_source - Source provider
 * @property {number} max_tokens - Maximum number of tokens to generate
 * @property {number} [temperature] - Optional temperature parameter for response randomness
- * @property {string} [custom_url] - Optional custom URL for chat completion
+ * @property {string} [custom_url] - Optional custom URL
+ * @property {string} [reverse_proxy] - Optional reverse proxy URL
+ * @property {string} [proxy_password] - Optional proxy password
 */

 /** @typedef {Record<string, any> & ChatCompletionPayloadBase} ChatCompletionPayload */
@@ -80,7 +82,6 @@ export class TextCompletionService {
     */
    static createRequestData({ stream = false, prompt, max_tokens, model, api_type, api_server, temperature, min_p, ...props }) {
        const payload = {
-            ...props,
            stream,
            prompt,
            max_tokens,
@@ -90,6 +91,7 @@ export class TextCompletionService {
            api_server: api_server ?? getTextGenServer(api_type),
            temperature,
            min_p,
+            ...props,
        };

        // Remove undefined values to avoid API errors
@@ -387,9 +389,8 @@ export class ChatCompletionService {
     * @param {ChatCompletionPayload} custom
     * @returns {ChatCompletionPayload}
     */
-    static createRequestData({ stream = false, messages, model, chat_completion_source, max_tokens, temperature, custom_url, ...props }) {
+    static createRequestData({ stream = false, messages, model, chat_completion_source, max_tokens, temperature, custom_url, reverse_proxy, proxy_password, ...props }) {
        const payload = {
-            ...props,
            stream,
            messages,
            model,
@@ -397,6 +398,11 @@ export class ChatCompletionService {
            max_tokens,
            temperature,
            custom_url,
+            reverse_proxy,
+            proxy_password,
+            use_makersuite_sysprompt: true,
+            claude_use_sysprompt: true,
+            ...props,
        };

        // Remove undefined values to avoid API errors
--- a/public/scripts/extensions/shared.js
+++ b/public/scripts/extensions/shared.js
@@ -1,7 +1,7 @@
 import { CONNECT_API_MAP, getRequestHeaders } from '../../script.js';
 import { extension_settings, openThirdPartyExtensionMenu } from '../extensions.js';
 import { t } from '../i18n.js';
-import { oai_settings } from '../openai.js';
+import { oai_settings, proxies } from '../openai.js';
 import { SECRET_KEYS, secret_state } from '../secrets.js';
 import { textgen_types, textgenerationwebui_settings } from '../textgen-settings.js';
 import { getTokenCountAsync } from '../tokenizers.js';
@@ -310,9 +310,10 @@ export class ConnectionManagerRequestService {
     * @param {boolean?} [custom.includePreset=true]
     * @param {boolean?} [custom.includeInstruct=true]
     * @param {Partial<InstructSettings>?} [custom.instructSettings] Override instruct settings
+     * @param {Record<string, any>} [overridePayload] - Override payload for the request
     * @returns {Promise<import('../custom-request.js').ExtractedData | (() => AsyncGenerator<import('../custom-request.js').StreamResponse>)>} If not streaming, returns extracted data; if streaming, returns a function that creates an AsyncGenerator
     */
-    static async sendRequest(profileId, prompt, maxTokens, custom = this.defaultSendRequestParams) {
+    static async sendRequest(profileId, prompt, maxTokens, custom = this.defaultSendRequestParams, overridePayload = {}) {
        const { stream, signal, extractData, includePreset, includeInstruct, instructSettings } = { ...this.defaultSendRequestParams, ...custom };

        const context = SillyTavern.getContext();
@@ -330,6 +331,8 @@ export class ConnectionManagerRequestService {
                        throw new Error(`API type ${selectedApiMap.selected} does not support chat completions`);
                    }

+                    const proxyPreset = proxies.find((p) => p.name === profile.proxy);
+
                    const messages = Array.isArray(prompt) ? prompt : [{ role: 'user', content: prompt }];
                    return await context.ChatCompletionService.processRequest({
                        stream,
@@ -338,6 +341,9 @@ export class ConnectionManagerRequestService {
                        model: profile.model,
                        chat_completion_source: selectedApiMap.source,
                        custom_url: profile['api-url'],
+                        reverse_proxy: proxyPreset?.url,
+                        proxy_password: proxyPreset?.password,
+                        ...overridePayload,
                    }, {
                        presetName: includePreset ? profile.preset : undefined,
                    }, extractData, signal);
@@ -354,6 +360,7 @@ export class ConnectionManagerRequestService {
                        model: profile.model,
                        api_type: selectedApiMap.type,
                        api_server: profile['api-url'],
+                        ...overridePayload,
                    }, {
                        instructName: includeInstruct ? profile.instruct : undefined,
                        presetName: includePreset ? profile.preset : undefined,
--- a/public/scripts/openai.js
+++ b/public/scripts/openai.js
@@ -4192,6 +4192,80 @@ function getMaxContextWindowAI(value) {
    }
 }

+/**
+ * Get the maximum context size for the Mistral model
+ * @param {string} model Model identifier
+ * @param {boolean} isUnlocked Whether context limits are unlocked
+ * @returns {number} Maximum context size in tokens
+ */
+function getMistralMaxContext(model, isUnlocked) {
+    if (isUnlocked) {
+        return unlocked_max;
+    }
+
+    if (Array.isArray(model_list) && model_list.length > 0) {
+        const contextLength = model_list.find((record) => record.id === model)?.max_context_length;
+        if (contextLength) {
+            return contextLength;
+        }
+    }
+
+    const contextMap = {
+        'codestral-2411-rc5': 262144,
+        'codestral-2412': 262144,
+        'codestral-2501': 262144,
+        'codestral-latest': 262144,
+        'codestral-mamba-2407': 262144,
+        'codestral-mamba-latest': 262144,
+        'open-codestral-mamba': 262144,
+        'ministral-3b-2410': 131072,
+        'ministral-3b-latest': 131072,
+        'ministral-8b-2410': 131072,
+        'ministral-8b-latest': 131072,
+        'mistral-large-2407': 131072,
+        'mistral-large-2411': 131072,
+        'mistral-large-latest': 131072,
+        'mistral-large-pixtral-2411': 131072,
+        'mistral-tiny-2407': 131072,
+        'mistral-tiny-latest': 131072,
+        'open-mistral-nemo': 131072,
+        'open-mistral-nemo-2407': 131072,
+        'pixtral-12b': 131072,
+        'pixtral-12b-2409': 131072,
+        'pixtral-12b-latest': 131072,
+        'pixtral-large-2411': 131072,
+        'pixtral-large-latest': 131072,
+        'open-mixtral-8x22b': 65536,
+        'open-mixtral-8x22b-2404': 65536,
+        'codestral-2405': 32768,
+        'mistral-embed': 32768,
+        'mistral-large-2402': 32768,
+        'mistral-medium': 32768,
+        'mistral-medium-2312': 32768,
+        'mistral-medium-latest': 32768,
+        'mistral-moderation-2411': 32768,
+        'mistral-moderation-latest': 32768,
+        'mistral-ocr-2503': 32768,
+        'mistral-ocr-latest': 32768,
+        'mistral-saba-2502': 32768,
+        'mistral-saba-latest': 32768,
+        'mistral-small': 32768,
+        'mistral-small-2312': 32768,
+        'mistral-small-2402': 32768,
+        'mistral-small-2409': 32768,
+        'mistral-small-2501': 32768,
+        'mistral-small-2503': 32768,
+        'mistral-small-latest': 32768,
+        'mistral-tiny': 32768,
+        'mistral-tiny-2312': 32768,
+        'open-mistral-7b': 32768,
+        'open-mixtral-8x7b': 32768,
+    };
+
+    // Return context size if model found, otherwise default to 32k
+    return Object.entries(contextMap).find(([key]) => model.includes(key))?.[1] || 32768;
+}
+
 /**
 * Get the maximum context size for the Groq model
 * @param {string} model Model identifier
@@ -4460,27 +4534,10 @@ async function onModelChange() {
    }

    if (oai_settings.chat_completion_source === chat_completion_sources.MISTRALAI) {
-        if (oai_settings.max_context_unlocked) {
-            $('#openai_max_context').attr('max', unlocked_max);
-        } else if (['codestral-latest', 'codestral-mamba-2407', 'codestral-2411-rc5', 'codestral-2412', 'codestral-2501'].includes(oai_settings.mistralai_model)) {
-            $('#openai_max_context').attr('max', max_256k);
-        } else if (['mistral-large-2407', 'mistral-large-2411', 'mistral-large-pixtral-2411', 'mistral-large-latest'].includes(oai_settings.mistralai_model)) {
-            $('#openai_max_context').attr('max', max_128k);
-        } else if (oai_settings.mistralai_model.includes('mistral-nemo')) {
-            $('#openai_max_context').attr('max', max_128k);
-        } else if (oai_settings.mistralai_model.includes('mixtral-8x22b')) {
-            $('#openai_max_context').attr('max', max_64k);
-        } else if (oai_settings.mistralai_model.includes('pixtral')) {
-            $('#openai_max_context').attr('max', max_128k);
-        } else if (oai_settings.mistralai_model.includes('ministral')) {
-            $('#openai_max_context').attr('max', max_32k);
-        } else {
-            $('#openai_max_context').attr('max', max_32k);
-        }
+        const maxContext = getMistralMaxContext(oai_settings.mistralai_model, oai_settings.max_context_unlocked);
+        $('#openai_max_context').attr('max', maxContext);
        oai_settings.openai_max_context = Math.min(oai_settings.openai_max_context, Number($('#openai_max_context').attr('max')));
        $('#openai_max_context').val(oai_settings.openai_max_context).trigger('input');
-
-        //mistral also caps temp at 1.0
        oai_settings.temp_openai = Math.min(claude_max_temp, oai_settings.temp_openai);
        $('#temp_openai').attr('max', claude_max_temp).val(oai_settings.temp_openai).trigger('input');
    }
@@ -5070,7 +5127,9 @@ export function isImageInliningSupported() {
        'o1-2024-12-17',
        'chatgpt-4o-latest',
        'yi-vision',
-        'pixtral-latest',
+        'mistral-large-pixtral-2411',
+        'mistral-small-2503',
+        'mistral-small-latest',
        'pixtral-12b-latest',
        'pixtral-12b',
        'pixtral-12b-2409',
--- a/public/scripts/textgen-models.js
+++ b/public/scripts/textgen-models.js
@@ -58,6 +58,8 @@ const OPENROUTER_PROVIDERS = [
    'Minimax',
    'Nineteen',
    'Liquid',
+    'Stealth',
+    'NCompass',
    'InferenceNet',
    'Friendli',
    'AionLabs',
@@ -69,6 +71,9 @@ const OPENROUTER_PROVIDERS = [
    'Targon',
    'Ubicloud',
    'Parasail',
+    'Phala',
+    'Cent-ML',
+    'Venice',
    '01.AI',
    'HuggingFace',
    'Mancer',