diff --git a/public/index.html b/public/index.html index a1976f83b..3f54f3068 100644 --- a/public/index.html +++ b/public/index.html @@ -2896,6 +2896,7 @@ + @@ -2903,11 +2904,13 @@ + + diff --git a/public/scripts/extensions/caption/index.js b/public/scripts/extensions/caption/index.js index efd2c1a55..0f740cc9a 100644 --- a/public/scripts/extensions/caption/index.js +++ b/public/scripts/extensions/caption/index.js @@ -403,6 +403,7 @@ jQuery(async function () { (extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'openai' && (secret_state[SECRET_KEYS.OPENAI] || extension_settings.caption.allow_reverse_proxy)) || (extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'openrouter' && secret_state[SECRET_KEYS.OPENROUTER]) || (extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'zerooneai' && secret_state[SECRET_KEYS.ZEROONEAI]) || + (extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'mistral' && (secret_state[SECRET_KEYS.MISTRALAI] || extension_settings.caption.allow_reverse_proxy)) || (extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'google' && (secret_state[SECRET_KEYS.MAKERSUITE] || extension_settings.caption.allow_reverse_proxy)) || (extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'anthropic' && (secret_state[SECRET_KEYS.CLAUDE] || extension_settings.caption.allow_reverse_proxy)) || (extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'ollama' && textgenerationwebui_settings.server_urls[textgen_types.OLLAMA]) || diff --git a/public/scripts/extensions/caption/settings.html b/public/scripts/extensions/caption/settings.html index c02a18fa1..40a7273e0 100644 --- a/public/scripts/extensions/caption/settings.html +++ b/public/scripts/extensions/caption/settings.html @@ -23,6 +23,7 @@ + @@ -33,6 +34,8 @@
Allow reverse proxy diff --git a/public/scripts/extensions/shared.js b/public/scripts/extensions/shared.js index 9fb1f980b..950105ce1 100644 --- a/public/scripts/extensions/shared.js +++ b/public/scripts/extensions/shared.js @@ -13,7 +13,7 @@ import { createThumbnail, isValidUrl } from '../utils.js'; */ export async function getMultimodalCaption(base64Img, prompt) { const useReverseProxy = - (['openai', 'anthropic', 'google'].includes(extension_settings.caption.multimodal_api)) + (['openai', 'anthropic', 'google', 'mistral'].includes(extension_settings.caption.multimodal_api)) && extension_settings.caption.allow_reverse_proxy && oai_settings.reverse_proxy && isValidUrl(oai_settings.reverse_proxy); @@ -36,7 +36,7 @@ export async function getMultimodalCaption(base64Img, prompt) { const isVllm = extension_settings.caption.multimodal_api === 'vllm'; const base64Bytes = base64Img.length * 0.75; const compressionLimit = 2 * 1024 * 1024; - if ((['google', 'openrouter'].includes(extension_settings.caption.multimodal_api) && base64Bytes > compressionLimit) || isOoba || isKoboldCpp) { + if ((['google', 'openrouter', 'mistral'].includes(extension_settings.caption.multimodal_api) && base64Bytes > compressionLimit) || isOoba || isKoboldCpp) { const maxSide = 1024; base64Img = await createThumbnail(base64Img, maxSide, maxSide, 'image/jpeg'); } @@ -139,6 +139,10 @@ function throwIfInvalidModel(useReverseProxy) { throw new Error('Google AI Studio API key is not set.'); } + if (extension_settings.caption.multi_modal_api === 'mistral' && !secret_state[SECRET_KEYS.MISTRALAI] && !useReverseProxy) { + throw new Error('Mistral AI API key is not set.'); + } + if (extension_settings.caption.multimodal_api === 'ollama' && !textgenerationwebui_settings.server_urls[textgen_types.OLLAMA]) { throw new Error('Ollama server URL is not set.'); } diff --git a/public/scripts/openai.js b/public/scripts/openai.js index fd2d76559..fd8f90e13 100644 --- a/public/scripts/openai.js +++ b/public/scripts/openai.js @@ -2490,7 +2490,7 @@ class Message { * @returns {Promise} Compressed image as a Data URL. */ async compressImage(image) { - if ([chat_completion_sources.OPENROUTER, chat_completion_sources.MAKERSUITE].includes(oai_settings.chat_completion_source)) { + if ([chat_completion_sources.OPENROUTER, chat_completion_sources.MAKERSUITE, chat_completion_sources.MISTRALAI].includes(oai_settings.chat_completion_source)) { const sizeThreshold = 2 * 1024 * 1024; const dataSize = image.length * 0.75; const maxSide = 1024; @@ -4221,6 +4221,8 @@ async function onModelChange() { $('#openai_max_context').attr('max', max_128k); } else if (oai_settings.mistralai_model.includes('mixtral-8x22b')) { $('#openai_max_context').attr('max', max_64k); + } else if (oai_settings.mistralai_model.includes('pixtral')) { + $('#openai_max_context').attr('max', max_128k); } else { $('#openai_max_context').attr('max', max_32k); } @@ -4770,6 +4772,8 @@ export function isImageInliningSupported() { 'gpt-4o-mini', 'chatgpt-4o-latest', 'yi-vision', + 'pixtral-latest', + 'pixtral-12b-2409', ]; switch (oai_settings.chat_completion_source) { @@ -4785,6 +4789,8 @@ export function isImageInliningSupported() { return true; case chat_completion_sources.ZEROONEAI: return visionSupportedModels.some(model => oai_settings.zerooneai_model.includes(model)); + case chat_completion_sources.MISTRALAI: + return visionSupportedModels.some(model => oai_settings.mistralai_model.includes(model)); default: return false; } diff --git a/src/endpoints/openai.js b/src/endpoints/openai.js index b1a36e35c..af1f107e2 100644 --- a/src/endpoints/openai.js +++ b/src/endpoints/openai.js @@ -51,6 +51,10 @@ router.post('/caption-image', jsonParser, async (request, response) => { key = readSecret(request.user.directories, SECRET_KEYS.ZEROONEAI); } + if (request.body.api === 'mistral') { + key = readSecret(request.user.directories, SECRET_KEYS.MISTRALAI); + } + if (!key && !request.body.reverse_proxy && ['custom', 'ooba', 'koboldcpp', 'vllm'].includes(request.body.api) === false) { console.log('No key found for API', request.body.api); return response.sendStatus(400); @@ -107,6 +111,10 @@ router.post('/caption-image', jsonParser, async (request, response) => { apiUrl = 'https://api.01.ai/v1/chat/completions'; } + if (request.body.api === 'mistral') { + apiUrl = 'https://api.mistral.ai/v1/chat/completions'; + } + if (request.body.api === 'ooba') { apiUrl = `${trimV1(request.body.server_url)}/v1/chat/completions`; const imgMessage = body.messages.pop();