MistralAI: Add Pixtral to models and captioning

This commit is contained in:
Cohee
2024-09-17 21:44:25 +03:00
parent 1366c2741d
commit 60df924bec
6 changed files with 29 additions and 4 deletions

View File

@@ -403,6 +403,7 @@ jQuery(async function () {
(extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'openai' && (secret_state[SECRET_KEYS.OPENAI] || extension_settings.caption.allow_reverse_proxy)) ||
(extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'openrouter' && secret_state[SECRET_KEYS.OPENROUTER]) ||
(extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'zerooneai' && secret_state[SECRET_KEYS.ZEROONEAI]) ||
(extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'mistral' && (secret_state[SECRET_KEYS.MISTRALAI] || extension_settings.caption.allow_reverse_proxy)) ||
(extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'google' && (secret_state[SECRET_KEYS.MAKERSUITE] || extension_settings.caption.allow_reverse_proxy)) ||
(extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'anthropic' && (secret_state[SECRET_KEYS.CLAUDE] || extension_settings.caption.allow_reverse_proxy)) ||
(extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'ollama' && textgenerationwebui_settings.server_urls[textgen_types.OLLAMA]) ||

View File

@@ -23,6 +23,7 @@
<option value="google">Google AI Studio</option>
<option value="koboldcpp">KoboldCpp</option>
<option value="llamacpp">llama.cpp</option>
<option value="mistral">MistralAI</option>
<option value="ollama">Ollama</option>
<option value="openai">OpenAI</option>
<option value="openrouter">OpenRouter</option>
@@ -33,6 +34,8 @@
<div class="flex1 flex-container flexFlowColumn flexNoGap">
<label for="caption_multimodal_model" data-i18n="Model">Model</label>
<select id="caption_multimodal_model" class="flex1 text_pole">
<option data-type="mistral" value="pixtral-latest">pixtral-latest</option>
<option data-type="mistral" value="pixtral-12b-2409">pixtral-12b-2409</option>
<option data-type="zerooneai" value="yi-vision">yi-vision</option>
<option data-type="openai" value="gpt-4-vision-preview">gpt-4-vision-preview</option>
<option data-type="openai" value="gpt-4-turbo">gpt-4-turbo</option>
@@ -96,7 +99,7 @@
<div data-type="ollama">
The model must be downloaded first! Do it with the <code>ollama pull</code> command or <a href="#" id="caption_ollama_pull">click here</a>.
</div>
<label data-type="openai,anthropic,google" class="checkbox_label flexBasis100p" for="caption_allow_reverse_proxy" title="Allow using reverse proxy if defined and valid.">
<label data-type="openai,anthropic,google,mistral" class="checkbox_label flexBasis100p" for="caption_allow_reverse_proxy" title="Allow using reverse proxy if defined and valid.">
<input id="caption_allow_reverse_proxy" type="checkbox" class="checkbox">
<span data-i18n="Allow reverse proxy">Allow reverse proxy</span>
</label>

View File

@@ -13,7 +13,7 @@ import { createThumbnail, isValidUrl } from '../utils.js';
*/
export async function getMultimodalCaption(base64Img, prompt) {
const useReverseProxy =
(['openai', 'anthropic', 'google'].includes(extension_settings.caption.multimodal_api))
(['openai', 'anthropic', 'google', 'mistral'].includes(extension_settings.caption.multimodal_api))
&& extension_settings.caption.allow_reverse_proxy
&& oai_settings.reverse_proxy
&& isValidUrl(oai_settings.reverse_proxy);
@@ -36,7 +36,7 @@ export async function getMultimodalCaption(base64Img, prompt) {
const isVllm = extension_settings.caption.multimodal_api === 'vllm';
const base64Bytes = base64Img.length * 0.75;
const compressionLimit = 2 * 1024 * 1024;
if ((['google', 'openrouter'].includes(extension_settings.caption.multimodal_api) && base64Bytes > compressionLimit) || isOoba || isKoboldCpp) {
if ((['google', 'openrouter', 'mistral'].includes(extension_settings.caption.multimodal_api) && base64Bytes > compressionLimit) || isOoba || isKoboldCpp) {
const maxSide = 1024;
base64Img = await createThumbnail(base64Img, maxSide, maxSide, 'image/jpeg');
}
@@ -139,6 +139,10 @@ function throwIfInvalidModel(useReverseProxy) {
throw new Error('Google AI Studio API key is not set.');
}
if (extension_settings.caption.multi_modal_api === 'mistral' && !secret_state[SECRET_KEYS.MISTRALAI] && !useReverseProxy) {
throw new Error('Mistral AI API key is not set.');
}
if (extension_settings.caption.multimodal_api === 'ollama' && !textgenerationwebui_settings.server_urls[textgen_types.OLLAMA]) {
throw new Error('Ollama server URL is not set.');
}