MistralAI: Add Pixtral to models and captioning

This commit is contained in:
Cohee 2024-09-17 21:44:25 +03:00
parent 1366c2741d
commit 60df924bec
6 changed files with 29 additions and 4 deletions

View File

@ -2896,6 +2896,7 @@
<option value="mistral-large-latest">mistral-large-latest</option>
<option value="codestral-latest">codestral-latest</option>
<option value="codestral-mamba-latest">codestral-mamba-latest</option>
<option value="pixtral-latest">pixtral-latest</option>
</optgroup>
<optgroup label="Sub-versions">
<option value="open-mistral-nemo-2407">open-mistral-nemo-2407</option>
@ -2903,11 +2904,13 @@
<option value="mistral-tiny-2312">mistral-tiny-2312</option>
<option value="mistral-small-2312">mistral-small-2312</option>
<option value="mistral-small-2402">mistral-small-2402</option>
<option value="mistral-small-2409">mistral-small-2409</option>
<option value="mistral-medium-2312">mistral-medium-2312</option>
<option value="mistral-large-2402">mistral-large-2402</option>
<option value="mistral-large-2407">mistral-large-2407</option>
<option value="codestral-2405">codestral-2405</option>
<option value="codestral-mamba-2407">codestral-mamba-2407</option>
<option value="pixtral-12b-2409">pixtral-12b-2409</option>
</optgroup>
</select>
</div>

View File

@ -403,6 +403,7 @@ jQuery(async function () {
(extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'openai' && (secret_state[SECRET_KEYS.OPENAI] || extension_settings.caption.allow_reverse_proxy)) ||
(extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'openrouter' && secret_state[SECRET_KEYS.OPENROUTER]) ||
(extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'zerooneai' && secret_state[SECRET_KEYS.ZEROONEAI]) ||
(extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'mistral' && (secret_state[SECRET_KEYS.MISTRALAI] || extension_settings.caption.allow_reverse_proxy)) ||
(extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'google' && (secret_state[SECRET_KEYS.MAKERSUITE] || extension_settings.caption.allow_reverse_proxy)) ||
(extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'anthropic' && (secret_state[SECRET_KEYS.CLAUDE] || extension_settings.caption.allow_reverse_proxy)) ||
(extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'ollama' && textgenerationwebui_settings.server_urls[textgen_types.OLLAMA]) ||

View File

@ -23,6 +23,7 @@
<option value="google">Google AI Studio</option>
<option value="koboldcpp">KoboldCpp</option>
<option value="llamacpp">llama.cpp</option>
<option value="mistral">MistralAI</option>
<option value="ollama">Ollama</option>
<option value="openai">OpenAI</option>
<option value="openrouter">OpenRouter</option>
@ -33,6 +34,8 @@
<div class="flex1 flex-container flexFlowColumn flexNoGap">
<label for="caption_multimodal_model" data-i18n="Model">Model</label>
<select id="caption_multimodal_model" class="flex1 text_pole">
<option data-type="mistral" value="pixtral-latest">pixtral-latest</option>
<option data-type="mistral" value="pixtral-12b-2409">pixtral-12b-2409</option>
<option data-type="zerooneai" value="yi-vision">yi-vision</option>
<option data-type="openai" value="gpt-4-vision-preview">gpt-4-vision-preview</option>
<option data-type="openai" value="gpt-4-turbo">gpt-4-turbo</option>
@ -96,7 +99,7 @@
<div data-type="ollama">
The model must be downloaded first! Do it with the <code>ollama pull</code> command or <a href="#" id="caption_ollama_pull">click here</a>.
</div>
<label data-type="openai,anthropic,google" class="checkbox_label flexBasis100p" for="caption_allow_reverse_proxy" title="Allow using reverse proxy if defined and valid.">
<label data-type="openai,anthropic,google,mistral" class="checkbox_label flexBasis100p" for="caption_allow_reverse_proxy" title="Allow using reverse proxy if defined and valid.">
<input id="caption_allow_reverse_proxy" type="checkbox" class="checkbox">
<span data-i18n="Allow reverse proxy">Allow reverse proxy</span>
</label>

View File

@ -13,7 +13,7 @@ import { createThumbnail, isValidUrl } from '../utils.js';
*/
export async function getMultimodalCaption(base64Img, prompt) {
const useReverseProxy =
(['openai', 'anthropic', 'google'].includes(extension_settings.caption.multimodal_api))
(['openai', 'anthropic', 'google', 'mistral'].includes(extension_settings.caption.multimodal_api))
&& extension_settings.caption.allow_reverse_proxy
&& oai_settings.reverse_proxy
&& isValidUrl(oai_settings.reverse_proxy);
@ -36,7 +36,7 @@ export async function getMultimodalCaption(base64Img, prompt) {
const isVllm = extension_settings.caption.multimodal_api === 'vllm';
const base64Bytes = base64Img.length * 0.75;
const compressionLimit = 2 * 1024 * 1024;
if ((['google', 'openrouter'].includes(extension_settings.caption.multimodal_api) && base64Bytes > compressionLimit) || isOoba || isKoboldCpp) {
if ((['google', 'openrouter', 'mistral'].includes(extension_settings.caption.multimodal_api) && base64Bytes > compressionLimit) || isOoba || isKoboldCpp) {
const maxSide = 1024;
base64Img = await createThumbnail(base64Img, maxSide, maxSide, 'image/jpeg');
}
@ -139,6 +139,10 @@ function throwIfInvalidModel(useReverseProxy) {
throw new Error('Google AI Studio API key is not set.');
}
if (extension_settings.caption.multi_modal_api === 'mistral' && !secret_state[SECRET_KEYS.MISTRALAI] && !useReverseProxy) {
throw new Error('Mistral AI API key is not set.');
}
if (extension_settings.caption.multimodal_api === 'ollama' && !textgenerationwebui_settings.server_urls[textgen_types.OLLAMA]) {
throw new Error('Ollama server URL is not set.');
}

View File

@ -2490,7 +2490,7 @@ class Message {
* @returns {Promise<string>} Compressed image as a Data URL.
*/
async compressImage(image) {
if ([chat_completion_sources.OPENROUTER, chat_completion_sources.MAKERSUITE].includes(oai_settings.chat_completion_source)) {
if ([chat_completion_sources.OPENROUTER, chat_completion_sources.MAKERSUITE, chat_completion_sources.MISTRALAI].includes(oai_settings.chat_completion_source)) {
const sizeThreshold = 2 * 1024 * 1024;
const dataSize = image.length * 0.75;
const maxSide = 1024;
@ -4221,6 +4221,8 @@ async function onModelChange() {
$('#openai_max_context').attr('max', max_128k);
} else if (oai_settings.mistralai_model.includes('mixtral-8x22b')) {
$('#openai_max_context').attr('max', max_64k);
} else if (oai_settings.mistralai_model.includes('pixtral')) {
$('#openai_max_context').attr('max', max_128k);
} else {
$('#openai_max_context').attr('max', max_32k);
}
@ -4770,6 +4772,8 @@ export function isImageInliningSupported() {
'gpt-4o-mini',
'chatgpt-4o-latest',
'yi-vision',
'pixtral-latest',
'pixtral-12b-2409',
];
switch (oai_settings.chat_completion_source) {
@ -4785,6 +4789,8 @@ export function isImageInliningSupported() {
return true;
case chat_completion_sources.ZEROONEAI:
return visionSupportedModels.some(model => oai_settings.zerooneai_model.includes(model));
case chat_completion_sources.MISTRALAI:
return visionSupportedModels.some(model => oai_settings.mistralai_model.includes(model));
default:
return false;
}

View File

@ -51,6 +51,10 @@ router.post('/caption-image', jsonParser, async (request, response) => {
key = readSecret(request.user.directories, SECRET_KEYS.ZEROONEAI);
}
if (request.body.api === 'mistral') {
key = readSecret(request.user.directories, SECRET_KEYS.MISTRALAI);
}
if (!key && !request.body.reverse_proxy && ['custom', 'ooba', 'koboldcpp', 'vllm'].includes(request.body.api) === false) {
console.log('No key found for API', request.body.api);
return response.sendStatus(400);
@ -107,6 +111,10 @@ router.post('/caption-image', jsonParser, async (request, response) => {
apiUrl = 'https://api.01.ai/v1/chat/completions';
}
if (request.body.api === 'mistral') {
apiUrl = 'https://api.mistral.ai/v1/chat/completions';
}
if (request.body.api === 'ooba') {
apiUrl = `${trimV1(request.body.server_url)}/v1/chat/completions`;
const imgMessage = body.messages.pop();