mirror of
https://github.com/SillyTavern/SillyTavern.git
synced 2025-06-05 21:59:27 +02:00
Add multimodal captioning for Cohere
This commit is contained in:
@ -398,23 +398,62 @@ jQuery(async function () {
|
|||||||
|
|
||||||
$('#caption_wand_container').append(sendButton);
|
$('#caption_wand_container').append(sendButton);
|
||||||
$(sendButton).on('click', () => {
|
$(sendButton).on('click', () => {
|
||||||
const hasCaptionModule =
|
const hasCaptionModule = (() => {
|
||||||
(modules.includes('caption') && extension_settings.caption.source === 'extras') ||
|
const settings = extension_settings.caption;
|
||||||
(extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'openai' && (secret_state[SECRET_KEYS.OPENAI] || extension_settings.caption.allow_reverse_proxy)) ||
|
|
||||||
(extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'openrouter' && secret_state[SECRET_KEYS.OPENROUTER]) ||
|
// Handle non-multimodal sources
|
||||||
(extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'zerooneai' && secret_state[SECRET_KEYS.ZEROONEAI]) ||
|
if (settings.source === 'extras' && modules.includes('caption')) return true;
|
||||||
(extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'groq' && secret_state[SECRET_KEYS.GROQ]) ||
|
if (settings.source === 'local' || settings.source === 'horde') return true;
|
||||||
(extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'mistral' && (secret_state[SECRET_KEYS.MISTRALAI] || extension_settings.caption.allow_reverse_proxy)) ||
|
|
||||||
(extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'google' && (secret_state[SECRET_KEYS.MAKERSUITE] || extension_settings.caption.allow_reverse_proxy)) ||
|
// Handle multimodal sources
|
||||||
(extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'anthropic' && (secret_state[SECRET_KEYS.CLAUDE] || extension_settings.caption.allow_reverse_proxy)) ||
|
if (settings.source === 'multimodal') {
|
||||||
(extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'ollama' && textgenerationwebui_settings.server_urls[textgen_types.OLLAMA]) ||
|
const api = settings.multimodal_api;
|
||||||
(extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'llamacpp' && textgenerationwebui_settings.server_urls[textgen_types.LLAMACPP]) ||
|
|
||||||
(extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'ooba' && textgenerationwebui_settings.server_urls[textgen_types.OOBA]) ||
|
// APIs that support reverse proxy
|
||||||
(extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'koboldcpp' && textgenerationwebui_settings.server_urls[textgen_types.KOBOLDCPP]) ||
|
const reverseProxyApis = {
|
||||||
(extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'vllm' && textgenerationwebui_settings.server_urls[textgen_types.VLLM]) ||
|
'openai': SECRET_KEYS.OPENAI,
|
||||||
(extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'custom') ||
|
'mistral': SECRET_KEYS.MISTRALAI,
|
||||||
extension_settings.caption.source === 'local' ||
|
'google': SECRET_KEYS.MAKERSUITE,
|
||||||
extension_settings.caption.source === 'horde';
|
'anthropic': SECRET_KEYS.CLAUDE,
|
||||||
|
};
|
||||||
|
|
||||||
|
if (reverseProxyApis[api]) {
|
||||||
|
if (secret_state[reverseProxyApis[api]] || settings.allow_reverse_proxy) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const chatCompletionApis = {
|
||||||
|
'openrouter': SECRET_KEYS.OPENROUTER,
|
||||||
|
'zerooneai': SECRET_KEYS.ZEROONEAI,
|
||||||
|
'groq': SECRET_KEYS.GROQ,
|
||||||
|
'cohere': SECRET_KEYS.COHERE,
|
||||||
|
};
|
||||||
|
|
||||||
|
if (chatCompletionApis[api] && secret_state[chatCompletionApis[api]]) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
const textCompletionApis = {
|
||||||
|
'ollama': textgen_types.OLLAMA,
|
||||||
|
'llamacpp': textgen_types.LLAMACPP,
|
||||||
|
'ooba': textgen_types.OOBA,
|
||||||
|
'koboldcpp': textgen_types.KOBOLDCPP,
|
||||||
|
'vllm': textgen_types.VLLM,
|
||||||
|
};
|
||||||
|
|
||||||
|
if (textCompletionApis[api] && textgenerationwebui_settings.server_urls[textCompletionApis[api]]) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Custom API doesn't need additional checks
|
||||||
|
if (api === 'custom') {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
})();
|
||||||
|
|
||||||
if (!hasCaptionModule) {
|
if (!hasCaptionModule) {
|
||||||
toastr.error('Choose other captioning source in the extension settings.', 'Captioning is not available');
|
toastr.error('Choose other captioning source in the extension settings.', 'Captioning is not available');
|
||||||
|
@ -19,6 +19,7 @@
|
|||||||
<select id="caption_multimodal_api" class="flex1 text_pole">
|
<select id="caption_multimodal_api" class="flex1 text_pole">
|
||||||
<option value="zerooneai">01.AI (Yi)</option>
|
<option value="zerooneai">01.AI (Yi)</option>
|
||||||
<option value="anthropic">Anthropic</option>
|
<option value="anthropic">Anthropic</option>
|
||||||
|
<option value="cohere">Cohere</option>
|
||||||
<option value="custom" data-i18n="Custom (OpenAI-compatible)">Custom (OpenAI-compatible)</option>
|
<option value="custom" data-i18n="Custom (OpenAI-compatible)">Custom (OpenAI-compatible)</option>
|
||||||
<option value="google">Google AI Studio</option>
|
<option value="google">Google AI Studio</option>
|
||||||
<option value="groq">Groq</option>
|
<option value="groq">Groq</option>
|
||||||
@ -35,6 +36,8 @@
|
|||||||
<div class="flex1 flex-container flexFlowColumn flexNoGap">
|
<div class="flex1 flex-container flexFlowColumn flexNoGap">
|
||||||
<label for="caption_multimodal_model" data-i18n="Model">Model</label>
|
<label for="caption_multimodal_model" data-i18n="Model">Model</label>
|
||||||
<select id="caption_multimodal_model" class="flex1 text_pole">
|
<select id="caption_multimodal_model" class="flex1 text_pole">
|
||||||
|
<option data-type="cohere" value="c4ai-aya-vision-8b">c4ai-aya-vision-8b</option>
|
||||||
|
<option data-type="cohere" value="c4ai-aya-vision-32b">c4ai-aya-vision-32b</option>
|
||||||
<option data-type="mistral" value="pixtral-12b-latest">pixtral-12b-latest</option>
|
<option data-type="mistral" value="pixtral-12b-latest">pixtral-12b-latest</option>
|
||||||
<option data-type="mistral" value="pixtral-12b-2409">pixtral-12b-2409</option>
|
<option data-type="mistral" value="pixtral-12b-2409">pixtral-12b-2409</option>
|
||||||
<option data-type="mistral" value="pixtral-large-latest">pixtral-large-latest</option>
|
<option data-type="mistral" value="pixtral-large-latest">pixtral-large-latest</option>
|
||||||
|
@ -144,10 +144,14 @@ function throwIfInvalidModel(useReverseProxy) {
|
|||||||
throw new Error('Google AI Studio API key is not set.');
|
throw new Error('Google AI Studio API key is not set.');
|
||||||
}
|
}
|
||||||
|
|
||||||
if (extension_settings.caption.multi_modal_api === 'mistral' && !secret_state[SECRET_KEYS.MISTRALAI] && !useReverseProxy) {
|
if (extension_settings.caption.multimodal_api === 'mistral' && !secret_state[SECRET_KEYS.MISTRALAI] && !useReverseProxy) {
|
||||||
throw new Error('Mistral AI API key is not set.');
|
throw new Error('Mistral AI API key is not set.');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (extension_settings.caption.multimodal_api === 'cohere' && !secret_state[SECRET_KEYS.COHERE]) {
|
||||||
|
throw new Error('Cohere API key is not set.');
|
||||||
|
}
|
||||||
|
|
||||||
if (extension_settings.caption.multimodal_api === 'ollama' && !textgenerationwebui_settings.server_urls[textgen_types.OLLAMA]) {
|
if (extension_settings.caption.multimodal_api === 'ollama' && !textgenerationwebui_settings.server_urls[textgen_types.OLLAMA]) {
|
||||||
throw new Error('Ollama server URL is not set.');
|
throw new Error('Ollama server URL is not set.');
|
||||||
}
|
}
|
||||||
|
@ -62,6 +62,10 @@ router.post('/caption-image', jsonParser, async (request, response) => {
|
|||||||
key = readSecret(request.user.directories, SECRET_KEYS.GROQ);
|
key = readSecret(request.user.directories, SECRET_KEYS.GROQ);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (request.body.api === 'cohere') {
|
||||||
|
key = readSecret(request.user.directories, SECRET_KEYS.COHERE);
|
||||||
|
}
|
||||||
|
|
||||||
if (!key && !request.body.reverse_proxy && ['custom', 'ooba', 'koboldcpp', 'vllm'].includes(request.body.api) === false) {
|
if (!key && !request.body.reverse_proxy && ['custom', 'ooba', 'koboldcpp', 'vllm'].includes(request.body.api) === false) {
|
||||||
console.warn('No key found for API', request.body.api);
|
console.warn('No key found for API', request.body.api);
|
||||||
return response.sendStatus(400);
|
return response.sendStatus(400);
|
||||||
@ -126,6 +130,10 @@ router.post('/caption-image', jsonParser, async (request, response) => {
|
|||||||
apiUrl = 'https://api.mistral.ai/v1/chat/completions';
|
apiUrl = 'https://api.mistral.ai/v1/chat/completions';
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (request.body.api === 'cohere') {
|
||||||
|
apiUrl = 'https://api.cohere.ai/v2/chat';
|
||||||
|
}
|
||||||
|
|
||||||
if (request.body.api === 'ooba') {
|
if (request.body.api === 'ooba') {
|
||||||
apiUrl = `${trimV1(request.body.server_url)}/v1/chat/completions`;
|
apiUrl = `${trimV1(request.body.server_url)}/v1/chat/completions`;
|
||||||
const imgMessage = body.messages.pop();
|
const imgMessage = body.messages.pop();
|
||||||
@ -165,7 +173,7 @@ router.post('/caption-image', jsonParser, async (request, response) => {
|
|||||||
/** @type {any} */
|
/** @type {any} */
|
||||||
const data = await result.json();
|
const data = await result.json();
|
||||||
console.info('Multimodal captioning response', data);
|
console.info('Multimodal captioning response', data);
|
||||||
const caption = data?.choices[0]?.message?.content;
|
const caption = data?.choices?.[0]?.message?.content ?? data?.message?.content?.[0]?.text;
|
||||||
|
|
||||||
if (!caption) {
|
if (!caption) {
|
||||||
return response.status(500).send('No caption found');
|
return response.status(500).send('No caption found');
|
||||||
|
Reference in New Issue
Block a user