mirror of
https://github.com/SillyTavern/SillyTavern.git
synced 2025-03-06 20:58:04 +01:00
Add multimodal captioning for 01.ai
This commit is contained in:
parent
7b9eb97c7f
commit
40ee236ca8
@ -8,13 +8,12 @@ import { textgen_types, textgenerationwebui_settings } from '../../textgen-setti
|
||||
import { SlashCommandParser } from '../../slash-commands/SlashCommandParser.js';
|
||||
import { SlashCommand } from '../../slash-commands/SlashCommand.js';
|
||||
import { ARGUMENT_TYPE, SlashCommandArgument, SlashCommandNamedArgument } from '../../slash-commands/SlashCommandArgument.js';
|
||||
import { SlashCommandEnumValue } from '../../slash-commands/SlashCommandEnumValue.js';
|
||||
import { commonEnumProviders } from '../../slash-commands/SlashCommandCommonEnumsProvider.js';
|
||||
export { MODULE_NAME };
|
||||
|
||||
const MODULE_NAME = 'caption';
|
||||
|
||||
const PROMPT_DEFAULT = 'What’s in this image?';
|
||||
const PROMPT_DEFAULT = 'What\'s in this image?';
|
||||
const TEMPLATE_DEFAULT = '[{{user}} sends {{char}} a picture that contains: {{caption}}]';
|
||||
|
||||
/**
|
||||
@ -334,7 +333,7 @@ async function getCaptionForFile(file, prompt, quiet) {
|
||||
}
|
||||
catch (error) {
|
||||
const errorMessage = error.message || 'Unknown error';
|
||||
toastr.error(errorMessage, "Failed to caption image.");
|
||||
toastr.error(errorMessage, 'Failed to caption image.');
|
||||
console.error(error);
|
||||
return '';
|
||||
}
|
||||
@ -399,6 +398,7 @@ jQuery(async function () {
|
||||
(modules.includes('caption') && extension_settings.caption.source === 'extras') ||
|
||||
(extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'openai' && (secret_state[SECRET_KEYS.OPENAI] || extension_settings.caption.allow_reverse_proxy)) ||
|
||||
(extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'openrouter' && secret_state[SECRET_KEYS.OPENROUTER]) ||
|
||||
(extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'zerooneai' && secret_state[SECRET_KEYS.ZEROONEAI]) ||
|
||||
(extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'google' && (secret_state[SECRET_KEYS.MAKERSUITE] || extension_settings.caption.allow_reverse_proxy)) ||
|
||||
(extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'anthropic' && (secret_state[SECRET_KEYS.CLAUDE] || extension_settings.caption.allow_reverse_proxy)) ||
|
||||
(extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'ollama' && textgenerationwebui_settings.server_urls[textgen_types.OLLAMA]) ||
|
||||
|
@ -17,6 +17,7 @@
|
||||
<div class="flex1 flex-container flexFlowColumn flexNoGap">
|
||||
<label for="caption_multimodal_api" data-i18n="API">API</label>
|
||||
<select id="caption_multimodal_api" class="flex1 text_pole">
|
||||
<option value="zerooneai">01.AI (Yi)</option>
|
||||
<option value="anthropic">Anthropic</option>
|
||||
<option value="custom" data-i18n="Custom (OpenAI-compatible)">Custom (OpenAI-compatible)</option>
|
||||
<option value="google">Google MakerSuite</option>
|
||||
@ -32,6 +33,7 @@
|
||||
<div class="flex1 flex-container flexFlowColumn flexNoGap">
|
||||
<label for="caption_multimodal_model" data-i18n="Model">Model</label>
|
||||
<select id="caption_multimodal_model" class="flex1 text_pole">
|
||||
<option data-type="zerooneai" value="yi-vision">yi-vision</option>
|
||||
<option data-type="openai" value="gpt-4-vision-preview">gpt-4-vision-preview</option>
|
||||
<option data-type="openai" value="gpt-4-turbo">gpt-4-turbo</option>
|
||||
<option data-type="openai" value="gpt-4o">gpt-4o</option>
|
||||
|
@ -136,6 +136,10 @@ function throwIfInvalidModel(useReverseProxy) {
|
||||
throw new Error('Anthropic (Claude) API key is not set.');
|
||||
}
|
||||
|
||||
if (extension_settings.caption.multimodal_api === 'zerooneai' && !secret_state[SECRET_KEYS.ZEROONEAI]) {
|
||||
throw new Error('01.AI API key is not set.');
|
||||
}
|
||||
|
||||
if (extension_settings.caption.multimodal_api === 'google' && !secret_state[SECRET_KEYS.MAKERSUITE] && !useReverseProxy) {
|
||||
throw new Error('MakerSuite API key is not set.');
|
||||
}
|
||||
|
@ -47,6 +47,10 @@ router.post('/caption-image', jsonParser, async (request, response) => {
|
||||
key = readSecret(request.user.directories, SECRET_KEYS.VLLM);
|
||||
}
|
||||
|
||||
if (request.body.api === 'zerooneai') {
|
||||
key = readSecret(request.user.directories, SECRET_KEYS.ZEROONEAI);
|
||||
}
|
||||
|
||||
if (!key && !request.body.reverse_proxy && ['custom', 'ooba', 'koboldcpp', 'vllm'].includes(request.body.api) === false) {
|
||||
console.log('No key found for API', request.body.api);
|
||||
return response.sendStatus(400);
|
||||
@ -100,6 +104,10 @@ router.post('/caption-image', jsonParser, async (request, response) => {
|
||||
apiUrl = `${request.body.server_url}/chat/completions`;
|
||||
}
|
||||
|
||||
if (request.body.api === 'zerooneai') {
|
||||
apiUrl = 'https://api.01.ai/v1/chat/completions';
|
||||
}
|
||||
|
||||
if (request.body.api === 'ooba') {
|
||||
apiUrl = `${trimV1(request.body.server_url)}/v1/chat/completions`;
|
||||
const imgMessage = body.messages.pop();
|
||||
|
Loading…
x
Reference in New Issue
Block a user