Add multimodal captioning for 01.ai

This commit is contained in:
Cohee
2024-08-01 01:34:49 +03:00
parent 7b9eb97c7f
commit 40ee236ca8
4 changed files with 17 additions and 3 deletions

View File

@ -8,13 +8,12 @@ import { textgen_types, textgenerationwebui_settings } from '../../textgen-setti
import { SlashCommandParser } from '../../slash-commands/SlashCommandParser.js';
import { SlashCommand } from '../../slash-commands/SlashCommand.js';
import { ARGUMENT_TYPE, SlashCommandArgument, SlashCommandNamedArgument } from '../../slash-commands/SlashCommandArgument.js';
import { SlashCommandEnumValue } from '../../slash-commands/SlashCommandEnumValue.js';
import { commonEnumProviders } from '../../slash-commands/SlashCommandCommonEnumsProvider.js';
export { MODULE_NAME };
const MODULE_NAME = 'caption';
const PROMPT_DEFAULT = 'Whats in this image?';
const PROMPT_DEFAULT = 'What\'s in this image?';
const TEMPLATE_DEFAULT = '[{{user}} sends {{char}} a picture that contains: {{caption}}]';
/**
@ -334,7 +333,7 @@ async function getCaptionForFile(file, prompt, quiet) {
}
catch (error) {
const errorMessage = error.message || 'Unknown error';
toastr.error(errorMessage, "Failed to caption image.");
toastr.error(errorMessage, 'Failed to caption image.');
console.error(error);
return '';
}
@ -399,6 +398,7 @@ jQuery(async function () {
(modules.includes('caption') && extension_settings.caption.source === 'extras') ||
(extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'openai' && (secret_state[SECRET_KEYS.OPENAI] || extension_settings.caption.allow_reverse_proxy)) ||
(extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'openrouter' && secret_state[SECRET_KEYS.OPENROUTER]) ||
(extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'zerooneai' && secret_state[SECRET_KEYS.ZEROONEAI]) ||
(extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'google' && (secret_state[SECRET_KEYS.MAKERSUITE] || extension_settings.caption.allow_reverse_proxy)) ||
(extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'anthropic' && (secret_state[SECRET_KEYS.CLAUDE] || extension_settings.caption.allow_reverse_proxy)) ||
(extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'ollama' && textgenerationwebui_settings.server_urls[textgen_types.OLLAMA]) ||

View File

@ -17,6 +17,7 @@
<div class="flex1 flex-container flexFlowColumn flexNoGap">
<label for="caption_multimodal_api" data-i18n="API">API</label>
<select id="caption_multimodal_api" class="flex1 text_pole">
<option value="zerooneai">01.AI (Yi)</option>
<option value="anthropic">Anthropic</option>
<option value="custom" data-i18n="Custom (OpenAI-compatible)">Custom (OpenAI-compatible)</option>
<option value="google">Google MakerSuite</option>
@ -32,6 +33,7 @@
<div class="flex1 flex-container flexFlowColumn flexNoGap">
<label for="caption_multimodal_model" data-i18n="Model">Model</label>
<select id="caption_multimodal_model" class="flex1 text_pole">
<option data-type="zerooneai" value="yi-vision">yi-vision</option>
<option data-type="openai" value="gpt-4-vision-preview">gpt-4-vision-preview</option>
<option data-type="openai" value="gpt-4-turbo">gpt-4-turbo</option>
<option data-type="openai" value="gpt-4o">gpt-4o</option>