From 40ee236ca8302f5d9692b6a5b5dd33c96f27e5ed Mon Sep 17 00:00:00 2001 From: Cohee <18619528+Cohee1207@users.noreply.github.com> Date: Thu, 1 Aug 2024 01:34:49 +0300 Subject: [PATCH] Add multimodal captioning for 01.ai --- public/scripts/extensions/caption/index.js | 6 +++--- public/scripts/extensions/caption/settings.html | 2 ++ public/scripts/extensions/shared.js | 4 ++++ src/endpoints/openai.js | 8 ++++++++ 4 files changed, 17 insertions(+), 3 deletions(-) diff --git a/public/scripts/extensions/caption/index.js b/public/scripts/extensions/caption/index.js index dbfbc0d1d..b2e8c7664 100644 --- a/public/scripts/extensions/caption/index.js +++ b/public/scripts/extensions/caption/index.js @@ -8,13 +8,12 @@ import { textgen_types, textgenerationwebui_settings } from '../../textgen-setti import { SlashCommandParser } from '../../slash-commands/SlashCommandParser.js'; import { SlashCommand } from '../../slash-commands/SlashCommand.js'; import { ARGUMENT_TYPE, SlashCommandArgument, SlashCommandNamedArgument } from '../../slash-commands/SlashCommandArgument.js'; -import { SlashCommandEnumValue } from '../../slash-commands/SlashCommandEnumValue.js'; import { commonEnumProviders } from '../../slash-commands/SlashCommandCommonEnumsProvider.js'; export { MODULE_NAME }; const MODULE_NAME = 'caption'; -const PROMPT_DEFAULT = 'What’s in this image?'; +const PROMPT_DEFAULT = 'What\'s in this image?'; const TEMPLATE_DEFAULT = '[{{user}} sends {{char}} a picture that contains: {{caption}}]'; /** @@ -334,7 +333,7 @@ async function getCaptionForFile(file, prompt, quiet) { } catch (error) { const errorMessage = error.message || 'Unknown error'; - toastr.error(errorMessage, "Failed to caption image."); + toastr.error(errorMessage, 'Failed to caption image.'); console.error(error); return ''; } @@ -399,6 +398,7 @@ jQuery(async function () { (modules.includes('caption') && extension_settings.caption.source === 'extras') || (extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'openai' && (secret_state[SECRET_KEYS.OPENAI] || extension_settings.caption.allow_reverse_proxy)) || (extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'openrouter' && secret_state[SECRET_KEYS.OPENROUTER]) || + (extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'zerooneai' && secret_state[SECRET_KEYS.ZEROONEAI]) || (extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'google' && (secret_state[SECRET_KEYS.MAKERSUITE] || extension_settings.caption.allow_reverse_proxy)) || (extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'anthropic' && (secret_state[SECRET_KEYS.CLAUDE] || extension_settings.caption.allow_reverse_proxy)) || (extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'ollama' && textgenerationwebui_settings.server_urls[textgen_types.OLLAMA]) || diff --git a/public/scripts/extensions/caption/settings.html b/public/scripts/extensions/caption/settings.html index 5181e8ce1..d686ca0a3 100644 --- a/public/scripts/extensions/caption/settings.html +++ b/public/scripts/extensions/caption/settings.html @@ -17,6 +17,7 @@
+ diff --git a/public/scripts/extensions/shared.js b/public/scripts/extensions/shared.js index 3e671c3af..403d23f7f 100644 --- a/public/scripts/extensions/shared.js +++ b/public/scripts/extensions/shared.js @@ -136,6 +136,10 @@ function throwIfInvalidModel(useReverseProxy) { throw new Error('Anthropic (Claude) API key is not set.'); } + if (extension_settings.caption.multimodal_api === 'zerooneai' && !secret_state[SECRET_KEYS.ZEROONEAI]) { + throw new Error('01.AI API key is not set.'); + } + if (extension_settings.caption.multimodal_api === 'google' && !secret_state[SECRET_KEYS.MAKERSUITE] && !useReverseProxy) { throw new Error('MakerSuite API key is not set.'); } diff --git a/src/endpoints/openai.js b/src/endpoints/openai.js index f63d72b11..662974088 100644 --- a/src/endpoints/openai.js +++ b/src/endpoints/openai.js @@ -47,6 +47,10 @@ router.post('/caption-image', jsonParser, async (request, response) => { key = readSecret(request.user.directories, SECRET_KEYS.VLLM); } + if (request.body.api === 'zerooneai') { + key = readSecret(request.user.directories, SECRET_KEYS.ZEROONEAI); + } + if (!key && !request.body.reverse_proxy && ['custom', 'ooba', 'koboldcpp', 'vllm'].includes(request.body.api) === false) { console.log('No key found for API', request.body.api); return response.sendStatus(400); @@ -100,6 +104,10 @@ router.post('/caption-image', jsonParser, async (request, response) => { apiUrl = `${request.body.server_url}/chat/completions`; } + if (request.body.api === 'zerooneai') { + apiUrl = 'https://api.01.ai/v1/chat/completions'; + } + if (request.body.api === 'ooba') { apiUrl = `${trimV1(request.body.server_url)}/v1/chat/completions`; const imgMessage = body.messages.pop();