diff --git a/public/scripts/extensions/caption/index.js b/public/scripts/extensions/caption/index.js index 8e499452e..6b43f3fa3 100644 --- a/public/scripts/extensions/caption/index.js +++ b/public/scripts/extensions/caption/index.js @@ -408,6 +408,8 @@ jQuery(async function () { // Handle multimodal sources if (settings.source === 'multimodal') { const api = settings.multimodal_api; + const altEndpointEnabled = settings.alt_endpoint_enabled; + const altEndpointUrl = settings.alt_endpoint_url; // APIs that support reverse proxy const reverseProxyApis = { @@ -444,7 +446,11 @@ jQuery(async function () { 'vllm': textgen_types.VLLM, }; - if (textCompletionApis[api] && textgenerationwebui_settings.server_urls[textCompletionApis[api]]) { + if (textCompletionApis[api] && altEndpointEnabled && altEndpointUrl) { + return true; + } + + if (textCompletionApis[api] && !altEndpointEnabled && textgenerationwebui_settings.server_urls[textCompletionApis[api]]) { return true; } @@ -580,6 +586,14 @@ jQuery(async function () { extension_settings.caption.multimodal_model = String($('#caption_multimodal_model').val()); saveSettingsDebounced(); }); + $('#caption_altEndpoint_url').val(extension_settings.caption.alt_endpoint_url).on('input', () => { + extension_settings.caption.alt_endpoint_url = String($('#caption_altEndpoint_url').val()); + saveSettingsDebounced(); + }); + $('#caption_altEndpoint_enabled').prop('checked', !!(extension_settings.caption.alt_endpoint_enabled)).on('input', () => { + extension_settings.caption.alt_endpoint_enabled = !!$('#caption_altEndpoint_enabled').prop('checked'); + saveSettingsDebounced(); + }); const onMessageEvent = async (index) => { if (!extension_settings.caption.auto_mode) { diff --git a/public/scripts/extensions/caption/settings.html b/public/scripts/extensions/caption/settings.html index 9eb60ba5b..d78942778 100644 --- a/public/scripts/extensions/caption/settings.html +++ b/public/scripts/extensions/caption/settings.html @@ -189,6 +189,16 @@ Hint: Set your API keys and endpoints in the 'API Connections' tab first. +
+ + + +
diff --git a/public/scripts/extensions/shared.js b/public/scripts/extensions/shared.js index ad4e3589d..01a6e8219 100644 --- a/public/scripts/extensions/shared.js +++ b/public/scripts/extensions/shared.js @@ -61,7 +61,9 @@ export async function getMultimodalCaption(base64Img, prompt) { requestBody.model = textgenerationwebui_settings.ollama_model; } - requestBody.server_url = textgenerationwebui_settings.server_urls[textgen_types.OLLAMA]; + requestBody.server_url = extension_settings.caption.alt_endpoint_enabled + ? extension_settings.caption.alt_endpoint_url + : textgenerationwebui_settings.server_urls[textgen_types.OLLAMA]; } if (isVllm) { @@ -69,19 +71,27 @@ export async function getMultimodalCaption(base64Img, prompt) { requestBody.model = textgenerationwebui_settings.vllm_model; } - requestBody.server_url = textgenerationwebui_settings.server_urls[textgen_types.VLLM]; + requestBody.server_url = extension_settings.caption.alt_endpoint_enabled + ? extension_settings.caption.alt_endpoint_url + : textgenerationwebui_settings.server_urls[textgen_types.VLLM]; } if (isLlamaCpp) { - requestBody.server_url = textgenerationwebui_settings.server_urls[textgen_types.LLAMACPP]; + requestBody.server_url = extension_settings.caption.alt_endpoint_enabled + ? extension_settings.caption.alt_endpoint_url + : textgenerationwebui_settings.server_urls[textgen_types.LLAMACPP]; } if (isOoba) { - requestBody.server_url = textgenerationwebui_settings.server_urls[textgen_types.OOBA]; + requestBody.server_url = extension_settings.caption.alt_endpoint_enabled + ? extension_settings.caption.alt_endpoint_url + : textgenerationwebui_settings.server_urls[textgen_types.OOBA]; } if (isKoboldCpp) { - requestBody.server_url = textgenerationwebui_settings.server_urls[textgen_types.KOBOLDCPP]; + requestBody.server_url = extension_settings.caption.alt_endpoint_enabled + ? extension_settings.caption.alt_endpoint_url + : textgenerationwebui_settings.server_urls[textgen_types.KOBOLDCPP]; } if (isCustom) { @@ -121,75 +131,84 @@ export async function getMultimodalCaption(base64Img, prompt) { } function throwIfInvalidModel(useReverseProxy) { - if (extension_settings.caption.multimodal_api === 'openai' && !secret_state[SECRET_KEYS.OPENAI] && !useReverseProxy) { + const altEndpointEnabled = extension_settings.caption.alt_endpoint_enabled; + const altEndpointUrl = extension_settings.caption.alt_endpoint_url; + const multimodalModel = extension_settings.caption.multimodal_model; + const multimodalApi = extension_settings.caption.multimodal_api; + + if (altEndpointEnabled && ['llamacpp', 'ooba', 'koboldcpp', 'vllm', 'ollama'].includes(multimodalApi) && !altEndpointUrl) { + throw new Error('Secondary endpoint URL is not set.'); + } + + if (multimodalApi === 'openai' && !secret_state[SECRET_KEYS.OPENAI] && !useReverseProxy) { throw new Error('OpenAI API key is not set.'); } - if (extension_settings.caption.multimodal_api === 'openrouter' && !secret_state[SECRET_KEYS.OPENROUTER]) { + if (multimodalApi === 'openrouter' && !secret_state[SECRET_KEYS.OPENROUTER]) { throw new Error('OpenRouter API key is not set.'); } - if (extension_settings.caption.multimodal_api === 'anthropic' && !secret_state[SECRET_KEYS.CLAUDE] && !useReverseProxy) { + if (multimodalApi === 'anthropic' && !secret_state[SECRET_KEYS.CLAUDE] && !useReverseProxy) { throw new Error('Anthropic (Claude) API key is not set.'); } - if (extension_settings.caption.multimodal_api === 'zerooneai' && !secret_state[SECRET_KEYS.ZEROONEAI]) { + if (multimodalApi === 'zerooneai' && !secret_state[SECRET_KEYS.ZEROONEAI]) { throw new Error('01.AI API key is not set.'); } - if (extension_settings.caption.multimodal_api === 'groq' && !secret_state[SECRET_KEYS.GROQ]) { + if (multimodalApi === 'groq' && !secret_state[SECRET_KEYS.GROQ]) { throw new Error('Groq API key is not set.'); } - if (extension_settings.caption.multimodal_api === 'google' && !secret_state[SECRET_KEYS.MAKERSUITE] && !useReverseProxy) { + if (multimodalApi === 'google' && !secret_state[SECRET_KEYS.MAKERSUITE] && !useReverseProxy) { throw new Error('Google AI Studio API key is not set.'); } - if (extension_settings.caption.multimodal_api === 'vertexai' && !secret_state[SECRET_KEYS.VERTEXAI] && !useReverseProxy) { + if (multimodalApi === 'vertexai' && !secret_state[SECRET_KEYS.VERTEXAI] && !useReverseProxy) { throw new Error('Google Vertex AI API key is not set.'); } - if (extension_settings.caption.multimodal_api === 'mistral' && !secret_state[SECRET_KEYS.MISTRALAI] && !useReverseProxy) { + if (multimodalApi === 'mistral' && !secret_state[SECRET_KEYS.MISTRALAI] && !useReverseProxy) { throw new Error('Mistral AI API key is not set.'); } - if (extension_settings.caption.multimodal_api === 'cohere' && !secret_state[SECRET_KEYS.COHERE]) { + if (multimodalApi === 'cohere' && !secret_state[SECRET_KEYS.COHERE]) { throw new Error('Cohere API key is not set.'); } - if (extension_settings.caption.multimodal_api === 'xai' && !secret_state[SECRET_KEYS.XAI] && !useReverseProxy) { + if (multimodalApi === 'xai' && !secret_state[SECRET_KEYS.XAI] && !useReverseProxy) { throw new Error('xAI API key is not set.'); } - if (extension_settings.caption.multimodal_api === 'ollama' && !textgenerationwebui_settings.server_urls[textgen_types.OLLAMA]) { + if (multimodalApi === 'ollama' && !textgenerationwebui_settings.server_urls[textgen_types.OLLAMA] && !altEndpointEnabled) { throw new Error('Ollama server URL is not set.'); } - if (extension_settings.caption.multimodal_api === 'ollama' && extension_settings.caption.multimodal_model === 'ollama_current' && !textgenerationwebui_settings.ollama_model) { + if (multimodalApi === 'ollama' && multimodalModel === 'ollama_current' && !textgenerationwebui_settings.ollama_model) { throw new Error('Ollama model is not set.'); } - if (extension_settings.caption.multimodal_api === 'llamacpp' && !textgenerationwebui_settings.server_urls[textgen_types.LLAMACPP]) { + if (multimodalApi === 'llamacpp' && !textgenerationwebui_settings.server_urls[textgen_types.LLAMACPP] && !altEndpointEnabled) { throw new Error('LlamaCPP server URL is not set.'); } - if (extension_settings.caption.multimodal_api === 'ooba' && !textgenerationwebui_settings.server_urls[textgen_types.OOBA]) { + if (multimodalApi === 'ooba' && !textgenerationwebui_settings.server_urls[textgen_types.OOBA] && !altEndpointEnabled) { throw new Error('Text Generation WebUI server URL is not set.'); } - if (extension_settings.caption.multimodal_api === 'koboldcpp' && !textgenerationwebui_settings.server_urls[textgen_types.KOBOLDCPP]) { + if (multimodalApi === 'koboldcpp' && !textgenerationwebui_settings.server_urls[textgen_types.KOBOLDCPP] && !altEndpointEnabled) { throw new Error('KoboldCpp server URL is not set.'); } - if (extension_settings.caption.multimodal_api === 'vllm' && !textgenerationwebui_settings.server_urls[textgen_types.VLLM]) { + if (multimodalApi === 'vllm' && !textgenerationwebui_settings.server_urls[textgen_types.VLLM] && !altEndpointEnabled) { throw new Error('vLLM server URL is not set.'); } - if (extension_settings.caption.multimodal_api === 'vllm' && extension_settings.caption.multimodal_model === 'vllm_current' && !textgenerationwebui_settings.vllm_model) { + if (multimodalApi === 'vllm' && multimodalModel === 'vllm_current' && !textgenerationwebui_settings.vllm_model) { throw new Error('vLLM model is not set.'); } - if (extension_settings.caption.multimodal_api === 'custom' && !oai_settings.custom_url) { + if (multimodalApi === 'custom' && !oai_settings.custom_url) { throw new Error('Custom API URL is not set.'); } } diff --git a/src/endpoints/openai.js b/src/endpoints/openai.js index 5af8e9976..3604634c3 100644 --- a/src/endpoints/openai.js +++ b/src/endpoints/openai.js @@ -151,8 +151,11 @@ router.post('/caption-image', async (request, response) => { apiUrl = 'https://text.pollinations.ai/openai/chat/completions'; } - if (request.body.api === 'ooba') { + if (['koboldcpp', 'vllm', 'llamacpp', 'ooba'].includes(request.body.api)) { apiUrl = `${trimV1(request.body.server_url)}/v1/chat/completions`; + } + + if (request.body.api === 'ooba') { const imgMessage = body.messages.pop(); body.messages.push({ role: 'user', @@ -165,10 +168,6 @@ router.post('/caption-image', async (request, response) => { }); } - if (['koboldcpp', 'vllm', 'llamacpp'].includes(request.body.api)) { - apiUrl = `${trimV1(request.body.server_url)}/v1/chat/completions`; - } - setAdditionalHeaders(request, { headers }, apiUrl); console.debug('Multimodal captioning request', body);