diff --git a/public/scripts/extensions/caption/index.js b/public/scripts/extensions/caption/index.js
index 8e499452e..6b43f3fa3 100644
--- a/public/scripts/extensions/caption/index.js
+++ b/public/scripts/extensions/caption/index.js
@@ -408,6 +408,8 @@ jQuery(async function () {
// Handle multimodal sources
if (settings.source === 'multimodal') {
const api = settings.multimodal_api;
+ const altEndpointEnabled = settings.alt_endpoint_enabled;
+ const altEndpointUrl = settings.alt_endpoint_url;
// APIs that support reverse proxy
const reverseProxyApis = {
@@ -444,7 +446,11 @@ jQuery(async function () {
'vllm': textgen_types.VLLM,
};
- if (textCompletionApis[api] && textgenerationwebui_settings.server_urls[textCompletionApis[api]]) {
+ if (textCompletionApis[api] && altEndpointEnabled && altEndpointUrl) {
+ return true;
+ }
+
+ if (textCompletionApis[api] && !altEndpointEnabled && textgenerationwebui_settings.server_urls[textCompletionApis[api]]) {
return true;
}
@@ -580,6 +586,14 @@ jQuery(async function () {
extension_settings.caption.multimodal_model = String($('#caption_multimodal_model').val());
saveSettingsDebounced();
});
+ $('#caption_altEndpoint_url').val(extension_settings.caption.alt_endpoint_url).on('input', () => {
+ extension_settings.caption.alt_endpoint_url = String($('#caption_altEndpoint_url').val());
+ saveSettingsDebounced();
+ });
+ $('#caption_altEndpoint_enabled').prop('checked', !!(extension_settings.caption.alt_endpoint_enabled)).on('input', () => {
+ extension_settings.caption.alt_endpoint_enabled = !!$('#caption_altEndpoint_enabled').prop('checked');
+ saveSettingsDebounced();
+ });
const onMessageEvent = async (index) => {
if (!extension_settings.caption.auto_mode) {
diff --git a/public/scripts/extensions/caption/settings.html b/public/scripts/extensions/caption/settings.html
index 9eb60ba5b..d78942778 100644
--- a/public/scripts/extensions/caption/settings.html
+++ b/public/scripts/extensions/caption/settings.html
@@ -189,6 +189,16 @@
Hint: Set your API keys and endpoints in the 'API Connections' tab first.
+
+
+
+
+
diff --git a/public/scripts/extensions/shared.js b/public/scripts/extensions/shared.js
index ad4e3589d..01a6e8219 100644
--- a/public/scripts/extensions/shared.js
+++ b/public/scripts/extensions/shared.js
@@ -61,7 +61,9 @@ export async function getMultimodalCaption(base64Img, prompt) {
requestBody.model = textgenerationwebui_settings.ollama_model;
}
- requestBody.server_url = textgenerationwebui_settings.server_urls[textgen_types.OLLAMA];
+ requestBody.server_url = extension_settings.caption.alt_endpoint_enabled
+ ? extension_settings.caption.alt_endpoint_url
+ : textgenerationwebui_settings.server_urls[textgen_types.OLLAMA];
}
if (isVllm) {
@@ -69,19 +71,27 @@ export async function getMultimodalCaption(base64Img, prompt) {
requestBody.model = textgenerationwebui_settings.vllm_model;
}
- requestBody.server_url = textgenerationwebui_settings.server_urls[textgen_types.VLLM];
+ requestBody.server_url = extension_settings.caption.alt_endpoint_enabled
+ ? extension_settings.caption.alt_endpoint_url
+ : textgenerationwebui_settings.server_urls[textgen_types.VLLM];
}
if (isLlamaCpp) {
- requestBody.server_url = textgenerationwebui_settings.server_urls[textgen_types.LLAMACPP];
+ requestBody.server_url = extension_settings.caption.alt_endpoint_enabled
+ ? extension_settings.caption.alt_endpoint_url
+ : textgenerationwebui_settings.server_urls[textgen_types.LLAMACPP];
}
if (isOoba) {
- requestBody.server_url = textgenerationwebui_settings.server_urls[textgen_types.OOBA];
+ requestBody.server_url = extension_settings.caption.alt_endpoint_enabled
+ ? extension_settings.caption.alt_endpoint_url
+ : textgenerationwebui_settings.server_urls[textgen_types.OOBA];
}
if (isKoboldCpp) {
- requestBody.server_url = textgenerationwebui_settings.server_urls[textgen_types.KOBOLDCPP];
+ requestBody.server_url = extension_settings.caption.alt_endpoint_enabled
+ ? extension_settings.caption.alt_endpoint_url
+ : textgenerationwebui_settings.server_urls[textgen_types.KOBOLDCPP];
}
if (isCustom) {
@@ -121,75 +131,84 @@ export async function getMultimodalCaption(base64Img, prompt) {
}
function throwIfInvalidModel(useReverseProxy) {
- if (extension_settings.caption.multimodal_api === 'openai' && !secret_state[SECRET_KEYS.OPENAI] && !useReverseProxy) {
+ const altEndpointEnabled = extension_settings.caption.alt_endpoint_enabled;
+ const altEndpointUrl = extension_settings.caption.alt_endpoint_url;
+ const multimodalModel = extension_settings.caption.multimodal_model;
+ const multimodalApi = extension_settings.caption.multimodal_api;
+
+ if (altEndpointEnabled && ['llamacpp', 'ooba', 'koboldcpp', 'vllm', 'ollama'].includes(multimodalApi) && !altEndpointUrl) {
+ throw new Error('Secondary endpoint URL is not set.');
+ }
+
+ if (multimodalApi === 'openai' && !secret_state[SECRET_KEYS.OPENAI] && !useReverseProxy) {
throw new Error('OpenAI API key is not set.');
}
- if (extension_settings.caption.multimodal_api === 'openrouter' && !secret_state[SECRET_KEYS.OPENROUTER]) {
+ if (multimodalApi === 'openrouter' && !secret_state[SECRET_KEYS.OPENROUTER]) {
throw new Error('OpenRouter API key is not set.');
}
- if (extension_settings.caption.multimodal_api === 'anthropic' && !secret_state[SECRET_KEYS.CLAUDE] && !useReverseProxy) {
+ if (multimodalApi === 'anthropic' && !secret_state[SECRET_KEYS.CLAUDE] && !useReverseProxy) {
throw new Error('Anthropic (Claude) API key is not set.');
}
- if (extension_settings.caption.multimodal_api === 'zerooneai' && !secret_state[SECRET_KEYS.ZEROONEAI]) {
+ if (multimodalApi === 'zerooneai' && !secret_state[SECRET_KEYS.ZEROONEAI]) {
throw new Error('01.AI API key is not set.');
}
- if (extension_settings.caption.multimodal_api === 'groq' && !secret_state[SECRET_KEYS.GROQ]) {
+ if (multimodalApi === 'groq' && !secret_state[SECRET_KEYS.GROQ]) {
throw new Error('Groq API key is not set.');
}
- if (extension_settings.caption.multimodal_api === 'google' && !secret_state[SECRET_KEYS.MAKERSUITE] && !useReverseProxy) {
+ if (multimodalApi === 'google' && !secret_state[SECRET_KEYS.MAKERSUITE] && !useReverseProxy) {
throw new Error('Google AI Studio API key is not set.');
}
- if (extension_settings.caption.multimodal_api === 'vertexai' && !secret_state[SECRET_KEYS.VERTEXAI] && !useReverseProxy) {
+ if (multimodalApi === 'vertexai' && !secret_state[SECRET_KEYS.VERTEXAI] && !useReverseProxy) {
throw new Error('Google Vertex AI API key is not set.');
}
- if (extension_settings.caption.multimodal_api === 'mistral' && !secret_state[SECRET_KEYS.MISTRALAI] && !useReverseProxy) {
+ if (multimodalApi === 'mistral' && !secret_state[SECRET_KEYS.MISTRALAI] && !useReverseProxy) {
throw new Error('Mistral AI API key is not set.');
}
- if (extension_settings.caption.multimodal_api === 'cohere' && !secret_state[SECRET_KEYS.COHERE]) {
+ if (multimodalApi === 'cohere' && !secret_state[SECRET_KEYS.COHERE]) {
throw new Error('Cohere API key is not set.');
}
- if (extension_settings.caption.multimodal_api === 'xai' && !secret_state[SECRET_KEYS.XAI] && !useReverseProxy) {
+ if (multimodalApi === 'xai' && !secret_state[SECRET_KEYS.XAI] && !useReverseProxy) {
throw new Error('xAI API key is not set.');
}
- if (extension_settings.caption.multimodal_api === 'ollama' && !textgenerationwebui_settings.server_urls[textgen_types.OLLAMA]) {
+ if (multimodalApi === 'ollama' && !textgenerationwebui_settings.server_urls[textgen_types.OLLAMA] && !altEndpointEnabled) {
throw new Error('Ollama server URL is not set.');
}
- if (extension_settings.caption.multimodal_api === 'ollama' && extension_settings.caption.multimodal_model === 'ollama_current' && !textgenerationwebui_settings.ollama_model) {
+ if (multimodalApi === 'ollama' && multimodalModel === 'ollama_current' && !textgenerationwebui_settings.ollama_model) {
throw new Error('Ollama model is not set.');
}
- if (extension_settings.caption.multimodal_api === 'llamacpp' && !textgenerationwebui_settings.server_urls[textgen_types.LLAMACPP]) {
+ if (multimodalApi === 'llamacpp' && !textgenerationwebui_settings.server_urls[textgen_types.LLAMACPP] && !altEndpointEnabled) {
throw new Error('LlamaCPP server URL is not set.');
}
- if (extension_settings.caption.multimodal_api === 'ooba' && !textgenerationwebui_settings.server_urls[textgen_types.OOBA]) {
+ if (multimodalApi === 'ooba' && !textgenerationwebui_settings.server_urls[textgen_types.OOBA] && !altEndpointEnabled) {
throw new Error('Text Generation WebUI server URL is not set.');
}
- if (extension_settings.caption.multimodal_api === 'koboldcpp' && !textgenerationwebui_settings.server_urls[textgen_types.KOBOLDCPP]) {
+ if (multimodalApi === 'koboldcpp' && !textgenerationwebui_settings.server_urls[textgen_types.KOBOLDCPP] && !altEndpointEnabled) {
throw new Error('KoboldCpp server URL is not set.');
}
- if (extension_settings.caption.multimodal_api === 'vllm' && !textgenerationwebui_settings.server_urls[textgen_types.VLLM]) {
+ if (multimodalApi === 'vllm' && !textgenerationwebui_settings.server_urls[textgen_types.VLLM] && !altEndpointEnabled) {
throw new Error('vLLM server URL is not set.');
}
- if (extension_settings.caption.multimodal_api === 'vllm' && extension_settings.caption.multimodal_model === 'vllm_current' && !textgenerationwebui_settings.vllm_model) {
+ if (multimodalApi === 'vllm' && multimodalModel === 'vllm_current' && !textgenerationwebui_settings.vllm_model) {
throw new Error('vLLM model is not set.');
}
- if (extension_settings.caption.multimodal_api === 'custom' && !oai_settings.custom_url) {
+ if (multimodalApi === 'custom' && !oai_settings.custom_url) {
throw new Error('Custom API URL is not set.');
}
}
diff --git a/src/endpoints/openai.js b/src/endpoints/openai.js
index 5af8e9976..3604634c3 100644
--- a/src/endpoints/openai.js
+++ b/src/endpoints/openai.js
@@ -151,8 +151,11 @@ router.post('/caption-image', async (request, response) => {
apiUrl = 'https://text.pollinations.ai/openai/chat/completions';
}
- if (request.body.api === 'ooba') {
+ if (['koboldcpp', 'vllm', 'llamacpp', 'ooba'].includes(request.body.api)) {
apiUrl = `${trimV1(request.body.server_url)}/v1/chat/completions`;
+ }
+
+ if (request.body.api === 'ooba') {
const imgMessage = body.messages.pop();
body.messages.push({
role: 'user',
@@ -165,10 +168,6 @@ router.post('/caption-image', async (request, response) => {
});
}
- if (['koboldcpp', 'vllm', 'llamacpp'].includes(request.body.api)) {
- apiUrl = `${trimV1(request.body.server_url)}/v1/chat/completions`;
- }
-
setAdditionalHeaders(request, { headers }, apiUrl);
console.debug('Multimodal captioning request', body);