diff --git a/public/scripts/extensions/caption/index.js b/public/scripts/extensions/caption/index.js
index e06449f88..543eef7f6 100644
--- a/public/scripts/extensions/caption/index.js
+++ b/public/scripts/extensions/caption/index.js
@@ -356,6 +356,7 @@ jQuery(async function () {
(extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'llamacpp' && textgenerationwebui_settings.server_urls[textgen_types.LLAMACPP]) ||
(extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'ooba' && textgenerationwebui_settings.server_urls[textgen_types.OOBA]) ||
(extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'koboldcpp' && textgenerationwebui_settings.server_urls[textgen_types.KOBOLDCPP]) ||
+ (extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'vllm' && textgenerationwebui_settings.server_urls[textgen_types.VLLM]) ||
(extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'custom') ||
extension_settings.caption.source === 'local' ||
extension_settings.caption.source === 'horde';
diff --git a/public/scripts/extensions/caption/settings.html b/public/scripts/extensions/caption/settings.html
index 3e23cfbd5..90ff673ba 100644
--- a/public/scripts/extensions/caption/settings.html
+++ b/public/scripts/extensions/caption/settings.html
@@ -26,6 +26,7 @@
+
@@ -66,6 +67,7 @@
+
diff --git a/public/scripts/extensions/shared.js b/public/scripts/extensions/shared.js
index 4a4390318..3e671c3af 100644
--- a/public/scripts/extensions/shared.js
+++ b/public/scripts/extensions/shared.js
@@ -34,6 +34,7 @@ export async function getMultimodalCaption(base64Img, prompt) {
const isCustom = extension_settings.caption.multimodal_api === 'custom';
const isOoba = extension_settings.caption.multimodal_api === 'ooba';
const isKoboldCpp = extension_settings.caption.multimodal_api === 'koboldcpp';
+ const isVllm = extension_settings.caption.multimodal_api === 'vllm';
const base64Bytes = base64Img.length * 0.75;
const compressionLimit = 2 * 1024 * 1024;
if ((['google', 'openrouter'].includes(extension_settings.caption.multimodal_api) && base64Bytes > compressionLimit) || isOoba || isKoboldCpp) {
@@ -65,6 +66,14 @@ export async function getMultimodalCaption(base64Img, prompt) {
requestBody.server_url = textgenerationwebui_settings.server_urls[textgen_types.OLLAMA];
}
+ if (isVllm) {
+ if (extension_settings.caption.multimodal_model === 'vllm_current') {
+ requestBody.model = textgenerationwebui_settings.vllm_model;
+ }
+
+ requestBody.server_url = textgenerationwebui_settings.server_urls[textgen_types.VLLM];
+ }
+
if (isLlamaCpp) {
requestBody.server_url = textgenerationwebui_settings.server_urls[textgen_types.LLAMACPP];
}
@@ -151,6 +160,14 @@ function throwIfInvalidModel(useReverseProxy) {
throw new Error('KoboldCpp server URL is not set.');
}
+ if (extension_settings.caption.multimodal_api === 'vllm' && !textgenerationwebui_settings.server_urls[textgen_types.VLLM]) {
+ throw new Error('vLLM server URL is not set.');
+ }
+
+ if (extension_settings.caption.multimodal_api === 'vllm' && extension_settings.caption.multimodal_model === 'vllm_current' && !textgenerationwebui_settings.vllm_model) {
+ throw new Error('vLLM model is not set.');
+ }
+
if (extension_settings.caption.multimodal_api === 'custom' && !oai_settings.custom_url) {
throw new Error('Custom API URL is not set.');
}
diff --git a/src/endpoints/openai.js b/src/endpoints/openai.js
index 75de75b7b..f63d72b11 100644
--- a/src/endpoints/openai.js
+++ b/src/endpoints/openai.js
@@ -43,7 +43,11 @@ router.post('/caption-image', jsonParser, async (request, response) => {
key = readSecret(request.user.directories, SECRET_KEYS.KOBOLDCPP);
}
- if (!key && !request.body.reverse_proxy && ['custom', 'ooba', 'koboldcpp'].includes(request.body.api) === false) {
+ if (request.body.api === 'vllm') {
+ key = readSecret(request.user.directories, SECRET_KEYS.VLLM);
+ }
+
+ if (!key && !request.body.reverse_proxy && ['custom', 'ooba', 'koboldcpp', 'vllm'].includes(request.body.api) === false) {
console.log('No key found for API', request.body.api);
return response.sendStatus(400);
}
@@ -110,7 +114,7 @@ router.post('/caption-image', jsonParser, async (request, response) => {
});
}
- if (request.body.api === 'koboldcpp') {
+ if (request.body.api === 'koboldcpp' || request.body.api === 'vllm') {
apiUrl = `${trimV1(request.body.server_url)}/v1/chat/completions`;
}