diff --git a/public/scripts/extensions/caption/index.js b/public/scripts/extensions/caption/index.js index 0483deeb2..307455a17 100644 --- a/public/scripts/extensions/caption/index.js +++ b/public/scripts/extensions/caption/index.js @@ -288,6 +288,7 @@ jQuery(function () { (extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'ollama' && textgenerationwebui_settings.server_urls[textgen_types.OLLAMA]) || (extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'llamacpp' && textgenerationwebui_settings.server_urls[textgen_types.LLAMACPP]) || (extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'ooba' && textgenerationwebui_settings.server_urls[textgen_types.OOBA]) || + (extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'koboldcpp' && textgenerationwebui_settings.server_urls[textgen_types.KOBOLDCPP]) || (extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'custom') || extension_settings.caption.source === 'local' || extension_settings.caption.source === 'horde'; @@ -355,6 +356,7 @@ jQuery(function () { diff --git a/public/scripts/extensions/shared.js b/public/scripts/extensions/shared.js index a30d7967c..96400ad32 100644 --- a/public/scripts/extensions/shared.js +++ b/public/scripts/extensions/shared.js @@ -21,16 +21,17 @@ export async function getMultimodalCaption(base64Img, prompt) { } // OpenRouter has a payload limit of ~2MB. Google is 4MB, but we love democracy. - // Ooba requires all images to be JPEGs. + // Ooba requires all images to be JPEGs. Koboldcpp just asked nicely. const isGoogle = extension_settings.caption.multimodal_api === 'google'; const isClaude = extension_settings.caption.multimodal_api === 'anthropic'; const isOllama = extension_settings.caption.multimodal_api === 'ollama'; const isLlamaCpp = extension_settings.caption.multimodal_api === 'llamacpp'; const isCustom = extension_settings.caption.multimodal_api === 'custom'; const isOoba = extension_settings.caption.multimodal_api === 'ooba'; + const isKoboldCpp = extension_settings.caption.multimodal_api === 'koboldcpp'; const base64Bytes = base64Img.length * 0.75; const compressionLimit = 2 * 1024 * 1024; - if ((['google', 'openrouter'].includes(extension_settings.caption.multimodal_api) && base64Bytes > compressionLimit) || isOoba) { + if ((['google', 'openrouter'].includes(extension_settings.caption.multimodal_api) && base64Bytes > compressionLimit) || isOoba || isKoboldCpp) { const maxSide = 1024; base64Img = await createThumbnail(base64Img, maxSide, maxSide, 'image/jpeg'); @@ -76,6 +77,10 @@ export async function getMultimodalCaption(base64Img, prompt) { requestBody.server_url = textgenerationwebui_settings.server_urls[textgen_types.OOBA]; } + if (isKoboldCpp) { + requestBody.server_url = textgenerationwebui_settings.server_urls[textgen_types.KOBOLDCPP]; + } + if (isCustom) { requestBody.server_url = oai_settings.custom_url; requestBody.model = oai_settings.custom_model || 'gpt-4-vision-preview'; @@ -142,6 +147,10 @@ function throwIfInvalidModel() { throw new Error('Text Generation WebUI server URL is not set.'); } + if (extension_settings.caption.multimodal_api === 'koboldcpp' && !textgenerationwebui_settings.server_urls[textgen_types.KOBOLDCPP]) { + throw new Error('KoboldCpp server URL is not set.'); + } + if (extension_settings.caption.multimodal_api === 'custom' && !oai_settings.custom_url) { throw new Error('Custom API URL is not set.'); } diff --git a/src/endpoints/openai.js b/src/endpoints/openai.js index 1d10537e9..f8803cfec 100644 --- a/src/endpoints/openai.js +++ b/src/endpoints/openai.js @@ -5,6 +5,7 @@ const FormData = require('form-data'); const fs = require('fs'); const { jsonParser, urlencodedParser } = require('../express-common'); const { getConfigValue, mergeObjectWithYaml, excludeKeysByYaml, trimV1 } = require('../util'); +const { setAdditionalHeaders } = require('../additional-headers'); const router = express.Router(); @@ -37,7 +38,11 @@ router.post('/caption-image', jsonParser, async (request, response) => { bodyParams.temperature = 0.1; } - if (!key && !request.body.reverse_proxy && request.body.api !== 'custom' && request.body.api !== 'ooba') { + if (request.body.api === 'koboldcpp') { + key = readSecret(SECRET_KEYS.KOBOLDCPP); + } + + if (!key && !request.body.reverse_proxy && ['custom', 'ooba', 'koboldcpp'].includes(request.body.api) === false) { console.log('No key found for API', request.body.api); return response.sendStatus(400); } @@ -104,6 +109,12 @@ router.post('/caption-image', jsonParser, async (request, response) => { }); } + if (request.body.api === 'koboldcpp') { + apiUrl = `${trimV1(request.body.server_url)}/v1/chat/completions`; + } + + setAdditionalHeaders(request, { headers }, apiUrl); + const result = await fetch(apiUrl, { method: 'POST', headers: {