Add koboldcpp as a multimodal captioning source

This commit is contained in:
Cohee
2024-03-14 01:03:51 +02:00
parent bd223486de
commit 6ac8ef1b48
3 changed files with 26 additions and 3 deletions

View File

@ -288,6 +288,7 @@ jQuery(function () {
(extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'ollama' && textgenerationwebui_settings.server_urls[textgen_types.OLLAMA]) || (extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'ollama' && textgenerationwebui_settings.server_urls[textgen_types.OLLAMA]) ||
(extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'llamacpp' && textgenerationwebui_settings.server_urls[textgen_types.LLAMACPP]) || (extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'llamacpp' && textgenerationwebui_settings.server_urls[textgen_types.LLAMACPP]) ||
(extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'ooba' && textgenerationwebui_settings.server_urls[textgen_types.OOBA]) || (extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'ooba' && textgenerationwebui_settings.server_urls[textgen_types.OOBA]) ||
(extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'koboldcpp' && textgenerationwebui_settings.server_urls[textgen_types.KOBOLDCPP]) ||
(extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'custom') || (extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'custom') ||
extension_settings.caption.source === 'local' || extension_settings.caption.source === 'local' ||
extension_settings.caption.source === 'horde'; extension_settings.caption.source === 'horde';
@ -355,6 +356,7 @@ jQuery(function () {
<select id="caption_multimodal_api" class="flex1 text_pole"> <select id="caption_multimodal_api" class="flex1 text_pole">
<option value="llamacpp">llama.cpp</option> <option value="llamacpp">llama.cpp</option>
<option value="ooba">Text Generation WebUI (oobabooga)</option> <option value="ooba">Text Generation WebUI (oobabooga)</option>
<option value="koboldcpp">KoboldCpp</option>
<option value="ollama">Ollama</option> <option value="ollama">Ollama</option>
<option value="openai">OpenAI</option> <option value="openai">OpenAI</option>
<option value="anthropic">Anthropic</option> <option value="anthropic">Anthropic</option>
@ -378,6 +380,7 @@ jQuery(function () {
<option data-type="ollama" value="llava:latest">llava:latest</option> <option data-type="ollama" value="llava:latest">llava:latest</option>
<option data-type="llamacpp" value="llamacpp_current">[Currently loaded]</option> <option data-type="llamacpp" value="llamacpp_current">[Currently loaded]</option>
<option data-type="ooba" value="ooba_current">[Currently loaded]</option> <option data-type="ooba" value="ooba_current">[Currently loaded]</option>
<option data-type="koboldcpp" value="koboldcpp_current">[Currently loaded]</option>
<option data-type="custom" value="custom_current">[Currently selected]</option> <option data-type="custom" value="custom_current">[Currently selected]</option>
</select> </select>
</div> </div>

View File

@ -21,16 +21,17 @@ export async function getMultimodalCaption(base64Img, prompt) {
} }
// OpenRouter has a payload limit of ~2MB. Google is 4MB, but we love democracy. // OpenRouter has a payload limit of ~2MB. Google is 4MB, but we love democracy.
// Ooba requires all images to be JPEGs. // Ooba requires all images to be JPEGs. Koboldcpp just asked nicely.
const isGoogle = extension_settings.caption.multimodal_api === 'google'; const isGoogle = extension_settings.caption.multimodal_api === 'google';
const isClaude = extension_settings.caption.multimodal_api === 'anthropic'; const isClaude = extension_settings.caption.multimodal_api === 'anthropic';
const isOllama = extension_settings.caption.multimodal_api === 'ollama'; const isOllama = extension_settings.caption.multimodal_api === 'ollama';
const isLlamaCpp = extension_settings.caption.multimodal_api === 'llamacpp'; const isLlamaCpp = extension_settings.caption.multimodal_api === 'llamacpp';
const isCustom = extension_settings.caption.multimodal_api === 'custom'; const isCustom = extension_settings.caption.multimodal_api === 'custom';
const isOoba = extension_settings.caption.multimodal_api === 'ooba'; const isOoba = extension_settings.caption.multimodal_api === 'ooba';
const isKoboldCpp = extension_settings.caption.multimodal_api === 'koboldcpp';
const base64Bytes = base64Img.length * 0.75; const base64Bytes = base64Img.length * 0.75;
const compressionLimit = 2 * 1024 * 1024; const compressionLimit = 2 * 1024 * 1024;
if ((['google', 'openrouter'].includes(extension_settings.caption.multimodal_api) && base64Bytes > compressionLimit) || isOoba) { if ((['google', 'openrouter'].includes(extension_settings.caption.multimodal_api) && base64Bytes > compressionLimit) || isOoba || isKoboldCpp) {
const maxSide = 1024; const maxSide = 1024;
base64Img = await createThumbnail(base64Img, maxSide, maxSide, 'image/jpeg'); base64Img = await createThumbnail(base64Img, maxSide, maxSide, 'image/jpeg');
@ -76,6 +77,10 @@ export async function getMultimodalCaption(base64Img, prompt) {
requestBody.server_url = textgenerationwebui_settings.server_urls[textgen_types.OOBA]; requestBody.server_url = textgenerationwebui_settings.server_urls[textgen_types.OOBA];
} }
if (isKoboldCpp) {
requestBody.server_url = textgenerationwebui_settings.server_urls[textgen_types.KOBOLDCPP];
}
if (isCustom) { if (isCustom) {
requestBody.server_url = oai_settings.custom_url; requestBody.server_url = oai_settings.custom_url;
requestBody.model = oai_settings.custom_model || 'gpt-4-vision-preview'; requestBody.model = oai_settings.custom_model || 'gpt-4-vision-preview';
@ -142,6 +147,10 @@ function throwIfInvalidModel() {
throw new Error('Text Generation WebUI server URL is not set.'); throw new Error('Text Generation WebUI server URL is not set.');
} }
if (extension_settings.caption.multimodal_api === 'koboldcpp' && !textgenerationwebui_settings.server_urls[textgen_types.KOBOLDCPP]) {
throw new Error('KoboldCpp server URL is not set.');
}
if (extension_settings.caption.multimodal_api === 'custom' && !oai_settings.custom_url) { if (extension_settings.caption.multimodal_api === 'custom' && !oai_settings.custom_url) {
throw new Error('Custom API URL is not set.'); throw new Error('Custom API URL is not set.');
} }

View File

@ -5,6 +5,7 @@ const FormData = require('form-data');
const fs = require('fs'); const fs = require('fs');
const { jsonParser, urlencodedParser } = require('../express-common'); const { jsonParser, urlencodedParser } = require('../express-common');
const { getConfigValue, mergeObjectWithYaml, excludeKeysByYaml, trimV1 } = require('../util'); const { getConfigValue, mergeObjectWithYaml, excludeKeysByYaml, trimV1 } = require('../util');
const { setAdditionalHeaders } = require('../additional-headers');
const router = express.Router(); const router = express.Router();
@ -37,7 +38,11 @@ router.post('/caption-image', jsonParser, async (request, response) => {
bodyParams.temperature = 0.1; bodyParams.temperature = 0.1;
} }
if (!key && !request.body.reverse_proxy && request.body.api !== 'custom' && request.body.api !== 'ooba') { if (request.body.api === 'koboldcpp') {
key = readSecret(SECRET_KEYS.KOBOLDCPP);
}
if (!key && !request.body.reverse_proxy && ['custom', 'ooba', 'koboldcpp'].includes(request.body.api) === false) {
console.log('No key found for API', request.body.api); console.log('No key found for API', request.body.api);
return response.sendStatus(400); return response.sendStatus(400);
} }
@ -104,6 +109,12 @@ router.post('/caption-image', jsonParser, async (request, response) => {
}); });
} }
if (request.body.api === 'koboldcpp') {
apiUrl = `${trimV1(request.body.server_url)}/v1/chat/completions`;
}
setAdditionalHeaders(request, { headers }, apiUrl);
const result = await fetch(apiUrl, { const result = await fetch(apiUrl, {
method: 'POST', method: 'POST',
headers: { headers: {