mirror of
https://github.com/SillyTavern/SillyTavern.git
synced 2025-03-10 09:00:14 +01:00
Add multimodal captioning for ooba
This commit is contained in:
parent
88993bd3e8
commit
a8fb306c12
@ -286,6 +286,7 @@ jQuery(function () {
|
|||||||
(extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'google' && secret_state[SECRET_KEYS.MAKERSUITE]) ||
|
(extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'google' && secret_state[SECRET_KEYS.MAKERSUITE]) ||
|
||||||
(extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'ollama' && textgenerationwebui_settings.server_urls[textgen_types.OLLAMA]) ||
|
(extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'ollama' && textgenerationwebui_settings.server_urls[textgen_types.OLLAMA]) ||
|
||||||
(extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'llamacpp' && textgenerationwebui_settings.server_urls[textgen_types.LLAMACPP]) ||
|
(extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'llamacpp' && textgenerationwebui_settings.server_urls[textgen_types.LLAMACPP]) ||
|
||||||
|
(extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'ooba' && textgenerationwebui_settings.server_urls[textgen_types.OOBA]) ||
|
||||||
(extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'custom') ||
|
(extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'custom') ||
|
||||||
extension_settings.caption.source === 'local' ||
|
extension_settings.caption.source === 'local' ||
|
||||||
extension_settings.caption.source === 'horde';
|
extension_settings.caption.source === 'horde';
|
||||||
@ -351,6 +352,7 @@ jQuery(function () {
|
|||||||
<label for="caption_multimodal_api">API</label>
|
<label for="caption_multimodal_api">API</label>
|
||||||
<select id="caption_multimodal_api" class="flex1 text_pole">
|
<select id="caption_multimodal_api" class="flex1 text_pole">
|
||||||
<option value="llamacpp">llama.cpp</option>
|
<option value="llamacpp">llama.cpp</option>
|
||||||
|
<option value="ooba">Text Generation WebUI (oobabooga)</option>
|
||||||
<option value="ollama">Ollama</option>
|
<option value="ollama">Ollama</option>
|
||||||
<option value="openai">OpenAI</option>
|
<option value="openai">OpenAI</option>
|
||||||
<option value="openrouter">OpenRouter</option>
|
<option value="openrouter">OpenRouter</option>
|
||||||
@ -369,6 +371,7 @@ jQuery(function () {
|
|||||||
<option data-type="ollama" value="bakllava:latest">bakllava:latest</option>
|
<option data-type="ollama" value="bakllava:latest">bakllava:latest</option>
|
||||||
<option data-type="ollama" value="llava:latest">llava:latest</option>
|
<option data-type="ollama" value="llava:latest">llava:latest</option>
|
||||||
<option data-type="llamacpp" value="llamacpp_current">[Currently loaded]</option>
|
<option data-type="llamacpp" value="llamacpp_current">[Currently loaded]</option>
|
||||||
|
<option data-type="ooba" value="ooba_current">[Currently loaded]</option>
|
||||||
<option data-type="custom" value="custom_current">[Currently selected]</option>
|
<option data-type="custom" value="custom_current">[Currently selected]</option>
|
||||||
</select>
|
</select>
|
||||||
</div>
|
</div>
|
||||||
|
@ -21,13 +21,15 @@ export async function getMultimodalCaption(base64Img, prompt) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// OpenRouter has a payload limit of ~2MB. Google is 4MB, but we love democracy.
|
// OpenRouter has a payload limit of ~2MB. Google is 4MB, but we love democracy.
|
||||||
|
// Ooba requires all images to be JPEGs.
|
||||||
const isGoogle = extension_settings.caption.multimodal_api === 'google';
|
const isGoogle = extension_settings.caption.multimodal_api === 'google';
|
||||||
const isOllama = extension_settings.caption.multimodal_api === 'ollama';
|
const isOllama = extension_settings.caption.multimodal_api === 'ollama';
|
||||||
const isLlamaCpp = extension_settings.caption.multimodal_api === 'llamacpp';
|
const isLlamaCpp = extension_settings.caption.multimodal_api === 'llamacpp';
|
||||||
const isCustom = extension_settings.caption.multimodal_api === 'custom';
|
const isCustom = extension_settings.caption.multimodal_api === 'custom';
|
||||||
|
const isOoba = extension_settings.caption.multimodal_api === 'ooba';
|
||||||
const base64Bytes = base64Img.length * 0.75;
|
const base64Bytes = base64Img.length * 0.75;
|
||||||
const compressionLimit = 2 * 1024 * 1024;
|
const compressionLimit = 2 * 1024 * 1024;
|
||||||
if (['google', 'openrouter'].includes(extension_settings.caption.multimodal_api) && base64Bytes > compressionLimit) {
|
if ((['google', 'openrouter'].includes(extension_settings.caption.multimodal_api) && base64Bytes > compressionLimit) || isOoba) {
|
||||||
const maxSide = 1024;
|
const maxSide = 1024;
|
||||||
base64Img = await createThumbnail(base64Img, maxSide, maxSide, 'image/jpeg');
|
base64Img = await createThumbnail(base64Img, maxSide, maxSide, 'image/jpeg');
|
||||||
|
|
||||||
@ -69,6 +71,10 @@ export async function getMultimodalCaption(base64Img, prompt) {
|
|||||||
requestBody.server_url = textgenerationwebui_settings.server_urls[textgen_types.LLAMACPP];
|
requestBody.server_url = textgenerationwebui_settings.server_urls[textgen_types.LLAMACPP];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (isOoba) {
|
||||||
|
requestBody.server_url = textgenerationwebui_settings.server_urls[textgen_types.OOBA];
|
||||||
|
}
|
||||||
|
|
||||||
if (isCustom) {
|
if (isCustom) {
|
||||||
requestBody.server_url = oai_settings.custom_url;
|
requestBody.server_url = oai_settings.custom_url;
|
||||||
requestBody.model = oai_settings.custom_model || 'gpt-4-vision-preview';
|
requestBody.model = oai_settings.custom_model || 'gpt-4-vision-preview';
|
||||||
@ -129,6 +135,10 @@ function throwIfInvalidModel() {
|
|||||||
throw new Error('LlamaCPP server URL is not set.');
|
throw new Error('LlamaCPP server URL is not set.');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (extension_settings.caption.multimodal_api === 'ooba' && !textgenerationwebui_settings.server_urls[textgen_types.OOBA]) {
|
||||||
|
throw new Error('Text Generation WebUI server URL is not set.');
|
||||||
|
}
|
||||||
|
|
||||||
if (extension_settings.caption.multimodal_api === 'custom' && !oai_settings.custom_url) {
|
if (extension_settings.caption.multimodal_api === 'custom' && !oai_settings.custom_url) {
|
||||||
throw new Error('Custom API URL is not set.');
|
throw new Error('Custom API URL is not set.');
|
||||||
}
|
}
|
||||||
|
@ -4,7 +4,7 @@ const express = require('express');
|
|||||||
const FormData = require('form-data');
|
const FormData = require('form-data');
|
||||||
const fs = require('fs');
|
const fs = require('fs');
|
||||||
const { jsonParser, urlencodedParser } = require('../express-common');
|
const { jsonParser, urlencodedParser } = require('../express-common');
|
||||||
const { getConfigValue, mergeObjectWithYaml, excludeKeysByYaml } = require('../util');
|
const { getConfigValue, mergeObjectWithYaml, excludeKeysByYaml, trimV1 } = require('../util');
|
||||||
|
|
||||||
const router = express.Router();
|
const router = express.Router();
|
||||||
|
|
||||||
@ -32,7 +32,11 @@ router.post('/caption-image', jsonParser, async (request, response) => {
|
|||||||
mergeObjectWithYaml(headers, request.body.custom_include_headers);
|
mergeObjectWithYaml(headers, request.body.custom_include_headers);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!key && !request.body.reverse_proxy && request.body.api !== 'custom') {
|
if (request.body.api === 'ooba') {
|
||||||
|
bodyParams.temperature = 0.1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!key && !request.body.reverse_proxy && request.body.api !== 'custom' && request.body.api !== 'ooba') {
|
||||||
console.log('No key found for API', request.body.api);
|
console.log('No key found for API', request.body.api);
|
||||||
return response.sendStatus(400);
|
return response.sendStatus(400);
|
||||||
}
|
}
|
||||||
@ -85,6 +89,20 @@ router.post('/caption-image', jsonParser, async (request, response) => {
|
|||||||
apiUrl = `${request.body.server_url}/chat/completions`;
|
apiUrl = `${request.body.server_url}/chat/completions`;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (request.body.api === 'ooba') {
|
||||||
|
apiUrl = `${trimV1(request.body.server_url)}/v1/chat/completions`;
|
||||||
|
const imgMessage = body.messages.pop();
|
||||||
|
body.messages.push({
|
||||||
|
role: 'user',
|
||||||
|
content: imgMessage?.content?.[0]?.text,
|
||||||
|
});
|
||||||
|
body.messages.push({
|
||||||
|
role: 'user',
|
||||||
|
content: [],
|
||||||
|
image_url: imgMessage?.content?.[1]?.image_url?.url,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
const result = await fetch(apiUrl, {
|
const result = await fetch(apiUrl, {
|
||||||
method: 'POST',
|
method: 'POST',
|
||||||
headers: {
|
headers: {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user