Merge branch 'staging' into qr-rewrite

This commit is contained in:
LenAnderson
2023-12-24 00:25:27 +00:00
3 changed files with 34 additions and 3 deletions

View File

@ -286,6 +286,7 @@ jQuery(function () {
(extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'google' && secret_state[SECRET_KEYS.MAKERSUITE]) ||
(extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'ollama' && textgenerationwebui_settings.server_urls[textgen_types.OLLAMA]) ||
(extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'llamacpp' && textgenerationwebui_settings.server_urls[textgen_types.LLAMACPP]) ||
(extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'ooba' && textgenerationwebui_settings.server_urls[textgen_types.OOBA]) ||
(extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'custom') ||
extension_settings.caption.source === 'local' ||
extension_settings.caption.source === 'horde';
@ -351,6 +352,7 @@ jQuery(function () {
<label for="caption_multimodal_api">API</label>
<select id="caption_multimodal_api" class="flex1 text_pole">
<option value="llamacpp">llama.cpp</option>
<option value="ooba">Text Generation WebUI (oobabooga)</option>
<option value="ollama">Ollama</option>
<option value="openai">OpenAI</option>
<option value="openrouter">OpenRouter</option>
@ -369,6 +371,7 @@ jQuery(function () {
<option data-type="ollama" value="bakllava:latest">bakllava:latest</option>
<option data-type="ollama" value="llava:latest">llava:latest</option>
<option data-type="llamacpp" value="llamacpp_current">[Currently loaded]</option>
<option data-type="ooba" value="ooba_current">[Currently loaded]</option>
<option data-type="custom" value="custom_current">[Currently selected]</option>
</select>
</div>

View File

@ -21,13 +21,15 @@ export async function getMultimodalCaption(base64Img, prompt) {
}
// OpenRouter has a payload limit of ~2MB. Google is 4MB, but we love democracy.
// Ooba requires all images to be JPEGs.
const isGoogle = extension_settings.caption.multimodal_api === 'google';
const isOllama = extension_settings.caption.multimodal_api === 'ollama';
const isLlamaCpp = extension_settings.caption.multimodal_api === 'llamacpp';
const isCustom = extension_settings.caption.multimodal_api === 'custom';
const isOoba = extension_settings.caption.multimodal_api === 'ooba';
const base64Bytes = base64Img.length * 0.75;
const compressionLimit = 2 * 1024 * 1024;
if (['google', 'openrouter'].includes(extension_settings.caption.multimodal_api) && base64Bytes > compressionLimit) {
if ((['google', 'openrouter'].includes(extension_settings.caption.multimodal_api) && base64Bytes > compressionLimit) || isOoba) {
const maxSide = 1024;
base64Img = await createThumbnail(base64Img, maxSide, maxSide, 'image/jpeg');
@ -69,6 +71,10 @@ export async function getMultimodalCaption(base64Img, prompt) {
requestBody.server_url = textgenerationwebui_settings.server_urls[textgen_types.LLAMACPP];
}
if (isOoba) {
requestBody.server_url = textgenerationwebui_settings.server_urls[textgen_types.OOBA];
}
if (isCustom) {
requestBody.server_url = oai_settings.custom_url;
requestBody.model = oai_settings.custom_model || 'gpt-4-vision-preview';
@ -129,6 +135,10 @@ function throwIfInvalidModel() {
throw new Error('LlamaCPP server URL is not set.');
}
if (extension_settings.caption.multimodal_api === 'ooba' && !textgenerationwebui_settings.server_urls[textgen_types.OOBA]) {
throw new Error('Text Generation WebUI server URL is not set.');
}
if (extension_settings.caption.multimodal_api === 'custom' && !oai_settings.custom_url) {
throw new Error('Custom API URL is not set.');
}