mirror of
https://github.com/SillyTavern/SillyTavern.git
synced 2025-06-05 21:59:27 +02:00
Merge branch 'staging' into qr-crud-slash-commands
This commit is contained in:
@@ -134,7 +134,7 @@ async function doCaptionRequest(base64Img, fileData) {
|
||||
case 'horde':
|
||||
return await captionHorde(base64Img);
|
||||
case 'multimodal':
|
||||
return await captionMultimodal(fileData);
|
||||
return await captionMultimodal(extension_settings.caption.multimodal_api === 'google' ? base64Img : fileData);
|
||||
default:
|
||||
throw new Error('Unknown caption source.');
|
||||
}
|
||||
@@ -273,6 +273,7 @@ jQuery(function () {
|
||||
(modules.includes('caption') && extension_settings.caption.source === 'extras') ||
|
||||
(extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'openai' && secret_state[SECRET_KEYS.OPENAI]) ||
|
||||
(extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'openrouter' && secret_state[SECRET_KEYS.OPENROUTER]) ||
|
||||
(extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'google' && secret_state[SECRET_KEYS.MAKERSUITE]) ||
|
||||
extension_settings.caption.source === 'local' ||
|
||||
extension_settings.caption.source === 'horde';
|
||||
|
||||
@@ -328,7 +329,7 @@ jQuery(function () {
|
||||
<label for="caption_source">Source</label>
|
||||
<select id="caption_source" class="text_pole">
|
||||
<option value="local">Local</option>
|
||||
<option value="multimodal">Multimodal (OpenAI / OpenRouter)</option>
|
||||
<option value="multimodal">Multimodal (OpenAI / OpenRouter / Google)</option>
|
||||
<option value="extras">Extras</option>
|
||||
<option value="horde">Horde</option>
|
||||
</select>
|
||||
@@ -338,12 +339,14 @@ jQuery(function () {
|
||||
<select id="caption_multimodal_api" class="flex1 text_pole">
|
||||
<option value="openai">OpenAI</option>
|
||||
<option value="openrouter">OpenRouter</option>
|
||||
<option value="google">Google</option>
|
||||
</select>
|
||||
</div>
|
||||
<div class="flex1 flex-container flexFlowColumn flexNoGap">
|
||||
<label for="caption_multimodal_model">Model</label>
|
||||
<select id="caption_multimodal_model" class="flex1 text_pole">
|
||||
<option data-type="openai" value="gpt-4-vision-preview">gpt-4-vision-preview</option>
|
||||
<option data-type="google" value="gemini-pro-vision">gemini-pro-vision</option>
|
||||
<option data-type="openrouter" value="openai/gpt-4-vision-preview">openai/gpt-4-vision-preview</option>
|
||||
<option data-type="openrouter" value="haotian-liu/llava-13b">haotian-liu/llava-13b</option>
|
||||
</select>
|
||||
|
||||
@@ -18,22 +18,35 @@ export async function getMultimodalCaption(base64Img, prompt) {
|
||||
throw new Error('OpenRouter API key is not set.');
|
||||
}
|
||||
|
||||
// OpenRouter has a payload limit of ~2MB
|
||||
const base64Bytes = base64Img.length * 0.75;
|
||||
const compressionLimit = 2 * 1024 * 1024;
|
||||
if (extension_settings.caption.multimodal_api === 'openrouter' && base64Bytes > compressionLimit) {
|
||||
const maxSide = 1024;
|
||||
base64Img = await createThumbnail(base64Img, maxSide, maxSide, 'image/jpeg');
|
||||
if (extension_settings.caption.multimodal_api === 'google' && !secret_state[SECRET_KEYS.MAKERSUITE]) {
|
||||
throw new Error('MakerSuite API key is not set.');
|
||||
}
|
||||
|
||||
const apiResult = await fetch('/api/openai/caption-image', {
|
||||
// OpenRouter has a payload limit of ~2MB. Google is 4MB, but we love democracy.
|
||||
const isGoogle = extension_settings.caption.multimodal_api === 'google';
|
||||
const base64Bytes = base64Img.length * 0.75;
|
||||
const compressionLimit = 2 * 1024 * 1024;
|
||||
if (['google', 'openrouter'].includes(extension_settings.caption.multimodal_api) && base64Bytes > compressionLimit) {
|
||||
const maxSide = 1024;
|
||||
base64Img = await createThumbnail(base64Img, maxSide, maxSide, 'image/jpeg');
|
||||
|
||||
if (isGoogle) {
|
||||
base64Img = base64Img.split(',')[1];
|
||||
}
|
||||
}
|
||||
|
||||
const apiResult = await fetch(`/api/${isGoogle ? 'google' : 'openai'}/caption-image`, {
|
||||
method: 'POST',
|
||||
headers: getRequestHeaders(),
|
||||
body: JSON.stringify({
|
||||
image: base64Img,
|
||||
prompt: prompt,
|
||||
api: extension_settings.caption.multimodal_api || 'openai',
|
||||
model: extension_settings.caption.multimodal_model || 'gpt-4-vision-preview',
|
||||
...(isGoogle
|
||||
? {}
|
||||
: {
|
||||
api: extension_settings.caption.multimodal_api || 'openai',
|
||||
model: extension_settings.caption.multimodal_model || 'gpt-4-vision-preview',
|
||||
}),
|
||||
}),
|
||||
});
|
||||
|
||||
|
||||
@@ -1756,22 +1756,28 @@ async function generateMultimodalPrompt(generationType, quietPrompt) {
|
||||
}
|
||||
}
|
||||
|
||||
const response = await fetch(avatarUrl);
|
||||
try {
|
||||
const response = await fetch(avatarUrl);
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error('Could not fetch avatar image.');
|
||||
}
|
||||
if (!response.ok) {
|
||||
throw new Error('Could not fetch avatar image.');
|
||||
}
|
||||
|
||||
const avatarBlob = await response.blob();
|
||||
const avatarBase64 = await getBase64Async(avatarBlob);
|
||||
const avatarBlob = await response.blob();
|
||||
const avatarBase64 = await getBase64Async(avatarBlob);
|
||||
|
||||
const caption = await getMultimodalCaption(avatarBase64, quietPrompt);
|
||||
const caption = await getMultimodalCaption(avatarBase64, quietPrompt);
|
||||
|
||||
if (!caption) {
|
||||
if (!caption) {
|
||||
throw new Error('No caption returned from the API.');
|
||||
}
|
||||
|
||||
return caption;
|
||||
} catch (error) {
|
||||
console.error(error);
|
||||
toastr.error('Multimodal captioning failed. Please try again.', 'Image Generation');
|
||||
throw new Error('Multimodal captioning failed.');
|
||||
}
|
||||
|
||||
return caption;
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -394,7 +394,7 @@ async function getSavedHashes(collectionId) {
|
||||
*/
|
||||
async function insertVectorItems(collectionId, items) {
|
||||
if (settings.source === 'openai' && !secret_state[SECRET_KEYS.OPENAI] ||
|
||||
settings.source === 'palm' && !secret_state[SECRET_KEYS.PALM]) {
|
||||
settings.source === 'palm' && !secret_state[SECRET_KEYS.MAKERSUITE]) {
|
||||
throw new Error('Vectors: API key missing', { cause: 'api_key_missing' });
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user