Merge branch 'staging' into qr-crud-slash-commands

This commit is contained in:
Cohee
2023-12-15 00:42:41 +02:00
23 changed files with 554 additions and 144 deletions

View File

@@ -134,7 +134,7 @@ async function doCaptionRequest(base64Img, fileData) {
case 'horde':
return await captionHorde(base64Img);
case 'multimodal':
return await captionMultimodal(fileData);
return await captionMultimodal(extension_settings.caption.multimodal_api === 'google' ? base64Img : fileData);
default:
throw new Error('Unknown caption source.');
}
@@ -273,6 +273,7 @@ jQuery(function () {
(modules.includes('caption') && extension_settings.caption.source === 'extras') ||
(extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'openai' && secret_state[SECRET_KEYS.OPENAI]) ||
(extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'openrouter' && secret_state[SECRET_KEYS.OPENROUTER]) ||
(extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'google' && secret_state[SECRET_KEYS.MAKERSUITE]) ||
extension_settings.caption.source === 'local' ||
extension_settings.caption.source === 'horde';
@@ -328,7 +329,7 @@ jQuery(function () {
<label for="caption_source">Source</label>
<select id="caption_source" class="text_pole">
<option value="local">Local</option>
<option value="multimodal">Multimodal (OpenAI / OpenRouter)</option>
<option value="multimodal">Multimodal (OpenAI / OpenRouter / Google)</option>
<option value="extras">Extras</option>
<option value="horde">Horde</option>
</select>
@@ -338,12 +339,14 @@ jQuery(function () {
<select id="caption_multimodal_api" class="flex1 text_pole">
<option value="openai">OpenAI</option>
<option value="openrouter">OpenRouter</option>
<option value="google">Google</option>
</select>
</div>
<div class="flex1 flex-container flexFlowColumn flexNoGap">
<label for="caption_multimodal_model">Model</label>
<select id="caption_multimodal_model" class="flex1 text_pole">
<option data-type="openai" value="gpt-4-vision-preview">gpt-4-vision-preview</option>
<option data-type="google" value="gemini-pro-vision">gemini-pro-vision</option>
<option data-type="openrouter" value="openai/gpt-4-vision-preview">openai/gpt-4-vision-preview</option>
<option data-type="openrouter" value="haotian-liu/llava-13b">haotian-liu/llava-13b</option>
</select>

View File

@@ -18,22 +18,35 @@ export async function getMultimodalCaption(base64Img, prompt) {
throw new Error('OpenRouter API key is not set.');
}
// OpenRouter has a payload limit of ~2MB
const base64Bytes = base64Img.length * 0.75;
const compressionLimit = 2 * 1024 * 1024;
if (extension_settings.caption.multimodal_api === 'openrouter' && base64Bytes > compressionLimit) {
const maxSide = 1024;
base64Img = await createThumbnail(base64Img, maxSide, maxSide, 'image/jpeg');
if (extension_settings.caption.multimodal_api === 'google' && !secret_state[SECRET_KEYS.MAKERSUITE]) {
throw new Error('MakerSuite API key is not set.');
}
const apiResult = await fetch('/api/openai/caption-image', {
// OpenRouter has a payload limit of ~2MB. Google is 4MB, but we love democracy.
const isGoogle = extension_settings.caption.multimodal_api === 'google';
const base64Bytes = base64Img.length * 0.75;
const compressionLimit = 2 * 1024 * 1024;
if (['google', 'openrouter'].includes(extension_settings.caption.multimodal_api) && base64Bytes > compressionLimit) {
const maxSide = 1024;
base64Img = await createThumbnail(base64Img, maxSide, maxSide, 'image/jpeg');
if (isGoogle) {
base64Img = base64Img.split(',')[1];
}
}
const apiResult = await fetch(`/api/${isGoogle ? 'google' : 'openai'}/caption-image`, {
method: 'POST',
headers: getRequestHeaders(),
body: JSON.stringify({
image: base64Img,
prompt: prompt,
api: extension_settings.caption.multimodal_api || 'openai',
model: extension_settings.caption.multimodal_model || 'gpt-4-vision-preview',
...(isGoogle
? {}
: {
api: extension_settings.caption.multimodal_api || 'openai',
model: extension_settings.caption.multimodal_model || 'gpt-4-vision-preview',
}),
}),
});

View File

@@ -1756,22 +1756,28 @@ async function generateMultimodalPrompt(generationType, quietPrompt) {
}
}
const response = await fetch(avatarUrl);
try {
const response = await fetch(avatarUrl);
if (!response.ok) {
throw new Error('Could not fetch avatar image.');
}
if (!response.ok) {
throw new Error('Could not fetch avatar image.');
}
const avatarBlob = await response.blob();
const avatarBase64 = await getBase64Async(avatarBlob);
const avatarBlob = await response.blob();
const avatarBase64 = await getBase64Async(avatarBlob);
const caption = await getMultimodalCaption(avatarBase64, quietPrompt);
const caption = await getMultimodalCaption(avatarBase64, quietPrompt);
if (!caption) {
if (!caption) {
throw new Error('No caption returned from the API.');
}
return caption;
} catch (error) {
console.error(error);
toastr.error('Multimodal captioning failed. Please try again.', 'Image Generation');
throw new Error('Multimodal captioning failed.');
}
return caption;
}
/**

View File

@@ -394,7 +394,7 @@ async function getSavedHashes(collectionId) {
*/
async function insertVectorItems(collectionId, items) {
if (settings.source === 'openai' && !secret_state[SECRET_KEYS.OPENAI] ||
settings.source === 'palm' && !secret_state[SECRET_KEYS.PALM]) {
settings.source === 'palm' && !secret_state[SECRET_KEYS.MAKERSUITE]) {
throw new Error('Vectors: API key missing', { cause: 'api_key_missing' });
}