added google vision caption support

This commit is contained in:
based
2023-12-14 22:37:53 +10:00
parent ca87f29771
commit 0b7c1a98cd
5 changed files with 94 additions and 16 deletions

View File

@ -1,7 +1,7 @@
import { getRequestHeaders } from '../../script.js';
import { extension_settings } from '../extensions.js';
import { SECRET_KEYS, secret_state } from '../secrets.js';
import { createThumbnail } from '../utils.js';
import {getRequestHeaders} from '../../script.js';
import {extension_settings} from '../extensions.js';
import {SECRET_KEYS, secret_state} from '../secrets.js';
import {createThumbnail} from '../utils.js';
/**
* Generates a caption for an image using a multimodal model.
@ -18,6 +18,10 @@ export async function getMultimodalCaption(base64Img, prompt) {
throw new Error('OpenRouter API key is not set.');
}
if (extension_settings.caption.multimodal_api === 'google' && !secret_state[SECRET_KEYS.MAKERSUITE]) {
throw new Error('MakerSuite API key is not set.');
}
// OpenRouter has a payload limit of ~2MB
const base64Bytes = base64Img.length * 0.75;
const compressionLimit = 2 * 1024 * 1024;
@ -26,16 +30,25 @@ export async function getMultimodalCaption(base64Img, prompt) {
base64Img = await createThumbnail(base64Img, maxSide, maxSide, 'image/jpeg');
}
const apiResult = await fetch('/api/openai/caption-image', {
method: 'POST',
headers: getRequestHeaders(),
body: JSON.stringify({
image: base64Img,
prompt: prompt,
api: extension_settings.caption.multimodal_api || 'openai',
model: extension_settings.caption.multimodal_model || 'gpt-4-vision-preview',
}),
});
const apiResult = extension_settings.caption.multimodal_api === 'google' ?
await fetch('/api/google/caption-image', {
method: 'POST',
headers: getRequestHeaders(),
body: JSON.stringify({
image: base64Img,
prompt: prompt,
}),
})
: await fetch('/api/openai/caption-image', {
method: 'POST',
headers: getRequestHeaders(),
body: JSON.stringify({
image: base64Img,
prompt: prompt,
api: extension_settings.caption.multimodal_api || 'openai',
model: extension_settings.caption.multimodal_model || 'gpt-4-vision-preview',
}),
});
if (!apiResult.ok) {
throw new Error('Failed to caption image via OpenAI.');