mirror of
https://github.com/SillyTavern/SillyTavern.git
synced 2025-06-05 21:59:27 +02:00
added google vision caption support
This commit is contained in:
@ -1,7 +1,7 @@
|
||||
import { getRequestHeaders } from '../../script.js';
|
||||
import { extension_settings } from '../extensions.js';
|
||||
import { SECRET_KEYS, secret_state } from '../secrets.js';
|
||||
import { createThumbnail } from '../utils.js';
|
||||
import {getRequestHeaders} from '../../script.js';
|
||||
import {extension_settings} from '../extensions.js';
|
||||
import {SECRET_KEYS, secret_state} from '../secrets.js';
|
||||
import {createThumbnail} from '../utils.js';
|
||||
|
||||
/**
|
||||
* Generates a caption for an image using a multimodal model.
|
||||
@ -18,6 +18,10 @@ export async function getMultimodalCaption(base64Img, prompt) {
|
||||
throw new Error('OpenRouter API key is not set.');
|
||||
}
|
||||
|
||||
if (extension_settings.caption.multimodal_api === 'google' && !secret_state[SECRET_KEYS.MAKERSUITE]) {
|
||||
throw new Error('MakerSuite API key is not set.');
|
||||
}
|
||||
|
||||
// OpenRouter has a payload limit of ~2MB
|
||||
const base64Bytes = base64Img.length * 0.75;
|
||||
const compressionLimit = 2 * 1024 * 1024;
|
||||
@ -26,16 +30,25 @@ export async function getMultimodalCaption(base64Img, prompt) {
|
||||
base64Img = await createThumbnail(base64Img, maxSide, maxSide, 'image/jpeg');
|
||||
}
|
||||
|
||||
const apiResult = await fetch('/api/openai/caption-image', {
|
||||
method: 'POST',
|
||||
headers: getRequestHeaders(),
|
||||
body: JSON.stringify({
|
||||
image: base64Img,
|
||||
prompt: prompt,
|
||||
api: extension_settings.caption.multimodal_api || 'openai',
|
||||
model: extension_settings.caption.multimodal_model || 'gpt-4-vision-preview',
|
||||
}),
|
||||
});
|
||||
const apiResult = extension_settings.caption.multimodal_api === 'google' ?
|
||||
await fetch('/api/google/caption-image', {
|
||||
method: 'POST',
|
||||
headers: getRequestHeaders(),
|
||||
body: JSON.stringify({
|
||||
image: base64Img,
|
||||
prompt: prompt,
|
||||
}),
|
||||
})
|
||||
: await fetch('/api/openai/caption-image', {
|
||||
method: 'POST',
|
||||
headers: getRequestHeaders(),
|
||||
body: JSON.stringify({
|
||||
image: base64Img,
|
||||
prompt: prompt,
|
||||
api: extension_settings.caption.multimodal_api || 'openai',
|
||||
model: extension_settings.caption.multimodal_model || 'gpt-4-vision-preview',
|
||||
}),
|
||||
});
|
||||
|
||||
if (!apiResult.ok) {
|
||||
throw new Error('Failed to caption image via OpenAI.');
|
||||
|
Reference in New Issue
Block a user