mirror of
https://github.com/SillyTavern/SillyTavern.git
synced 2025-06-05 21:59:27 +02:00
Fix sending PNG/WEBP to Google captioning
This commit is contained in:
@ -1756,22 +1756,28 @@ async function generateMultimodalPrompt(generationType, quietPrompt) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const response = await fetch(avatarUrl);
|
try {
|
||||||
|
const response = await fetch(avatarUrl);
|
||||||
|
|
||||||
if (!response.ok) {
|
if (!response.ok) {
|
||||||
throw new Error('Could not fetch avatar image.');
|
throw new Error('Could not fetch avatar image.');
|
||||||
}
|
}
|
||||||
|
|
||||||
const avatarBlob = await response.blob();
|
const avatarBlob = await response.blob();
|
||||||
const avatarBase64 = await getBase64Async(avatarBlob);
|
const avatarBase64 = await getBase64Async(avatarBlob);
|
||||||
|
|
||||||
const caption = await getMultimodalCaption(avatarBase64, quietPrompt);
|
const caption = await getMultimodalCaption(avatarBase64, quietPrompt);
|
||||||
|
|
||||||
if (!caption) {
|
if (!caption) {
|
||||||
|
throw new Error('No caption returned from the API.');
|
||||||
|
}
|
||||||
|
|
||||||
|
return caption;
|
||||||
|
} catch (error) {
|
||||||
|
console.error(error);
|
||||||
|
toastr.error('Multimodal captioning failed. Please try again.', 'Image Generation');
|
||||||
throw new Error('Multimodal captioning failed.');
|
throw new Error('Multimodal captioning failed.');
|
||||||
}
|
}
|
||||||
|
|
||||||
return caption;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -8,21 +8,26 @@ const router = express.Router();
|
|||||||
|
|
||||||
router.post('/caption-image', jsonParser, async (request, response) => {
|
router.post('/caption-image', jsonParser, async (request, response) => {
|
||||||
try {
|
try {
|
||||||
|
const mimeType = request.body.image.split(';')[0].split(':')[1];
|
||||||
|
const base64Data = request.body.image.split(',')[1];
|
||||||
const url = `https://generativelanguage.googleapis.com/v1beta/models/gemini-pro-vision:generateContent?key=${readSecret(SECRET_KEYS.MAKERSUITE)}`;
|
const url = `https://generativelanguage.googleapis.com/v1beta/models/gemini-pro-vision:generateContent?key=${readSecret(SECRET_KEYS.MAKERSUITE)}`;
|
||||||
const body = {
|
const body = {
|
||||||
contents: [{
|
contents: [{
|
||||||
parts: [
|
parts: [
|
||||||
{ text: request.body.prompt },
|
{ text: request.body.prompt },
|
||||||
{ inlineData: {
|
{
|
||||||
mimeType: 'image/png', //jpg images seem to work fine even with this mimetype set?
|
inlineData: {
|
||||||
data: request.body.image,
|
mimeType: 'image/png', // It needs to specify a MIME type in data if it's not a PNG
|
||||||
},
|
data: mimeType === 'image/png' ? base64Data : request.body.image,
|
||||||
|
},
|
||||||
}],
|
}],
|
||||||
}],
|
}],
|
||||||
safetySettings: MAKERSUITE_SAFETY,
|
safetySettings: MAKERSUITE_SAFETY,
|
||||||
generationConfig: { maxOutputTokens: 1000 },
|
generationConfig: { maxOutputTokens: 1000 },
|
||||||
};
|
};
|
||||||
|
|
||||||
|
console.log('Multimodal captioning request', body);
|
||||||
|
|
||||||
const result = await fetch(url, {
|
const result = await fetch(url, {
|
||||||
body: JSON.stringify(body),
|
body: JSON.stringify(body),
|
||||||
method: 'POST',
|
method: 'POST',
|
||||||
@ -32,10 +37,9 @@ router.post('/caption-image', jsonParser, async (request, response) => {
|
|||||||
timeout: 0,
|
timeout: 0,
|
||||||
});
|
});
|
||||||
|
|
||||||
console.log('Multimodal captioning request', body);
|
|
||||||
|
|
||||||
if (!result.ok) {
|
if (!result.ok) {
|
||||||
console.log(`MakerSuite API returned error: ${result.status} ${result.statusText} ${await result.text()}`);
|
const error = await result.json();
|
||||||
|
console.log(`MakerSuite API returned error: ${result.status} ${result.statusText}`, error);
|
||||||
return response.status(result.status).send({ error: true });
|
return response.status(result.status).send({ error: true });
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -43,7 +47,7 @@ router.post('/caption-image', jsonParser, async (request, response) => {
|
|||||||
console.log('Multimodal captioning response', data);
|
console.log('Multimodal captioning response', data);
|
||||||
|
|
||||||
const candidates = data?.candidates;
|
const candidates = data?.candidates;
|
||||||
if(!candidates) {
|
if (!candidates) {
|
||||||
return response.status(500).send('No candidates found, image was most likely filtered.');
|
return response.status(500).send('No candidates found, image was most likely filtered.');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user