Fix sending PNG/WEBP to Google captioning

This commit is contained in:
Cohee
2023-12-14 20:36:31 +02:00
parent d1be9d5347
commit bb8b8f9386
2 changed files with 28 additions and 18 deletions

View File

@ -1756,6 +1756,7 @@ async function generateMultimodalPrompt(generationType, quietPrompt) {
} }
} }
try {
const response = await fetch(avatarUrl); const response = await fetch(avatarUrl);
if (!response.ok) { if (!response.ok) {
@ -1768,10 +1769,15 @@ async function generateMultimodalPrompt(generationType, quietPrompt) {
const caption = await getMultimodalCaption(avatarBase64, quietPrompt); const caption = await getMultimodalCaption(avatarBase64, quietPrompt);
if (!caption) { if (!caption) {
throw new Error('Multimodal captioning failed.'); throw new Error('No caption returned from the API.');
} }
return caption; return caption;
} catch (error) {
console.error(error);
toastr.error('Multimodal captioning failed. Please try again.', 'Image Generation');
throw new Error('Multimodal captioning failed.');
}
} }
/** /**

View File

@ -8,14 +8,17 @@ const router = express.Router();
router.post('/caption-image', jsonParser, async (request, response) => { router.post('/caption-image', jsonParser, async (request, response) => {
try { try {
const mimeType = request.body.image.split(';')[0].split(':')[1];
const base64Data = request.body.image.split(',')[1];
const url = `https://generativelanguage.googleapis.com/v1beta/models/gemini-pro-vision:generateContent?key=${readSecret(SECRET_KEYS.MAKERSUITE)}`; const url = `https://generativelanguage.googleapis.com/v1beta/models/gemini-pro-vision:generateContent?key=${readSecret(SECRET_KEYS.MAKERSUITE)}`;
const body = { const body = {
contents: [{ contents: [{
parts: [ parts: [
{ text: request.body.prompt }, { text: request.body.prompt },
{ inlineData: { {
mimeType: 'image/png', //jpg images seem to work fine even with this mimetype set? inlineData: {
data: request.body.image, mimeType: 'image/png', // It needs to specify a MIME type in data if it's not a PNG
data: mimeType === 'image/png' ? base64Data : request.body.image,
}, },
}], }],
}], }],
@ -23,6 +26,8 @@ router.post('/caption-image', jsonParser, async (request, response) => {
generationConfig: { maxOutputTokens: 1000 }, generationConfig: { maxOutputTokens: 1000 },
}; };
console.log('Multimodal captioning request', body);
const result = await fetch(url, { const result = await fetch(url, {
body: JSON.stringify(body), body: JSON.stringify(body),
method: 'POST', method: 'POST',
@ -32,10 +37,9 @@ router.post('/caption-image', jsonParser, async (request, response) => {
timeout: 0, timeout: 0,
}); });
console.log('Multimodal captioning request', body);
if (!result.ok) { if (!result.ok) {
console.log(`MakerSuite API returned error: ${result.status} ${result.statusText} ${await result.text()}`); const error = await result.json();
console.log(`MakerSuite API returned error: ${result.status} ${result.statusText}`, error);
return response.status(result.status).send({ error: true }); return response.status(result.status).send({ error: true });
} }
@ -43,7 +47,7 @@ router.post('/caption-image', jsonParser, async (request, response) => {
console.log('Multimodal captioning response', data); console.log('Multimodal captioning response', data);
const candidates = data?.candidates; const candidates = data?.candidates;
if(!candidates) { if (!candidates) {
return response.status(500).send('No candidates found, image was most likely filtered.'); return response.status(500).send('No candidates found, image was most likely filtered.');
} }