actually convert the prompts properly

This commit is contained in:
based 2024-03-27 15:48:26 +10:00
parent 6a51855f19
commit f3b9920f22

View File

@ -192,9 +192,6 @@ function convertClaudeMessages(messages, prefillString, useSysPrompt, humanMsgFi
function convertGooglePrompt(messages, model) { function convertGooglePrompt(messages, model) {
// This is a 1x1 transparent PNG // This is a 1x1 transparent PNG
const PNG_PIXEL = 'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNkYAAAAAYAAjCB0C8AAAAASUVORK5CYII='; const PNG_PIXEL = 'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNkYAAAAAYAAjCB0C8AAAAASUVORK5CYII=';
const contents = [];
let lastRole = '';
let currentText = '';
const visionSupportedModels = [ const visionSupportedModels = [
'gemini-1.0-pro-vision-latest', 'gemini-1.0-pro-vision-latest',
@ -203,48 +200,65 @@ function convertGooglePrompt(messages, model) {
]; ];
const isMultimodal = visionSupportedModels.includes(model); const isMultimodal = visionSupportedModels.includes(model);
let hasImage = false;
if (isMultimodal) { const contents = [];
const combinedText = messages.map((message) => { messages.forEach((message, index) => {
const role = message.role === 'assistant' ? 'MODEL: ' : 'USER: '; // fix the roles
return role + message.content; if (message.role === 'system') {
}).join('\n\n').trim(); message.role = 'user';
} else if (message.role === 'assistant') {
message.role = 'model';
}
const imageEntry = messages.find((message) => message.content?.[1]?.image_url); // similar story as claude
const imageData = imageEntry?.content?.[1]?.image_url?.data ?? PNG_PIXEL; if (message.name) {
contents.push({ if (Array.isArray(message.content)) {
parts: [ message.content[0].text = `${message.name}: ${message.content[0].text}`;
{ text: combinedText },
{
inlineData: {
mimeType: 'image/png',
data: imageData,
},
},
],
role: 'user',
});
} else {
messages.forEach((message, index) => {
const role = message.role === 'assistant' ? 'model' : 'user';
if (lastRole === role) {
currentText += '\n\n' + message.content;
} else { } else {
if (currentText !== '') { message.content = `${message.name}: ${message.content}`;
contents.push({ }
parts: [{ text: currentText.trim() }], delete message.name;
role: lastRole, }
//create the prompt parts
const parts = [];
if (typeof message.content === 'string') {
parts.push({ text: message.content });
} else if (Array.isArray(message.content)) {
message.content.forEach((part) => {
if (part.type === 'text') {
parts.push({ text: part.text });
} else if (part.type === 'image_url' && isMultimodal) {
parts.push({
inlineData: {
mimeType: 'image/png',
data: part.image_url.url,
},
}); });
hasImage = true;
} }
currentText = message.content; });
lastRole = role; }
}
if (index === messages.length - 1) { // merge consecutive messages with the same role
contents.push({ if (index > 0 && message.role === contents[contents.length - 1].role) {
parts: [{ text: currentText.trim() }], contents[contents.length - 1].parts[0].text += '\n\n' + parts[0].text;
role: lastRole, } else {
}); contents.push({
} role: message.role,
parts: parts,
});
}
});
// pro 1.5 doesn't require a dummy image to be attached, other vision models do
if (isMultimodal && model !== 'gemini-1.5-pro-latest' && !hasImage) {
contents[0].parts.push({
inlineData: {
mimeType: 'image/png',
data: PNG_PIXEL,
},
}); });
} }