Merge pull request #1533 from kingbased/gemini

Better Google Makersuite/AI adapter + Gemini Pro & Vision Support & tokenizers
This commit is contained in:
Cohee
2023-12-14 22:42:30 +02:00
committed by GitHub
21 changed files with 552 additions and 134 deletions

View File

@@ -105,7 +105,26 @@ const UNSAFE_EXTENSIONS = [
'.ws',
];
const PALM_SAFETY = [
const GEMINI_SAFETY = [
{
category: 'HARM_CATEGORY_HARASSMENT',
threshold: 'BLOCK_NONE',
},
{
category: 'HARM_CATEGORY_HATE_SPEECH',
threshold: 'BLOCK_NONE',
},
{
category: 'HARM_CATEGORY_SEXUALLY_EXPLICIT',
threshold: 'BLOCK_NONE',
},
{
category: 'HARM_CATEGORY_DANGEROUS_CONTENT',
threshold: 'BLOCK_NONE',
},
];
const BISON_SAFETY = [
{
category: 'HARM_CATEGORY_DEROGATORY',
threshold: 'BLOCK_NONE',
@@ -139,7 +158,7 @@ const CHAT_COMPLETION_SOURCES = {
SCALE: 'scale',
OPENROUTER: 'openrouter',
AI21: 'ai21',
PALM: 'palm',
MAKERSUITE: 'makersuite',
};
const UPLOADS_PATH = './uploads';
@@ -160,7 +179,8 @@ module.exports = {
DIRECTORIES,
UNSAFE_EXTENSIONS,
UPLOADS_PATH,
PALM_SAFETY,
GEMINI_SAFETY,
BISON_SAFETY,
TEXTGEN_TYPES,
CHAT_COMPLETION_SOURCES,
AVATAR_WIDTH,

View File

@@ -1,10 +1,11 @@
const express = require('express');
const fetch = require('node-fetch').default;
const { Readable } = require('stream');
const { jsonParser } = require('../../express-common');
const { CHAT_COMPLETION_SOURCES, PALM_SAFETY } = require('../../constants');
const { CHAT_COMPLETION_SOURCES, GEMINI_SAFETY, BISON_SAFETY } = require('../../constants');
const { forwardFetchResponse, getConfigValue, tryParse, uuidv4 } = require('../../util');
const { convertClaudePrompt, convertTextCompletionPrompt } = require('../prompt-converters');
const { convertClaudePrompt, convertGooglePrompt, convertTextCompletionPrompt } = require('../prompt-converters');
const { readSecret, SECRET_KEYS } = require('../secrets');
const { getTokenizerModel, getSentencepiceTokenizer, getTiktokenTokenizer, sentencepieceTokenizers, TEXT_COMPLETION_MODELS } = require('../tokenizers');
@@ -151,28 +152,70 @@ async function sendScaleRequest(request, response) {
* @param {express.Request} request Express request
* @param {express.Response} response Express response
*/
async function sendPalmRequest(request, response) {
const api_key_palm = readSecret(SECRET_KEYS.PALM);
async function sendMakerSuiteRequest(request, response) {
const apiKey = readSecret(SECRET_KEYS.MAKERSUITE);
if (!api_key_palm) {
console.log('Palm API key is missing.');
if (!apiKey) {
console.log('MakerSuite API key is missing.');
return response.status(400).send({ error: true });
}
const body = {
prompt: {
text: request.body.messages,
},
const model = String(request.body.model);
const isGemini = model.includes('gemini');
const isText = model.includes('text');
const stream = Boolean(request.body.stream) && isGemini;
const generationConfig = {
stopSequences: request.body.stop,
safetySettings: PALM_SAFETY,
candidateCount: 1,
maxOutputTokens: request.body.max_tokens,
temperature: request.body.temperature,
topP: request.body.top_p,
topK: request.body.top_k || undefined,
maxOutputTokens: request.body.max_tokens,
candidate_count: 1,
};
console.log('Palm request:', body);
function getGeminiBody() {
return {
contents: convertGooglePrompt(request.body.messages, model),
safetySettings: GEMINI_SAFETY,
generationConfig: generationConfig,
};
}
function getBisonBody() {
const prompt = isText
? ({ text: convertTextCompletionPrompt(request.body.messages) })
: ({ messages: convertGooglePrompt(request.body.messages, model) });
/** @type {any} Shut the lint up */
const bisonBody = {
...generationConfig,
safetySettings: BISON_SAFETY,
candidate_count: 1, // lewgacy spelling
prompt: prompt,
};
if (!isText) {
delete bisonBody.stopSequences;
delete bisonBody.maxOutputTokens;
delete bisonBody.safetySettings;
if (Array.isArray(prompt.messages)) {
for (const msg of prompt.messages) {
msg.author = msg.role;
msg.content = msg.parts[0].text;
delete msg.parts;
delete msg.role;
}
}
}
delete bisonBody.candidateCount;
return bisonBody;
}
const body = isGemini ? getGeminiBody() : getBisonBody();
console.log('MakerSuite request:', body);
try {
const controller = new AbortController();
@@ -181,7 +224,12 @@ async function sendPalmRequest(request, response) {
controller.abort();
});
const generateResponse = await fetch(`https://generativelanguage.googleapis.com/v1beta2/models/text-bison-001:generateText?key=${api_key_palm}`, {
const apiVersion = isGemini ? 'v1beta' : 'v1beta2';
const responseType = isGemini
? (stream ? 'streamGenerateContent' : 'generateContent')
: (isText ? 'generateText' : 'generateMessage');
const generateResponse = await fetch(`https://generativelanguage.googleapis.com/${apiVersion}/models/${model}:${responseType}?key=${apiKey}`, {
body: JSON.stringify(body),
method: 'POST',
headers: {
@@ -190,34 +238,79 @@ async function sendPalmRequest(request, response) {
signal: controller.signal,
timeout: 0,
});
// have to do this because of their busted ass streaming endpoint
if (stream) {
try {
let partialData = '';
generateResponse.body.on('data', (data) => {
const chunk = data.toString();
if (chunk.startsWith(',') || chunk.endsWith(',') || chunk.startsWith('[') || chunk.endsWith(']')) {
partialData = chunk.slice(1);
} else {
partialData += chunk;
}
while (true) {
let json;
try {
json = JSON.parse(partialData);
} catch (e) {
break;
}
response.write(JSON.stringify(json));
partialData = '';
}
});
if (!generateResponse.ok) {
console.log(`Palm API returned error: ${generateResponse.status} ${generateResponse.statusText} ${await generateResponse.text()}`);
return response.status(generateResponse.status).send({ error: true });
}
request.socket.on('close', function () {
if (generateResponse.body instanceof Readable) generateResponse.body.destroy();
response.end();
});
const generateResponseJson = await generateResponse.json();
const responseText = generateResponseJson?.candidates?.[0]?.output;
generateResponse.body.on('end', () => {
console.log('Streaming request finished');
response.end();
});
if (!responseText) {
console.log('Palm API returned no response', generateResponseJson);
let message = `Palm API returned no response: ${JSON.stringify(generateResponseJson)}`;
// Check for filters
if (generateResponseJson?.filters?.[0]?.reason) {
message = `Palm filter triggered: ${generateResponseJson.filters[0].reason}`;
} catch (error) {
console.log('Error forwarding streaming response:', error);
if (!response.headersSent) {
return response.status(500).send({ error: true });
}
}
} else {
if (!generateResponse.ok) {
console.log(`MakerSuite API returned error: ${generateResponse.status} ${generateResponse.statusText} ${await generateResponse.text()}`);
return response.status(generateResponse.status).send({ error: true });
}
return response.send({ error: { message } });
const generateResponseJson = await generateResponse.json();
const candidates = generateResponseJson?.candidates;
if (!candidates || candidates.length === 0) {
let message = 'MakerSuite API returned no candidate';
console.log(message, generateResponseJson);
if (generateResponseJson?.promptFeedback?.blockReason) {
message += `\nPrompt was blocked due to : ${generateResponseJson.promptFeedback.blockReason}`;
}
return response.send({ error: { message } });
}
const responseContent = candidates[0].content ?? candidates[0].output;
const responseText = typeof responseContent === 'string' ? responseContent : responseContent.parts?.[0]?.text;
if (!responseText) {
let message = 'MakerSuite Candidate text empty';
console.log(message, generateResponseJson);
return response.send({ error: { message } });
}
console.log('MakerSuite response:', responseText);
// Wrap it back to OAI format
const reply = { choices: [{ 'message': { 'content': responseText } }] };
return response.send(reply);
}
console.log('Palm response:', responseText);
// Wrap it back to OAI format
const reply = { choices: [{ 'message': { 'content': responseText } }] };
return response.send(reply);
} catch (error) {
console.log('Error communicating with Palm API: ', error);
console.log('Error communicating with MakerSuite API: ', error);
if (!response.headersSent) {
return response.status(500).send({ error: true });
}
@@ -225,7 +318,7 @@ async function sendPalmRequest(request, response) {
}
/**
* Sends a request to Google AI API.
* Sends a request to AI21 API.
* @param {express.Request} request Express request
* @param {express.Response} response Express response
*/
@@ -457,7 +550,7 @@ router.post('/generate', jsonParser, function (request, response) {
case CHAT_COMPLETION_SOURCES.CLAUDE: return sendClaudeRequest(request, response);
case CHAT_COMPLETION_SOURCES.SCALE: return sendScaleRequest(request, response);
case CHAT_COMPLETION_SOURCES.AI21: return sendAI21Request(request, response);
case CHAT_COMPLETION_SOURCES.PALM: return sendPalmRequest(request, response);
case CHAT_COMPLETION_SOURCES.MAKERSUITE: return sendMakerSuiteRequest(request, response);
}
let apiUrl;

66
src/endpoints/google.js Normal file
View File

@@ -0,0 +1,66 @@
const { readSecret, SECRET_KEYS } = require('./secrets');
const fetch = require('node-fetch').default;
const express = require('express');
const { jsonParser } = require('../express-common');
const { GEMINI_SAFETY } = require('../constants');
const router = express.Router();
router.post('/caption-image', jsonParser, async (request, response) => {
try {
const mimeType = request.body.image.split(';')[0].split(':')[1];
const base64Data = request.body.image.split(',')[1];
const url = `https://generativelanguage.googleapis.com/v1beta/models/gemini-pro-vision:generateContent?key=${readSecret(SECRET_KEYS.MAKERSUITE)}`;
const body = {
contents: [{
parts: [
{ text: request.body.prompt },
{
inlineData: {
mimeType: 'image/png', // It needs to specify a MIME type in data if it's not a PNG
data: mimeType === 'image/png' ? base64Data : request.body.image,
},
}],
}],
safetySettings: GEMINI_SAFETY,
generationConfig: { maxOutputTokens: 1000 },
};
console.log('Multimodal captioning request', body);
const result = await fetch(url, {
body: JSON.stringify(body),
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
timeout: 0,
});
if (!result.ok) {
const error = await result.json();
console.log(`MakerSuite API returned error: ${result.status} ${result.statusText}`, error);
return response.status(result.status).send({ error: true });
}
const data = await result.json();
console.log('Multimodal captioning response', data);
const candidates = data?.candidates;
if (!candidates) {
return response.status(500).send('No candidates found, image was most likely filtered.');
}
const caption = candidates[0].content.parts[0].text;
if (!caption) {
return response.status(500).send('No caption found');
}
return response.json({ caption });
} catch (error) {
console.error(error);
response.status(500).send('Internal server error');
}
});
module.exports = { router };

View File

@@ -72,6 +72,68 @@ function convertClaudePrompt(messages, addHumanPrefix, addAssistantPostfix, with
return requestPrompt;
}
/**
* Convert a prompt from the ChatML objects to the format used by Google MakerSuite models.
* @param {object[]} messages Array of messages
* @param {string} model Model name
* @returns {object[]} Prompt for Google MakerSuite models
*/
function convertGooglePrompt(messages, model) {
// This is a 1x1 transparent PNG
const PNG_PIXEL = 'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNkYAAAAAYAAjCB0C8AAAAASUVORK5CYII=';
const contents = [];
let lastRole = '';
let currentText = '';
const isMultimodal = model === 'gemini-pro-vision';
if (isMultimodal) {
const combinedText = messages.map((message) => {
const role = message.role === 'assistant' ? 'MODEL: ' : 'USER: ';
return role + message.content;
}).join('\n\n').trim();
const imageEntry = messages.find((message) => message.content?.[1]?.image_url);
const imageData = imageEntry?.content?.[1]?.image_url?.data ?? PNG_PIXEL;
contents.push({
parts: [
{ text: combinedText },
{
inlineData: {
mimeType: 'image/png',
data: imageData,
},
},
],
role: 'user',
});
} else {
messages.forEach((message, index) => {
const role = message.role === 'assistant' ? 'model' : 'user';
if (lastRole === role) {
currentText += '\n\n' + message.content;
} else {
if (currentText !== '') {
contents.push({
parts: [{ text: currentText.trim() }],
role: lastRole,
});
}
currentText = message.content;
lastRole = role;
}
if (index === messages.length - 1) {
contents.push({
parts: [{ text: currentText.trim() }],
role: lastRole,
});
}
});
}
return contents;
}
/**
* Convert a prompt from the ChatML objects to the format used by Text Completion API.
* @param {object[]} messages Array of messages
@@ -99,5 +161,6 @@ function convertTextCompletionPrompt(messages) {
module.exports = {
convertClaudePrompt,
convertGooglePrompt,
convertTextCompletionPrompt,
};

View File

@@ -23,7 +23,7 @@ const SECRET_KEYS = {
SCALE_COOKIE: 'scale_cookie',
ONERING_URL: 'oneringtranslator_url',
DEEPLX_URL: 'deeplx_url',
PALM: 'api_key_palm',
MAKERSUITE: 'api_key_makersuite',
SERPAPI: 'api_key_serpapi',
};
@@ -44,6 +44,17 @@ function writeSecret(key, value) {
writeFileAtomicSync(SECRETS_FILE, JSON.stringify(secrets, null, 4), 'utf-8');
}
function deleteSecret(key) {
if (!fs.existsSync(SECRETS_FILE)) {
return;
}
const fileContents = fs.readFileSync(SECRETS_FILE, 'utf-8');
const secrets = JSON.parse(fileContents);
delete secrets[key];
writeFileAtomicSync(SECRETS_FILE, JSON.stringify(secrets, null, 4), 'utf-8');
}
/**
* Reads a secret from the secrets file
* @param {string} key Secret key
@@ -85,6 +96,13 @@ function readSecretState() {
* @returns {void}
*/
function migrateSecrets(settingsFile) {
const palmKey = readSecret('api_key_palm');
if (palmKey) {
console.log('Migrating Palm key...');
writeSecret(SECRET_KEYS.MAKERSUITE, palmKey);
deleteSecret('api_key_palm');
}
if (!fs.existsSync(settingsFile)) {
console.log('Settings file does not exist');
return;

View File

@@ -4,7 +4,7 @@ const express = require('express');
const { SentencePieceProcessor } = require('@agnai/sentencepiece-js');
const tiktoken = require('@dqbd/tiktoken');
const { Tokenizer } = require('@agnai/web-tokenizers');
const { convertClaudePrompt } = require('./prompt-converters');
const { convertClaudePrompt, convertGooglePrompt } = require('./prompt-converters');
const { readSecret, SECRET_KEYS } = require('./secrets');
const { TEXTGEN_TYPES } = require('../constants');
const { jsonParser } = require('../express-common');
@@ -387,6 +387,26 @@ router.post('/ai21/count', jsonParser, async function (req, res) {
}
});
router.post('/google/count', jsonParser, async function (req, res) {
if (!req.body) return res.sendStatus(400);
const options = {
method: 'POST',
headers: {
accept: 'application/json',
'content-type': 'application/json',
},
body: JSON.stringify({ contents: convertGooglePrompt(req.body) }),
};
try {
const response = await fetch(`https://generativelanguage.googleapis.com/v1beta/models/${req.query.model}:countTokens?key=${readSecret(SECRET_KEYS.MAKERSUITE)}`, options);
const data = await response.json();
return res.send({ 'token_count': data?.totalTokens || 0 });
} catch (err) {
console.error(err);
return res.send({ 'token_count': 0 });
}
});
router.post('/llama/encode', jsonParser, createSentencepieceEncodingHandler(spp_llama));
router.post('/nerdstash/encode', jsonParser, createSentencepieceEncodingHandler(spp_nerd));
router.post('/nerdstash_v2/encode', jsonParser, createSentencepieceEncodingHandler(spp_nerd_v2));

View File

@@ -17,7 +17,7 @@ async function getVector(source, text) {
case 'transformers':
return require('../embedding').getTransformersVector(text);
case 'palm':
return require('../palm-vectors').getPaLMVector(text);
return require('../makersuite-vectors').getMakerSuiteVector(text);
}
throw new Error(`Unknown vector source ${source}`);
@@ -196,7 +196,7 @@ router.post('/purge', jsonParser, async (req, res) => {
const collectionId = String(req.body.collectionId);
const sources = ['transformers', 'openai'];
const sources = ['transformers', 'openai', 'palm'];
for (const source of sources) {
const index = await getIndex(collectionId, source, false);

View File

@@ -6,15 +6,15 @@ const { SECRET_KEYS, readSecret } = require('./endpoints/secrets');
* @param {string} text - The text to get the vector for
* @returns {Promise<number[]>} - The vector for the text
*/
async function getPaLMVector(text) {
const key = readSecret(SECRET_KEYS.PALM);
async function getMakerSuiteVector(text) {
const key = readSecret(SECRET_KEYS.MAKERSUITE);
if (!key) {
console.log('No PaLM key found');
throw new Error('No PaLM key found');
console.log('No MakerSuite key found');
throw new Error('No MakerSuite key found');
}
const response = await fetch(`https://generativelanguage.googleapis.com/v1beta2/models/embedding-gecko-001:embedText?key=${key}`, {
const response = await fetch(`https://generativelanguage.googleapis.com/v1beta/models/embedding-gecko-001:embedText?key=${key}`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
@@ -26,8 +26,8 @@ async function getPaLMVector(text) {
if (!response.ok) {
const text = await response.text();
console.log('PaLM request failed', response.statusText, text);
throw new Error('PaLM request failed');
console.log('MakerSuite request failed', response.statusText, text);
throw new Error('MakerSuite request failed');
}
const data = await response.json();
@@ -39,5 +39,5 @@ async function getPaLMVector(text) {
}
module.exports = {
getPaLMVector,
getMakerSuiteVector,
};