Add endpoint for transformers.js TTS

This commit is contained in:
Cohee
2024-02-02 01:51:02 +02:00
parent 10d78f9a25
commit 2f3dca2348
3 changed files with 25 additions and 5 deletions

View File

@@ -54,6 +54,7 @@ extras:
embeddingModel: Cohee/jina-embeddings-v2-base-en embeddingModel: Cohee/jina-embeddings-v2-base-en
promptExpansionModel: Cohee/fooocus_expansion-onnx promptExpansionModel: Cohee/fooocus_expansion-onnx
speechToTextModel: Xenova/whisper-small speechToTextModel: Xenova/whisper-small
textToSpeechModel: Xenova/speecht5_tts
# -- OPENAI CONFIGURATION -- # -- OPENAI CONFIGURATION --
openai: openai:
# Will send a random user ID to OpenAI completion API # Will send a random user ID to OpenAI completion API

View File

@@ -54,17 +54,25 @@ router.post('/recognize', jsonParser, async (req, res) => {
router.post('/synthesize', jsonParser, async (req, res) => { router.post('/synthesize', jsonParser, async (req, res) => {
try { try {
const wavefile = require('wavefile');
const TASK = 'text-to-speech'; const TASK = 'text-to-speech';
const { model, text, lang } = req.body; const { text, model, speaker } = req.body;
const module = await import('../transformers.mjs'); const module = await import('../transformers.mjs');
const pipe = await module.default.getPipeline(TASK, model); const pipe = await module.default.getPipeline(TASK, model);
const speaker_embeddings = speaker
? new Float32Array(new Uint8Array(Buffer.from(speaker.split(',')[1], 'base64')).buffer)
: null;
const start = performance.now(); const start = performance.now();
const result = await pipe(text, { language: lang || null }); const result = await pipe(text, { speaker_embeddings: speaker_embeddings });
const end = performance.now(); const end = performance.now();
console.log(`Execution duration: ${(end - start) / 1000} seconds`); console.log(`Execution duration: ${(end - start) / 1000} seconds`);
console.log('Synthesized audio:', result.audio);
return res.json({ audio: result.audio }); const wav = new wavefile.WaveFile();
wav.fromScratch(1, result.sampling_rate, '32f', result.audio);
const buffer = wav.toBuffer();
res.set('Content-Type', 'audio/wav');
return res.send(Buffer.from(buffer));
} catch (error) { } catch (error) {
console.error(error); console.error(error);
return res.sendStatus(500); return res.sendStatus(500);

View File

@@ -17,26 +17,37 @@ const tasks = {
defaultModel: 'Cohee/distilbert-base-uncased-go-emotions-onnx', defaultModel: 'Cohee/distilbert-base-uncased-go-emotions-onnx',
pipeline: null, pipeline: null,
configField: 'extras.classificationModel', configField: 'extras.classificationModel',
quantized: true,
}, },
'image-to-text': { 'image-to-text': {
defaultModel: 'Xenova/vit-gpt2-image-captioning', defaultModel: 'Xenova/vit-gpt2-image-captioning',
pipeline: null, pipeline: null,
configField: 'extras.captioningModel', configField: 'extras.captioningModel',
quantized: true,
}, },
'feature-extraction': { 'feature-extraction': {
defaultModel: 'Xenova/all-mpnet-base-v2', defaultModel: 'Xenova/all-mpnet-base-v2',
pipeline: null, pipeline: null,
configField: 'extras.embeddingModel', configField: 'extras.embeddingModel',
quantized: true,
}, },
'text-generation': { 'text-generation': {
defaultModel: 'Cohee/fooocus_expansion-onnx', defaultModel: 'Cohee/fooocus_expansion-onnx',
pipeline: null, pipeline: null,
configField: 'extras.promptExpansionModel', configField: 'extras.promptExpansionModel',
quantized: true,
}, },
'automatic-speech-recognition': { 'automatic-speech-recognition': {
defaultModel: 'Xenova/whisper-small', defaultModel: 'Xenova/whisper-small',
pipeline: null, pipeline: null,
configField: 'extras.speechToTextModel', configField: 'extras.speechToTextModel',
quantized: true,
},
'text-to-speech': {
defaultModel: 'Xenova/speecht5_tts',
pipeline: null,
configField: 'extras.textToSpeechModel',
quantized: false,
}, },
} }
@@ -90,7 +101,7 @@ async function getPipeline(task, forceModel = '') {
const model = forceModel || getModelForTask(task); const model = forceModel || getModelForTask(task);
const localOnly = getConfigValue('extras.disableAutoDownload', false); const localOnly = getConfigValue('extras.disableAutoDownload', false);
console.log('Initializing transformers.js pipeline for task', task, 'with model', model); console.log('Initializing transformers.js pipeline for task', task, 'with model', model);
const instance = await pipeline(task, model, { cache_dir, quantized: true, local_files_only: localOnly }); const instance = await pipeline(task, model, { cache_dir, quantized: tasks[task].quantized ?? true, local_files_only: localOnly });
tasks[task].pipeline = instance; tasks[task].pipeline = instance;
return instance; return instance;
} }