diff --git a/default/config.yaml b/default/config.yaml
index bb4847c3..5925d573 100644
--- a/default/config.yaml
+++ b/default/config.yaml
@@ -54,6 +54,7 @@ extras:
   embeddingModel: Cohee/jina-embeddings-v2-base-en
   promptExpansionModel: Cohee/fooocus_expansion-onnx
   speechToTextModel: Xenova/whisper-small
+  textToSpeechModel: Xenova/speecht5_tts
 # -- OPENAI CONFIGURATION --
 openai:
   # Will send a random user ID to OpenAI completion API
diff --git a/src/endpoints/speech.js b/src/endpoints/speech.js
index de5e758c..713c420a 100644
--- a/src/endpoints/speech.js
+++ b/src/endpoints/speech.js
@@ -54,17 +54,25 @@ router.post('/recognize', jsonParser, async (req, res) => {
 
 router.post('/synthesize', jsonParser, async (req, res) => {
     try {
+        const wavefile = require('wavefile');
         const TASK = 'text-to-speech';
-        const { model, text, lang } = req.body;
+        const { text, model, speaker } = req.body;
         const module = await import('../transformers.mjs');
         const pipe = await module.default.getPipeline(TASK, model);
+        const speaker_embeddings = speaker
+            ? new Float32Array(new Uint8Array(Buffer.from(speaker.split(',')[1], 'base64')).buffer)
+            : null;
         const start = performance.now();
-        const result = await pipe(text, { language: lang || null });
+        const result = await pipe(text, { speaker_embeddings: speaker_embeddings });
         const end = performance.now();
         console.log(`Execution duration: ${(end - start) / 1000} seconds`);
-        console.log('Synthesized audio:', result.audio);
 
-        return res.json({ audio: result.audio });
+        const wav = new wavefile.WaveFile();
+        wav.fromScratch(1, result.sampling_rate, '32f', result.audio);
+        const buffer = wav.toBuffer();
+
+        res.set('Content-Type', 'audio/wav');
+        return res.send(Buffer.from(buffer));
     } catch (error) {
         console.error(error);
         return res.sendStatus(500);
diff --git a/src/transformers.mjs b/src/transformers.mjs
index 3a30edf6..e0465f30 100644
--- a/src/transformers.mjs
+++ b/src/transformers.mjs
@@ -17,26 +17,37 @@ const tasks = {
         defaultModel: 'Cohee/distilbert-base-uncased-go-emotions-onnx',
         pipeline: null,
         configField: 'extras.classificationModel',
+        quantized: true,
     },
     'image-to-text': {
         defaultModel: 'Xenova/vit-gpt2-image-captioning',
         pipeline: null,
         configField: 'extras.captioningModel',
+        quantized: true,
     },
     'feature-extraction': {
         defaultModel: 'Xenova/all-mpnet-base-v2',
         pipeline: null,
         configField: 'extras.embeddingModel',
+        quantized: true,
     },
     'text-generation': {
         defaultModel: 'Cohee/fooocus_expansion-onnx',
         pipeline: null,
         configField: 'extras.promptExpansionModel',
+        quantized: true,
     },
     'automatic-speech-recognition': {
         defaultModel: 'Xenova/whisper-small',
         pipeline: null,
         configField: 'extras.speechToTextModel',
+        quantized: true,
+    },
+    'text-to-speech': {
+        defaultModel: 'Xenova/speecht5_tts',
+        pipeline: null,
+        configField: 'extras.textToSpeechModel',
+        quantized: false,
     },
 }
 
@@ -90,7 +101,7 @@ async function getPipeline(task, forceModel = '') {
     const model = forceModel || getModelForTask(task);
     const localOnly = getConfigValue('extras.disableAutoDownload', false);
     console.log('Initializing transformers.js pipeline for task', task, 'with model', model);
-    const instance = await pipeline(task, model, { cache_dir, quantized: true, local_files_only: localOnly });
+    const instance = await pipeline(task, model, { cache_dir, quantized: tasks[task].quantized ?? true, local_files_only: localOnly });
     tasks[task].pipeline = instance;
     return instance;
 }