From 96b87641ca5655c4a1f5d1f0821ab21d80b0f709 Mon Sep 17 00:00:00 2001 From: Cohee <18619528+Cohee1207@users.noreply.github.com> Date: Sun, 19 Nov 2023 20:30:34 +0200 Subject: [PATCH] Add OpenAI Whisper API --- server.js | 2 +- src/openai.js | 57 ++++++++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 55 insertions(+), 4 deletions(-) diff --git a/server.js b/server.js index 400045f16..8e7393b7f 100644 --- a/server.js +++ b/server.js @@ -3530,7 +3530,7 @@ async function fetchJSON(url, args = {}) { // ** END ** // OpenAI API -require('./src/openai').registerEndpoints(app, jsonParser); +require('./src/openai').registerEndpoints(app, jsonParser, urlencodedParser); // Tokenizers require('./src/tokenizers').registerEndpoints(app, jsonParser); diff --git a/src/openai.js b/src/openai.js index 654545713..6c5d551d3 100644 --- a/src/openai.js +++ b/src/openai.js @@ -1,12 +1,15 @@ const { readSecret, SECRET_KEYS } = require("./secrets"); const fetch = require('node-fetch').default; +const FormData = require('form-data'); +const fs = require('fs'); /** * Registers the OpenAI endpoints. - * @param {import("express").Express} app - * @param {any} jsonParser + * @param {import("express").Express} app Express app + * @param {any} jsonParser JSON parser + * @param {any} urlencodedParser Form data parser */ -function registerEndpoints(app, jsonParser) { +function registerEndpoints(app, jsonParser, urlencodedParser) { app.post('/api/openai/caption-image', jsonParser, async (request, response) => { try { let key = ''; @@ -85,6 +88,54 @@ function registerEndpoints(app, jsonParser) { } }); + app.post('/api/openai/transcribe-audio', urlencodedParser, async (request, response) => { + try { + const key = readSecret(SECRET_KEYS.OPENAI); + + if (!key) { + console.log('No OpenAI key found'); + return response.sendStatus(401); + } + + if (!request.file) { + console.log('No audio file found'); + return response.sendStatus(400); + } + + const formData = new FormData(); + console.log('Processing audio file', request.file.path); + formData.append('file', fs.createReadStream(request.file.path), { filename: 'audio.wav', contentType: 'audio/wav' }); + formData.append('model', request.body.model); + + if (request.body.language) { + formData.append('language', request.body.language); + } + + const result = await fetch('https://api.openai.com/v1/audio/transcriptions', { + method: 'POST', + headers: { + 'Authorization': `Bearer ${key}`, + ...formData.getHeaders(), + }, + body: formData, + }); + + if (!result.ok) { + const text = await result.text(); + console.log('OpenAI request failed', result.statusText, text); + return response.status(500).send(text); + } + + fs.rmSync(request.file.path); + const data = await result.json(); + console.log('OpenAI transcription response', data); + return response.json(data); + } catch (error) { + console.error('OpenAI transcription failed', error); + response.status(500).send('Internal server error'); + } + }); + app.post('/api/openai/generate-voice', jsonParser, async (request, response) => { try { const key = readSecret(SECRET_KEYS.OPENAI);