diff --git a/public/script.js b/public/script.js index aa5d120b8..f3baf1b8f 100644 --- a/public/script.js +++ b/public/script.js @@ -1562,7 +1562,7 @@ function appendToStoryString(value, prefix) { } function isStreamingEnabled() { - return ((main_api == 'openai' && oai_settings.stream_openai && oai_settings.chat_completion_source !== chat_completion_sources.CLAUDE) + return ((main_api == 'openai' && oai_settings.stream_openai) || (main_api == 'poe' && poe_settings.streaming) || (main_api == 'textgenerationwebui' && textgenerationwebui_settings.streaming)) && !isMultigenEnabled(); // Multigen has a quasi-streaming mode which breaks the real streaming diff --git a/public/scripts/openai.js b/public/scripts/openai.js index c67ab39c5..d7ac69fc9 100644 --- a/public/scripts/openai.js +++ b/public/scripts/openai.js @@ -638,7 +638,7 @@ async function sendOpenAIRequest(type, openai_msgs_tosend, signal) { let logit_bias = {}; const isClaude = oai_settings.chat_completion_source == chat_completion_sources.CLAUDE; - const stream = type !== 'quiet' && oai_settings.stream_openai && !isClaude; + const stream = type !== 'quiet' && oai_settings.stream_openai; // If we're using the window.ai extension, use that instead // Doesn't support logit bias yet @@ -687,6 +687,11 @@ async function sendOpenAIRequest(type, openai_msgs_tosend, signal) { const { done, value } = await reader.read(); let response = decoder.decode(value); + // Claude's streaming SSE messages are separated by \r + if (oai_settings.chat_completion_source == chat_completion_sources.CLAUDE) { + response = response.replace(/\r/g, ""); + } + tryParseStreamingError(response); let eventList = []; @@ -710,7 +715,7 @@ async function sendOpenAIRequest(type, openai_msgs_tosend, signal) { } let data = JSON.parse(event.substring(6)); // the first and last messages are undefined, protect against that - getMessage += data.choices[0]["delta"]["content"] || ""; + getMessage = getStreamingReply(getMessage, data); yield getMessage; } @@ -734,6 +739,15 @@ async function sendOpenAIRequest(type, openai_msgs_tosend, signal) { } } +function getStreamingReply(getMessage, data) { + if (oai_settings.chat_completion_source == chat_completion_sources.CLAUDE) { + getMessage = data.completion || ""; + } else{ + getMessage += data.choices[0]["delta"]["content"] || ""; + } + return getMessage; +} + function handleWindowError(err) { const text = parseWindowError(err); toastr.error(text, 'Window.ai returned an error'); diff --git a/server.js b/server.js index af1536f30..c1ebbefcf 100644 --- a/server.js +++ b/server.js @@ -760,7 +760,7 @@ function charaFormatData(data) { [d => Array.isArray(d.alternate_greetings), d => d.alternate_greetings], [d => typeof d.alternate_greetings === 'string', d => [d.alternate_greetings]], [_.stubTrue, _.constant([])] - ])(data); + ])(data); // Spec V1 fields _.set(char, 'name', data.ch_name); @@ -2699,7 +2699,7 @@ app.post("/deletepreset_openai", jsonParser, function (request, response) { function convertClaudePrompt(messages) { let requestPrompt = messages.map((v) => { let prefix = ''; - switch (v.role){ + switch (v.role) { case "assistant": prefix = "\n\nAssistant: "; break @@ -2725,43 +2725,66 @@ async function sendClaudeRequest(request, response) { return response.status(401).send({ error: true }); } - const controller = new AbortController(); - request.socket.removeAllListeners('close'); - request.socket.on('close', function () { - controller.abort(); - }); + try { + const controller = new AbortController(); + request.socket.removeAllListeners('close'); + request.socket.on('close', function () { + controller.abort(); + }); - const requestPrompt = convertClaudePrompt(request.body.messages); - console.log('Claude request:', requestPrompt); + const requestPrompt = convertClaudePrompt(request.body.messages); + console.log('Claude request:', requestPrompt); - const generateResponse = await fetch(api_url + '/complete', { - method: "POST", - signal: controller.signal, - body: JSON.stringify({ - prompt : "\n\nHuman: " + requestPrompt, - model: request.body.model, - max_tokens_to_sample: request.body.max_tokens, - stop_sequences: ["\n\nHuman:", "\n\nSystem:", "\n\nAssistant:"], - temperature: request.body.temperature, - }), - headers: { - "Content-Type": "application/json", - "x-api-key": api_key_claude, + const generateResponse = await fetch(api_url + '/complete', { + method: "POST", + signal: controller.signal, + body: JSON.stringify({ + prompt: "\n\nHuman: " + requestPrompt, + model: request.body.model, + max_tokens_to_sample: request.body.max_tokens, + stop_sequences: ["\n\nHuman:", "\n\nSystem:", "\n\nAssistant:"], + temperature: request.body.temperature, + stream: request.body.stream, + }), + headers: { + "Content-Type": "application/json", + "x-api-key": api_key_claude, + } + }); + + if (request.body.stream) { + // Pipe remote SSE stream to Express response + generateResponse.body.pipe(response); + + request.socket.on('close', function () { + generateResponse.body.destroy(); // Close the remote stream + response.end(); // End the Express response + }); + + generateResponse.body.on('end', function () { + console.log("Streaming request finished"); + response.end(); + }); + } else { + if (!generateResponse.ok) { + console.log(`Claude API returned error: ${generateResponse.status} ${generateResponse.statusText} ${await generateResponse.text()}`); + return response.status(generateResponse.status).send({ error: true }); + } + + const generateResponseJson = await generateResponse.json(); + const responseText = generateResponseJson.completion; + console.log('Claude response:', responseText); + + // Wrap it back to OAI format + const reply = { choices: [{ "message": { "content": responseText, } }] }; + return response.send(reply); + } + } catch (error) { + console.log('Error communicating with Claude: ', error); + if (!response.headersSent) { + return response.status(500).send({ error: true }); } - }); - - if (!generateResponse.ok) { - console.log(`Claude API returned error: ${generateResponse.status} ${generateResponse.statusText} ${await generateResponse.text()}`); - return response.status(generateResponse.status).send({ error: true }); } - - const generateResponseJson = await generateResponse.json(); - const responseText = generateResponseJson.completion; - console.log('Claude response:', responseText); - - // Wrap it back to OAI format - const reply = { choices: [{ "message": { "content": responseText, } }] }; - return response.send(reply); } app.post("/generate_openai", jsonParser, function (request, response_generate_openai) {