mirror of
https://github.com/SillyTavern/SillyTavern.git
synced 2025-06-05 21:59:27 +02:00
Streaming for Claude.
This commit is contained in:
@@ -1562,7 +1562,7 @@ function appendToStoryString(value, prefix) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
function isStreamingEnabled() {
|
function isStreamingEnabled() {
|
||||||
return ((main_api == 'openai' && oai_settings.stream_openai && oai_settings.chat_completion_source !== chat_completion_sources.CLAUDE)
|
return ((main_api == 'openai' && oai_settings.stream_openai)
|
||||||
|| (main_api == 'poe' && poe_settings.streaming)
|
|| (main_api == 'poe' && poe_settings.streaming)
|
||||||
|| (main_api == 'textgenerationwebui' && textgenerationwebui_settings.streaming))
|
|| (main_api == 'textgenerationwebui' && textgenerationwebui_settings.streaming))
|
||||||
&& !isMultigenEnabled(); // Multigen has a quasi-streaming mode which breaks the real streaming
|
&& !isMultigenEnabled(); // Multigen has a quasi-streaming mode which breaks the real streaming
|
||||||
|
@@ -638,7 +638,7 @@ async function sendOpenAIRequest(type, openai_msgs_tosend, signal) {
|
|||||||
|
|
||||||
let logit_bias = {};
|
let logit_bias = {};
|
||||||
const isClaude = oai_settings.chat_completion_source == chat_completion_sources.CLAUDE;
|
const isClaude = oai_settings.chat_completion_source == chat_completion_sources.CLAUDE;
|
||||||
const stream = type !== 'quiet' && oai_settings.stream_openai && !isClaude;
|
const stream = type !== 'quiet' && oai_settings.stream_openai;
|
||||||
|
|
||||||
// If we're using the window.ai extension, use that instead
|
// If we're using the window.ai extension, use that instead
|
||||||
// Doesn't support logit bias yet
|
// Doesn't support logit bias yet
|
||||||
@@ -687,6 +687,11 @@ async function sendOpenAIRequest(type, openai_msgs_tosend, signal) {
|
|||||||
const { done, value } = await reader.read();
|
const { done, value } = await reader.read();
|
||||||
let response = decoder.decode(value);
|
let response = decoder.decode(value);
|
||||||
|
|
||||||
|
// Claude's streaming SSE messages are separated by \r
|
||||||
|
if (oai_settings.chat_completion_source == chat_completion_sources.CLAUDE) {
|
||||||
|
response = response.replace(/\r/g, "");
|
||||||
|
}
|
||||||
|
|
||||||
tryParseStreamingError(response);
|
tryParseStreamingError(response);
|
||||||
|
|
||||||
let eventList = [];
|
let eventList = [];
|
||||||
@@ -710,7 +715,7 @@ async function sendOpenAIRequest(type, openai_msgs_tosend, signal) {
|
|||||||
}
|
}
|
||||||
let data = JSON.parse(event.substring(6));
|
let data = JSON.parse(event.substring(6));
|
||||||
// the first and last messages are undefined, protect against that
|
// the first and last messages are undefined, protect against that
|
||||||
getMessage += data.choices[0]["delta"]["content"] || "";
|
getMessage = getStreamingReply(getMessage, data);
|
||||||
yield getMessage;
|
yield getMessage;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -734,6 +739,15 @@ async function sendOpenAIRequest(type, openai_msgs_tosend, signal) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function getStreamingReply(getMessage, data) {
|
||||||
|
if (oai_settings.chat_completion_source == chat_completion_sources.CLAUDE) {
|
||||||
|
getMessage = data.completion || "";
|
||||||
|
} else{
|
||||||
|
getMessage += data.choices[0]["delta"]["content"] || "";
|
||||||
|
}
|
||||||
|
return getMessage;
|
||||||
|
}
|
||||||
|
|
||||||
function handleWindowError(err) {
|
function handleWindowError(err) {
|
||||||
const text = parseWindowError(err);
|
const text = parseWindowError(err);
|
||||||
toastr.error(text, 'Window.ai returned an error');
|
toastr.error(text, 'Window.ai returned an error');
|
||||||
|
93
server.js
93
server.js
@@ -760,7 +760,7 @@ function charaFormatData(data) {
|
|||||||
[d => Array.isArray(d.alternate_greetings), d => d.alternate_greetings],
|
[d => Array.isArray(d.alternate_greetings), d => d.alternate_greetings],
|
||||||
[d => typeof d.alternate_greetings === 'string', d => [d.alternate_greetings]],
|
[d => typeof d.alternate_greetings === 'string', d => [d.alternate_greetings]],
|
||||||
[_.stubTrue, _.constant([])]
|
[_.stubTrue, _.constant([])]
|
||||||
])(data);
|
])(data);
|
||||||
|
|
||||||
// Spec V1 fields
|
// Spec V1 fields
|
||||||
_.set(char, 'name', data.ch_name);
|
_.set(char, 'name', data.ch_name);
|
||||||
@@ -2699,7 +2699,7 @@ app.post("/deletepreset_openai", jsonParser, function (request, response) {
|
|||||||
function convertClaudePrompt(messages) {
|
function convertClaudePrompt(messages) {
|
||||||
let requestPrompt = messages.map((v) => {
|
let requestPrompt = messages.map((v) => {
|
||||||
let prefix = '';
|
let prefix = '';
|
||||||
switch (v.role){
|
switch (v.role) {
|
||||||
case "assistant":
|
case "assistant":
|
||||||
prefix = "\n\nAssistant: ";
|
prefix = "\n\nAssistant: ";
|
||||||
break
|
break
|
||||||
@@ -2725,43 +2725,66 @@ async function sendClaudeRequest(request, response) {
|
|||||||
return response.status(401).send({ error: true });
|
return response.status(401).send({ error: true });
|
||||||
}
|
}
|
||||||
|
|
||||||
const controller = new AbortController();
|
try {
|
||||||
request.socket.removeAllListeners('close');
|
const controller = new AbortController();
|
||||||
request.socket.on('close', function () {
|
request.socket.removeAllListeners('close');
|
||||||
controller.abort();
|
request.socket.on('close', function () {
|
||||||
});
|
controller.abort();
|
||||||
|
});
|
||||||
|
|
||||||
const requestPrompt = convertClaudePrompt(request.body.messages);
|
const requestPrompt = convertClaudePrompt(request.body.messages);
|
||||||
console.log('Claude request:', requestPrompt);
|
console.log('Claude request:', requestPrompt);
|
||||||
|
|
||||||
const generateResponse = await fetch(api_url + '/complete', {
|
const generateResponse = await fetch(api_url + '/complete', {
|
||||||
method: "POST",
|
method: "POST",
|
||||||
signal: controller.signal,
|
signal: controller.signal,
|
||||||
body: JSON.stringify({
|
body: JSON.stringify({
|
||||||
prompt : "\n\nHuman: " + requestPrompt,
|
prompt: "\n\nHuman: " + requestPrompt,
|
||||||
model: request.body.model,
|
model: request.body.model,
|
||||||
max_tokens_to_sample: request.body.max_tokens,
|
max_tokens_to_sample: request.body.max_tokens,
|
||||||
stop_sequences: ["\n\nHuman:", "\n\nSystem:", "\n\nAssistant:"],
|
stop_sequences: ["\n\nHuman:", "\n\nSystem:", "\n\nAssistant:"],
|
||||||
temperature: request.body.temperature,
|
temperature: request.body.temperature,
|
||||||
}),
|
stream: request.body.stream,
|
||||||
headers: {
|
}),
|
||||||
"Content-Type": "application/json",
|
headers: {
|
||||||
"x-api-key": api_key_claude,
|
"Content-Type": "application/json",
|
||||||
|
"x-api-key": api_key_claude,
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
if (request.body.stream) {
|
||||||
|
// Pipe remote SSE stream to Express response
|
||||||
|
generateResponse.body.pipe(response);
|
||||||
|
|
||||||
|
request.socket.on('close', function () {
|
||||||
|
generateResponse.body.destroy(); // Close the remote stream
|
||||||
|
response.end(); // End the Express response
|
||||||
|
});
|
||||||
|
|
||||||
|
generateResponse.body.on('end', function () {
|
||||||
|
console.log("Streaming request finished");
|
||||||
|
response.end();
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
if (!generateResponse.ok) {
|
||||||
|
console.log(`Claude API returned error: ${generateResponse.status} ${generateResponse.statusText} ${await generateResponse.text()}`);
|
||||||
|
return response.status(generateResponse.status).send({ error: true });
|
||||||
|
}
|
||||||
|
|
||||||
|
const generateResponseJson = await generateResponse.json();
|
||||||
|
const responseText = generateResponseJson.completion;
|
||||||
|
console.log('Claude response:', responseText);
|
||||||
|
|
||||||
|
// Wrap it back to OAI format
|
||||||
|
const reply = { choices: [{ "message": { "content": responseText, } }] };
|
||||||
|
return response.send(reply);
|
||||||
|
}
|
||||||
|
} catch (error) {
|
||||||
|
console.log('Error communicating with Claude: ', error);
|
||||||
|
if (!response.headersSent) {
|
||||||
|
return response.status(500).send({ error: true });
|
||||||
}
|
}
|
||||||
});
|
|
||||||
|
|
||||||
if (!generateResponse.ok) {
|
|
||||||
console.log(`Claude API returned error: ${generateResponse.status} ${generateResponse.statusText} ${await generateResponse.text()}`);
|
|
||||||
return response.status(generateResponse.status).send({ error: true });
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const generateResponseJson = await generateResponse.json();
|
|
||||||
const responseText = generateResponseJson.completion;
|
|
||||||
console.log('Claude response:', responseText);
|
|
||||||
|
|
||||||
// Wrap it back to OAI format
|
|
||||||
const reply = { choices: [{ "message": { "content": responseText, } }] };
|
|
||||||
return response.send(reply);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
app.post("/generate_openai", jsonParser, function (request, response_generate_openai) {
|
app.post("/generate_openai", jsonParser, function (request, response_generate_openai) {
|
||||||
|
Reference in New Issue
Block a user