Implement support for koboldcpp SSE streaming.

koboldcpp added SSE streaming to its API as of:
https://github.com/LostRuins/koboldcpp/pull/220

This has not yet been added to release, but I figued I'd try to
get the work done to make it ready.

Work mostly hijacked from:
10bbc97069
This commit is contained in:
Zixaphir
2023-06-11 13:24:07 -07:00
parent 4bce33b1a9
commit 698461c188
4 changed files with 107 additions and 25 deletions

View File

@@ -381,33 +381,39 @@ app.post("/generate", jsonParser, async function (request, response_generate = r
console.log(this_settings);
const args = {
body: JSON.stringify(this_settings),
signal: controller.signal,
headers: { "Content-Type": "application/json" },
signal: controller.signal,
};
const MAX_RETRIES = 10;
const delayAmount = 3000;
for (let i = 0; i < MAX_RETRIES; i++) {
try {
const data = await postAsync(api_server + "/v1/generate", args);
console.log(data);
return response_generate.send(data);
}
catch (error) {
// data
if (typeof error['text'] === 'function') {
console.log(await error.text());
}
try {
const fetch = require('node-fetch').default;
const url = request.body.streaming ? `${api_server}/extra/generate/stream` : `${api_server}/v1/generate`;
const response = await fetch(url, { method: 'POST', timeout: 0, ...args });
console.log(response);
if (request.body.streaming) {
// Pipe remote SSE stream to Express response
response.body.pipe(response_generate);
// response
switch (error.statusCode) {
case 503:
await delay(delayAmount);
break;
default:
return response_generate.send({ error: true });
}
}
request.socket.on('close', function () {
response.body.destroy(); // Close the remote stream
response_generate.end(); // End the Express response
});
response.body.on('end', function () {
console.log("Streaming request finished");
response_generate.end();
});
} else {
if (!response.ok) {
console.log(`Kobold returned error: ${response.status} ${response.statusText} ${await response.text()}`);
return response.status(response.status).send({ error: true });
}
const data = await response.json();
return response_generate.send(data);
}
} catch (error) {
return response_generate.send({ error: true });
}
});