mirror of
https://github.com/SillyTavern/SillyTavern.git
synced 2025-06-05 21:59:27 +02:00
Merge branch 'dev' of https://github.com/Cohee1207/SillyTavern into dev
This commit is contained in:
@ -260,6 +260,22 @@
|
|||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
<hr>
|
||||||
|
<div id="range_block">
|
||||||
|
<div class="range-block">
|
||||||
|
<label class="checkbox_label widthFreeExpand">
|
||||||
|
<input id="streaming_kobold" type="checkbox" />
|
||||||
|
<span data-i18n="Streaming">Streaming</span>
|
||||||
|
</label>
|
||||||
|
<div class="toggle-description justifyLeft">
|
||||||
|
<span data-i18n="Display the response bit by bit as it is generated.">
|
||||||
|
Display the response bit by bit as it is generated.</span><br>
|
||||||
|
<span data-i18n="When this is off, responses will be displayed all at once when they are complete.">
|
||||||
|
When this is off, responses will be displayed all at once when they are complete.
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
</div>
|
</div>
|
||||||
<div id="range_block_novel">
|
<div id="range_block_novel">
|
||||||
<div class="range-block">
|
<div class="range-block">
|
||||||
|
@ -2,11 +2,13 @@ import { humanizedDateTime, favsToHotswap } from "./scripts/RossAscends-mods.js"
|
|||||||
import { encode } from "../scripts/gpt-2-3-tokenizer/mod.js";
|
import { encode } from "../scripts/gpt-2-3-tokenizer/mod.js";
|
||||||
import { GPT3BrowserTokenizer } from "../scripts/gpt-3-tokenizer/gpt3-tokenizer.js";
|
import { GPT3BrowserTokenizer } from "../scripts/gpt-3-tokenizer/gpt3-tokenizer.js";
|
||||||
import {
|
import {
|
||||||
|
generateKoboldWithStreaming,
|
||||||
kai_settings,
|
kai_settings,
|
||||||
loadKoboldSettings,
|
loadKoboldSettings,
|
||||||
formatKoboldUrl,
|
formatKoboldUrl,
|
||||||
getKoboldGenerationData,
|
getKoboldGenerationData,
|
||||||
canUseKoboldStopSequence,
|
canUseKoboldStopSequence,
|
||||||
|
canUseKoboldStreaming,
|
||||||
} from "./scripts/kai-settings.js";
|
} from "./scripts/kai-settings.js";
|
||||||
|
|
||||||
import {
|
import {
|
||||||
@ -743,9 +745,10 @@ async function getStatus() {
|
|||||||
is_pygmalion = false;
|
is_pygmalion = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// determine if we can use stop sequence
|
// determine if we can use stop sequence and streaming
|
||||||
if (main_api === "kobold" || main_api === "koboldhorde") {
|
if (main_api === "kobold" || main_api === "koboldhorde") {
|
||||||
kai_settings.use_stop_sequence = canUseKoboldStopSequence(data.version);
|
kai_settings.use_stop_sequence = canUseKoboldStopSequence(data.version);
|
||||||
|
kai_settings.can_use_streaming = canUseKoboldStreaming(data.koboldVersion);
|
||||||
}
|
}
|
||||||
|
|
||||||
//console.log(online_status);
|
//console.log(online_status);
|
||||||
@ -1587,6 +1590,7 @@ function appendToStoryString(value, prefix) {
|
|||||||
|
|
||||||
function isStreamingEnabled() {
|
function isStreamingEnabled() {
|
||||||
return ((main_api == 'openai' && oai_settings.stream_openai)
|
return ((main_api == 'openai' && oai_settings.stream_openai)
|
||||||
|
|| (main_api == 'kobold' && kai_settings.streaming_kobold && kai_settings.can_use_streaming)
|
||||||
|| (main_api == 'novel' && nai_settings.streaming_novel)
|
|| (main_api == 'novel' && nai_settings.streaming_novel)
|
||||||
|| (main_api == 'poe' && poe_settings.streaming)
|
|| (main_api == 'poe' && poe_settings.streaming)
|
||||||
|| (main_api == 'textgenerationwebui' && textgenerationwebui_settings.streaming))
|
|| (main_api == 'textgenerationwebui' && textgenerationwebui_settings.streaming))
|
||||||
@ -1854,6 +1858,10 @@ async function Generate(type, { automatic_trigger, force_name2, resolve, reject,
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (main_api == 'kobold' && kai_settings.streaming_kobold && !kai_settings.can_use_streaming) {
|
||||||
|
toastr.warning('Streaming is enabled, but the version of Kobold used does not support token streaming.', undefined, { timeOut: 10000, preventDuplicates: true, });
|
||||||
|
}
|
||||||
|
|
||||||
if (isHordeGenerationNotAllowed()) {
|
if (isHordeGenerationNotAllowed()) {
|
||||||
is_send_press = false;
|
is_send_press = false;
|
||||||
return;
|
return;
|
||||||
@ -2368,6 +2376,9 @@ async function Generate(type, { automatic_trigger, force_name2, resolve, reject,
|
|||||||
else if (main_api == 'novel' && isStreamingEnabled() && type !== 'quiet') {
|
else if (main_api == 'novel' && isStreamingEnabled() && type !== 'quiet') {
|
||||||
streamingProcessor.generator = await generateNovelWithStreaming(generate_data, streamingProcessor.abortController.signal);
|
streamingProcessor.generator = await generateNovelWithStreaming(generate_data, streamingProcessor.abortController.signal);
|
||||||
}
|
}
|
||||||
|
else if (main_api == 'kobold' && isStreamingEnabled() && type !== 'quiet') {
|
||||||
|
streamingProcessor.generator = await generateKoboldWithStreaming(generate_data, streamingProcessor.abortController.signal);
|
||||||
|
}
|
||||||
else {
|
else {
|
||||||
try {
|
try {
|
||||||
const response = await fetch(generate_url, {
|
const response = await fetch(generate_url, {
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
import {
|
import {
|
||||||
|
getRequestHeaders,
|
||||||
saveSettingsDebounced,
|
saveSettingsDebounced,
|
||||||
getStoppingStrings,
|
getStoppingStrings,
|
||||||
} from "../script.js";
|
} from "../script.js";
|
||||||
@ -9,6 +10,7 @@ export {
|
|||||||
formatKoboldUrl,
|
formatKoboldUrl,
|
||||||
getKoboldGenerationData,
|
getKoboldGenerationData,
|
||||||
canUseKoboldStopSequence,
|
canUseKoboldStopSequence,
|
||||||
|
canUseKoboldStreaming,
|
||||||
};
|
};
|
||||||
|
|
||||||
const kai_settings = {
|
const kai_settings = {
|
||||||
@ -23,9 +25,11 @@ const kai_settings = {
|
|||||||
rep_pen_slope: 0.9,
|
rep_pen_slope: 0.9,
|
||||||
single_line: false,
|
single_line: false,
|
||||||
use_stop_sequence: false,
|
use_stop_sequence: false,
|
||||||
|
streaming_kobold: false,
|
||||||
};
|
};
|
||||||
|
|
||||||
const MIN_STOP_SEQUENCE_VERSION = '1.2.2';
|
const MIN_STOP_SEQUENCE_VERSION = '1.2.2';
|
||||||
|
const MIN_STREAMING_KCPPVERSION = '1.30';
|
||||||
|
|
||||||
function formatKoboldUrl(value) {
|
function formatKoboldUrl(value) {
|
||||||
try {
|
try {
|
||||||
@ -58,6 +62,10 @@ function loadKoboldSettings(preset) {
|
|||||||
kai_settings.single_line = preset.single_line;
|
kai_settings.single_line = preset.single_line;
|
||||||
$('#single_line').prop('checked', kai_settings.single_line);
|
$('#single_line').prop('checked', kai_settings.single_line);
|
||||||
}
|
}
|
||||||
|
if (preset.hasOwnProperty('streaming_kobold')) {
|
||||||
|
kai_settings.streaming_kobold = preset.streaming_kobold;
|
||||||
|
$('#streaming_kobold').prop('checked', kai_settings.streaming_kobold);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function getKoboldGenerationData(finalPromt, this_settings, this_amount_gen, this_max_context, isImpersonate) {
|
function getKoboldGenerationData(finalPromt, this_settings, this_amount_gen, this_max_context, isImpersonate) {
|
||||||
@ -86,10 +94,53 @@ function getKoboldGenerationData(finalPromt, this_settings, this_amount_gen, thi
|
|||||||
use_world_info: false,
|
use_world_info: false,
|
||||||
singleline: kai_settings.single_line,
|
singleline: kai_settings.single_line,
|
||||||
stop_sequence: kai_settings.use_stop_sequence ? getStoppingStrings(isImpersonate, false) : undefined,
|
stop_sequence: kai_settings.use_stop_sequence ? getStoppingStrings(isImpersonate, false) : undefined,
|
||||||
|
streaming: kai_settings.streaming_kobold && kai_settings.can_use_streaming,
|
||||||
};
|
};
|
||||||
return generate_data;
|
return generate_data;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export async function generateKoboldWithStreaming(generate_data, signal) {
|
||||||
|
const response = await fetch('/generate', {
|
||||||
|
headers: getRequestHeaders(),
|
||||||
|
body: JSON.stringify(generate_data),
|
||||||
|
method: 'POST',
|
||||||
|
signal: signal,
|
||||||
|
});
|
||||||
|
|
||||||
|
return async function* streamData() {
|
||||||
|
const decoder = new TextDecoder();
|
||||||
|
const reader = response.body.getReader();
|
||||||
|
let getMessage = '';
|
||||||
|
let messageBuffer = "";
|
||||||
|
while (true) {
|
||||||
|
const { done, value } = await reader.read();
|
||||||
|
let response = decoder.decode(value);
|
||||||
|
let eventList = [];
|
||||||
|
|
||||||
|
// ReadableStream's buffer is not guaranteed to contain full SSE messages as they arrive in chunks
|
||||||
|
// We need to buffer chunks until we have one or more full messages (separated by double newlines)
|
||||||
|
messageBuffer += response;
|
||||||
|
eventList = messageBuffer.split("\n\n");
|
||||||
|
// Last element will be an empty string or a leftover partial message
|
||||||
|
messageBuffer = eventList.pop();
|
||||||
|
|
||||||
|
for (let event of eventList) {
|
||||||
|
for (let subEvent of event.split('\n')) {
|
||||||
|
if (subEvent.startsWith("data")) {
|
||||||
|
let data = JSON.parse(subEvent.substring(5));
|
||||||
|
getMessage += (data?.token || '');
|
||||||
|
yield getMessage;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (done) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
const sliders = [
|
const sliders = [
|
||||||
{
|
{
|
||||||
name: "temp",
|
name: "temp",
|
||||||
@ -160,6 +211,12 @@ function canUseKoboldStopSequence(version) {
|
|||||||
return (version || '0.0.0').localeCompare(MIN_STOP_SEQUENCE_VERSION, undefined, { numeric: true, sensitivity: 'base' }) > -1;
|
return (version || '0.0.0').localeCompare(MIN_STOP_SEQUENCE_VERSION, undefined, { numeric: true, sensitivity: 'base' }) > -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function canUseKoboldStreaming(koboldVersion) {
|
||||||
|
if (koboldVersion.result == 'KoboldCpp') {
|
||||||
|
return (koboldVersion.version || '0.0').localeCompare(MIN_STREAMING_KCPPVERSION, undefined, { numeric: true, sensitivity: 'base' }) > -1;
|
||||||
|
} else return false;
|
||||||
|
}
|
||||||
|
|
||||||
$(document).ready(function () {
|
$(document).ready(function () {
|
||||||
sliders.forEach(slider => {
|
sliders.forEach(slider => {
|
||||||
$(document).on("input", slider.sliderId, function () {
|
$(document).on("input", slider.sliderId, function () {
|
||||||
@ -176,4 +233,10 @@ $(document).ready(function () {
|
|||||||
kai_settings.single_line = value;
|
kai_settings.single_line = value;
|
||||||
saveSettingsDebounced();
|
saveSettingsDebounced();
|
||||||
});
|
});
|
||||||
});
|
|
||||||
|
$('#streaming_kobold').on("input", function () {
|
||||||
|
const value = $(this).prop('checked');
|
||||||
|
kai_settings.streaming_kobold = value;
|
||||||
|
saveSettingsDebounced();
|
||||||
|
});
|
||||||
|
});
|
||||||
|
@ -44,7 +44,7 @@ If you have any objections to these requirements, please mention them specifical
|
|||||||
|
|
||||||
If you accept the requirements, please confirm this by replying with "${DEFAULT_JAILBREAK_RESPONSE}", and nothing more. Upon receiving your accurate confirmation message, I will specify the context of the scene and {{char}}'s characteristics, background, and personality in the next message.`;
|
If you accept the requirements, please confirm this by replying with "${DEFAULT_JAILBREAK_RESPONSE}", and nothing more. Upon receiving your accurate confirmation message, I will specify the context of the scene and {{char}}'s characteristics, background, and personality in the next message.`;
|
||||||
|
|
||||||
const DEFAULT_CHARACTER_NUDGE_MESSAGE = "[Unless otherwise stated by {{user}}, your the next response shall only be written from the point of view of {{char}}. Do not seek approval of your writing style at the end of the response. Don't reply with <ACK>.]";
|
const DEFAULT_CHARACTER_NUDGE_MESSAGE = "[Unless otherwise stated by {{user}}, your the next response shall only be written from the point of view of {{char}}. Do not seek approval of your writing style at the end of the response.]";
|
||||||
const DEFAULT_IMPERSONATION_PROMPT = "[Write 1 reply only in internet RP style from the point of view of {{user}}, using the chat history so far as a guideline for the writing style of {{user}}. Don't write as {{char}} or system.]";
|
const DEFAULT_IMPERSONATION_PROMPT = "[Write 1 reply only in internet RP style from the point of view of {{user}}, using the chat history so far as a guideline for the writing style of {{user}}. Don't write as {{char}} or system.]";
|
||||||
|
|
||||||
const poe_settings = {
|
const poe_settings = {
|
||||||
@ -281,7 +281,7 @@ async function generatePoe(type, finalPrompt, signal) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
async function sendChunkedMessage(finalPrompt, withStreaming, signal) {
|
async function sendChunkedMessage(finalPrompt, withStreaming, signal) {
|
||||||
const fastReplyPrompt = '\n[REPLY TO THIS MESSAGE WITH <ACK> ONLY!!!]';
|
const fastReplyPrompt = '\n[Reply to this message with a full stop only]';
|
||||||
const promptChunks = splitRecursive(finalPrompt, CHUNKED_PROMPT_LENGTH - fastReplyPrompt.length);
|
const promptChunks = splitRecursive(finalPrompt, CHUNKED_PROMPT_LENGTH - fastReplyPrompt.length);
|
||||||
console.debug(`Splitting prompt into ${promptChunks.length} chunks`, promptChunks);
|
console.debug(`Splitting prompt into ${promptChunks.length} chunks`, promptChunks);
|
||||||
let reply = '';
|
let reply = '';
|
||||||
|
68
server.js
68
server.js
@ -381,34 +381,61 @@ app.post("/generate", jsonParser, async function (request, response_generate = r
|
|||||||
console.log(this_settings);
|
console.log(this_settings);
|
||||||
const args = {
|
const args = {
|
||||||
body: JSON.stringify(this_settings),
|
body: JSON.stringify(this_settings),
|
||||||
signal: controller.signal,
|
|
||||||
headers: { "Content-Type": "application/json" },
|
headers: { "Content-Type": "application/json" },
|
||||||
|
signal: controller.signal,
|
||||||
};
|
};
|
||||||
|
|
||||||
const MAX_RETRIES = 10;
|
const MAX_RETRIES = 50;
|
||||||
const delayAmount = 3000;
|
const delayAmount = 2500;
|
||||||
|
let fetch, url, response;
|
||||||
for (let i = 0; i < MAX_RETRIES; i++) {
|
for (let i = 0; i < MAX_RETRIES; i++) {
|
||||||
try {
|
try {
|
||||||
const data = await postAsync(api_server + "/v1/generate", args);
|
fetch = require('node-fetch').default;
|
||||||
console.log(data);
|
url = request.body.streaming ? `${api_server}/extra/generate/stream` : `${api_server}/v1/generate`;
|
||||||
return response_generate.send(data);
|
response = await fetch(url, { method: 'POST', timeout: 0, ...args });
|
||||||
}
|
|
||||||
catch (error) {
|
|
||||||
// data
|
|
||||||
if (typeof error['text'] === 'function') {
|
|
||||||
console.log(await error.text());
|
|
||||||
}
|
|
||||||
|
|
||||||
|
if (request.body.streaming) {
|
||||||
|
|
||||||
|
request.socket.on('close', function () {
|
||||||
|
response.body.destroy(); // Close the remote stream
|
||||||
|
response_generate.end(); // End the Express response
|
||||||
|
});
|
||||||
|
|
||||||
|
response.body.on('end', function () {
|
||||||
|
console.log("Streaming request finished");
|
||||||
|
response_generate.end();
|
||||||
|
});
|
||||||
|
|
||||||
|
// Pipe remote SSE stream to Express response
|
||||||
|
return response.body.pipe(response_generate);
|
||||||
|
} else {
|
||||||
|
if (!response.ok) {
|
||||||
|
console.log(`Kobold returned error: ${response.status} ${response.statusText} ${await response.text()}`);
|
||||||
|
return response.status(response.status).send({ error: true });
|
||||||
|
}
|
||||||
|
|
||||||
|
const data = await response.json();
|
||||||
|
return response_generate.send(data);
|
||||||
|
}
|
||||||
|
} catch (error) {
|
||||||
// response
|
// response
|
||||||
switch (error.statusCode) {
|
switch (error?.status) {
|
||||||
case 503:
|
case 403:
|
||||||
|
case 503: // retry in case of temporary service issue, possibly caused by a queue failure?
|
||||||
|
console.debug(`KoboldAI is busy. Retry attempt ${i+1} of ${MAX_RETRIES}...`);
|
||||||
await delay(delayAmount);
|
await delay(delayAmount);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
|
if ('status' in error) {
|
||||||
|
console.log('Status Code from Kobold:', error.status);
|
||||||
|
}
|
||||||
return response_generate.send({ error: true });
|
return response_generate.send({ error: true });
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
console.log('Max retries exceeded. Giving up.');
|
||||||
|
return response_generate.send({ error: true });
|
||||||
});
|
});
|
||||||
|
|
||||||
//************** Text generation web UI
|
//************** Text generation web UI
|
||||||
@ -575,6 +602,7 @@ app.post("/getstatus", jsonParser, async function (request, response_getstatus =
|
|||||||
};
|
};
|
||||||
var url = api_server + "/v1/model";
|
var url = api_server + "/v1/model";
|
||||||
let version = '';
|
let version = '';
|
||||||
|
let koboldVersion = {};
|
||||||
if (main_api == "kobold") {
|
if (main_api == "kobold") {
|
||||||
try {
|
try {
|
||||||
version = (await getAsync(api_server + "/v1/info/version")).result;
|
version = (await getAsync(api_server + "/v1/info/version")).result;
|
||||||
@ -582,6 +610,15 @@ app.post("/getstatus", jsonParser, async function (request, response_getstatus =
|
|||||||
catch {
|
catch {
|
||||||
version = '0.0.0';
|
version = '0.0.0';
|
||||||
}
|
}
|
||||||
|
try {
|
||||||
|
koboldVersion = (await getAsync(api_server + "/extra/version"));
|
||||||
|
}
|
||||||
|
catch {
|
||||||
|
koboldVersion = {
|
||||||
|
result: 'Kobold',
|
||||||
|
version: '0.0',
|
||||||
|
};
|
||||||
|
}
|
||||||
}
|
}
|
||||||
client.get(url, args, function (data, response) {
|
client.get(url, args, function (data, response) {
|
||||||
if (typeof data !== 'object') {
|
if (typeof data !== 'object') {
|
||||||
@ -589,6 +626,7 @@ app.post("/getstatus", jsonParser, async function (request, response_getstatus =
|
|||||||
}
|
}
|
||||||
if (response.statusCode == 200) {
|
if (response.statusCode == 200) {
|
||||||
data.version = version;
|
data.version = version;
|
||||||
|
data.koboldVersion = koboldVersion;
|
||||||
if (data.result != "ReadOnly") {
|
if (data.result != "ReadOnly") {
|
||||||
} else {
|
} else {
|
||||||
data.result = "no_connection";
|
data.result = "no_connection";
|
||||||
@ -3120,7 +3158,7 @@ async function postAsync(url, args) {
|
|||||||
return data;
|
return data;
|
||||||
}
|
}
|
||||||
|
|
||||||
throw new Error(response);
|
throw response;
|
||||||
}
|
}
|
||||||
|
|
||||||
function getAsync(url, args) {
|
function getAsync(url, args) {
|
||||||
|
Reference in New Issue
Block a user