mirror of
https://github.com/SillyTavern/SillyTavern.git
synced 2025-06-05 21:59:27 +02:00
Merge branch 'staging' into separate-kobold-endpoints
This commit is contained in:
@ -2629,12 +2629,12 @@ class StreamingProcessor {
|
|||||||
|
|
||||||
if (!isImpersonate && !isContinue && Array.isArray(this.swipes) && this.swipes.length > 0) {
|
if (!isImpersonate && !isContinue && Array.isArray(this.swipes) && this.swipes.length > 0) {
|
||||||
for (let i = 0; i < this.swipes.length; i++) {
|
for (let i = 0; i < this.swipes.length; i++) {
|
||||||
this.swipes[i] = cleanUpMessage(this.removePrefix(this.swipes[i]), false, false, true, !isFinal);
|
this.swipes[i] = cleanUpMessage(this.removePrefix(this.swipes[i]), false, false, true, this.stoppingStrings);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
text = this.removePrefix(text);
|
text = this.removePrefix(text);
|
||||||
let processedText = cleanUpMessage(text, isImpersonate, isContinue, !isFinal, !isFinal);
|
let processedText = cleanUpMessage(text, isImpersonate, isContinue, !isFinal, this.stoppingStrings);
|
||||||
|
|
||||||
// Predict unbalanced asterisks / quotes during streaming
|
// Predict unbalanced asterisks / quotes during streaming
|
||||||
const charsToBalance = ['*', '"', '```'];
|
const charsToBalance = ['*', '"', '```'];
|
||||||
@ -2805,6 +2805,12 @@ class StreamingProcessor {
|
|||||||
scrollLock = false;
|
scrollLock = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Stopping strings are expensive to calculate, especially with macros enabled. To remove stopping strings
|
||||||
|
// when streaming, we cache the result of getStoppingStrings instead of calling it once per token.
|
||||||
|
const isImpersonate = this.type == 'impersonate';
|
||||||
|
const isContinue = this.type == 'continue';
|
||||||
|
this.stoppingStrings = getStoppingStrings(isImpersonate, isContinue);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const sw = new Stopwatch(1000 / power_user.streaming_fps);
|
const sw = new Stopwatch(1000 / power_user.streaming_fps);
|
||||||
const timestamps = [];
|
const timestamps = [];
|
||||||
@ -2907,7 +2913,7 @@ export async function generateRaw(prompt, api, instructOverride) {
|
|||||||
throw new Error(data.error);
|
throw new Error(data.error);
|
||||||
}
|
}
|
||||||
|
|
||||||
const message = cleanUpMessage(extractMessageFromData(data), false, false, true, false);
|
const message = cleanUpMessage(extractMessageFromData(data), false, false, true);
|
||||||
|
|
||||||
if (!message) {
|
if (!message) {
|
||||||
throw new Error('No message generated');
|
throw new Error('No message generated');
|
||||||
@ -3814,7 +3820,7 @@ async function Generate(type, { automatic_trigger, force_name2, quiet_prompt, qu
|
|||||||
streamingProcessor.generator = streamingGenerator;
|
streamingProcessor.generator = streamingGenerator;
|
||||||
hideSwipeButtons();
|
hideSwipeButtons();
|
||||||
let getMessage = await streamingProcessor.generate();
|
let getMessage = await streamingProcessor.generate();
|
||||||
let messageChunk = cleanUpMessage(getMessage, isImpersonate, isContinue, false, false);
|
let messageChunk = cleanUpMessage(getMessage, isImpersonate, isContinue, false);
|
||||||
|
|
||||||
if (isContinue) {
|
if (isContinue) {
|
||||||
getMessage = continue_mag + getMessage;
|
getMessage = continue_mag + getMessage;
|
||||||
@ -3849,7 +3855,7 @@ async function Generate(type, { automatic_trigger, force_name2, quiet_prompt, qu
|
|||||||
|
|
||||||
const swipes = extractMultiSwipes(data, type);
|
const swipes = extractMultiSwipes(data, type);
|
||||||
|
|
||||||
messageChunk = cleanUpMessage(getMessage, isImpersonate, isContinue, false, false);
|
messageChunk = cleanUpMessage(getMessage, isImpersonate, isContinue, false);
|
||||||
|
|
||||||
if (isContinue) {
|
if (isContinue) {
|
||||||
getMessage = continue_mag + getMessage;
|
getMessage = continue_mag + getMessage;
|
||||||
@ -3857,7 +3863,7 @@ async function Generate(type, { automatic_trigger, force_name2, quiet_prompt, qu
|
|||||||
|
|
||||||
//Formating
|
//Formating
|
||||||
const displayIncomplete = type === 'quiet' && !quietToLoud;
|
const displayIncomplete = type === 'quiet' && !quietToLoud;
|
||||||
getMessage = cleanUpMessage(getMessage, isImpersonate, isContinue, displayIncomplete, false);
|
getMessage = cleanUpMessage(getMessage, isImpersonate, isContinue, displayIncomplete);
|
||||||
|
|
||||||
if (getMessage.length > 0) {
|
if (getMessage.length > 0) {
|
||||||
if (isImpersonate) {
|
if (isImpersonate) {
|
||||||
@ -4487,7 +4493,7 @@ function extractMultiSwipes(data, type) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
for (let i = 1; i < data.choices.length; i++) {
|
for (let i = 1; i < data.choices.length; i++) {
|
||||||
const text = cleanUpMessage(data.choices[i].text, false, false, false, false);
|
const text = cleanUpMessage(data.choices[i].text, false, false, false);
|
||||||
swipes.push(text);
|
swipes.push(text);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -4495,7 +4501,7 @@ function extractMultiSwipes(data, type) {
|
|||||||
return swipes;
|
return swipes;
|
||||||
}
|
}
|
||||||
|
|
||||||
function cleanUpMessage(getMessage, isImpersonate, isContinue, displayIncompleteSentences = false, skipStopStringCleanup = false) {
|
function cleanUpMessage(getMessage, isImpersonate, isContinue, displayIncompleteSentences = false, stoppingStrings = null) {
|
||||||
if (!getMessage) {
|
if (!getMessage) {
|
||||||
return '';
|
return '';
|
||||||
}
|
}
|
||||||
@ -4510,16 +4516,18 @@ function cleanUpMessage(getMessage, isImpersonate, isContinue, displayIncomplete
|
|||||||
getMessage = substituteParams(power_user.user_prompt_bias) + getMessage;
|
getMessage = substituteParams(power_user.user_prompt_bias) + getMessage;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!skipStopStringCleanup) {
|
// Allow for caching of stopping strings. getStoppingStrings is an expensive function, especially with macros
|
||||||
const stoppingStrings = getStoppingStrings(isImpersonate, isContinue);
|
// enabled, so for streaming, we call it once and then pass it into each cleanUpMessage call.
|
||||||
|
if (!stoppingStrings) {
|
||||||
|
stoppingStrings = getStoppingStrings(isImpersonate, isContinue);
|
||||||
|
}
|
||||||
|
|
||||||
for (const stoppingString of stoppingStrings) {
|
for (const stoppingString of stoppingStrings) {
|
||||||
if (stoppingString.length) {
|
if (stoppingString.length) {
|
||||||
for (let j = stoppingString.length; j > 0; j--) {
|
for (let j = stoppingString.length; j > 0; j--) {
|
||||||
if (getMessage.slice(-j) === stoppingString.slice(0, j)) {
|
if (getMessage.slice(-j) === stoppingString.slice(0, j)) {
|
||||||
getMessage = getMessage.slice(0, -j);
|
getMessage = getMessage.slice(0, -j);
|
||||||
break;
|
break;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -63,6 +63,7 @@ const TOKENIZER_URLS = {
|
|||||||
},
|
},
|
||||||
[tokenizers.API_KOBOLD]: {
|
[tokenizers.API_KOBOLD]: {
|
||||||
count: '/api/tokenizers/remote/kobold/count',
|
count: '/api/tokenizers/remote/kobold/count',
|
||||||
|
encode: '/api/tokenizers/remote/kobold/count',
|
||||||
},
|
},
|
||||||
[tokenizers.MISTRAL]: {
|
[tokenizers.MISTRAL]: {
|
||||||
encode: '/api/tokenizers/mistral/encode',
|
encode: '/api/tokenizers/mistral/encode',
|
||||||
@ -617,6 +618,32 @@ function getTextTokensFromTextgenAPI(str) {
|
|||||||
return ids;
|
return ids;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Calls the AI provider's tokenize API to encode a string to tokens.
|
||||||
|
* @param {string} str String to tokenize.
|
||||||
|
* @returns {number[]} Array of token ids.
|
||||||
|
*/
|
||||||
|
function getTextTokensFromKoboldAPI(str) {
|
||||||
|
let ids = [];
|
||||||
|
|
||||||
|
jQuery.ajax({
|
||||||
|
async: false,
|
||||||
|
type: 'POST',
|
||||||
|
url: TOKENIZER_URLS[tokenizers.API_KOBOLD].encode,
|
||||||
|
data: JSON.stringify({
|
||||||
|
text: str,
|
||||||
|
url: api_server,
|
||||||
|
}),
|
||||||
|
dataType: 'json',
|
||||||
|
contentType: 'application/json',
|
||||||
|
success: function (data) {
|
||||||
|
ids = data.ids;
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
return ids;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Calls the underlying tokenizer model to decode token ids to text.
|
* Calls the underlying tokenizer model to decode token ids to text.
|
||||||
* @param {string} endpoint API endpoint.
|
* @param {string} endpoint API endpoint.
|
||||||
@ -650,6 +677,8 @@ export function getTextTokens(tokenizerType, str) {
|
|||||||
return getTextTokens(currentRemoteTokenizerAPI(), str);
|
return getTextTokens(currentRemoteTokenizerAPI(), str);
|
||||||
case tokenizers.API_TEXTGENERATIONWEBUI:
|
case tokenizers.API_TEXTGENERATIONWEBUI:
|
||||||
return getTextTokensFromTextgenAPI(str);
|
return getTextTokensFromTextgenAPI(str);
|
||||||
|
case tokenizers.API_KOBOLD:
|
||||||
|
return getTextTokensFromKoboldAPI(str);
|
||||||
default: {
|
default: {
|
||||||
const tokenizerEndpoints = TOKENIZER_URLS[tokenizerType];
|
const tokenizerEndpoints = TOKENIZER_URLS[tokenizerType];
|
||||||
if (!tokenizerEndpoints) {
|
if (!tokenizerEndpoints) {
|
||||||
|
@ -1438,7 +1438,7 @@ app.use('/api/serpapi', require('./src/endpoints/serpapi').router);
|
|||||||
// The different text generation APIs
|
// The different text generation APIs
|
||||||
|
|
||||||
// Ooba/OpenAI text completions
|
// Ooba/OpenAI text completions
|
||||||
app.use('/api/backends/ooba', require('./src/endpoints/backends/ooba').router);
|
app.use('/api/backends/text-completions', require('./src/endpoints/backends/text-completions').router);
|
||||||
|
|
||||||
// KoboldAI
|
// KoboldAI
|
||||||
app.use('/api/textgen/kobold', require('./src/endpoints/textgen/kobold').router);
|
app.use('/api/textgen/kobold', require('./src/endpoints/textgen/kobold').router);
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
const express = require('express');
|
const express = require('express');
|
||||||
|
const fetch = require('node-fetch').default;
|
||||||
|
|
||||||
const { jsonParser } = require('../../express-common');
|
const { jsonParser } = require('../../express-common');
|
||||||
const { TEXTGEN_TYPES } = require('../../constants');
|
const { TEXTGEN_TYPES } = require('../../constants');
|
@ -562,7 +562,8 @@ router.post('/remote/kobold/count', jsonParser, async function (request, respons
|
|||||||
|
|
||||||
const data = await result.json();
|
const data = await result.json();
|
||||||
const count = data['value'];
|
const count = data['value'];
|
||||||
return response.send({ count, ids: [] });
|
const ids = data['ids'] ?? [];
|
||||||
|
return response.send({ count, ids });
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.log(error);
|
console.log(error);
|
||||||
return response.send({ error: true });
|
return response.send({ error: true });
|
||||||
@ -617,7 +618,7 @@ router.post('/remote/textgenerationwebui/encode', jsonParser, async function (re
|
|||||||
|
|
||||||
const data = await result.json();
|
const data = await result.json();
|
||||||
const count = legacyApi ? data?.results[0]?.tokens : (data?.length ?? data?.value);
|
const count = legacyApi ? data?.results[0]?.tokens : (data?.length ?? data?.value);
|
||||||
const ids = legacyApi ? [] : (data?.tokens ?? []);
|
const ids = legacyApi ? [] : (data?.tokens ?? data?.ids ?? []);
|
||||||
|
|
||||||
return response.send({ count, ids });
|
return response.send({ count, ids });
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
|
@ -349,7 +349,7 @@ function getImages(path) {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Pipe a fetch() response to an Express.js Response, including status code.
|
* Pipe a fetch() response to an Express.js Response, including status code.
|
||||||
* @param {Response} from The Fetch API response to pipe from.
|
* @param {import('node-fetch').Response} from The Fetch API response to pipe from.
|
||||||
* @param {Express.Response} to The Express response to pipe to.
|
* @param {Express.Response} to The Express response to pipe to.
|
||||||
*/
|
*/
|
||||||
function forwardFetchResponse(from, to) {
|
function forwardFetchResponse(from, to) {
|
||||||
|
Reference in New Issue
Block a user