Add support for Koboldcpp tokenization endpoint
This commit is contained in:
parent
cd2faea2b2
commit
ab52af4fb5
|
@ -2245,7 +2245,7 @@
|
|||
<option value="3">Sentencepiece (LLaMA)</option>
|
||||
<option value="4">NerdStash (NovelAI Clio)</option>
|
||||
<option value="5">NerdStash v2 (NovelAI Kayra)</option>
|
||||
<option value="6">API (WebUI)</option>
|
||||
<option value="6">API (WebUI / koboldcpp)</option>
|
||||
</select>
|
||||
</div>
|
||||
<div class="range-block">
|
||||
|
|
|
@ -24,6 +24,15 @@ const gpt3 = new GPT3BrowserTokenizer({ type: 'gpt3' });
|
|||
|
||||
let tokenCache = {};
|
||||
|
||||
/**
|
||||
* Guesstimates the token count for a string.
|
||||
* @param {string} str String to tokenize.
|
||||
* @returns {number} Token count.
|
||||
*/
|
||||
export function guesstimate(str) {
|
||||
return Math.ceil(str.length / CHARACTERS_PER_TOKEN_RATIO);
|
||||
}
|
||||
|
||||
async function loadTokenCache() {
|
||||
try {
|
||||
console.debug('Chat Completions: loading token cache')
|
||||
|
@ -89,7 +98,7 @@ export function getTokenCount(str, padding = undefined) {
|
|||
function calculate(type) {
|
||||
switch (type) {
|
||||
case tokenizers.NONE:
|
||||
return Math.ceil(str.length / CHARACTERS_PER_TOKEN_RATIO) + padding;
|
||||
return guesstimate(str) + padding;
|
||||
case tokenizers.GPT3:
|
||||
return gpt3.encode(str).bpe.length + padding;
|
||||
case tokenizers.CLASSIC:
|
||||
|
@ -291,8 +300,16 @@ function getTokenCacheObject() {
|
|||
return tokenCache[String(chatId)];
|
||||
}
|
||||
|
||||
/**
|
||||
* Counts token using the remote server API.
|
||||
* @param {string} endpoint API endpoint.
|
||||
* @param {string} str String to tokenize.
|
||||
* @param {number} padding Number of padding tokens.
|
||||
* @returns {number} Token count with padding.
|
||||
*/
|
||||
function countTokensRemote(endpoint, str, padding) {
|
||||
let tokenCount = 0;
|
||||
|
||||
jQuery.ajax({
|
||||
async: false,
|
||||
type: 'POST',
|
||||
|
@ -301,9 +318,25 @@ function countTokensRemote(endpoint, str, padding) {
|
|||
dataType: "json",
|
||||
contentType: "application/json",
|
||||
success: function (data) {
|
||||
if (typeof data.count === 'number') {
|
||||
tokenCount = data.count;
|
||||
} else {
|
||||
tokenCount = guesstimate(str);
|
||||
console.error("Error counting tokens");
|
||||
|
||||
if (!sessionStorage.getItem('tokenizationWarningShown')) {
|
||||
toastr.warning(
|
||||
"Your selected API doesn't support the tokenization endpoint. Using estimated counts.",
|
||||
"Error counting tokens",
|
||||
{ timeOut: 10000, preventDuplicates: true },
|
||||
);
|
||||
|
||||
sessionStorage.setItem('tokenizationWarningShown', String(true));
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
return tokenCount + padding;
|
||||
}
|
||||
|
||||
|
|
14
server.js
14
server.js
|
@ -3842,11 +3842,19 @@ app.post("/tokenize_via_api", jsonParser, async function (request, response) {
|
|||
|
||||
if (main_api == 'textgenerationwebui' && request.body.use_mancer) {
|
||||
args.headers = Object.assign(args.headers, get_mancer_headers());
|
||||
const data = await postAsync(api_server + "/v1/token-count", args);
|
||||
return response.send({ count: data['results'][0]['tokens'] });
|
||||
}
|
||||
|
||||
const data = await postAsync(api_server + "/v1/token-count", args);
|
||||
console.log(data);
|
||||
return response.send({ count: data['results'][0]['tokens'] });
|
||||
else if (main_api == 'kobold') {
|
||||
const data = await postAsync(api_server + "/extra/tokencount", args);
|
||||
const count = data['value'];
|
||||
return response.send({ count: count });
|
||||
}
|
||||
|
||||
else {
|
||||
return response.send({ error: true });
|
||||
}
|
||||
} catch (error) {
|
||||
console.log(error);
|
||||
return response.send({ error: true });
|
||||
|
|
Loading…
Reference in New Issue