Fix tokenizer override

I searched for all users of tokenizers.API, but missed that the menu
converts the numerical select values directly to enum values. I've used
the special tokenizer value 98 to represent "the tokenizer API for
whichever backend we're currently using".
This commit is contained in:
valadaptive 2023-12-09 23:57:21 -05:00
parent 499d158c11
commit 55976e61a3
2 changed files with 23 additions and 1 deletions

View File

@ -2438,7 +2438,7 @@
<option value="5">NerdStash v2 (NovelAI Kayra)</option>
<option value="7">Mistral</option>
<option value="8">Yi</option>
<option value="6">API (WebUI / koboldcpp)</option>
<option value="98">API (WebUI / koboldcpp)</option>
</select>
</div>
<div class="range-block" data-newbie-hidden>

View File

@ -22,6 +22,7 @@ export const tokenizers = {
MISTRAL: 7,
YI: 8,
API_TEXTGENERATIONWEBUI: 9,
API_CURRENT: 98,
BEST_MATCH: 99,
};
@ -195,6 +196,19 @@ export function getTokenizerBestMatch(forApi) {
return tokenizers.NONE;
}
// Get the current remote tokenizer API based on the current text generation API.
function currentRemoteTokenizerAPI() {
switch (main_api) {
case 'kobold':
case 'koboldhorde':
return tokenizers.API_KOBOLD;
case 'textgenerationwebui':
return tokenizers.API_TEXTGENERATIONWEBUI;
default:
return tokenizers.NONE;
}
}
/**
* Calls the underlying tokenizer model to the token count for a string.
* @param {number} type Tokenizer type.
@ -205,6 +219,8 @@ function callTokenizer(type, str) {
if (type === tokenizers.NONE) return guesstimate(str);
switch (type) {
case tokenizers.API_CURRENT:
return callTokenizer(currentRemoteTokenizerAPI(), str);
case tokenizers.API_KOBOLD:
return countTokensFromKoboldAPI(str);
case tokenizers.API_TEXTGENERATIONWEBUI:
@ -620,6 +636,8 @@ function decodeTextTokensFromServer(endpoint, ids) {
*/
export function getTextTokens(tokenizerType, str) {
switch (tokenizerType) {
case tokenizers.API_CURRENT:
return callTokenizer(currentRemoteTokenizerAPI(), str);
case tokenizers.API_TEXTGENERATIONWEBUI:
return getTextTokensFromTextgenAPI(str);
default: {
@ -647,6 +665,10 @@ export function getTextTokens(tokenizerType, str) {
* @param {number[]} ids Array of token ids
*/
export function decodeTextTokens(tokenizerType, ids) {
// Currently, neither remote API can decode, but this may change in the future. Put this guard here to be safe
if (tokenizerType === tokenizers.API_CURRENT) {
return decodeTextTokens(tokenizers.NONE);
}
const tokenizerEndpoints = TOKENIZER_URLS[tokenizerType];
if (!tokenizerEndpoints) {
console.warn('Unknown tokenizer type', tokenizerType);