From 5f92b8a09e3638475756b575e4859f7f4e9f907a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eug=C3=A9n=20Cowie?= Date: Mon, 19 Aug 2024 00:13:26 +0100 Subject: [PATCH] Add slash command to set tokenizer --- public/script.js | 43 +++++++++++++++++++++++++++++++++++- public/scripts/tokenizers.js | 18 +++++++++++++++ 2 files changed, 60 insertions(+), 1 deletion(-) diff --git a/public/script.js b/public/script.js index daea9cc50..4599e1cdc 100644 --- a/public/script.js +++ b/public/script.js @@ -212,7 +212,7 @@ import { selectContextPreset, } from './scripts/instruct-mode.js'; import { initLocales, t, translate } from './scripts/i18n.js'; -import { getFriendlyTokenizerName, getTokenCount, getTokenCountAsync, getTokenizerModel, initTokenizers, saveTokenCache } from './scripts/tokenizers.js'; +import { getFriendlyTokenizerName, getTokenCount, getTokenCountAsync, getTokenizerModel, initTokenizers, saveTokenCache, selectTokenizer, TOKENIZER_NAME_MAP, tokenizers } from './scripts/tokenizers.js'; import { user_avatar, getUserAvatars, @@ -8451,6 +8451,25 @@ async function selectInstructCallback(_, name) { return foundName; } +async function selectTokenizerCallback(_, name) { + if (!name) { + return TOKENIZER_NAME_MAP[power_user.tokenizer]; + } + + const tokenizerNames = Object.values(TOKENIZER_NAME_MAP); + const fuse = new Fuse(tokenizerNames); + const result = fuse.search(name); + + if (result.length === 0) { + toastr.warning(`Tokenizer "${name}" not found`); + return ''; + } + + const foundName = result[0].item; + selectTokenizer(tokenizers[foundName.toUpperCase()]); + return foundName; +} + async function enableInstructCallback() { $('#instruct_enabled').prop('checked', true).trigger('change'); return ''; @@ -9095,6 +9114,28 @@ jQuery(async function () { `, })); + SlashCommandParser.addCommandObject(SlashCommand.fromProps({ + name: 'tokenizer', + callback: selectTokenizerCallback, + returns: 'current tokenizer', + unnamedArgumentList: [ + SlashCommandArgument.fromProps({ + description: 'tokenizer name', + typeList: [ARGUMENT_TYPE.STRING], + enumList: Object.values(TOKENIZER_NAME_MAP).map(tokenizer => + new SlashCommandEnumValue(tokenizer, null, enumTypes.enum, enumIcons.default)), + }), + ], + helpString: ` +
+ Selects tokenizer by name. Gets the current tokenizer if no name is provided. +
+
+ Available tokenizers: +
${Object.values(TOKENIZER_NAME_MAP).join(', ')}
+
+ ` + })); SlashCommandParser.addCommandObject(SlashCommand.fromProps({ name: 'instruct-on', callback: enableInstructCallback, diff --git a/public/scripts/tokenizers.js b/public/scripts/tokenizers.js index c27775acb..5732706d3 100644 --- a/public/scripts/tokenizers.js +++ b/public/scripts/tokenizers.js @@ -147,6 +147,24 @@ async function resetTokenCache() { } } +/** + * Maps tokenizer IDs to their names. + * @example { 0: 'none', 1: 'gpt2', ... } + */ +export const TOKENIZER_NAME_MAP = Object.fromEntries( + Object.entries(tokenizers).map(([name, id]) => [id, name.toLowerCase()])); + +/** + * Selects tokenizer if not already selected. + * @param {number} tokenizerId Tokenizer ID. + */ +export function selectTokenizer(tokenizerId) { + if (tokenizerId !== power_user.tokenizer) { + $('#tokenizer').val(tokenizerId).trigger('change'); + toastr.info(`Tokenizer: "${TOKENIZER_NAME_MAP[tokenizerId]}" selected`); + } +} + /** * Gets the friendly name of the current tokenizer. * @param {string} forApi API to get the tokenizer for. Defaults to the main API.