Add NovelAI hypebot plugin

2025-06-05 21:59:27 +02:00 · 2023-08-27 18:27:34 +03:00
parent 8ec9b64be4
commit 9660aaa2c2
14 changed files with 505 additions and 561 deletions
--- a/public/index.html
+++ b/public/index.html
@ -2277,7 +2277,7 @@
                            <select id="tokenizer">
                                <option value="99">Best match (recommended)</option>
                                <option value="0">None / Estimated</option>
-                                <option value="1">GPT-3 (OpenAI)</option>
+                                <option value="1">GPT-2</option>
                                <option value="2">GPT-3 (Alternative / Classic)</option>
                                <option value="3">Sentencepiece (LLaMA)</option>
                                <option value="4">NerdStash (NovelAI Clio)</option>
--- a/public/lib/gpt-3-tokenizer/array-keyed-map.js
+++ b/public/lib/gpt-3-tokenizer/array-keyed-map.js
@ -1,210 +0,0 @@
-/*
-  # Implementation strategy
-
-  Create a tree of `Map`s, such that indexing the tree recursively (with items
-  of a key array, sequentially), traverses the tree, so that when the key array
-  is exhausted, the tree node we arrive at contains the value for that key
-  array under the guaranteed-unique `Symbol` key `dataSymbol`.
-
-  ## Example
-
-  Start with an empty `ArrayKeyedMap` tree:
-
-      {
-      }
-
-  Add ['a'] → 1:
-
-      {
-        'a': {
-          [dataSymbol]: 1,
-        },
-      }
-
-  Add [] → 0:
-
-      {
-        [dataSymbol]: 0,
-        'a': {
-          [dataSymbol]: 1,
-        },
-      }
-
-  Add ['a', 'b', 'c', 'd'] → 4:
-
-      {
-        [dataSymbol]: 0,
-        'a': {
-          [dataSymbol]: 1,
-          'b': {
-            'c': {
-              'd': {
-                [dataSymbol]: 4,
-              },
-            },
-          },
-        },
-      }
-
-  String array keys are used in the above example for simplicity.  In reality,
-  we can support any values in array keys, because `Map`s do.
-*/
-
-const dataSymbol = Symbol('path-store-trunk')
-
-//
-// This class represents the external API
-//
-
-class ArrayKeyedMap {
-  constructor (initialEntries = []) {
-    this._root = new Map()
-    this._size = 0
-    for (const [k, v] of initialEntries) { this.set(k, v) }
-  }
-
-  set (path, value) { return set.call(this, path, value) }
-
-  has (path) { return has.call(this, path) }
-
-  get (path) { return get.call(this, path) }
-
-  delete (path) { return del.call(this, path) }
-
-  get size () { return this._size }
-
-  clear () {
-    this._root.clear()
-    this._size = 0
-  }
-
-  hasPrefix (path) { return hasPrefix.call(this, path) }
-
-  get [Symbol.toStringTag] () { return 'ArrayKeyedMap' }
-
-  * [Symbol.iterator] () { yield * entries.call(this) }
-
-  * entries () { yield * entries.call(this) }
-
-  * keys () { yield * keys.call(this) }
-
-  * values () { yield * values.call(this) }
-
-  forEach (callback, thisArg) { forEach.call(this, callback, thisArg) }
-}
-
-//
-// These stateless functions implement the internals
-//
-
-function set (path, value) {
-  let map = this._root
-  for (const item of path) {
-    let nextMap = map.get(item)
-    if (!nextMap) {
-      // Create next map if none exists
-      nextMap = new Map()
-      map.set(item, nextMap)
-    }
-    map = nextMap
-  }
-
-  // Reached end of path.  Set the data symbol to the given value, and
-  // increment size if nothing was here before.
-  if (!map.has(dataSymbol)) this._size += 1
-  map.set(dataSymbol, value)
-  return this
-}
-
-function has (path) {
-  let map = this._root
-  for (const item of path) {
-    const nextMap = map.get(item)
-    if (nextMap) {
-      map = nextMap
-    } else {
-      return false
-    }
-  }
-  return map.has(dataSymbol)
-}
-
-function get (path) {
-  let map = this._root
-  for (const item of path) {
-    map = map.get(item)
-    if (!map) return undefined
-  }
-  return map.get(dataSymbol)
-}
-
-function del (path) {
-  let map = this._root
-
-  // Maintain a stack of maps we visited, so we can go back and trim empty ones
-  // if we delete something.
-  const stack = []
-
-  for (const item of path) {
-    const nextMap = map.get(item)
-    if (nextMap) {
-      stack.unshift({ parent: map, child: nextMap, item })
-      map = nextMap
-    } else {
-      // Nothing to delete
-      return false
-    }
-  }
-
-  // Reached end of path.  Delete data, if it exists.
-  const hadPreviousValue = map.delete(dataSymbol)
-
-  // If something was deleted, decrement size and go through the stack of
-  // visited maps, trimming any that are now empty.
-  if (hadPreviousValue) {
-    this._size -= 1
-
-    for (const { parent, child, item } of stack) {
-      if (child.size === 0) {
-        parent.delete(item)
-      }
-    }
-  }
-  return hadPreviousValue
-}
-
-function hasPrefix (path) {
-  let map = this._root
-  for (const item of path) {
-    map = map.get(item)
-    if (!map) return false
-  }
-  return true
-}
-
-function * entries () {
-  const stack = [{ path: [], map: this._root }]
-  while (stack.length > 0) {
-    const { path, map } = stack.pop()
-    for (const [k, v] of map.entries()) {
-      if (k === dataSymbol) yield [path, v]
-      else stack.push({ path: path.concat([k]), map: v })
-    }
-  }
-}
-
-function * keys () {
-  for (const [k] of this.entries()) yield k
-}
-
-function * values () {
-  for (const [, v] of this.entries()) yield v
-}
-
-function forEach (callback, thisArg) {
-  for (const [k, v] of this.entries()) callback.call(thisArg, v, k, this)
-}
-
-export {
-    ArrayKeyedMap
-}
--- a/public/lib/gpt-3-tokenizer/gpt3-tokenizer.js
+++ b/public/lib/gpt-3-tokenizer/gpt3-tokenizer.js
--- a/public/lib/gpt-3-tokenizer/gpt3-tokenizer.js.map
+++ b/public/lib/gpt-3-tokenizer/gpt3-tokenizer.js.map
--- a/public/scripts/extensions/hypebot/index.js
+++ b/public/scripts/extensions/hypebot/index.js
@ -0,0 +1,195 @@
+import { eventSource, event_types, getRequestHeaders, is_send_press, saveSettingsDebounced } from "../../../script.js";
+import { extension_settings, getContext, renderExtensionTemplate } from "../../extensions.js";
+import { SECRET_KEYS, secret_state } from "../../secrets.js";
+import { collapseNewlines } from "../../power-user.js";
+import { bufferToBase64, debounce } from "../../utils.js";
+import { decodeTextTokens, getTextTokens, tokenizers } from "../../tokenizers.js";
+
+const MODULE_NAME = 'hypebot';
+const MAX_PROMPT = 1024;
+const MAX_LENGTH = 50;
+const MAX_STRING_LENGTH = MAX_PROMPT * 4;
+
+const settings = {
+    enabled: false,
+    name: 'Goose',
+};
+
+/**
+ * Returns a random waiting verb
+ * @returns {string} Random waiting verb
+ */
+function getWaitingVerb() {
+    const waitingVerbs = ['thinking', 'typing', 'brainstorming', 'cooking', 'conjuring'];
+    return waitingVerbs[Math.floor(Math.random() * waitingVerbs.length)];
+}
+
+/**
+ * Returns a random verb based on the text
+ * @param {string} text Text to generate a verb for
+ * @returns {string} Random verb
+ */
+function getVerb(text) {
+    let verbList = ['says', 'notes', 'states', 'whispers', 'murmurs', 'mumbles'];
+
+    if (text.endsWith('!')) {
+        verbList = ['proclaims', 'declares', 'salutes', 'exclaims', 'cheers'];
+    }
+
+    if (text.endsWith('?')) {
+        verbList = ['asks', 'suggests', 'ponders', 'wonders', 'inquires', 'questions'];
+    }
+
+    return verbList[Math.floor(Math.random() * verbList.length)];
+}
+
+/**
+ * Formats the HypeBot reply text
+ * @param {string} text HypeBot output text
+ * @returns {string} Formatted HTML text
+ */
+function formatReply(text) {
+    const verb = getVerb(text);
+    return DOMPurify.sanitize(`<span class="hypebot_name">${settings.name} ${verb}:</span>&nbsp;<span class="hypebot_text">${text}</span>`);
+}
+
+let hypeBotBar;
+let abortController;
+
+const generateDebounced = debounce(() => generateHypeBot(), 500);
+
+/**
+ * Called when a chat event occurs to generate a HypeBot reply.
+ * @param {boolean} clear Clear the hypebot bar.
+ */
+function onChatEvent(clear) {
+    if (clear) {
+        hypeBotBar.text('');
+    }
+
+    abortController?.abort();
+    generateDebounced();
+};
+
+/**
+ * Generates a HypeBot reply.
+ */
+async function generateHypeBot() {
+    if (!settings.enabled || is_send_press) {
+        return;
+    }
+
+    if (!secret_state[SECRET_KEYS.NOVEL]) {
+        hypeBotBar.html('<span class="hypebot_nokey">No API key found. Please enter your API key in the NovelAI API Settings</span>');
+        return;
+    }
+
+    console.debug('Generating HypeBot reply');
+    hypeBotBar.html(DOMPurify.sanitize(`<span class="hypebot_name">${settings.name}</span> is ${getWaitingVerb()}...`));
+
+    const context = getContext();
+    const chat = context.chat.slice();
+    let prompt = '';
+
+    for (let index = chat.length - 1; index >= 0; index--) {
+        const message = chat[index];
+
+        if (message.is_system || !message.mes) {
+            continue;
+        }
+
+        prompt = `\n${message.mes}\n${prompt}`;
+
+        if (prompt.length >= MAX_STRING_LENGTH) {
+            break;
+        }
+    }
+
+    prompt = collapseNewlines(prompt.replaceAll(/[\*\[\]\{\}]/g, ''));
+
+    if (!prompt) {
+        return;
+    }
+
+    const sliceLength = MAX_PROMPT - MAX_LENGTH;
+    const encoded = getTextTokens(tokenizers.GPT2, prompt).slice(-sliceLength);
+
+    // Add a stop string token to the end of the prompt
+    encoded.push(49527);
+
+    const base64String = await bufferToBase64(new Uint16Array(encoded).buffer);
+
+    const parameters = {
+        input: base64String,
+        model: "hypebot",
+        streaming: false,
+        temperature: 1,
+        max_length: MAX_LENGTH,
+        min_length: 1,
+        top_k: 0,
+        top_p: 1,
+        tail_free_sampling: 0.95,
+        repetition_penalty: 1,
+        repetition_penalty_range: 2048,
+        repetition_penalty_slope: 0.18,
+        repetition_penalty_frequency: 0,
+        repetition_penalty_presence: 0,
+        phrase_rep_pen: "off",
+        bad_words_ids: [],
+        stop_sequences: [[48585]],
+        generate_until_sentence: true,
+        use_cache: false,
+        use_string: false,
+        return_full_text: false,
+        prefix: "vanilla",
+        logit_bias_exp: [],
+        order: [0, 1, 2, 3],
+    };
+
+    abortController = new AbortController();
+
+    const response = await fetch('/generate_novelai', {
+        headers: getRequestHeaders(),
+        body: JSON.stringify(parameters),
+        method: 'POST',
+        signal: abortController.signal,
+    });
+
+    if (response.ok) {
+        const data = await response.json();
+        const ids = Array.from(new Uint16Array(Uint8Array.from(atob(data.output), c => c.charCodeAt(0)).buffer));
+        const output = decodeTextTokens(tokenizers.GPT2, ids).replace(/<2F>/g, '').trim();
+
+        hypeBotBar.html(formatReply(output));
+    }
+}
+
+jQuery(() => {
+    if (!extension_settings.hypebot) {
+        extension_settings.hypebot = settings;
+    }
+
+    Object.assign(settings, extension_settings.hypebot);
+    $('#extensions_settings2').append(renderExtensionTemplate(MODULE_NAME, 'settings'));
+    hypeBotBar = $(`<div id="hypeBotBar"></div>`).toggle(settings.enabled);
+    $('#send_form').append(hypeBotBar);
+
+    $('#hypebot_enabled').prop('checked', settings.enabled).on('input', () => {
+        settings.enabled = $('#hypebot_enabled').prop('checked');
+        hypeBotBar.toggle(settings.enabled);
+        abortController?.abort();
+        saveSettingsDebounced();
+    });
+
+    $('#hypebot_name').val(settings.name).on('input', () => {
+        settings.name = String($('#hypebot_name').val());
+        saveSettingsDebounced();
+    });
+
+    eventSource.on(event_types.CHAT_CHANGED, () => onChatEvent(true));
+    eventSource.on(event_types.MESSAGE_DELETED, () => onChatEvent(true));
+    eventSource.on(event_types.MESSAGE_EDITED, () => onChatEvent(true));
+    eventSource.on(event_types.MESSAGE_SENT, () => onChatEvent(false));
+    eventSource.on(event_types.MESSAGE_RECEIVED, () => onChatEvent(false));
+    eventSource.on(event_types.MESSAGE_SWIPED, () => onChatEvent(false));
+});
--- a/public/scripts/extensions/hypebot/manifest.json
+++ b/public/scripts/extensions/hypebot/manifest.json
@ -0,0 +1,11 @@
+{
+    "display_name": "HypeBot",
+    "loading_order": 1000,
+    "requires": [],
+    "optional": [],
+    "js": "index.js",
+    "css": "style.css",
+    "author": "Cohee#1207",
+    "version": "1.0.0",
+    "homePage": "https://github.com/SillyTavern/SillyTavern"
+}
--- a/public/scripts/extensions/hypebot/settings.html
+++ b/public/scripts/extensions/hypebot/settings.html
@ -0,0 +1,18 @@
+<div class="hypebot_settings">
+    <div class="inline-drawer">
+        <div class="inline-drawer-toggle inline-drawer-header">
+            <b>HypeBot</b>
+            <div class="inline-drawer-icon fa-solid fa-circle-chevron-down down"></div>
+        </div>
+        <div class="inline-drawer-content">
+            <div>Show personalized suggestions based on your recent chats using the NovelAI's HypeBot engine.</div>
+            <small><i>Hint: Save an API key in the NovelAI API settings to use it here.</i></small>
+            <label class="checkbox_label" for="hypebot_enabled">
+                <input id="hypebot_enabled" type="checkbox" class="checkbox">
+                Enabled
+            </label>
+            <label>Name:</label>
+            <input id="hypebot_name" type="text" class="text_pole" placeholder="Goose">
+        </div>
+    </div>
+</div>
--- a/public/scripts/extensions/hypebot/style.css
+++ b/public/scripts/extensions/hypebot/style.css
@ -0,0 +1,17 @@
+#hypeBotBar {
+    width: 100%;
+    max-width: 100%;
+    padding: 0.5em;
+    white-space: normal;
+    font-size: calc(var(--mainFontSize) * 0.85);
+    order: 20;
+}
+
+.hypebot_nokey {
+    text-align: center;
+    font-style: italic;
+}
+
+.hypebot_name {
+    font-weight: 600;
+}
--- a/public/scripts/extensions/quick-reply/style.css
+++ b/public/scripts/extensions/quick-reply/style.css
@ -12,6 +12,7 @@
    display: none;
    max-width: 100%;
    overflow-x: auto;
+    order: 10;
 }

 #quickReplies {
--- a/public/scripts/tokenizers.js
+++ b/public/scripts/tokenizers.js
@ -1,7 +1,6 @@
 import { characters, main_api, nai_settings, online_status, this_chid } from "../script.js";
 import { power_user } from "./power-user.js";
 import { encode } from "../lib/gpt-2-3-tokenizer/mod.js";
-import { GPT3BrowserTokenizer } from "../lib/gpt-3-tokenizer/gpt3-tokenizer.js";
 import { chat_completion_sources, oai_settings } from "./openai.js";
 import { groups, selected_group } from "./group-chats.js";
 import { getStringHash } from "./utils.js";
@ -12,7 +11,7 @@ const TOKENIZER_WARNING_KEY = 'tokenizationWarningShown';

 export const tokenizers = {
    NONE: 0,
-    GPT3: 1,
+    GPT2: 1,
    CLASSIC: 2,
    LLAMA: 3,
    NERD: 4,
@ -22,7 +21,6 @@ export const tokenizers = {
 };

 const objectStore = new localforage.createInstance({ name: "SillyTavern_ChatCompletions" });
-const gpt3 = new GPT3BrowserTokenizer({ type: 'gpt3' });

 let tokenCache = {};

@ -94,23 +92,18 @@ function getTokenizerBestMatch() {
 }

 /**
- * Gets the token count for a string using the current model tokenizer.
- * @param {string} str String to tokenize
- * @param {number | undefined} padding Optional padding tokens. Defaults to 0.
+ * Calls the underlying tokenizer model to the token count for a string.
+ * @param {number} type Tokenizer type.
+ * @param {string} str String to tokenize.
+ * @param {number} padding Number of padding tokens.
 * @returns {number} Token count.
 */
-export function getTokenCount(str, padding = undefined) {
-    /**
-     * Calculates the token count for a string.
-     * @param {number} [type] Tokenizer type.
-     * @returns {number} Token count.
-     */
-    function calculate(type) {
+function callTokenizer(type, str, padding) {
    switch (type) {
        case tokenizers.NONE:
            return guesstimate(str) + padding;
-            case tokenizers.GPT3:
-                return gpt3.encode(str).bpe.length + padding;
+        case tokenizers.GPT2:
+            return countTokensRemote('/tokenize_gpt2', str, padding);
        case tokenizers.CLASSIC:
            return encode(str).length + padding;
        case tokenizers.LLAMA:
@ -123,10 +116,17 @@ export function getTokenCount(str, padding = undefined) {
            return countTokensRemote('/tokenize_via_api', str, padding);
        default:
            console.warn("Unknown tokenizer type", type);
-                return calculate(tokenizers.NONE);
-        }
+            return callTokenizer(tokenizers.NONE, str, padding);
    }
+}

+/**
+ * Gets the token count for a string using the current model tokenizer.
+ * @param {string} str String to tokenize
+ * @param {number | undefined} padding Optional padding tokens. Defaults to 0.
+ * @returns {number} Token count.
+ */
+export function getTokenCount(str, padding = undefined) {
    if (typeof str !== 'string' || !str?.length) {
        return 0;
    }
@ -159,7 +159,7 @@ export function getTokenCount(str, padding = undefined) {
        return cacheObject[cacheKey];
    }

-    const result = calculate(tokenizerType);
+    const result = callTokenizer(tokenizerType, str, padding);

    if (isNaN(result)) {
        console.warn("Token count calculation returned NaN");
@ -350,6 +350,12 @@ function countTokensRemote(endpoint, str, padding) {
    return tokenCount + padding;
 }

+/**
+ * Calls the underlying tokenizer model to encode a string to tokens.
+ * @param {string} endpoint API endpoint.
+ * @param {string} str String to tokenize.
+ * @returns {number[]} Array of token ids.
+ */
 function getTextTokensRemote(endpoint, str) {
    let ids = [];
    jQuery.ajax({
@ -366,8 +372,37 @@ function getTextTokensRemote(endpoint, str) {
    return ids;
 }

+/**
+ * Calls the underlying tokenizer model to decode token ids to text.
+ * @param {string} endpoint API endpoint.
+ * @param {number[]} ids Array of token ids
+ */
+function decodeTextTokensRemote(endpoint, ids) {
+    let text = '';
+    jQuery.ajax({
+        async: false,
+        type: 'POST',
+        url: endpoint,
+        data: JSON.stringify({ ids: ids }),
+        dataType: "json",
+        contentType: "application/json",
+        success: function (data) {
+            text = data.text;
+        }
+    });
+    return text;
+}
+
+/**
+ * Encodes a string to tokens using the remote server API.
+ * @param {number} tokenizerType Tokenizer type.
+ * @param {string} str String to tokenize.
+ * @returns {number[]} Array of token ids.
+ */
 export function getTextTokens(tokenizerType, str) {
    switch (tokenizerType) {
+        case tokenizers.GPT2:
+            return getTextTokensRemote('/tokenize_gpt2', str);
        case tokenizers.LLAMA:
            return getTextTokensRemote('/tokenize_llama', str);
        case tokenizers.NERD:
@ -380,6 +415,27 @@ export function getTextTokens(tokenizerType, str) {
    }
 }

+/**
+ * Decodes token ids to text using the remote server API.
+ * @param {any} tokenizerType Tokenizer type.
+ * @param {number[]} ids Array of token ids
+ */
+export function decodeTextTokens(tokenizerType, ids) {
+    switch (tokenizerType) {
+        case tokenizers.GPT2:
+            return decodeTextTokensRemote('/decode_gpt2', ids);
+        case tokenizers.LLAMA:
+            return decodeTextTokensRemote('/decode_llama', ids);
+        case tokenizers.NERD:
+            return decodeTextTokensRemote('/decode_nerdstash', ids);
+        case tokenizers.NERD2:
+            return decodeTextTokensRemote('/decode_nerdstash_v2', ids);
+        default:
+            console.warn("Calling decodeTextTokens with unsupported tokenizer type", tokenizerType);
+            return '';
+    }
+}
+
 jQuery(async () => {
    await loadTokenCache();
 });
--- a/public/scripts/utils.js
+++ b/public/scripts/utils.js
@ -45,6 +45,17 @@ export function getSortableDelay() {
    return isMobile() ? 750 : 50;
 }

+export async function bufferToBase64(buffer) {
+    // use a FileReader to generate a base64 data URI:
+    const base64url = await new Promise(resolve => {
+        const reader = new FileReader()
+        reader.onload = () => resolve(reader.result)
+        reader.readAsDataURL(new Blob([buffer]))
+    });
+    // remove the `data:...;base64,` part from the start
+    return base64url.slice(base64url.indexOf(',') + 1);
+}
+
 /**
 * Rearranges an array in a random order.
 * @param {any[]} array The array to shuffle.
--- a/public/style.css
+++ b/public/style.css
@ -531,6 +531,7 @@ hr {
    column-gap: 5px;
    font-size: var(--bottomFormIconSize);
    overflow: hidden;
+    order: 1003;
 }

 #send_but_sheld>div {
@ -581,6 +582,7 @@ hr {
    transition: 0.3s;
    display: flex;
    align-items: center;
+    order: 1001;
 }

 .font-family-reset {
@ -904,6 +906,7 @@ select {
    margin: 0;
    text-shadow: 0px 0px calc(var(--shadowWidth) * 1px) var(--SmartThemeShadowColor);
    flex: 1;
+    order: 1002;
 }

 .text_pole::placeholder {
--- a/server.js
+++ b/server.js
@ -1894,8 +1894,7 @@ app.post("/generate_novelai", jsonParser, async function (request, response_gene

    const novelai = require('./src/novelai');
    const isNewModel = (request.body.model.includes('clio') || request.body.model.includes('kayra'));
-    const isKrake = request.body.model.includes('krake');
-    const badWordsList = (isNewModel ? novelai.badWordsList : (isKrake ? novelai.krakeBadWordsList : novelai.euterpeBadWordsList)).slice();
+    const badWordsList = novelai.getBadWordsList(request.body.model);

    // Add customized bad words for Clio and Kayra
    if (isNewModel && Array.isArray(request.body.bad_words_ids)) {
@ -1907,7 +1906,7 @@ app.post("/generate_novelai", jsonParser, async function (request, response_gene
    }

    // Add default biases for dinkus and asterism
-    const logit_bias_exp = isNewModel ? novelai.logitBiasExp.slice() : null;
+    const logit_bias_exp = isNewModel ? novelai.logitBiasExp.slice() : [];

    if (Array.isArray(logit_bias_exp) && Array.isArray(request.body.logit_bias_exp)) {
        logit_bias_exp.push(...request.body.logit_bias_exp);
@ -1942,7 +1941,7 @@ app.post("/generate_novelai", jsonParser, async function (request, response_gene
            "logit_bias_exp": logit_bias_exp,
            "generate_until_sentence": request.body.generate_until_sentence,
            "use_cache": request.body.use_cache,
-            "use_string": true,
+            "use_string": request.body.use_string ?? true,
            "return_full_text": request.body.return_full_text,
            "prefix": request.body.prefix,
            "order": request.body.order
@ -3845,8 +3844,9 @@ function getPresetSettingsByAPI(apiId) {
    }
 }

-function createTokenizationHandler(getTokenizerFn) {
+function createSentencepieceEncodingHandler(getTokenizerFn) {
    return async function (request, response) {
+        try {
            if (!request.body) {
                return response.sendStatus(400);
            }
@ -3855,12 +3855,76 @@ function createTokenizationHandler(getTokenizerFn) {
            const tokenizer = getTokenizerFn();
            const { ids, count } = await countSentencepieceTokens(tokenizer, text);
            return response.send({ ids, count });
+        } catch (error) {
+            console.log(error);
+            return response.send({ ids: [], count: 0 });
+        }
    };
 }

-app.post("/tokenize_llama", jsonParser, createTokenizationHandler(() => spp_llama));
-app.post("/tokenize_nerdstash", jsonParser, createTokenizationHandler(() => spp_nerd));
-app.post("/tokenize_nerdstash_v2", jsonParser, createTokenizationHandler(() => spp_nerd_v2));
+function createSentencepieceDecodingHandler(getTokenizerFn) {
+    return async function (request, response) {
+        try {
+            if (!request.body) {
+                return response.sendStatus(400);
+            }
+
+            const ids = request.body.ids || [];
+            const tokenizer = getTokenizerFn();
+            const text = await tokenizer.decodeIds(ids);
+            return response.send({ text });
+        } catch (error) {
+            console.log(error);
+            return response.send({ text: '' });
+        }
+    };
+}
+
+function createTiktokenEncodingHandler(modelId) {
+    return async function (request, response) {
+        try {
+            if (!request.body) {
+                return response.sendStatus(400);
+            }
+
+            const text = request.body.text || '';
+            const tokenizer = getTiktokenTokenizer(modelId);
+            const tokens = Object.values(tokenizer.encode(text));
+            return response.send({ ids: tokens, count: tokens.length });
+        } catch (error) {
+            console.log(error);
+            return response.send({ ids: [], count: 0 });
+        }
+    }
+}
+
+function createTiktokenDecodingHandler(modelId) {
+    return async function (request, response) {
+        try {
+            if (!request.body) {
+                return response.sendStatus(400);
+            }
+
+            const ids = request.body.ids || [];
+            const tokenizer = getTiktokenTokenizer(modelId);
+            const textBytes = tokenizer.decode(new Uint32Array(ids));
+            const text = new TextDecoder().decode(textBytes);
+            return response.send({ text });
+        } catch (error) {
+            console.log(error);
+            return response.send({ text: '' });
+        }
+    }
+}
+
+app.post("/tokenize_llama", jsonParser, createSentencepieceEncodingHandler(() => spp_llama));
+app.post("/tokenize_nerdstash", jsonParser, createSentencepieceEncodingHandler(() => spp_nerd));
+app.post("/tokenize_nerdstash_v2", jsonParser, createSentencepieceEncodingHandler(() => spp_nerd_v2));
+app.post("/tokenize_gpt2", jsonParser, createTiktokenEncodingHandler('gpt2'));
+app.post("/decode_llama", jsonParser, createSentencepieceDecodingHandler(() => spp_llama));
+app.post("/decode_nerdstash", jsonParser, createSentencepieceDecodingHandler(() => spp_nerd));
+app.post("/decode_nerdstash_v2", jsonParser, createSentencepieceDecodingHandler(() => spp_nerd_v2));
+app.post("/decode_gpt2", jsonParser, createTiktokenDecodingHandler('gpt2'));
 app.post("/tokenize_via_api", jsonParser, async function (request, response) {
    if (!request.body) {
        return response.sendStatus(400);
--- a/src/novelai.js
+++ b/src/novelai.js
@ -52,6 +52,25 @@ const badWordsList = [
    [20765], [30702], [10691], [49333], [1266], [26523], [41471], [2936], [85, 85], [49332], [7286], [1115]
 ]

+const hypeBotBadWordsList = [
+    [58], [60], [90], [92], [685], [1391], [1782], [2361], [3693], [4083], [4357], [4895],
+    [5512], [5974], [7131], [8183], [8351], [8762], [8964], [8973], [9063], [11208],
+    [11709], [11907], [11919], [12878], [12962], [13018], [13412], [14631], [14692],
+    [14980], [15090], [15437], [16151], [16410], [16589], [17241], [17414], [17635],
+    [17816], [17912], [18083], [18161], [18477], [19629], [19779], [19953], [20520],
+    [20598], [20662], [20740], [21476], [21737], [22133], [22241], [22345], [22935],
+    [23330], [23785], [23834], [23884], [25295], [25597], [25719], [25787], [25915],
+    [26076], [26358], [26398], [26894], [26933], [27007], [27422], [28013], [29164],
+    [29225], [29342], [29565], [29795], [30072], [30109], [30138], [30866], [31161],
+    [31478], [32092], [32239], [32509], [33116], [33250], [33761], [34171], [34758],
+    [34949], [35944], [36338], [36463], [36563], [36786], [36796], [36937], [37250],
+    [37913], [37981], [38165], [38362], [38381], [38430], [38892], [39850], [39893],
+    [41832], [41888], [42535], [42669], [42785], [42924], [43839], [44438], [44587],
+    [44926], [45144], [45297], [46110], [46570], [46581], [46956], [47175], [47182],
+    [47527], [47715], [48600], [48683], [48688], [48874], [48999], [49074], [49082],
+    [49146], [49946], [10221], [4841], [1427], [2602, 834], [29343], [37405], [35780], [2602], [50256]
+];
+
 // Used for phrase repetition penalty
 const repPenaltyAllowList = [
    [49256, 49264, 49231, 49230, 49287, 85, 49255, 49399, 49262, 336, 333, 432, 363, 468, 492, 745, 401, 426, 623, 794,
@ -66,10 +85,41 @@ const logitBiasExp = [
    { "sequence": [21], "bias": -0.08, "ensure_sequence_finish": false, "generate_once": false }
 ]

+const hypeBotLogitBiasExp = [
+    { "sequence": [8162], "bias": -0.12, "ensure_sequence_finish": false, "generate_once": false},
+    { "sequence": [46256, 224], "bias": -0.12, "ensure_sequence_finish": false, "generate_once": false }
+];
+
+function getBadWordsList(model) {
+    let list = []
+
+    if (model.includes('euterpe')) {
+        list = euterpeBadWordsList;
+    }
+
+    if (model.includes('krake')) {
+        list = krakeBadWordsList;
+    }
+
+    if (model.includes('hypebot')) {
+        list = hypeBotBadWordsList;
+    }
+
+    if (model.includes('clio') || model.includes('kayra')) {
+        list = badWordsList;
+    }
+
+    // Clone the list so we don't modify the original
+    return list.slice();
+}
+
 module.exports = {
    euterpeBadWordsList,
    krakeBadWordsList,
    badWordsList,
    repPenaltyAllowList,
-    logitBiasExp
+    logitBiasExp,
+    hypeBotBadWordsList,
+    hypeBotLogitBiasExp,
+    getBadWordsList,
 };