Merge branch 'staging' into continue-from-reasoning

2025-06-05 21:59:27 +02:00 · 2025-03-09 01:19:25 +02:00
parent 980ed76cc3 b52b11d7bb
commit 96d79ac4e9
13 changed files with 303 additions and 31 deletions
--- a/public/index.html
+++ b/public/index.html
@@ -1962,7 +1962,7 @@
                                            </span>
                                        </div>
                                    </div>
-                                    <div class="range-block" data-source="openai,cohere,mistralai,custom,claude,openrouter,groq,deepseek,makersuite">
+                                    <div class="range-block" data-source="openai,cohere,mistralai,custom,claude,openrouter,groq,deepseek,makersuite,ai21">
                                        <label for="openai_function_calling" class="checkbox_label flexWrap widthFreeExpand">
                                            <input id="openai_function_calling" type="checkbox" />
                                            <span data-i18n="Enable function calling">Enable function calling</span>
@@ -3078,7 +3078,15 @@
                            <div>
                                <h4 data-i18n="AI21 Model">AI21 Model</h4>
                                <select id="model_ai21_select">
-                                    <optgroup label="Jamba 1.5">
+                                    <optgroup label="Jamba (Latest)">
+                                        <option value="jamba-mini">jamba-mini</option>
+                                        <option value="jamba-large">jamba-large</option>
+                                    </optgroup>
+                                    <optgroup label="Jamba 1.6">
+                                        <option value="jamba-1.6-mini">jamba-1.6-mini</option>
+                                        <option value="jamba-1.6-large">jamba-1.6-large</option>
+                                    </optgroup>
+                                    <optgroup label="Jamba 1.5 (Deprecated)">
                                        <option value="jamba-1.5-mini">jamba-1.5-mini</option>
                                        <option value="jamba-1.5-large">jamba-1.5-large</option>
                                    </optgroup>
--- a/public/script.js
+++ b/public/script.js
@@ -271,7 +271,7 @@ import { initSettingsSearch } from './scripts/setting-search.js';
 import { initBulkEdit } from './scripts/bulk-edit.js';
 import { deriveTemplatesFromChatTemplate } from './scripts/chat-templates.js';
 import { getContext } from './scripts/st-context.js';
-import { extractReasoningFromData, initReasoning, PromptReasoning, ReasoningHandler, removeReasoningFromString, updateReasoningUI } from './scripts/reasoning.js';
+import { extractReasoningFromData, initReasoning, parseReasoningInSwipes, PromptReasoning, ReasoningHandler, removeReasoningFromString, updateReasoningUI } from './scripts/reasoning.js';
 import { accountStorage } from './scripts/util/AccountStorage.js';

 // API OBJECT FOR EXTERNAL WIRING
@@ -3346,15 +3346,18 @@ class StreamingProcessor {

        if (Array.isArray(this.swipes) && this.swipes.length > 0) {
            const message = chat[messageId];
+            const swipeInfoExtra = structuredClone(message.extra ?? {});
+            delete swipeInfoExtra.token_count;
+            delete swipeInfoExtra.reasoning;
+            delete swipeInfoExtra.reasoning_duration;
            const swipeInfo = {
                send_date: message.send_date,
                gen_started: message.gen_started,
                gen_finished: message.gen_finished,
-                extra: structuredClone(message.extra),
+                extra: swipeInfoExtra,
            };
-            const swipeInfoArray = [];
-            swipeInfoArray.length = this.swipes.length;
-            swipeInfoArray.fill(swipeInfo);
+            const swipeInfoArray = Array(this.swipes.length).fill().map(() => structuredClone(swipeInfo));
+            parseReasoningInSwipes(this.swipes, swipeInfoArray, message.extra?.reasoning_duration);
            chat[messageId].swipes.push(...this.swipes);
            chat[messageId].swipe_info.push(...swipeInfoArray);
        }
@@ -3366,6 +3369,7 @@ class StreamingProcessor {
            await eventSource.emit(event_types.IMPERSONATE_READY, text);
        }

+        syncMesToSwipe(messageId);
        saveLogprobsForActiveMessage(this.messageLogprobs.filter(Boolean), this.continueMessage);
        await saveChatConditional();
        unblockGeneration();
@@ -6117,15 +6121,18 @@ export async function saveReply(type, getMessage, fromStreaming, title, swipes,
    }

    if (Array.isArray(swipes) && swipes.length > 0) {
+        const swipeInfoExtra = structuredClone(item.extra ?? {});
+        delete swipeInfoExtra.token_count;
+        delete swipeInfoExtra.reasoning;
+        delete swipeInfoExtra.reasoning_duration;
        const swipeInfo = {
            send_date: item.send_date,
            gen_started: item.gen_started,
            gen_finished: item.gen_finished,
-            extra: structuredClone(item.extra),
+            extra: swipeInfoExtra,
        };
-        const swipeInfoArray = [];
-        swipeInfoArray.length = swipes.length;
-        swipeInfoArray.fill(swipeInfo, 0, swipes.length);
+        const swipeInfoArray = Array(swipes.length).fill().map(() => structuredClone(swipeInfo));
+        parseReasoningInSwipes(swipes, swipeInfoArray, item.extra?.reasoning_duration);
        item.swipes.push(...swipes);
        item.swipe_info.push(...swipeInfoArray);
    }
--- a/public/scripts/extensions.js
+++ b/public/scripts/extensions.js
@@ -1070,7 +1070,7 @@ export async function installExtension(url, global) {
    toastr.success(t`Extension '${response.display_name}' by ${response.author} (version ${response.version}) has been installed successfully!`, t`Extension installation successful`);
    console.debug(`Extension "${response.display_name}" has been installed successfully at ${response.extensionPath}`);
    await loadExtensionSettings({}, false, false);
-    await eventSource.emit(event_types.EXTENSION_SETTINGS_LOADED);
+    await eventSource.emit(event_types.EXTENSION_SETTINGS_LOADED, response);
 }

 /**
--- a/public/scripts/extensions/vectors/index.js
+++ b/public/scripts/extensions/vectors/index.js
@@ -19,6 +19,7 @@ import {
    modules,
    renderExtensionTemplateAsync,
    doExtrasFetch, getApiUrl,
+    openThirdPartyExtensionMenu,
 } from '../../extensions.js';
 import { collapseNewlines, registerDebugFunction } from '../../power-user.js';
 import { SECRET_KEYS, secret_state, writeSecret } from '../../secrets.js';
@@ -34,6 +35,7 @@ import { SlashCommandEnumValue, enumTypes } from '../../slash-commands/SlashComm
 import { slashCommandReturnHelper } from '../../slash-commands/SlashCommandReturnHelper.js';
 import { callGenericPopup, POPUP_RESULT, POPUP_TYPE } from '../../popup.js';
 import { generateWebLlmChatPrompt, isWebLlmSupported } from '../shared.js';
+import { WebLlmVectorProvider } from './webllm.js';

 /**
 * @typedef {object} HashedMessage
@@ -60,6 +62,7 @@ const settings = {
    ollama_model: 'mxbai-embed-large',
    ollama_keep: false,
    vllm_model: '',
+    webllm_model: '',
    summarize: false,
    summarize_sent: false,
    summary_source: 'main',
@@ -103,7 +106,7 @@ const settings = {
 };

 const moduleWorker = new ModuleWorkerWrapper(synchronizeChat);
-
+const webllmProvider = new WebLlmVectorProvider();
 const cachedSummaries = new Map();

 /**
@@ -373,6 +376,8 @@ async function synchronizeChat(batchSize = 5) {
                    return 'Vectorization Source Model is required, but not set.';
                case 'extras_module_missing':
                    return 'Extras API must provide an "embeddings" module.';
+                case 'webllm_not_supported':
+                    return 'WebLLM extension is not installed or the model is not set.';
                default:
                    return 'Check server console for more details';
            }
@@ -747,14 +752,15 @@ async function getQueryText(chat, initiator) {

 /**
 * Gets common body parameters for vector requests.
- * @returns {object}
+ * @param {object} args Additional arguments
+ * @returns {object} Request body
 */
-function getVectorsRequestBody() {
-    const body = {};
+function getVectorsRequestBody(args = {}) {
+    const body = Object.assign({}, args);
    switch (settings.source) {
        case 'extras':
-            body.extrasUrl =  extension_settings.apiUrl;
-            body.extrasKey =  extension_settings.apiKey;
+            body.extrasUrl = extension_settings.apiUrl;
+            body.extrasKey = extension_settings.apiKey;
            break;
        case 'togetherai':
            body.model = extension_settings.vectors.togetherai_model;
@@ -777,12 +783,30 @@ function getVectorsRequestBody() {
            body.apiUrl = textgenerationwebui_settings.server_urls[textgen_types.VLLM];
            body.model = extension_settings.vectors.vllm_model;
            break;
+        case 'webllm':
+            body.model = extension_settings.vectors.webllm_model;
+            break;
        default:
            break;
    }
    return body;
 }

+/**
+ * Gets additional arguments for vector requests.
+ * @param {string[]} items Items to embed
+ * @returns {Promise<object>} Additional arguments
+ */
+async function getAdditionalArgs(items) {
+    const args = {};
+    switch (settings.source) {
+        case 'webllm':
+            args.embeddings = await createWebLlmEmbeddings(items);
+            break;
+    }
+    return args;
+}
+
 /**
 * Gets the saved hashes for a collection
 * @param {string} collectionId
@@ -816,11 +840,12 @@ async function getSavedHashes(collectionId) {
 async function insertVectorItems(collectionId, items) {
    throwIfSourceInvalid();

+    const args = await getAdditionalArgs(items.map(x => x.text));
    const response = await fetch('/api/vector/insert', {
        method: 'POST',
        headers: getRequestHeaders(),
        body: JSON.stringify({
-            ...getVectorsRequestBody(),
+            ...getVectorsRequestBody(args),
            collectionId: collectionId,
            items: items,
            source: settings.source,
@@ -858,6 +883,10 @@ function throwIfSourceInvalid() {
    if (settings.source === 'extras' && !modules.includes('embeddings')) {
        throw new Error('Vectors: Embeddings module missing', { cause: 'extras_module_missing' });
    }
+
+    if (settings.source === 'webllm' && (!isWebLlmSupported() || !settings.webllm_model)) {
+        throw new Error('Vectors: WebLLM is not supported', { cause: 'webllm_not_supported' });
+    }
 }

 /**
@@ -890,11 +919,12 @@ async function deleteVectorItems(collectionId, hashes) {
 * @returns {Promise<{ hashes: number[], metadata: object[]}>} - Hashes of the results
 */
 async function queryCollection(collectionId, searchText, topK) {
+    const args = await getAdditionalArgs([searchText]);
    const response = await fetch('/api/vector/query', {
        method: 'POST',
        headers: getRequestHeaders(),
        body: JSON.stringify({
-            ...getVectorsRequestBody(),
+            ...getVectorsRequestBody(args),
            collectionId: collectionId,
            searchText: searchText,
            topK: topK,
@@ -919,11 +949,12 @@ async function queryCollection(collectionId, searchText, topK) {
 * @returns {Promise<Record<string, { hashes: number[], metadata: object[] }>>} - Results mapped to collection IDs
 */
 async function queryMultipleCollections(collectionIds, searchText, topK, threshold) {
+    const args = await getAdditionalArgs([searchText]);
    const response = await fetch('/api/vector/query-multi', {
        method: 'POST',
        headers: getRequestHeaders(),
        body: JSON.stringify({
-            ...getVectorsRequestBody(),
+            ...getVectorsRequestBody(args),
            collectionIds: collectionIds,
            searchText: searchText,
            topK: topK,
@@ -1039,6 +1070,72 @@ function toggleSettings() {
    $('#llamacpp_vectorsModel').toggle(settings.source === 'llamacpp');
    $('#vllm_vectorsModel').toggle(settings.source === 'vllm');
    $('#nomicai_apiKey').toggle(settings.source === 'nomicai');
+    $('#webllm_vectorsModel').toggle(settings.source === 'webllm');
+    if (settings.source === 'webllm') {
+        loadWebLlmModels();
+    }
+}
+
+/**
+ * Executes a function with WebLLM error handling.
+ * @param {function(): Promise<T>} func Function to execute
+ * @returns {Promise<T>}
+ * @template T
+ */
+async function executeWithWebLlmErrorHandling(func) {
+    try {
+        return await func();
+    } catch (error) {
+        console.log('Vectors: Failed to load WebLLM models', error);
+        if (!(error instanceof Error)) {
+            return;
+        }
+        switch (error.cause) {
+            case 'webllm-not-available':
+                toastr.warning('WebLLM is not available. Please install the extension.', 'WebLLM not installed');
+                break;
+            case 'webllm-not-updated':
+                toastr.warning('The installed extension version does not support embeddings.', 'WebLLM update required');
+                break;
+        }
+    }
+}
+
+/**
+ * Loads and displays WebLLM models in the settings.
+ * @returns {Promise<void>}
+ */
+function loadWebLlmModels() {
+    return executeWithWebLlmErrorHandling(() => {
+        const models = webllmProvider.getModels();
+        $('#vectors_webllm_model').empty();
+        for (const model of models) {
+            $('#vectors_webllm_model').append($('<option>', { value: model.id, text: model.toString() }));
+        }
+        if (!settings.webllm_model || !models.some(x => x.id === settings.webllm_model)) {
+            if (models.length) {
+                settings.webllm_model = models[0].id;
+            }
+        }
+        $('#vectors_webllm_model').val(settings.webllm_model);
+        return Promise.resolve();
+    });
+}
+
+/**
+ * Creates WebLLM embeddings for a list of items.
+ * @param {string[]} items Items to embed
+ * @returns {Promise<Record<string, number[]>>} Calculated embeddings
+ */
+async function createWebLlmEmbeddings(items) {
+    return executeWithWebLlmErrorHandling(async () => {
+        const embeddings = await webllmProvider.embedTexts(items, settings.webllm_model);
+        const result = /** @type {Record<string, number[]>} */ ({});
+        for (let i = 0; i < items.length; i++) {
+            result[items[i]] = embeddings[i];
+        }
+        return result;
+    });
 }

 async function onPurgeClick() {
@@ -1567,6 +1664,30 @@ jQuery(async () => {
        $('#dialogue_popup_input').val(presetModel);
    });

+    $('#vectors_webllm_install').on('click', (e) => {
+        e.preventDefault();
+        e.stopPropagation();
+
+        if (Object.hasOwn(SillyTavern, 'llm')) {
+            toastr.info('WebLLM is already installed');
+            return;
+        }
+
+        openThirdPartyExtensionMenu('https://github.com/SillyTavern/Extension-WebLLM');
+    });
+
+    $('#vectors_webllm_model').on('input', () => {
+        settings.webllm_model = String($('#vectors_webllm_model').val());
+        Object.assign(extension_settings.vectors, settings);
+        saveSettingsDebounced();
+    });
+
+    $('#vectors_webllm_load').on('click', async () => {
+        if (!settings.webllm_model) return;
+        await webllmProvider.loadModel(settings.webllm_model);
+        toastr.success('WebLLM model loaded');
+    });
+
    $('#api_key_nomicai').toggleClass('success', !!secret_state[SECRET_KEYS.NOMICAI]);

    toggleSettings();
@@ -1578,6 +1699,11 @@ jQuery(async () => {
    eventSource.on(event_types.CHAT_DELETED, purgeVectorIndex);
    eventSource.on(event_types.GROUP_CHAT_DELETED, purgeVectorIndex);
    eventSource.on(event_types.FILE_ATTACHMENT_DELETED, purgeFileVectorIndex);
+    eventSource.on(event_types.EXTENSION_SETTINGS_LOADED, async (manifest) => {
+        if (settings.source === 'webllm' && manifest?.display_name === 'WebLLM') {
+            await loadWebLlmModels();
+        }
+    });

    SlashCommandParser.addCommandObject(SlashCommand.fromProps({
        name: 'db-ingest',
--- a/public/scripts/extensions/vectors/settings.html
+++ b/public/scripts/extensions/vectors/settings.html
@@ -21,8 +21,24 @@
                    <option value="openai">OpenAI</option>
                    <option value="togetherai">TogetherAI</option>
                    <option value="vllm">vLLM</option>
+                    <option value="webllm" data-i18n="WebLLM Extension">WebLLM Extension</option>
                </select>
            </div>
+            <div class="flex-container flexFlowColumn" id="webllm_vectorsModel">
+                <label for="vectors_webllm_model" data-i18n="Vectorization Model">
+                    Vectorization Model
+                </label>
+                <div class="flex-container">
+                    <select id="vectors_webllm_model" class="text_pole flex1">
+                    </select>
+                    <div id="vectors_webllm_load" class="menu_button menu_button_icon" title="Verify and load the selected model.">
+                        <i class="fa-solid fa-check-to-slot"></i>
+                    </div>
+                </div>
+                <div>
+                    Requires the WebLLM extension to be installed. Click <a href="#" id="vectors_webllm_install">here</a> to install.
+                </div>
+            </div>
            <div class="flex-container flexFlowColumn" id="ollama_vectorsModel">
                <label for="vectors_ollama_model" data-i18n="Vectorization Model">
                    Vectorization Model
--- a/public/scripts/extensions/vectors/webllm.js
+++ b/public/scripts/extensions/vectors/webllm.js
@@ -0,0 +1,64 @@
+export class WebLlmVectorProvider {
+    /** @type {object?} WebLLM engine */
+    #engine = null;
+
+    constructor() {
+        this.#engine = null;
+    }
+
+    /**
+     * Check if WebLLM is available and up-to-date
+     * @throws {Error} If WebLLM is not available or not up-to-date
+     */
+    #checkWebLlm() {
+        if (!Object.hasOwn(SillyTavern, 'llm')) {
+            throw new Error('WebLLM is not available', { cause: 'webllm-not-available' });
+        }
+
+        if (typeof SillyTavern.llm.generateEmbedding !== 'function') {
+            throw new Error('WebLLM is not updated', { cause: 'webllm-not-updated' });
+        }
+    }
+
+    /**
+     * Initialize the engine with a model.
+     * @param {string} modelId Model ID to initialize the engine with
+     * @returns {Promise<void>} Promise that resolves when the engine is initialized
+     */
+    #initEngine(modelId) {
+        this.#checkWebLlm();
+        if (!this.#engine) {
+            this.#engine = SillyTavern.llm.getEngine();
+        }
+
+        return this.#engine.loadModel(modelId);
+    }
+
+    /**
+     * Get available models.
+     * @returns {{id:string, toString: function(): string}[]} Array of available models
+     */
+    getModels() {
+        this.#checkWebLlm();
+        return SillyTavern.llm.getEmbeddingModels();
+    }
+
+    /**
+     * Generate embeddings for a list of texts.
+     * @param {string[]} texts Array of texts to generate embeddings for
+     * @param {string} modelId Model to use for generating embeddings
+     * @returns {Promise<number[][]>} Array of embeddings for each text
+     */
+    async embedTexts(texts, modelId) {
+        await this.#initEngine(modelId);
+        return this.#engine.generateEmbedding(texts);
+    }
+
+    /**
+     * Loads a model into the engine.
+     * @param {string} modelId Model ID to load
+     */
+    async loadModel(modelId) {
+        await this.#initEngine(modelId);
+    }
+}
--- a/public/scripts/openai.js
+++ b/public/scripts/openai.js
@@ -337,7 +337,7 @@ const default_settings = {
    openai_model: 'gpt-4-turbo',
    claude_model: 'claude-3-5-sonnet-20240620',
    google_model: 'gemini-1.5-pro',
-    ai21_model: 'jamba-1.5-large',
+    ai21_model: 'jamba-1.6-large',
    mistralai_model: 'mistral-large-latest',
    cohere_model: 'command-r-plus',
    perplexity_model: 'sonar-pro',
@@ -417,7 +417,7 @@ const oai_settings = {
    openai_model: 'gpt-4-turbo',
    claude_model: 'claude-3-5-sonnet-20240620',
    google_model: 'gemini-1.5-pro',
-    ai21_model: 'jamba-1.5-large',
+    ai21_model: 'jamba-1.6-large',
    mistralai_model: 'mistral-large-latest',
    cohere_model: 'command-r-plus',
    perplexity_model: 'sonar-pro',
@@ -2027,12 +2027,16 @@ async function sendOpenAIRequest(type, messages, signal) {
        generate_data['logprobs'] = 5;
    }

-    // Remove logit bias, logprobs and stop strings if it's not supported by the model
-    if (isOAI && oai_settings.openai_model.includes('vision') || isOpenRouter && oai_settings.openrouter_model.includes('vision') || isOAI && oai_settings.openai_model.includes('gpt-4.5-preview')) {
+    // Remove logit bias/logprobs/stop-strings if not supported by the model
+    const isVision = (m) => ['gpt', 'vision'].every(x => m.includes(x));
+    if (isOAI && isVision(oai_settings.openai_model) || isOpenRouter && isVision(oai_settings.openrouter_model)) {
        delete generate_data.logit_bias;
        delete generate_data.stop;
        delete generate_data.logprobs;
    }
+    if (isOAI && oai_settings.openai_model.includes('gpt-4.5-preview') || isOpenRouter && oai_settings.openrouter_model.includes('gpt-4.5-preview')) {
+        delete generate_data.logprobs;
+    }

    if (isClaude) {
        generate_data['top_k'] = Number(oai_settings.top_k_openai);
@@ -3251,7 +3255,7 @@ function loadOpenAISettings(data, settings) {
    }

    if (oai_settings.ai21_model.startsWith('j2-')) {
-        oai_settings.ai21_model = 'jamba-1.5-large';
+        oai_settings.ai21_model = 'jamba-1.6-large';
    }

    if (settings.wrap_in_quotes !== undefined) oai_settings.wrap_in_quotes = !!settings.wrap_in_quotes;
@@ -4208,7 +4212,7 @@ async function onModelChange() {

    if ($(this).is('#model_ai21_select')) {
        if (value === '' || value.startsWith('j2-')) {
-            value = 'jamba-1.5-large';
+            value = 'jamba-1.6-large';
            $('#model_ai21_select').val(value);
        }

@@ -4485,7 +4489,7 @@ async function onModelChange() {
    if (oai_settings.chat_completion_source == chat_completion_sources.AI21) {
        if (oai_settings.max_context_unlocked) {
            $('#openai_max_context').attr('max', unlocked_max);
-        } else if (oai_settings.ai21_model.includes('jamba-1.5') || oai_settings.ai21_model.includes('jamba-instruct')) {
+        } else if (oai_settings.ai21_model.startsWith('jamba-')) {
            $('#openai_max_context').attr('max', max_256k);
        }

--- a/public/scripts/reasoning.js
+++ b/public/scripts/reasoning.js
@@ -1104,6 +1104,32 @@ function parseReasoningFromString(str, { strict = true } = {}) {
    }
 }

+/**
+ * Parse reasoning in an array of swipe strings if auto-parsing is enabled.
+ * @param {string[]} swipes Array of swipe strings
+ * @param {{extra: {reasoning: string, reasoning_duration: number}}[]} swipeInfoArray Array of swipe info objects
+ * @param {number?} duration Duration of the reasoning
+ */
+export function parseReasoningInSwipes(swipes, swipeInfoArray, duration) {
+    if (!power_user.reasoning.auto_parse) {
+        return;
+    }
+
+    // Something ain't right, don't parse
+    if (!Array.isArray(swipes) || !Array.isArray(swipeInfoArray) || swipes.length !== swipeInfoArray.length) {
+        return;
+    }
+
+    for (let index = 0; index < swipes.length; index++) {
+        const parsedReasoning = parseReasoningFromString(swipes[index]);
+        if (parsedReasoning) {
+            swipes[index] = parsedReasoning.content;
+            swipeInfoArray[index].extra.reasoning = parsedReasoning.reasoning;
+            swipeInfoArray[index].extra.reasoning_duration = duration;
+        }
+    }
+}
+
 function registerReasoningAppEvents() {
    const eventHandler = (/** @type {string} */ type, /** @type {number} */ idx) => {
        if (!power_user.reasoning.auto_parse) {
--- a/public/scripts/tool-calling.js
+++ b/public/scripts/tool-calling.js
@@ -585,6 +585,7 @@ export class ToolManager {
            chat_completion_sources.COHERE,
            chat_completion_sources.DEEPSEEK,
            chat_completion_sources.MAKERSUITE,
+            chat_completion_sources.AI21,
        ];
        return supportedSources.includes(oai_settings.chat_completion_source);
    }
--- a/src/endpoints/backends/chat-completions.js
+++ b/src/endpoints/backends/chat-completions.js
@@ -499,6 +499,12 @@ async function sendMakerSuiteRequest(request, response) {
 async function sendAI21Request(request, response) {
    if (!request.body) return response.sendStatus(400);

+    const apiKey = readSecret(request.user.directories, SECRET_KEYS.AI21);
+    if (!apiKey) {
+        console.warn('AI21 API key is missing.');
+        return response.status(400).send({ error: true });
+    }
+
    const controller = new AbortController();
    console.debug(request.body.messages);
    request.socket.removeAllListeners('close');
@@ -514,13 +520,14 @@ async function sendAI21Request(request, response) {
        top_p: request.body.top_p,
        stop: request.body.stop,
        stream: request.body.stream,
+        tools: request.body.tools,
    };
    const options = {
        method: 'POST',
        headers: {
            accept: 'application/json',
            'content-type': 'application/json',
-            Authorization: `Bearer ${readSecret(request.user.directories, SECRET_KEYS.AI21)}`,
+            Authorization: `Bearer ${apiKey}`,
        },
        body: JSON.stringify(body),
        signal: controller.signal,
--- a/src/endpoints/characters.js
+++ b/src/endpoints/characters.js
@@ -218,11 +218,13 @@ const toShallow = (character) => {
        date_last_chat: character.date_last_chat,
        chat_size: character.chat_size,
        data_size: character.data_size,
+        tags: character.tags,
        data: {
            name: _.get(character, 'data.name', ''),
            character_version: _.get(character, 'data.character_version', ''),
            creator: _.get(character, 'data.creator', ''),
            creator_notes: _.get(character, 'data.creator_notes', ''),
+            tags: _.get(character, 'data.tags', []),
            extensions: {
                fav: _.get(character, 'data.extensions.fav', false),
            },
--- a/src/endpoints/users-admin.js
+++ b/src/endpoints/users-admin.js
@@ -4,7 +4,7 @@ import storage from 'node-persist';
 import express from 'express';
 import lodash from 'lodash';
 import { jsonParser } from '../express-common.js';
-import { checkForNewContent } from './content-manager.js';
+import { checkForNewContent, CONTENT_TYPES } from './content-manager.js';
 import {
    KEY_PREFIX,
    toKey,
@@ -195,7 +195,7 @@ router.post('/create', requireAdminMiddleware, jsonParser, async (request, respo
        console.info('Creating data directories for', newUser.handle);
        await ensurePublicDirectoriesExist();
        const directories = getUserDirectories(newUser.handle);
-        await checkForNewContent([directories]);
+        await checkForNewContent([directories], [CONTENT_TYPES.SETTINGS]);
        return response.json({ handle: newUser.handle });
    } catch (error) {
        console.error('User create failed:', error);
--- a/src/endpoints/vectors.js
+++ b/src/endpoints/vectors.js
@@ -31,6 +31,7 @@ const SOURCES = [
    'ollama',
    'llamacpp',
    'vllm',
+    'webllm',
 ];

 /**
@@ -64,6 +65,8 @@ async function getVector(source, sourceSettings, text, isQuery, directories) {
            return getVllmVector(text, sourceSettings.apiUrl, sourceSettings.model, directories);
        case 'ollama':
            return getOllamaVector(text, sourceSettings.apiUrl, sourceSettings.model, sourceSettings.keep, directories);
+        case 'webllm':
+            return sourceSettings.embeddings[text];
    }

    throw new Error(`Unknown vector source ${source}`);
@@ -114,6 +117,9 @@ async function getBatchVector(source, sourceSettings, texts, isQuery, directorie
            case 'ollama':
                results.push(...await getOllamaBatchVector(batch, sourceSettings.apiUrl, sourceSettings.model, sourceSettings.keep, directories));
                break;
+            case 'webllm':
+                results.push(...texts.map(x => sourceSettings.embeddings[x]));
+                break;
            default:
                throw new Error(`Unknown vector source ${source}`);
        }
@@ -179,6 +185,11 @@ function getSourceSettings(source, request) {
            return {
                model: 'nomic-embed-text-v1.5',
            };
+        case 'webllm':
+            return {
+                model: String(request.body.model),
+                embeddings: request.body.embeddings ?? {},
+            };
        default:
            return {};
    }