Vectors WebLLM (#3631)

* Add WebLLM support for vectorization * Load models when WebLLM extension installed * Consistency updated * Move checkWebLlm to initEngine * Refactor vector request handling to use getAdditionalArgs * Add error handling for unsupported WebLLM extension * Add prefix to error causes
2025-06-05 21:59:27 +02:00 · 2025-03-09 00:51:44 +02:00
parent 0ea64050ff
commit 1cb9287684
5 changed files with 227 additions and 10 deletions
--- a/public/scripts/extensions.js
+++ b/public/scripts/extensions.js
@ -1070,7 +1070,7 @@ export async function installExtension(url, global) {
    toastr.success(t`Extension '${response.display_name}' by ${response.author} (version ${response.version}) has been installed successfully!`, t`Extension installation successful`);
    console.debug(`Extension "${response.display_name}" has been installed successfully at ${response.extensionPath}`);
    await loadExtensionSettings({}, false, false);
-    await eventSource.emit(event_types.EXTENSION_SETTINGS_LOADED);
+    await eventSource.emit(event_types.EXTENSION_SETTINGS_LOADED, response);
 }
 /**
--- a/public/scripts/extensions/vectors/index.js
+++ b/public/scripts/extensions/vectors/index.js
@ -19,6 +19,7 @@ import {
    modules,
    renderExtensionTemplateAsync,
    doExtrasFetch, getApiUrl,
    openThirdPartyExtensionMenu,
 } from '../../extensions.js';
 import { collapseNewlines, registerDebugFunction } from '../../power-user.js';
 import { SECRET_KEYS, secret_state, writeSecret } from '../../secrets.js';
@ -34,6 +35,7 @@ import { SlashCommandEnumValue, enumTypes } from '../../slash-commands/SlashComm
 import { slashCommandReturnHelper } from '../../slash-commands/SlashCommandReturnHelper.js';
 import { callGenericPopup, POPUP_RESULT, POPUP_TYPE } from '../../popup.js';
 import { generateWebLlmChatPrompt, isWebLlmSupported } from '../shared.js';
 import { WebLlmVectorProvider } from './webllm.js';
 /**
 * @typedef {object} HashedMessage
@ -60,6 +62,7 @@ const settings = {
    ollama_model: 'mxbai-embed-large',
    ollama_keep: false,
    vllm_model: '',
    webllm_model: '',
    summarize: false,
    summarize_sent: false,
    summary_source: 'main',
@ -103,7 +106,7 @@ const settings = {
 };
 const moduleWorker = new ModuleWorkerWrapper(synchronizeChat);
-
+const webllmProvider = new WebLlmVectorProvider();
 const cachedSummaries = new Map();
 /**
@ -373,6 +376,8 @@ async function synchronizeChat(batchSize = 5) {
                    return 'Vectorization Source Model is required, but not set.';
                case 'extras_module_missing':
                    return 'Extras API must provide an "embeddings" module.';
                case 'webllm_not_supported':
                    return 'WebLLM extension is not installed or the model is not set.';
                default:
                    return 'Check server console for more details';
            }
@ -747,10 +752,11 @@ async function getQueryText(chat, initiator) {
 /**
 * Gets common body parameters for vector requests.
- * @returns {object}
+ * @param {object} args Additional arguments
 * @returns {object} Request body
 */
-function getVectorsRequestBody() {
+function getVectorsRequestBody(args = {}) {
-    const body = {};
+    const body = Object.assign({}, args);
    switch (settings.source) {
        case 'extras':
            body.extrasUrl = extension_settings.apiUrl;
@ -777,12 +783,30 @@ function getVectorsRequestBody() {
            body.apiUrl = textgenerationwebui_settings.server_urls[textgen_types.VLLM];
            body.model = extension_settings.vectors.vllm_model;
            break;
        case 'webllm':
            body.model = extension_settings.vectors.webllm_model;
            break;
        default:
            break;
    }
    return body;
 }
 /**
 * Gets additional arguments for vector requests.
 * @param {string[]} items Items to embed
 * @returns {Promise<object>} Additional arguments
 */
 async function getAdditionalArgs(items) {
    const args = {};
    switch (settings.source) {
        case 'webllm':
            args.embeddings = await createWebLlmEmbeddings(items);
            break;
    }
    return args;
 }
 /**
 * Gets the saved hashes for a collection
 * @param {string} collectionId
@ -816,11 +840,12 @@ async function getSavedHashes(collectionId) {
 async function insertVectorItems(collectionId, items) {
    throwIfSourceInvalid();
    const args = await getAdditionalArgs(items.map(x => x.text));
    const response = await fetch('/api/vector/insert', {
        method: 'POST',
        headers: getRequestHeaders(),
        body: JSON.stringify({
-            ...getVectorsRequestBody(),
+            ...getVectorsRequestBody(args),
            collectionId: collectionId,
            items: items,
            source: settings.source,
@ -858,6 +883,10 @@ function throwIfSourceInvalid() {
    if (settings.source === 'extras' && !modules.includes('embeddings')) {
        throw new Error('Vectors: Embeddings module missing', { cause: 'extras_module_missing' });
    }
    if (settings.source === 'webllm' && (!isWebLlmSupported() || !settings.webllm_model)) {
        throw new Error('Vectors: WebLLM is not supported', { cause: 'webllm_not_supported' });
    }
 }
 /**
@ -890,11 +919,12 @@ async function deleteVectorItems(collectionId, hashes) {
 * @returns {Promise<{ hashes: number[], metadata: object[]}>} - Hashes of the results
 */
 async function queryCollection(collectionId, searchText, topK) {
    const args = await getAdditionalArgs([searchText]);
    const response = await fetch('/api/vector/query', {
        method: 'POST',
        headers: getRequestHeaders(),
        body: JSON.stringify({
-            ...getVectorsRequestBody(),
+            ...getVectorsRequestBody(args),
            collectionId: collectionId,
            searchText: searchText,
            topK: topK,
@ -919,11 +949,12 @@ async function queryCollection(collectionId, searchText, topK) {
 * @returns {Promise<Record<string, { hashes: number[], metadata: object[] }>>} - Results mapped to collection IDs
 */
 async function queryMultipleCollections(collectionIds, searchText, topK, threshold) {
    const args = await getAdditionalArgs([searchText]);
    const response = await fetch('/api/vector/query-multi', {
        method: 'POST',
        headers: getRequestHeaders(),
        body: JSON.stringify({
-            ...getVectorsRequestBody(),
+            ...getVectorsRequestBody(args),
            collectionIds: collectionIds,
            searchText: searchText,
            topK: topK,
@ -1039,6 +1070,72 @@ function toggleSettings() {
    $('#llamacpp_vectorsModel').toggle(settings.source === 'llamacpp');
    $('#vllm_vectorsModel').toggle(settings.source === 'vllm');
    $('#nomicai_apiKey').toggle(settings.source === 'nomicai');
    $('#webllm_vectorsModel').toggle(settings.source === 'webllm');
    if (settings.source === 'webllm') {
        loadWebLlmModels();
    }
 }
 /**
 * Executes a function with WebLLM error handling.
 * @param {function(): Promise<T>} func Function to execute
 * @returns {Promise<T>}
 * @template T
 */
 async function executeWithWebLlmErrorHandling(func) {
    try {
        return await func();
    } catch (error) {
        console.log('Vectors: Failed to load WebLLM models', error);
        if (!(error instanceof Error)) {
            return;
        }
        switch (error.cause) {
            case 'webllm-not-available':
                toastr.warning('WebLLM is not available. Please install the extension.', 'WebLLM not installed');
                break;
            case 'webllm-not-updated':
                toastr.warning('The installed extension version does not support embeddings.', 'WebLLM update required');
                break;
        }
    }
 }
 /**
 * Loads and displays WebLLM models in the settings.
 * @returns {Promise<void>}
 */
 function loadWebLlmModels() {
    return executeWithWebLlmErrorHandling(() => {
        const models = webllmProvider.getModels();
        $('#vectors_webllm_model').empty();
        for (const model of models) {
            $('#vectors_webllm_model').append($('<option>', { value: model.id, text: model.toString() }));
        }
        if (!settings.webllm_model || !models.some(x => x.id === settings.webllm_model)) {
            if (models.length) {
                settings.webllm_model = models[0].id;
            }
        }
        $('#vectors_webllm_model').val(settings.webllm_model);
        return Promise.resolve();
    });
 }
 /**
 * Creates WebLLM embeddings for a list of items.
 * @param {string[]} items Items to embed
 * @returns {Promise<Record<string, number[]>>} Calculated embeddings
 */
 async function createWebLlmEmbeddings(items) {
    return executeWithWebLlmErrorHandling(async () => {
        const embeddings = await webllmProvider.embedTexts(items, settings.webllm_model);
        const result = /** @type {Record<string, number[]>} */ ({});
        for (let i = 0; i < items.length; i++) {
            result[items[i]] = embeddings[i];
        }
        return result;
    });
 }
 async function onPurgeClick() {
@ -1567,6 +1664,30 @@ jQuery(async () => {
        $('#dialogue_popup_input').val(presetModel);
    });
    $('#vectors_webllm_install').on('click', (e) => {
        e.preventDefault();
        e.stopPropagation();
        if (Object.hasOwn(SillyTavern, 'llm')) {
            toastr.info('WebLLM is already installed');
            return;
        }
        openThirdPartyExtensionMenu('https://github.com/SillyTavern/Extension-WebLLM');
    });
    $('#vectors_webllm_model').on('input', () => {
        settings.webllm_model = String($('#vectors_webllm_model').val());
        Object.assign(extension_settings.vectors, settings);
        saveSettingsDebounced();
    });
    $('#vectors_webllm_load').on('click', async () => {
        if (!settings.webllm_model) return;
        await webllmProvider.loadModel(settings.webllm_model);
        toastr.success('WebLLM model loaded');
    });
    $('#api_key_nomicai').toggleClass('success', !!secret_state[SECRET_KEYS.NOMICAI]);
    toggleSettings();
@ -1578,6 +1699,11 @@ jQuery(async () => {
    eventSource.on(event_types.CHAT_DELETED, purgeVectorIndex);
    eventSource.on(event_types.GROUP_CHAT_DELETED, purgeVectorIndex);
    eventSource.on(event_types.FILE_ATTACHMENT_DELETED, purgeFileVectorIndex);
    eventSource.on(event_types.EXTENSION_SETTINGS_LOADED, async (manifest) => {
        if (settings.source === 'webllm' && manifest?.display_name === 'WebLLM') {
            await loadWebLlmModels();
        }
    });
    SlashCommandParser.addCommandObject(SlashCommand.fromProps({
        name: 'db-ingest',
--- a/public/scripts/extensions/vectors/settings.html
+++ b/public/scripts/extensions/vectors/settings.html
@ -21,8 +21,24 @@
                    <option value="openai">OpenAI</option>
                    <option value="togetherai">TogetherAI</option>
                    <option value="vllm">vLLM</option>
                    <option value="webllm" data-i18n="WebLLM Extension">WebLLM Extension</option>
                </select>
            </div>
            <div class="flex-container flexFlowColumn" id="webllm_vectorsModel">
                <label for="vectors_webllm_model" data-i18n="Vectorization Model">
                    Vectorization Model
                </label>
                <div class="flex-container">
                    <select id="vectors_webllm_model" class="text_pole flex1">
                    </select>
                    <div id="vectors_webllm_load" class="menu_button menu_button_icon" title="Verify and load the selected model.">
                        <i class="fa-solid fa-check-to-slot"></i>
                    </div>
                </div>
                <div>
                    Requires the WebLLM extension to be installed. Click <a href="#" id="vectors_webllm_install">here</a> to install.
                </div>
            </div>
            <div class="flex-container flexFlowColumn" id="ollama_vectorsModel">
                <label for="vectors_ollama_model" data-i18n="Vectorization Model">
                    Vectorization Model
--- a/public/scripts/extensions/vectors/webllm.js
+++ b/public/scripts/extensions/vectors/webllm.js
@ -0,0 +1,64 @@
 export class WebLlmVectorProvider {
    /** @type {object?} WebLLM engine */
    #engine = null;
    constructor() {
        this.#engine = null;
    }
    /**
     * Check if WebLLM is available and up-to-date
     * @throws {Error} If WebLLM is not available or not up-to-date
     */
    #checkWebLlm() {
        if (!Object.hasOwn(SillyTavern, 'llm')) {
            throw new Error('WebLLM is not available', { cause: 'webllm-not-available' });
        }
        if (typeof SillyTavern.llm.generateEmbedding !== 'function') {
            throw new Error('WebLLM is not updated', { cause: 'webllm-not-updated' });
        }
    }
    /**
     * Initialize the engine with a model.
     * @param {string} modelId Model ID to initialize the engine with
     * @returns {Promise<void>} Promise that resolves when the engine is initialized
     */
    #initEngine(modelId) {
        this.#checkWebLlm();
        if (!this.#engine) {
            this.#engine = SillyTavern.llm.getEngine();
        }
        return this.#engine.loadModel(modelId);
    }
    /**
     * Get available models.
     * @returns {{id:string, toString: function(): string}[]} Array of available models
     */
    getModels() {
        this.#checkWebLlm();
        return SillyTavern.llm.getEmbeddingModels();
    }
    /**
     * Generate embeddings for a list of texts.
     * @param {string[]} texts Array of texts to generate embeddings for
     * @param {string} modelId Model to use for generating embeddings
     * @returns {Promise<number[][]>} Array of embeddings for each text
     */
    async embedTexts(texts, modelId) {
        await this.#initEngine(modelId);
        return this.#engine.generateEmbedding(texts);
    }
    /**
     * Loads a model into the engine.
     * @param {string} modelId Model ID to load
     */
    async loadModel(modelId) {
        await this.#initEngine(modelId);
    }
 }
--- a/src/endpoints/vectors.js
+++ b/src/endpoints/vectors.js
@ -31,6 +31,7 @@ const SOURCES = [
    'ollama',
    'llamacpp',
    'vllm',
    'webllm',
 ];
 /**
@ -64,6 +65,8 @@ async function getVector(source, sourceSettings, text, isQuery, directories) {
            return getVllmVector(text, sourceSettings.apiUrl, sourceSettings.model, directories);
        case 'ollama':
            return getOllamaVector(text, sourceSettings.apiUrl, sourceSettings.model, sourceSettings.keep, directories);
        case 'webllm':
            return sourceSettings.embeddings[text];
    }
    throw new Error(`Unknown vector source ${source}`);
@ -114,6 +117,9 @@ async function getBatchVector(source, sourceSettings, texts, isQuery, directorie
            case 'ollama':
                results.push(...await getOllamaBatchVector(batch, sourceSettings.apiUrl, sourceSettings.model, sourceSettings.keep, directories));
                break;
            case 'webllm':
                results.push(...texts.map(x => sourceSettings.embeddings[x]));
                break;
            default:
                throw new Error(`Unknown vector source ${source}`);
        }
@ -179,6 +185,11 @@ function getSourceSettings(source, request) {
            return {
                model: 'nomic-embed-text-v1.5',
            };
        case 'webllm':
            return {
                model: String(request.body.model),
                embeddings: request.body.embeddings ?? {},
            };
        default:
            return {};
    }