Add support for KoboldCpp embeddings in Vector Storage (#3795)

* Add support for KoboldCpp embeddings in vector processing * Add validation for KoboldCpp embeddings to handle empty data * Improve toast handling
2025-06-05 21:59:27 +02:00 · 2025-04-01 21:21:29 +03:00
parent 9c4404cae9
commit 80e821d12d
4 changed files with 113 additions and 1 deletions
--- a/public/scripts/extensions/vectors/index.js
+++ b/public/scripts/extensions/vectors/index.js
@@ -565,6 +565,8 @@ async function retrieveFileChunks(queryText, collectionId) {
 * @returns {Promise<boolean>} True if successful, false if not
 */
 async function vectorizeFile(fileText, fileName, collectionId, chunkSize, overlapPercent) {
+    let toast = jQuery();
+
    try {
        if (settings.translate_files && typeof globalThis.translate === 'function') {
            console.log(`Vectors: Translating file ${fileName} to English...`);
@@ -574,7 +576,7 @@ async function vectorizeFile(fileText, fileName, collectionId, chunkSize, overla

        const batchSize = getBatchSize();
        const toastBody = $('<span>').text('This may take a while. Please wait...');
-        const toast = toastr.info(toastBody, `Ingesting file ${escapeHtml(fileName)}`, { closeButton: false, escapeHtml: false, timeOut: 0, extendedTimeOut: 0 });
+        toast = toastr.info(toastBody, `Ingesting file ${escapeHtml(fileName)}`, { closeButton: false, escapeHtml: false, timeOut: 0, extendedTimeOut: 0 });
        const overlapSize = Math.round(chunkSize * overlapPercent / 100);
        const delimiters = getChunkDelimiters();
        // Overlap should not be included in chunk size. It will be later compensated by overlapChunks
@@ -596,6 +598,7 @@ async function vectorizeFile(fileText, fileName, collectionId, chunkSize, overla
        console.log(`Vectors: Inserted ${chunks.length} vector items for file ${fileName} into ${collectionId}`);
        return true;
    } catch (error) {
+        toastr.clear(toast);
        toastr.error(String(error), 'Failed to vectorize file', { preventDuplicates: true });
        console.error('Vectors: Failed to vectorize file', error);
        return false;
@@ -803,6 +806,12 @@ async function getAdditionalArgs(items) {
        case 'webllm':
            args.embeddings = await createWebLlmEmbeddings(items);
            break;
+        case 'koboldcpp': {
+            const { embeddings, model } = await createKoboldCppEmbeddings(items);
+            args.embeddings = embeddings;
+            args.model = model;
+            break;
+        }
    }
    return args;
 }
@@ -872,6 +881,7 @@ function throwIfSourceInvalid() {

    if (settings.source === 'ollama' && !textgenerationwebui_settings.server_urls[textgen_types.OLLAMA] ||
        settings.source === 'vllm' && !textgenerationwebui_settings.server_urls[textgen_types.VLLM] ||
+        settings.source === 'koboldcpp' && !textgenerationwebui_settings.server_urls[textgen_types.KOBOLDCPP] ||
        settings.source === 'llamacpp' && !textgenerationwebui_settings.server_urls[textgen_types.LLAMACPP]) {
        throw new Error('Vectors: API URL missing', { cause: 'api_url_missing' });
    }
@@ -1071,6 +1081,7 @@ function toggleSettings() {
    $('#vllm_vectorsModel').toggle(settings.source === 'vllm');
    $('#nomicai_apiKey').toggle(settings.source === 'nomicai');
    $('#webllm_vectorsModel').toggle(settings.source === 'webllm');
+    $('#koboldcpp_vectorsModel').toggle(settings.source === 'koboldcpp');
    if (settings.source === 'webllm') {
        loadWebLlmModels();
    }
@@ -1138,6 +1149,45 @@ async function createWebLlmEmbeddings(items) {
    });
 }

+/**
+ * Creates KoboldCpp embeddings for a list of items.
+ * @param {string[]} items Items to embed
+ * @returns {Promise<{embeddings: Record<string, number[]>, model: string}>} Calculated embeddings
+ */
+async function createKoboldCppEmbeddings(items) {
+    const response = await fetch('/api/backends/kobold/embed', {
+        method: 'POST',
+        headers: getRequestHeaders(),
+        body: JSON.stringify({
+            items: items,
+            server: textgenerationwebui_settings.server_urls[textgen_types.KOBOLDCPP],
+        }),
+    });
+
+    if (!response.ok) {
+        throw new Error('Failed to get KoboldCpp embeddings');
+    }
+
+    const data = await response.json();
+    if (!Array.isArray(data.embeddings) || !data.model || data.embeddings.length !== items.length) {
+        throw new Error('Invalid response from KoboldCpp embeddings');
+    }
+
+    const embeddings = /** @type {Record<string, number[]>} */ ({});
+    for (let i = 0; i < data.embeddings.length; i++) {
+        if (!Array.isArray(data.embeddings[i]) || data.embeddings[i].length === 0) {
+            throw new Error('KoboldCpp returned an empty embedding. Reduce the chunk size and/or size threshold and try again.');
+        }
+
+        embeddings[items[i]] = data.embeddings[i];
+    }
+
+    return {
+        embeddings: embeddings,
+        model: data.model,
+    };
+}
+
 async function onPurgeClick() {
    const chatId = getCurrentChatId();
    if (!chatId) {