Add VLLM as vector source

2025-03-15 11:30:09 +01:00 · 2024-06-09 01:03:22 +03:00 · 2024-06-09 01:03:22 +03:00 · 4e822eeebb
commit 4e822eeebb
parent 1dd21caa66
4 changed files with 111 additions and 1 deletions
--- a/public/scripts/extensions/vectors/index.js
+++ b/public/scripts/extensions/vectors/index.js
@ -44,6 +44,7 @@ const settings = {
    cohere_model: 'embed-english-v3.0',
    ollama_model: 'mxbai-embed-large',
    ollama_keep: false,
+    vllm_model: '',
    summarize: false,
    summarize_sent: false,
    summary_source: 'main',
@ -691,6 +692,9 @@ function getVectorHeaders() {
        case 'llamacpp':
            addLlamaCppHeaders(headers);
            break;
+        case 'vllm':
+            addVllmHeaders(headers);
+            break;
        default:
            break;
    }
@ -761,6 +765,17 @@ function addLlamaCppHeaders(headers) {
    });
 }

+/**
+ * Add headers for the VLLM API source.
+ * @param {object} headers Header object
+ */
+function addVllmHeaders(headers) {
+    Object.assign(headers, {
+        'X-Vllm-URL': textgenerationwebui_settings.server_urls[textgen_types.VLLM],
+        'X-Vllm-Model': extension_settings.vectors.vllm_model,
+    });
+}
+
 /**
 * Inserts vector items into a collection
 * @param {string} collectionId - The collection to insert into
@ -801,11 +816,12 @@ function throwIfSourceInvalid() {
    }

    if (settings.source === 'ollama' && !textgenerationwebui_settings.server_urls[textgen_types.OLLAMA] ||
+        settings.source === 'vllm' && !textgenerationwebui_settings.server_urls[textgen_types.VLLM] ||
        settings.source === 'llamacpp' && !textgenerationwebui_settings.server_urls[textgen_types.LLAMACPP]) {
        throw new Error('Vectors: API URL missing', { cause: 'api_url_missing' });
    }

-    if (settings.source === 'ollama' && !settings.ollama_model) {
+    if (settings.source === 'ollama' && !settings.ollama_model || settings.source === 'vllm' && !settings.vllm_model) {
        throw new Error('Vectors: API model missing', { cause: 'api_model_missing' });
    }

@ -965,6 +981,7 @@ function toggleSettings() {
    $('#cohere_vectorsModel').toggle(settings.source === 'cohere');
    $('#ollama_vectorsModel').toggle(settings.source === 'ollama');
    $('#llamacpp_vectorsModel').toggle(settings.source === 'llamacpp');
+    $('#vllm_vectorsModel').toggle(settings.source === 'vllm');
    $('#nomicai_apiKey').toggle(settings.source === 'nomicai');
 }

@ -1274,6 +1291,12 @@ jQuery(async () => {
        Object.assign(extension_settings.vectors, settings);
        saveSettingsDebounced();
    });
+    $('#vectors_vllm_model').val(settings.vllm_model).on('input', () => {
+        $('#vectors_modelWarning').show();
+        settings.vllm_model = String($('#vectors_vllm_model').val());
+        Object.assign(extension_settings.vectors, settings);
+        saveSettingsDebounced();
+    });
    $('#vectors_ollama_keep').prop('checked', settings.ollama_keep).on('input', () => {
        settings.ollama_keep = $('#vectors_ollama_keep').prop('checked');
        Object.assign(extension_settings.vectors, settings);
--- a/public/scripts/extensions/vectors/settings.html
+++ b/public/scripts/extensions/vectors/settings.html
@ -20,6 +20,7 @@
                    <option value="ollama">Ollama</option>
                    <option value="openai">OpenAI</option>
                    <option value="togetherai">TogetherAI</option>
+                    <option value="vllm">vLLM</option>
                </select>
            </div>
            <div class="flex-container flexFlowColumn" id="ollama_vectorsModel">
@ -82,6 +83,15 @@
                    <option value="bert-base-uncased">Bert Base Uncased</option>
                </select>
            </div>
+            <div class="flex-container flexFlowColumn" id="vllm_vectorsModel">
+                <label for="vectors_vllm_model">
+                    Vectorization Model
+                </label>
+                <input id="vectors_vllm_model" class="text_pole" type="text" placeholder="Model name, e.g. intfloat/e5-mistral-7b-instruct" />
+                <i>
+                    Hint: Set the URL in the API connection settings.
+                </i>
+            </div>

            <small id="vectors_modelWarning">
                <i class="fa-solid fa-exclamation-triangle"></i>
--- a/src/endpoints/vectors.js
+++ b/src/endpoints/vectors.js
@ -16,6 +16,7 @@ const SOURCES = [
    'cohere',
    'ollama',
    'llamacpp',
+    'vllm',
 ];

 /**
@ -45,6 +46,8 @@ async function getVector(source, sourceSettings, text, isQuery, directories) {
            return require('../vectors/cohere-vectors').getCohereVector(text, isQuery, directories, sourceSettings.model);
        case 'llamacpp':
            return require('../vectors/llamacpp-vectors').getLlamaCppVector(text, sourceSettings.apiUrl, directories);
+        case 'vllm':
+            return require('../vectors/vllm-vectors').getVllmVector(text, sourceSettings.apiUrl, sourceSettings.model, directories);
        case 'ollama':
            return require('../vectors/ollama-vectors').getOllamaVector(text, sourceSettings.apiUrl, sourceSettings.model, sourceSettings.keep, directories);
    }
@ -91,6 +94,9 @@ async function getBatchVector(source, sourceSettings, texts, isQuery, directorie
            case 'llamacpp':
                results.push(...await require('../vectors/llamacpp-vectors').getLlamaCppBatchVector(batch, sourceSettings.apiUrl, directories));
                break;
+            case 'vllm':
+                results.push(...await require('../vectors/vllm-vectors').getVllmBatchVector(batch, sourceSettings.apiUrl, sourceSettings.model, directories));
+                break;
            case 'ollama':
                results.push(...await require('../vectors/ollama-vectors').getOllamaBatchVector(batch, sourceSettings.apiUrl, sourceSettings.model, sourceSettings.keep, directories));
                break;
@ -278,6 +284,14 @@ function getSourceSettings(source, request) {
        return {
            apiUrl: apiUrl,
        };
+    } else if (source === 'vllm') {
+        const apiUrl = String(request.headers['x-vllm-url']);
+        const model = String(request.headers['x-vllm-model']);
+
+        return {
+            apiUrl: apiUrl,
+            model: model,
+        };
    } else if (source === 'ollama') {
        const apiUrl = String(request.headers['x-ollama-url']);
        const model = String(request.headers['x-ollama-model']);
--- a/src/vectors/vllm-vectors.js
+++ b/src/vectors/vllm-vectors.js
@ -0,0 +1,63 @@
+const fetch = require('node-fetch').default;
+const { setAdditionalHeadersByType } = require('../additional-headers');
+const { TEXTGEN_TYPES } = require('../constants');
+
+/**
+ * Gets the vector for the given text from VLLM
+ * @param {string[]} texts - The array of texts to get the vectors for
+ * @param {string} apiUrl - The API URL
+ * @param {string} model - The model to use
+ * @param {import('../users').UserDirectoryList} directories - The directories object for the user
+ * @returns {Promise<number[][]>} - The array of vectors for the texts
+ */
+async function getVllmBatchVector(texts, apiUrl, model, directories) {
+    const url = new URL(apiUrl);
+    url.pathname = '/v1/embeddings';
+
+    const headers = {};
+    setAdditionalHeadersByType(headers, TEXTGEN_TYPES.VLLM, apiUrl, directories);
+
+    const response = await fetch(url, {
+        method: 'POST',
+        headers: {
+            'Content-Type': 'application/json',
+            ...headers,
+        },
+        body: JSON.stringify({ input: texts, model }),
+    });
+
+    if (!response.ok) {
+        const responseText = await response.text();
+        throw new Error(`VLLM: Failed to get vector for text: ${response.statusText} ${responseText}`);
+    }
+
+    const data = await response.json();
+
+    if (!Array.isArray(data?.data)) {
+        throw new Error('API response was not an array');
+    }
+
+    // Sort data by x.index to ensure the order is correct
+    data.data.sort((a, b) => a.index - b.index);
+
+    const vectors = data.data.map(x => x.embedding);
+    return vectors;
+}
+
+/**
+ * Gets the vector for the given text from VLLM
+ * @param {string} text - The text to get the vector for
+ * @param {string} apiUrl - The API URL
+ * @param {string} model - The model to use
+ * @param {import('../users').UserDirectoryList} directories - The directories object for the user
+ * @returns {Promise<number[]>} - The vector for the text
+ */
+async function getVllmVector(text, apiUrl, model, directories) {
+    const vectors = await getVllmBatchVector([text], apiUrl, model, directories);
+    return vectors[0];
+}
+
+module.exports = {
+    getVllmBatchVector,
+    getVllmVector,
+};