Add VLLM as vector source
This commit is contained in:
parent
1dd21caa66
commit
4e822eeebb
|
@ -44,6 +44,7 @@ const settings = {
|
|||
cohere_model: 'embed-english-v3.0',
|
||||
ollama_model: 'mxbai-embed-large',
|
||||
ollama_keep: false,
|
||||
vllm_model: '',
|
||||
summarize: false,
|
||||
summarize_sent: false,
|
||||
summary_source: 'main',
|
||||
|
@ -691,6 +692,9 @@ function getVectorHeaders() {
|
|||
case 'llamacpp':
|
||||
addLlamaCppHeaders(headers);
|
||||
break;
|
||||
case 'vllm':
|
||||
addVllmHeaders(headers);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
@ -761,6 +765,17 @@ function addLlamaCppHeaders(headers) {
|
|||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Add headers for the VLLM API source.
|
||||
* @param {object} headers Header object
|
||||
*/
|
||||
function addVllmHeaders(headers) {
|
||||
Object.assign(headers, {
|
||||
'X-Vllm-URL': textgenerationwebui_settings.server_urls[textgen_types.VLLM],
|
||||
'X-Vllm-Model': extension_settings.vectors.vllm_model,
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Inserts vector items into a collection
|
||||
* @param {string} collectionId - The collection to insert into
|
||||
|
@ -801,11 +816,12 @@ function throwIfSourceInvalid() {
|
|||
}
|
||||
|
||||
if (settings.source === 'ollama' && !textgenerationwebui_settings.server_urls[textgen_types.OLLAMA] ||
|
||||
settings.source === 'vllm' && !textgenerationwebui_settings.server_urls[textgen_types.VLLM] ||
|
||||
settings.source === 'llamacpp' && !textgenerationwebui_settings.server_urls[textgen_types.LLAMACPP]) {
|
||||
throw new Error('Vectors: API URL missing', { cause: 'api_url_missing' });
|
||||
}
|
||||
|
||||
if (settings.source === 'ollama' && !settings.ollama_model) {
|
||||
if (settings.source === 'ollama' && !settings.ollama_model || settings.source === 'vllm' && !settings.vllm_model) {
|
||||
throw new Error('Vectors: API model missing', { cause: 'api_model_missing' });
|
||||
}
|
||||
|
||||
|
@ -965,6 +981,7 @@ function toggleSettings() {
|
|||
$('#cohere_vectorsModel').toggle(settings.source === 'cohere');
|
||||
$('#ollama_vectorsModel').toggle(settings.source === 'ollama');
|
||||
$('#llamacpp_vectorsModel').toggle(settings.source === 'llamacpp');
|
||||
$('#vllm_vectorsModel').toggle(settings.source === 'vllm');
|
||||
$('#nomicai_apiKey').toggle(settings.source === 'nomicai');
|
||||
}
|
||||
|
||||
|
@ -1274,6 +1291,12 @@ jQuery(async () => {
|
|||
Object.assign(extension_settings.vectors, settings);
|
||||
saveSettingsDebounced();
|
||||
});
|
||||
$('#vectors_vllm_model').val(settings.vllm_model).on('input', () => {
|
||||
$('#vectors_modelWarning').show();
|
||||
settings.vllm_model = String($('#vectors_vllm_model').val());
|
||||
Object.assign(extension_settings.vectors, settings);
|
||||
saveSettingsDebounced();
|
||||
});
|
||||
$('#vectors_ollama_keep').prop('checked', settings.ollama_keep).on('input', () => {
|
||||
settings.ollama_keep = $('#vectors_ollama_keep').prop('checked');
|
||||
Object.assign(extension_settings.vectors, settings);
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
<option value="ollama">Ollama</option>
|
||||
<option value="openai">OpenAI</option>
|
||||
<option value="togetherai">TogetherAI</option>
|
||||
<option value="vllm">vLLM</option>
|
||||
</select>
|
||||
</div>
|
||||
<div class="flex-container flexFlowColumn" id="ollama_vectorsModel">
|
||||
|
@ -82,6 +83,15 @@
|
|||
<option value="bert-base-uncased">Bert Base Uncased</option>
|
||||
</select>
|
||||
</div>
|
||||
<div class="flex-container flexFlowColumn" id="vllm_vectorsModel">
|
||||
<label for="vectors_vllm_model">
|
||||
Vectorization Model
|
||||
</label>
|
||||
<input id="vectors_vllm_model" class="text_pole" type="text" placeholder="Model name, e.g. intfloat/e5-mistral-7b-instruct" />
|
||||
<i>
|
||||
Hint: Set the URL in the API connection settings.
|
||||
</i>
|
||||
</div>
|
||||
|
||||
<small id="vectors_modelWarning">
|
||||
<i class="fa-solid fa-exclamation-triangle"></i>
|
||||
|
|
|
@ -16,6 +16,7 @@ const SOURCES = [
|
|||
'cohere',
|
||||
'ollama',
|
||||
'llamacpp',
|
||||
'vllm',
|
||||
];
|
||||
|
||||
/**
|
||||
|
@ -45,6 +46,8 @@ async function getVector(source, sourceSettings, text, isQuery, directories) {
|
|||
return require('../vectors/cohere-vectors').getCohereVector(text, isQuery, directories, sourceSettings.model);
|
||||
case 'llamacpp':
|
||||
return require('../vectors/llamacpp-vectors').getLlamaCppVector(text, sourceSettings.apiUrl, directories);
|
||||
case 'vllm':
|
||||
return require('../vectors/vllm-vectors').getVllmVector(text, sourceSettings.apiUrl, sourceSettings.model, directories);
|
||||
case 'ollama':
|
||||
return require('../vectors/ollama-vectors').getOllamaVector(text, sourceSettings.apiUrl, sourceSettings.model, sourceSettings.keep, directories);
|
||||
}
|
||||
|
@ -91,6 +94,9 @@ async function getBatchVector(source, sourceSettings, texts, isQuery, directorie
|
|||
case 'llamacpp':
|
||||
results.push(...await require('../vectors/llamacpp-vectors').getLlamaCppBatchVector(batch, sourceSettings.apiUrl, directories));
|
||||
break;
|
||||
case 'vllm':
|
||||
results.push(...await require('../vectors/vllm-vectors').getVllmBatchVector(batch, sourceSettings.apiUrl, sourceSettings.model, directories));
|
||||
break;
|
||||
case 'ollama':
|
||||
results.push(...await require('../vectors/ollama-vectors').getOllamaBatchVector(batch, sourceSettings.apiUrl, sourceSettings.model, sourceSettings.keep, directories));
|
||||
break;
|
||||
|
@ -278,6 +284,14 @@ function getSourceSettings(source, request) {
|
|||
return {
|
||||
apiUrl: apiUrl,
|
||||
};
|
||||
} else if (source === 'vllm') {
|
||||
const apiUrl = String(request.headers['x-vllm-url']);
|
||||
const model = String(request.headers['x-vllm-model']);
|
||||
|
||||
return {
|
||||
apiUrl: apiUrl,
|
||||
model: model,
|
||||
};
|
||||
} else if (source === 'ollama') {
|
||||
const apiUrl = String(request.headers['x-ollama-url']);
|
||||
const model = String(request.headers['x-ollama-model']);
|
||||
|
|
|
@ -0,0 +1,63 @@
|
|||
const fetch = require('node-fetch').default;
|
||||
const { setAdditionalHeadersByType } = require('../additional-headers');
|
||||
const { TEXTGEN_TYPES } = require('../constants');
|
||||
|
||||
/**
|
||||
* Gets the vector for the given text from VLLM
|
||||
* @param {string[]} texts - The array of texts to get the vectors for
|
||||
* @param {string} apiUrl - The API URL
|
||||
* @param {string} model - The model to use
|
||||
* @param {import('../users').UserDirectoryList} directories - The directories object for the user
|
||||
* @returns {Promise<number[][]>} - The array of vectors for the texts
|
||||
*/
|
||||
async function getVllmBatchVector(texts, apiUrl, model, directories) {
|
||||
const url = new URL(apiUrl);
|
||||
url.pathname = '/v1/embeddings';
|
||||
|
||||
const headers = {};
|
||||
setAdditionalHeadersByType(headers, TEXTGEN_TYPES.VLLM, apiUrl, directories);
|
||||
|
||||
const response = await fetch(url, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
...headers,
|
||||
},
|
||||
body: JSON.stringify({ input: texts, model }),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const responseText = await response.text();
|
||||
throw new Error(`VLLM: Failed to get vector for text: ${response.statusText} ${responseText}`);
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
|
||||
if (!Array.isArray(data?.data)) {
|
||||
throw new Error('API response was not an array');
|
||||
}
|
||||
|
||||
// Sort data by x.index to ensure the order is correct
|
||||
data.data.sort((a, b) => a.index - b.index);
|
||||
|
||||
const vectors = data.data.map(x => x.embedding);
|
||||
return vectors;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the vector for the given text from VLLM
|
||||
* @param {string} text - The text to get the vector for
|
||||
* @param {string} apiUrl - The API URL
|
||||
* @param {string} model - The model to use
|
||||
* @param {import('../users').UserDirectoryList} directories - The directories object for the user
|
||||
* @returns {Promise<number[]>} - The vector for the text
|
||||
*/
|
||||
async function getVllmVector(text, apiUrl, model, directories) {
|
||||
const vectors = await getVllmBatchVector([text], apiUrl, model, directories);
|
||||
return vectors[0];
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
getVllmBatchVector,
|
||||
getVllmVector,
|
||||
};
|
Loading…
Reference in New Issue