#1671 Add batch vectorization

This commit is contained in:
Cohee
2024-01-24 13:56:13 +02:00
parent 3d2c8bf674
commit cfdf43a26e
4 changed files with 77 additions and 15 deletions

View File

@@ -24,6 +24,26 @@ async function getVector(source, text) {
throw new Error(`Unknown vector source ${source}`);
}
/**
* Gets the vector for the given text batch from the given source.
* @param {string} source - The source of the vector
* @param {string[]} texts - The array of texts to get the vector for
* @returns {Promise<number[][]>} - The array of vectors for the texts
*/
async function getBatchVector(source, texts) {
switch (source) {
case 'mistral':
case 'openai':
return require('../openai-vectors').getOpenAIBatchVector(texts, source);
case 'transformers':
return require('../embedding').getTransformersBatchVector(texts);
case 'palm':
return require('../makersuite-vectors').getMakerSuiteBatchVector(texts);
}
throw new Error(`Unknown vector source ${source}`);
}
/**
* Gets the index for the vector collection
* @param {string} collectionId - The collection ID
@@ -52,12 +72,12 @@ async function insertVectorItems(collectionId, source, items) {
await store.beginUpdate();
for (const item of items) {
const text = item.text;
const hash = item.hash;
const index = item.index;
const vector = await getVector(source, text);
await store.upsertItem({ vector: vector, metadata: { hash, text, index } });
const vectors = await getBatchVector(source, items.map(x => x.text));
for (let i = 0; i < items.length; i++) {
const item = items[i];
const vector = vectors[i];
await store.upsertItem({ vector: vector, metadata: { hash: item.hash, text: item.text, index: item.index } });
}
await store.endUpdate();