Data Bank: display file ingestion progress

This commit is contained in:
Cohee 2024-11-18 15:18:45 +00:00
parent 7aad053e70
commit aaed09f606

View File

@ -44,6 +44,9 @@ const MODULE_NAME = 'vectors';
export const EXTENSION_PROMPT_TAG = '3_vectors';
export const EXTENSION_PROMPT_TAG_DB = '4_vectors_data_bank';
// Force solo chunks for sources that don't support batching.
const getBatchSize = () => ['transformers', 'palm', 'ollama'].includes(settings.source) ? 1 : 5;
const settings = {
// For both
source: 'transformers',
@ -125,7 +128,7 @@ async function onVectorizeAllClick() {
// upon request of a full vectorise
cachedSummaries.clear();
const batchSize = 5;
const batchSize = getBatchSize();
const elapsedLog = [];
let finished = false;
$('#vectorize_progress').show();
@ -560,7 +563,9 @@ async function vectorizeFile(fileText, fileName, collectionId, chunkSize, overla
fileText = translatedText;
}
const toast = toastr.info('Vectorization may take some time, please wait...', `Ingesting file ${fileName}`);
const batchSize = getBatchSize();
const toastBody = $('<span>').text('This may take a while. Please wait...');
const toast = toastr.info(toastBody, `Ingesting file ${fileName}`, { closeButton: false, escapeHtml: false, timeOut: 0, extendedTimeOut: 0 });
const overlapSize = Math.round(chunkSize * overlapPercent / 100);
const delimiters = getChunkDelimiters();
// Overlap should not be included in chunk size. It will be later compensated by overlapChunks
@ -569,7 +574,12 @@ async function vectorizeFile(fileText, fileName, collectionId, chunkSize, overla
console.debug(`Vectors: Split file ${fileName} into ${chunks.length} chunks with ${overlapPercent}% overlap`, chunks);
const items = chunks.map((chunk, index) => ({ hash: getStringHash(chunk), text: chunk, index: index }));
await insertVectorItems(collectionId, items);
for (let i = 0; i < items.length; i += batchSize) {
toastBody.text(`${i}/${items.length} (${Math.round((i / items.length) * 100)}%) chunks processed`);
const chunkedBatch = items.slice(i, i + batchSize);
await insertVectorItems(collectionId, chunkedBatch);
}
toastr.clear(toast);
console.log(`Vectors: Inserted ${chunks.length} vector items for file ${fileName} into ${collectionId}`);