mirror of
https://github.com/SillyTavern/SillyTavern.git
synced 2025-06-05 21:59:27 +02:00
Data Bank: display file ingestion progress
This commit is contained in:
@ -44,6 +44,9 @@ const MODULE_NAME = 'vectors';
|
|||||||
export const EXTENSION_PROMPT_TAG = '3_vectors';
|
export const EXTENSION_PROMPT_TAG = '3_vectors';
|
||||||
export const EXTENSION_PROMPT_TAG_DB = '4_vectors_data_bank';
|
export const EXTENSION_PROMPT_TAG_DB = '4_vectors_data_bank';
|
||||||
|
|
||||||
|
// Force solo chunks for sources that don't support batching.
|
||||||
|
const getBatchSize = () => ['transformers', 'palm', 'ollama'].includes(settings.source) ? 1 : 5;
|
||||||
|
|
||||||
const settings = {
|
const settings = {
|
||||||
// For both
|
// For both
|
||||||
source: 'transformers',
|
source: 'transformers',
|
||||||
@ -125,7 +128,7 @@ async function onVectorizeAllClick() {
|
|||||||
// upon request of a full vectorise
|
// upon request of a full vectorise
|
||||||
cachedSummaries.clear();
|
cachedSummaries.clear();
|
||||||
|
|
||||||
const batchSize = 5;
|
const batchSize = getBatchSize();
|
||||||
const elapsedLog = [];
|
const elapsedLog = [];
|
||||||
let finished = false;
|
let finished = false;
|
||||||
$('#vectorize_progress').show();
|
$('#vectorize_progress').show();
|
||||||
@ -560,7 +563,9 @@ async function vectorizeFile(fileText, fileName, collectionId, chunkSize, overla
|
|||||||
fileText = translatedText;
|
fileText = translatedText;
|
||||||
}
|
}
|
||||||
|
|
||||||
const toast = toastr.info('Vectorization may take some time, please wait...', `Ingesting file ${fileName}`);
|
const batchSize = getBatchSize();
|
||||||
|
const toastBody = $('<span>').text('This may take a while. Please wait...');
|
||||||
|
const toast = toastr.info(toastBody, `Ingesting file ${fileName}`, { closeButton: false, escapeHtml: false, timeOut: 0, extendedTimeOut: 0 });
|
||||||
const overlapSize = Math.round(chunkSize * overlapPercent / 100);
|
const overlapSize = Math.round(chunkSize * overlapPercent / 100);
|
||||||
const delimiters = getChunkDelimiters();
|
const delimiters = getChunkDelimiters();
|
||||||
// Overlap should not be included in chunk size. It will be later compensated by overlapChunks
|
// Overlap should not be included in chunk size. It will be later compensated by overlapChunks
|
||||||
@ -569,7 +574,12 @@ async function vectorizeFile(fileText, fileName, collectionId, chunkSize, overla
|
|||||||
console.debug(`Vectors: Split file ${fileName} into ${chunks.length} chunks with ${overlapPercent}% overlap`, chunks);
|
console.debug(`Vectors: Split file ${fileName} into ${chunks.length} chunks with ${overlapPercent}% overlap`, chunks);
|
||||||
|
|
||||||
const items = chunks.map((chunk, index) => ({ hash: getStringHash(chunk), text: chunk, index: index }));
|
const items = chunks.map((chunk, index) => ({ hash: getStringHash(chunk), text: chunk, index: index }));
|
||||||
await insertVectorItems(collectionId, items);
|
|
||||||
|
for (let i = 0; i < items.length; i += batchSize) {
|
||||||
|
toastBody.text(`${i}/${items.length} (${Math.round((i / items.length) * 100)}%) chunks processed`);
|
||||||
|
const chunkedBatch = items.slice(i, i + batchSize);
|
||||||
|
await insertVectorItems(collectionId, chunkedBatch);
|
||||||
|
}
|
||||||
|
|
||||||
toastr.clear(toast);
|
toastr.clear(toast);
|
||||||
console.log(`Vectors: Inserted ${chunks.length} vector items for file ${fileName} into ${collectionId}`);
|
console.log(`Vectors: Inserted ${chunks.length} vector items for file ${fileName} into ${collectionId}`);
|
||||||
|
Reference in New Issue
Block a user