Add more Data Bank script commands

This commit is contained in:
Cohee 2024-05-30 14:49:57 +03:00
parent 6a832bdf2a
commit 2c911a3ea2
4 changed files with 120 additions and 44 deletions

View File

@ -1279,23 +1279,28 @@ export function getDataBankAttachments(includeDisabled = false) {
}
/**
* Gets all attachments for a specific source. Includes disabled attachments.
* Gets all attachments for a specific source.
* @param {string} source Attachment source
* @param {boolean} [includeDisabled=true] If true, include disabled attachments
* @returns {FileAttachment[]} List of attachments
*/
export function getDataBankAttachmentsForSource(source) {
export function getDataBankAttachmentsForSource(source, includeDisabled = true) {
ensureAttachmentsExist();
switch (source) {
case ATTACHMENT_SOURCE.GLOBAL:
return extension_settings.attachments ?? [];
case ATTACHMENT_SOURCE.CHAT:
return chat_metadata.attachments ?? [];
case ATTACHMENT_SOURCE.CHARACTER:
return extension_settings.character_attachments?.[characters[this_chid]?.avatar] ?? [];
function getBySource() {
switch (source) {
case ATTACHMENT_SOURCE.GLOBAL:
return extension_settings.attachments ?? [];
case ATTACHMENT_SOURCE.CHAT:
return chat_metadata.attachments ?? [];
case ATTACHMENT_SOURCE.CHARACTER:
return extension_settings.character_attachments?.[characters[this_chid]?.avatar] ?? [];
}
return [];
}
return [];
return getBySource().filter(x => includeDisabled || !isAttachmentDisabled(x));
}
/**

View File

@ -138,7 +138,7 @@ jQuery(async () => {
name: 'db-list',
callback: listDataBankAttachments,
aliases: ['databank-list', 'data-bank-list'],
helpString: 'List attachments in the data bank as a JSON-serialized array. Optionally, provide the source of the attachments and the field to list by.',
helpString: 'List attachments in the Data Bank as a JSON-serialized array. Optionally, provide the source of the attachments and the field to list by.',
namedArgumentList: [
new SlashCommandNamedArgument('source', 'The source of the attachments.', ARGUMENT_TYPE.STRING, false, false, '', TYPES),
new SlashCommandNamedArgument('field', 'The field to list by.', ARGUMENT_TYPE.STRING, false, false, 'url', FIELDS),
@ -150,7 +150,7 @@ jQuery(async () => {
name: 'db-get',
callback: getDataBankText,
aliases: ['databank-get', 'data-bank-get'],
helpString: 'Get attachment text from the data bank. Either provide the name or URL of the attachment. Optionally, provide the source of the attachment.',
helpString: 'Get attachment text from the Data Bank. Either provide the name or URL of the attachment. Optionally, provide the source of the attachment.',
namedArgumentList: [
new SlashCommandNamedArgument('source', 'The source of the attachment.', ARGUMENT_TYPE.STRING, false, false, '', TYPES),
],
@ -164,7 +164,7 @@ jQuery(async () => {
name: 'db-add',
callback: uploadDataBankAttachment,
aliases: ['databank-add', 'data-bank-add'],
helpString: 'Add an attachment to the data bank. If name is not provided, it will be generated automatically. Returns the URL of the attachment.',
helpString: 'Add an attachment to the Data Bank. If name is not provided, it will be generated automatically. Returns the URL of the attachment.',
namedArgumentList: [
new SlashCommandNamedArgument('source', 'The source for the attachment.', ARGUMENT_TYPE.STRING, false, false, 'chat', TYPES),
new SlashCommandNamedArgument('name', 'The name of the attachment.', ARGUMENT_TYPE.STRING, false, false),
@ -179,7 +179,7 @@ jQuery(async () => {
name: 'db-update',
callback: updateDataBankAttachment,
aliases: ['databank-update', 'data-bank-update'],
helpString: 'Update an attachment in the data bank, preserving its name. Returns a new URL of the attachment.',
helpString: 'Update an attachment in the Data Bank, preserving its name. Returns a new URL of the attachment.',
namedArgumentList: [
new SlashCommandNamedArgument('source', 'The source for the attachment.', ARGUMENT_TYPE.STRING, false, false, 'chat', TYPES),
new SlashCommandNamedArgument('name', 'The name of the attachment.', ARGUMENT_TYPE.STRING, false, false),
@ -195,7 +195,7 @@ jQuery(async () => {
name: 'db-delete',
callback: deleteDataBankAttachment,
aliases: ['databank-delete', 'data-bank-delete'],
helpString: 'Delete an attachment from the data bank.',
helpString: 'Delete an attachment from the Data Bank.',
namedArgumentList: [
new SlashCommandNamedArgument('source', 'The source of the attachment.', ARGUMENT_TYPE.STRING, false, false, 'chat', TYPES),
],

View File

@ -21,11 +21,14 @@ import {
} from '../../extensions.js';
import { collapseNewlines } from '../../power-user.js';
import { SECRET_KEYS, secret_state, writeSecret } from '../../secrets.js';
import { getDataBankAttachments, getFileAttachment } from '../../chats.js';
import { getDataBankAttachments, getDataBankAttachmentsForSource, getFileAttachment } from '../../chats.js';
import { debounce, getStringHash as calculateHash, waitUntilCondition, onlyUnique, splitRecursive } from '../../utils.js';
import { debounce_timeout } from '../../constants.js';
import { getSortedEntries } from '../../world-info.js';
import { textgen_types, textgenerationwebui_settings } from '../../textgen-settings.js';
import { SlashCommandParser } from '../../slash-commands/SlashCommandParser.js';
import { SlashCommand } from '../../slash-commands/SlashCommand.js';
import { ARGUMENT_TYPE, SlashCommandArgument, SlashCommandNamedArgument } from '../../slash-commands/SlashCommandArgument.js';
const MODULE_NAME = 'vectors';
@ -332,28 +335,7 @@ async function processFiles(chat) {
return;
}
const dataBank = getDataBankAttachments();
const dataBankCollectionIds = [];
for (const file of dataBank) {
const collectionId = getFileCollectionId(file.url);
const hashesInCollection = await getSavedHashes(collectionId);
dataBankCollectionIds.push(collectionId);
// File is already in the collection
if (hashesInCollection.length) {
continue;
}
// Download and process the file
file.text = await getFileAttachment(file.url);
console.log(`Vectors: Retrieved file ${file.name} from Data Bank`);
// Convert kilobytes to string length
const thresholdLength = settings.size_threshold_db * 1024;
// Use chunk size from settings if file is larger than threshold
const chunkSize = file.size > thresholdLength ? settings.chunk_size_db : -1;
await vectorizeFile(file.text, file.name, collectionId, chunkSize);
}
const dataBankCollectionIds = await ingestDataBankAttachments();
if (dataBankCollectionIds.length) {
const queryText = await getQueryText(chat);
@ -400,6 +382,39 @@ async function processFiles(chat) {
}
}
/**
* Ensures that data bank attachments are ingested and inserted into the vector index.
* @param {string} [source] Optional source filter for data bank attachments.
* @returns {Promise<string[]>} Collection IDs
*/
async function ingestDataBankAttachments(source) {
// Exclude disabled files
const dataBank = source ? getDataBankAttachmentsForSource(source, false) : getDataBankAttachments(false);
const dataBankCollectionIds = [];
for (const file of dataBank) {
const collectionId = getFileCollectionId(file.url);
const hashesInCollection = await getSavedHashes(collectionId);
dataBankCollectionIds.push(collectionId);
// File is already in the collection
if (hashesInCollection.length) {
continue;
}
// Download and process the file
file.text = await getFileAttachment(file.url);
console.log(`Vectors: Retrieved file ${file.name} from Data Bank`);
// Convert kilobytes to string length
const thresholdLength = settings.size_threshold_db * 1024;
// Use chunk size from settings if file is larger than threshold
const chunkSize = file.size > thresholdLength ? settings.chunk_size_db : -1;
await vectorizeFile(file.text, file.name, collectionId, chunkSize);
}
return dataBankCollectionIds;
}
/**
* Inserts file chunks from the Data Bank into the prompt.
* @param {string} queryText Text to query
@ -408,7 +423,7 @@ async function processFiles(chat) {
*/
async function injectDataBankChunks(queryText, collectionIds) {
try {
const queryResults = await queryMultipleCollections(collectionIds, queryText, settings.chunk_count_db);
const queryResults = await queryMultipleCollections(collectionIds, queryText, settings.chunk_count_db, settings.score_threshold);
console.debug(`Vectors: Retrieved ${collectionIds.length} Data Bank collections`, queryResults);
let textResult = '';
@ -828,9 +843,10 @@ async function queryCollection(collectionId, searchText, topK) {
* @param {string[]} collectionIds - Collection IDs to query
* @param {string} searchText - Text to query
* @param {number} topK - Number of results to return
* @param {number} threshold - Score threshold
* @returns {Promise<Record<string, { hashes: number[], metadata: object[] }>>} - Results mapped to collection IDs
*/
async function queryMultipleCollections(collectionIds, searchText, topK) {
async function queryMultipleCollections(collectionIds, searchText, topK, threshold) {
const headers = getVectorHeaders();
const response = await fetch('/api/vector/query-multi', {
@ -841,7 +857,7 @@ async function queryMultipleCollections(collectionIds, searchText, topK) {
searchText: searchText,
topK: topK,
source: settings.source,
threshold: settings.score_threshold,
threshold: threshold ?? settings.score_threshold,
}),
});
@ -1125,7 +1141,7 @@ async function activateWorldInfo(chat) {
return;
}
const queryResults = await queryMultipleCollections(collectionIds, queryText, settings.max_entries);
const queryResults = await queryMultipleCollections(collectionIds, queryText, settings.max_entries, settings.score_threshold);
const activatedHashes = Object.values(queryResults).flatMap(x => x.hashes).filter(onlyUnique);
const activatedEntries = [];
@ -1396,4 +1412,59 @@ jQuery(async () => {
eventSource.on(event_types.CHAT_DELETED, purgeVectorIndex);
eventSource.on(event_types.GROUP_CHAT_DELETED, purgeVectorIndex);
eventSource.on(event_types.FILE_ATTACHMENT_DELETED, purgeFileVectorIndex);
SlashCommandParser.addCommandObject(SlashCommand.fromProps({
name: 'db-ingest',
callback: async () => {
await ingestDataBankAttachments();
return '';
},
aliases: ['databank-ingest', 'data-bank-ingest'],
helpString: 'Force the ingestion of all Data Bank attachments.',
}));
SlashCommandParser.addCommandObject(SlashCommand.fromProps({
name: 'db-purge',
callback: async () => {
const dataBank = getDataBankAttachments();
for (const file of dataBank) {
await purgeFileVectorIndex(file.url);
}
return '';
},
aliases: ['databank-purge', 'data-bank-purge'],
helpString: 'Purge the vector index for all Data Bank attachments.',
}));
SlashCommandParser.addCommandObject(SlashCommand.fromProps({
name: 'db-search',
callback: async (args, query) => {
const threshold = Number(args?.threshold ?? settings.score_threshold);
const source = String(args?.source ?? '');
const attachments = source ? getDataBankAttachmentsForSource(source, false) : getDataBankAttachments(false);
const collectionIds = await ingestDataBankAttachments(String(source));
const queryResults = await queryMultipleCollections(collectionIds, String(query), settings.chunk_count_db, threshold);
// Map collection IDs to file URLs
const urls = Object
.keys(queryResults)
.map(x => attachments.find(y => getFileCollectionId(y.url) === x))
.filter(x => x)
.map(x => x.url);
return JSON.stringify(urls);
},
aliases: ['databank-search', 'data-bank-search'],
helpString: 'Search the Data Bank for a specific query using vector similarity. Returns a list of file URLs with the most relevant content.',
namedArgumentList: [
new SlashCommandNamedArgument('threshold', 'Threshold for the similarity score. Uses the global config value if not set.', ARGUMENT_TYPE.NUMBER, false, false, ''),
new SlashCommandNamedArgument('source', 'Optional filter for the attachments by source.', ARGUMENT_TYPE.STRING, false, false, '', ['global', 'character', 'chat']),
],
unnamedArgumentList: [
new SlashCommandArgument('Query to search by.', ARGUMENT_TYPE.STRING, true, false),
],
returns: ARGUMENT_TYPE.LIST,
}));
});

View File

@ -21,7 +21,7 @@ export const ARGUMENT_TYPE = {
export class SlashCommandArgument {
/**
* Creates an unnamed argument from a poperties object.
* Creates an unnamed argument from a properties object.
* @param {Object} props
* @param {string} props.description description of the argument
* @param {ARGUMENT_TYPE|ARGUMENT_TYPE[]} props.typeList default: ARGUMENT_TYPE.STRING - list of accepted types (from ARGUMENT_TYPE)
@ -75,7 +75,7 @@ export class SlashCommandArgument {
export class SlashCommandNamedArgument extends SlashCommandArgument {
/**
* Creates an unnamed argument from a poperties object.
* Creates an unnamed argument from a properties object.
* @param {Object} props
* @param {string} props.name the argument's name
* @param {string[]} [props.aliasList] list of aliases