Compare commits

...

15 Commits

Author SHA1 Message Date
Juha Jeronen 3666af2c85
Merge 54c1c5912f into c52bdb9a4a 2024-05-17 17:59:32 +00:00
Cohee c52bdb9a4a Use new command names in examples 2024-05-17 20:59:00 +03:00
Cohee bbd9c89357 Add aliases for group member commands 2024-05-17 20:57:03 +03:00
Cohee fb2190ace1 #2254 Don't suppress abort in subcommands 2024-05-17 18:21:13 +03:00
Cohee deb09bf5bf Fix console errors on not found command autocomplete 2024-05-17 17:47:40 +03:00
Cohee d951beb626 #2260 Handle window resize in script editor 2024-05-17 17:47:18 +03:00
Juha Jeronen 54c1c5912f Merge remote-tracking branch 'upstream/staging' into vectors-client-improvements 2024-05-03 19:42:27 +03:00
Juha Jeronen 6874e361ae Merge branch 'staging' into vectors-client-improvements 2024-03-07 16:07:13 +02:00
Juha Jeronen 820dbc97a5 add a first draft of Science Mode
When enabled, this attempts to heuristically sanitize the input text,
and to strip the reference list.
2024-02-29 13:25:42 +02:00
Juha Jeronen 7c0b944d81 add some debug console messages to `processFiles` 2024-02-29 13:24:45 +02:00
Juha Jeronen e59a337303 Error message cosmetic fix: add a full-stop. 2024-02-29 13:24:11 +02:00
Juha Jeronen 4102705a8e add toast message when "Vectorize all" fails 2024-02-29 13:23:49 +02:00
Juha Jeronen 7432e00bb6 add toast message on completion of vectorization (both success/fail) 2024-02-29 13:23:05 +02:00
Juha Jeronen 82dbfa9ac5 vectors toast messages: have "Vector Storage" in the title
This makes explicit which part of SillyTavern those messages come from.
2024-02-29 13:22:10 +02:00
Juha Jeronen 7672b5260c remove blank line (consistency of formatting) 2024-02-29 13:20:37 +02:00
6 changed files with 126 additions and 24 deletions

View File

@ -342,6 +342,16 @@ export class QuickReply {
message.addEventListener('scroll', (evt)=>{
updateScrollDebounced();
});
/** @type {any} */
const resizeListener = debounce((evt) => {
updateSyntax();
updateScrollDebounced(evt);
if (document.activeElement == message) {
message.blur();
message.focus();
}
});
window.addEventListener('resize', resizeListener);
message.style.color = 'transparent';
message.style.background = 'transparent';
message.style.setProperty('text-shadow', 'none', 'important');
@ -514,6 +524,8 @@ export class QuickReply {
});
await popupResult;
window.removeEventListener('resize', resizeListener);
} else {
warn('failed to fetch qrEditor template');
}

View File

@ -55,6 +55,7 @@ const settings = {
// For files
enabled_files: false,
science_mode: false,
translate_files: false,
size_threshold: 10,
chunk_size: 5000,
@ -95,7 +96,7 @@ async function onVectorizeAllClick() {
const chatId = getCurrentChatId();
if (!chatId) {
toastr.info('No chat selected', 'Vectorization aborted');
toastr.info('No chat selected. Vectorization aborted.', 'Vector Storage');
return;
}
@ -108,7 +109,7 @@ async function onVectorizeAllClick() {
while (!finished) {
if (is_send_press) {
toastr.info('Message generation is in progress.', 'Vectorization aborted');
toastr.info('Message generation is in progress. Vectorization aborted.', 'Vector Storage');
throw new Error('Message generation is in progress.');
}
@ -135,6 +136,7 @@ async function onVectorizeAllClick() {
}
} catch (error) {
console.error('Vectors: Failed to vectorize all', error);
toastr.error(`Vectorize all failed. ${new String(error)}`, 'Vector Storage')
} finally {
$('#vectorize_progress').hide();
}
@ -274,14 +276,14 @@ async function synchronizeChat(batchSize = 5) {
case 'extras_module_missing':
return 'Extras API must provide an "embeddings" module.';
default:
return 'Check server console for more details';
return 'Check server console for more details.';
}
}
console.error('Vectors: Failed to synchronize chat', error);
const message = getErrorMessage(error.cause);
toastr.error(message, 'Vectorization failed', { preventDuplicates: true });
toastr.error(`Vectorization failed. ${message}`, 'Vector Storage', { preventDuplicates: true });
return -1;
} finally {
syncBlocked = false;
@ -357,6 +359,7 @@ async function processFiles(chat) {
if (!message?.extra?.file) {
continue;
}
console.debug(`Vectors: processFiles: message ${message.index}: has a file attachment, processing.`)
// Trim file inserted by the script
const fileText = String(message.mes)
@ -367,6 +370,7 @@ async function processFiles(chat) {
// File is too small
if (fileText.length < thresholdLength) {
console.debug(`Vectors: processFiles: message ${message.index}: text of file "${message.extra.file.name}" shorter than vectorization threshold (${fileText.length} < ${thresholdLength} chars), keeping inlined.`)
continue;
}
@ -379,11 +383,16 @@ async function processFiles(chat) {
// File is already in the collection
if (!hashesInCollection.length) {
console.debug(`Vectors: processFiles: message ${message.index}: file "${fileName}" not yet in collection, vectorizing.`)
await vectorizeFile(fileText, fileName, collectionId, settings.chunk_size);
} else {
console.debug(`Vectors: processFiles: message ${message.index}: file "${fileName}" found in collection.`)
}
console.debug(`Vectors: processFiles: message ${message.index}: querying vector DB.`)
const queryText = await getQueryText(chat);
const fileChunks = await retrieveFileChunks(queryText, collectionId);
console.debug(`Vectors: processFiles: message ${message.index}: retrieved ${fileChunks.length} chars.`);
message.mes = `${fileChunks}\n\n${message.mes}`;
}
@ -438,6 +447,62 @@ async function retrieveFileChunks(queryText, collectionId) {
return fileText;
}
/**
* Sanitizes the text content of a scientific paper to obtain higher-quality text for vectorization.
*
* This is a really simplistic, classical regex-based algorithm. An LLM could likely do better, but that would be slow.
* We hope to get a result that's not horribly broken and that won't include irrelevant RAG query poisoning stuff.
*
* Currently, we:
*
* - Strip the reference list.
*
* The reference list contains the highest concentration of keywords of any kind (in the titles of the cited studies),
* so it usually poisons RAG queries so that no matter what you search for, you'll only get chunks of the reference list.
* Omitting the reference list from the text to be vectorized, RAG will look for matches in the paper content only.
*
* - F IX H EADINGS T HAT L OOK L IKE T HIS.
*
* This is a rather common issue in text extraction from a PDF.
*
* @param {string} fileText The text to sanitize
* @returns {string} The sanitized text
*/
function sanitizeScientificInput(fileText) {
// Fix section headings
//
const brokenUppercaseWordsFinder = new RegExp(/(?<!\b[A-Z]\s+)\b([A-Z])\s+([A-Z]+)\b/, 'g'); // "H EADING", but not "C H EADING" (appendix section)
fileText = fileText.replaceAll(brokenUppercaseWordsFinder, '$1$2');
const brokenAppendixHeadingFinder = new RegExp(/([A-Z])\s+([A-Z])\s+([A-Z]+)\b/, 'g'); // "C H EADING"
fileText = fileText.replaceAll(brokenAppendixHeadingFinder, '$1 $2$3'); // -> "C HEADING"
const brokenHeadingsFinder = new RegExp(/^\s*([A-Z])\s+([a-z]+)\s*$/, 'mg'); // "H eading", on its own line
fileText = fileText.replaceAll(brokenHeadingsFinder, '$1$2');
// Strip reference list (easier now that the headings are already fixed).
//
// Linefeeds are sometimes lost, so the references may begin in the middle of a line.
// Since we can't trigger on any random mention of the word "References", we trigger in the middle of a line
// only for an all-uppercase "REFERENCES".
//
const referencesFinder = new RegExp(/(^\s*References\s*$|^\s*REFERENCES\s*$|\bREFERENCES\s*)/, 'mg');
const referencesMatches = [...fileText.matchAll(referencesFinder)];
if (referencesMatches.length > 0) { // Detected a reference list
const appendixFinder = new RegExp(/(^\s*Appendi(x|ces)\s*$|^\s*A\s*PPENDI(X|CES)\s*$|\bAPPENDI(X|CES)\s*)/, 'mg');
// Some documents just start appendices like "A Some stuff..." without a heading, but there's not much we can do about that.
// In those cases, we will simply ignore the appendices.
const appendixMatches = [...fileText.matchAll(appendixFinder)];
if (appendixMatches.length > 0) { // Detected both a reference list and appendices
fileText = fileText.substring(0, referencesMatches[0].index).trim() + fileText.substring(appendixMatches[0].index);
} else { // Detected only a reference list, no appendices
fileText = fileText.substring(0, referencesMatches[0].index).trim();
}
}
console.debug(fileText);
return fileText;
}
/**
* Vectorizes a file and inserts it into the vector index.
* @param {string} fileText File text
@ -454,12 +519,19 @@ async function vectorizeFile(fileText, fileName, collectionId, chunkSize) {
fileText = translatedText;
}
const toast = toastr.info('Vectorization may take some time, please wait...', `Ingesting file ${fileName}`);
const chunks = splitRecursive(fileText, chunkSize);
const toast = toastr.info(`Ingesting file ${fileName}. Vectorization may take some time, please wait...`, 'Vector Storage');
if (settings.science_mode) {
console.debug(`Vectors: Science mode is enabled. Sanitizing input ${fileName}.`);
fileText = sanitizeScientificInput(fileText);
}
const chunks = splitRecursive(fileText, settings.chunk_size);
console.debug(`Vectors: Split file ${fileName} into ${chunks.length} chunks`, chunks);
const items = chunks.map((chunk, index) => ({ hash: getStringHash(chunk), text: chunk, index: index }));
await insertVectorItems(collectionId, items);
toastr.info(`Vectorization complete for ${fileName}.`, `Vector Storage`);
toastr.clear(toast);
console.log(`Vectors: Inserted ${chunks.length} vector items for file ${fileName} into ${collectionId}`);
@ -467,6 +539,7 @@ async function vectorizeFile(fileText, fileName, collectionId, chunkSize) {
} catch (error) {
toastr.error(String(error), 'Failed to vectorize file', { preventDuplicates: true });
console.error('Vectors: Failed to vectorize file', error);
toastr.error(`Vectorization failed for ${fileName}. ${new String(error)}`, 'Vector Storage');
return false;
}
}
@ -873,20 +946,20 @@ function toggleSettings() {
async function onPurgeClick() {
const chatId = getCurrentChatId();
if (!chatId) {
toastr.info('No chat selected', 'Purge aborted');
toastr.info('No chat selected. Purge aborted.', 'Vector Storage');
return;
}
if (await purgeVectorIndex(chatId)) {
toastr.success('Vector index purged', 'Purge successful');
toastr.success('Vector index purged successfully.', 'Vector Storage');
} else {
toastr.error('Failed to purge vector index', 'Purge failed');
toastr.error('Failed to purge vector index', 'Vector Storage');
}
}
async function onViewStatsClick() {
const chatId = getCurrentChatId();
if (!chatId) {
toastr.info('No chat selected');
toastr.info('No chat selected', 'Vector Storage');
return;
}
@ -1120,6 +1193,11 @@ jQuery(async () => {
saveSettingsDebounced();
toggleSettings();
});
$('#vectors_science_mode').prop('checked', settings.science_mode).on('input', () => {
settings.science_mode = $('#vectors_science_mode').prop('checked');
Object.assign(extension_settings.vectors, settings);
saveSettingsDebounced();
});
$('#vectors_source').val(settings.source).on('change', () => {
settings.source = String($('#vectors_source').val());
Object.assign(extension_settings.vectors, settings);

View File

@ -200,6 +200,10 @@
<input id="vectors_chunk_count_db" type="number" class="text_pole widthUnset" min="1" max="99999" />
</div>
</div>
<label class="checkbox_label" for="vectors_science_mode" title="Sanitize input text to improve retrieval quality for scientific paper inputs.">
<input id="vectors_science_mode" type="checkbox" class="checkbox">
Science mode
</label>
<div class="flex-container flexFlowColumn">
<label for="vectors_file_template_db">
<span>Injection Template</span>

View File

@ -447,8 +447,9 @@ SlashCommandParser.addCommandObject(SlashCommand.fromProps({ name: 'unhide',
],
helpString: 'Unhides a message from the prompt.',
}));
SlashCommandParser.addCommandObject(SlashCommand.fromProps({ name: 'disable',
SlashCommandParser.addCommandObject(SlashCommand.fromProps({ name: 'member-disable',
callback: disableGroupMemberCallback,
aliases: ['disable', 'disablemember', 'memberdisable'],
unnamedArgumentList: [
new SlashCommandArgument(
'member index or name', [ARGUMENT_TYPE.NUMBER, ARGUMENT_TYPE.STRING], true,
@ -456,7 +457,8 @@ SlashCommandParser.addCommandObject(SlashCommand.fromProps({ name: 'disable',
],
helpString: 'Disables a group member from being drafted for replies.',
}));
SlashCommandParser.addCommandObject(SlashCommand.fromProps({ name: 'enable',
SlashCommandParser.addCommandObject(SlashCommand.fromProps({ name: 'member-enable',
aliases: ['enable', 'enablemember', 'memberenable'],
callback: enableGroupMemberCallback,
unnamedArgumentList: [
new SlashCommandArgument(
@ -465,9 +467,9 @@ SlashCommandParser.addCommandObject(SlashCommand.fromProps({ name: 'enable',
],
helpString: 'Enables a group member to be drafted for replies.',
}));
SlashCommandParser.addCommandObject(SlashCommand.fromProps({ name: 'memberadd',
SlashCommandParser.addCommandObject(SlashCommand.fromProps({ name: 'member-add',
callback: addGroupMemberCallback,
aliases: ['addmember'],
aliases: ['addmember', 'memberadd'],
unnamedArgumentList: [
new SlashCommandArgument(
'character name', [ARGUMENT_TYPE.STRING], true,
@ -481,15 +483,15 @@ SlashCommandParser.addCommandObject(SlashCommand.fromProps({ name: 'memberadd',
<strong>Example:</strong>
<ul>
<li>
<pre><code>/memberadd John Doe</code></pre>
<pre><code>/member-add John Doe</code></pre>
</li>
</ul>
</div>
`,
}));
SlashCommandParser.addCommandObject(SlashCommand.fromProps({ name: 'memberremove',
SlashCommandParser.addCommandObject(SlashCommand.fromProps({ name: 'member-remove',
callback: removeGroupMemberCallback,
aliases: ['removemember'],
aliases: ['removemember', 'memberremove'],
unnamedArgumentList: [
new SlashCommandArgument(
'member index or name', [ARGUMENT_TYPE.NUMBER, ARGUMENT_TYPE.STRING], true,
@ -503,16 +505,16 @@ SlashCommandParser.addCommandObject(SlashCommand.fromProps({ name: 'memberremove
<strong>Example:</strong>
<ul>
<li>
<pre><code>/memberremove 2</code></pre>
<pre><code>/memberremove John Doe</code></pre>
<pre><code>/member-remove 2</code></pre>
<pre><code>/member-remove John Doe</code></pre>
</li>
</ul>
</div>
`,
}));
SlashCommandParser.addCommandObject(SlashCommand.fromProps({ name: 'memberup',
SlashCommandParser.addCommandObject(SlashCommand.fromProps({ name: 'member-up',
callback: moveGroupMemberUpCallback,
aliases: ['upmember'],
aliases: ['upmember', 'memberup'],
unnamedArgumentList: [
new SlashCommandArgument(
'member index or name', [ARGUMENT_TYPE.NUMBER, ARGUMENT_TYPE.STRING], true,
@ -520,9 +522,9 @@ SlashCommandParser.addCommandObject(SlashCommand.fromProps({ name: 'memberup',
],
helpString: 'Moves a group member up in the group chat list.',
}));
SlashCommandParser.addCommandObject(SlashCommand.fromProps({ name: 'memberdown',
SlashCommandParser.addCommandObject(SlashCommand.fromProps({ name: 'member-down',
callback: moveGroupMemberDownCallback,
aliases: ['downmember'],
aliases: ['downmember', 'memberdown'],
unnamedArgumentList: [
new SlashCommandArgument(
'member index or name', [ARGUMENT_TYPE.NUMBER, ARGUMENT_TYPE.STRING], true,

View File

@ -58,6 +58,9 @@ export class SlashCommandAutoCompleteNameResult extends AutoCompleteNameResult {
return new RegExp('=(.*)');
}
}
if (!Array.isArray(this.executor.command?.namedArgumentList)) {
return null;
}
const notProvidedNamedArguments = this.executor.command.namedArgumentList.filter(arg=>!this.executor.namedArgumentList.find(it=>it.name == arg.name));
let name;
let value;
@ -130,6 +133,9 @@ export class SlashCommandAutoCompleteNameResult extends AutoCompleteNameResult {
}
getUnnamedArgumentAt(text, index, isSelect) {
if (!Array.isArray(this.executor.command?.unnamedArgumentList)) {
return null;
}
const lastArgIsBlank = this.executor.unnamedArgumentList.slice(-1)[0]?.value == '';
const notProvidedArguments = this.executor.command.unnamedArgumentList.slice(this.executor.unnamedArgumentList.length - (lastArgIsBlank ? 1 : 0));
let value;

View File

@ -518,7 +518,7 @@ async function executeSubCommands(command, scope = null, parserFlags = null) {
command = command.slice(1, -1);
}
const result = await executeSlashCommands(command, true, scope, true, parserFlags);
const result = await executeSlashCommands(command, true, scope, false, parserFlags);
if (!result || typeof result !== 'object') {
return '';