mirror of
https://github.com/SillyTavern/SillyTavern.git
synced 2025-06-05 21:59:27 +02:00
Add option to auto-adjust number of chroma messages to keep / query based on context size.
This commit is contained in:
@ -558,7 +558,7 @@ function getCurrentChatId() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const CHARACTERS_PER_TOKEN_RATIO = 3.35;
|
export const CHARACTERS_PER_TOKEN_RATIO = 3.35;
|
||||||
const talkativeness_default = 0.5;
|
const talkativeness_default = 0.5;
|
||||||
|
|
||||||
var is_advanced_char_open = false;
|
var is_advanced_char_open = false;
|
||||||
@ -1945,8 +1945,11 @@ async function Generate(type, { automatic_trigger, force_name2, resolve, reject,
|
|||||||
coreChat.pop();
|
coreChat.pop();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Determine token limit
|
||||||
|
let this_max_context = getMaxContextSize();
|
||||||
|
|
||||||
if (extension_settings.chromadb.n_results !== 0) {
|
if (extension_settings.chromadb.n_results !== 0) {
|
||||||
await runGenerationInterceptors(coreChat);
|
await runGenerationInterceptors(coreChat, this_max_context);
|
||||||
console.log(`Core/all messages: ${coreChat.length}/${chat.length}`);
|
console.log(`Core/all messages: ${coreChat.length}/${chat.length}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1993,9 +1996,6 @@ async function Generate(type, { automatic_trigger, force_name2, resolve, reject,
|
|||||||
chat2[i] = formatMessageHistoryItem(coreChat[j], isInstruct);
|
chat2[i] = formatMessageHistoryItem(coreChat[j], isInstruct);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Determine token limit
|
|
||||||
let this_max_context = getMaxContextSize();
|
|
||||||
|
|
||||||
// Adjust token limit for Horde
|
// Adjust token limit for Horde
|
||||||
let adjustedParams;
|
let adjustedParams;
|
||||||
if (main_api == 'koboldhorde' && (horde_settings.auto_adjust_context_length || horde_settings.auto_adjust_response_length)) {
|
if (main_api == 'koboldhorde' && (horde_settings.auto_adjust_context_length || horde_settings.auto_adjust_response_length)) {
|
||||||
|
@ -422,12 +422,12 @@ async function loadExtensionSettings(settings) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async function runGenerationInterceptors(chat) {
|
async function runGenerationInterceptors(chat, contextSize) {
|
||||||
for (const manifest of Object.values(manifests)) {
|
for (const manifest of Object.values(manifests)) {
|
||||||
const interceptorKey = manifest.generate_interceptor;
|
const interceptorKey = manifest.generate_interceptor;
|
||||||
if (typeof window[interceptorKey] === 'function') {
|
if (typeof window[interceptorKey] === 'function') {
|
||||||
try {
|
try {
|
||||||
await window[interceptorKey](chat);
|
await window[interceptorKey](chat, contextSize);
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
console.error(`Failed running interceptor for ${manifest.display_name}`, e);
|
console.error(`Failed running interceptor for ${manifest.display_name}`, e);
|
||||||
}
|
}
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
import { saveSettingsDebounced, getCurrentChatId, system_message_types, eventSource, event_types } from "../../../script.js";
|
import { saveSettingsDebounced, getCurrentChatId, system_message_types, eventSource, event_types, CHARACTERS_PER_TOKEN_RATIO } from "../../../script.js";
|
||||||
import { humanizedDateTime } from "../../RossAscends-mods.js";
|
import { humanizedDateTime } from "../../RossAscends-mods.js";
|
||||||
import { getApiUrl, extension_settings, getContext, doExtrasFetch } from "../../extensions.js";
|
import { getApiUrl, extension_settings, getContext, doExtrasFetch } from "../../extensions.js";
|
||||||
import { getFileText, onlyUnique, splitRecursive, IndexedDBStore } from "../../utils.js";
|
import { getFileText, onlyUnique, splitRecursive, IndexedDBStore } from "../../utils.js";
|
||||||
@ -29,6 +29,14 @@ const defaultSettings = {
|
|||||||
file_split_length_min: 512,
|
file_split_length_min: 512,
|
||||||
file_split_length_max: 4096,
|
file_split_length_max: 4096,
|
||||||
file_split_length_step: 128,
|
file_split_length_step: 128,
|
||||||
|
|
||||||
|
keep_context_proportion: 0.5,
|
||||||
|
keep_context_proportion_min: 0.0,
|
||||||
|
keep_context_proportion_max: 1.0,
|
||||||
|
keep_context_proportion_step: 0.05,
|
||||||
|
|
||||||
|
auto_adjust: true,
|
||||||
|
freeze: false,
|
||||||
};
|
};
|
||||||
|
|
||||||
const postHeaders = {
|
const postHeaders = {
|
||||||
@ -92,6 +100,8 @@ async function loadSettings() {
|
|||||||
$('#chromadb_n_results').val(extension_settings.chromadb.n_results).trigger('input');
|
$('#chromadb_n_results').val(extension_settings.chromadb.n_results).trigger('input');
|
||||||
$('#chromadb_split_length').val(extension_settings.chromadb.split_length).trigger('input');
|
$('#chromadb_split_length').val(extension_settings.chromadb.split_length).trigger('input');
|
||||||
$('#chromadb_file_split_length').val(extension_settings.chromadb.file_split_length).trigger('input');
|
$('#chromadb_file_split_length').val(extension_settings.chromadb.file_split_length).trigger('input');
|
||||||
|
$('#chromadb_keep_context_proportion').val(extension_settings.chromadb.keep_context_proportion).trigger('input');
|
||||||
|
$('#chromadb_auto_adjust').prop('checked', extension_settings.chromadb.auto_adjust);
|
||||||
$('#chromadb_freeze').prop('checked', extension_settings.chromadb.freeze);
|
$('#chromadb_freeze').prop('checked', extension_settings.chromadb.freeze);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -390,7 +400,51 @@ async function onSelectInjectFile(e) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
window.chromadb_interceptGeneration = async (chat) => {
|
/*
|
||||||
|
* Automatically adjusts the extension settings for the optimal number of messages to keep and query based
|
||||||
|
* on the chat history and a specified maximum context length.
|
||||||
|
*/
|
||||||
|
function doAutoAdjust(chat, maxContext) {
|
||||||
|
console.debug('CHROMADB: Auto-adjusting sliders (messages: %o, maxContext: %o)', chat.length, maxContext);
|
||||||
|
// Get mean message length
|
||||||
|
const meanMessageLength = chat.reduce((acc, cur) => acc + cur.mes.length, 0) / chat.length;
|
||||||
|
|
||||||
|
if (Number.isNaN(meanMessageLength)) {
|
||||||
|
console.debug('CHROMADB: Mean message length is NaN, aborting auto-adjust');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
console.debug('CHROMADB: Mean message length (characters): %o', meanMessageLength);
|
||||||
|
// Convert to number of "tokens"
|
||||||
|
const meanMessageLengthTokens = Math.ceil(meanMessageLength / CHARACTERS_PER_TOKEN_RATIO);
|
||||||
|
console.debug('CHROMADB: Mean message length (tokens): %o', meanMessageLengthTokens);
|
||||||
|
// Get number of messages in context
|
||||||
|
const contextMessages = Math.max(1, Math.ceil(maxContext / meanMessageLengthTokens));
|
||||||
|
// Round up to nearest 10
|
||||||
|
const contextMessagesRounded = Math.ceil(contextMessages / 10) * 10;
|
||||||
|
console.debug('CHROMADB: Estimated context messages (rounded): %o', contextMessagesRounded);
|
||||||
|
// Messages to keep (proportional, rounded to nearest 5, minimum 10)
|
||||||
|
const messagesToKeep = Math.max(10, Math.ceil(contextMessagesRounded * extension_settings.chromadb.keep_context_proportion / 5) * 5);
|
||||||
|
console.debug('CHROMADB: Estimated messages to keep: %o', messagesToKeep);
|
||||||
|
// Messages to query (rounded)
|
||||||
|
const messagesToQuery = contextMessagesRounded - messagesToKeep;
|
||||||
|
console.debug('CHROMADB: Estimated messages to query: %o', messagesToQuery);
|
||||||
|
// Set extension settings
|
||||||
|
extension_settings.chromadb.keep_context = messagesToKeep;
|
||||||
|
extension_settings.chromadb.n_results = messagesToQuery;
|
||||||
|
// Update sliders
|
||||||
|
$('#chromadb_keep_context').val(messagesToKeep);
|
||||||
|
$('#chromadb_n_results').val(messagesToQuery);
|
||||||
|
// Update labels
|
||||||
|
$('#chromadb_keep_context_value').text(extension_settings.chromadb.keep_context);
|
||||||
|
$('#chromadb_n_results_value').text(extension_settings.chromadb.n_results);
|
||||||
|
}
|
||||||
|
|
||||||
|
window.chromadb_interceptGeneration = async (chat, maxContext) => {
|
||||||
|
if (extension_settings.chromadb.auto_adjust) {
|
||||||
|
doAutoAdjust(chat, maxContext);
|
||||||
|
}
|
||||||
|
|
||||||
const currentChatId = getCurrentChatId();
|
const currentChatId = getCurrentChatId();
|
||||||
const selectedStrategy = extension_settings.chromadb.strategy;
|
const selectedStrategy = extension_settings.chromadb.strategy;
|
||||||
if (currentChatId) {
|
if (currentChatId) {
|
||||||
@ -452,6 +506,17 @@ function onFreezeInput() {
|
|||||||
saveSettingsDebounced();
|
saveSettingsDebounced();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function onAutoAdjustInput() {
|
||||||
|
extension_settings.chromadb.auto_adjust = $('#chromadb_auto_adjust').is(':checked');
|
||||||
|
saveSettingsDebounced();
|
||||||
|
}
|
||||||
|
|
||||||
|
function onKeepContextProportionInput() {
|
||||||
|
extension_settings.chromadb.keep_context_proportion = $('#chromadb_keep_context_proportion').val();
|
||||||
|
$('#chromadb_keep_context_proportion_value').text(Math.round(extension_settings.chromadb.keep_context_proportion * 100));
|
||||||
|
saveSettingsDebounced();
|
||||||
|
}
|
||||||
|
|
||||||
jQuery(async () => {
|
jQuery(async () => {
|
||||||
const settingsHtml = `
|
const settingsHtml = `
|
||||||
<div class="chromadb_settings">
|
<div class="chromadb_settings">
|
||||||
@ -472,6 +537,8 @@ jQuery(async () => {
|
|||||||
<input id="chromadb_keep_context" type="range" min="${defaultSettings.keep_context_min}" max="${defaultSettings.keep_context_max}" step="${defaultSettings.keep_context_step}" value="${defaultSettings.keep_context}" />
|
<input id="chromadb_keep_context" type="range" min="${defaultSettings.keep_context_min}" max="${defaultSettings.keep_context_max}" step="${defaultSettings.keep_context_step}" value="${defaultSettings.keep_context}" />
|
||||||
<label for="chromadb_n_results"><small>Maximum number of ChromaDB 'memories' to inject: (<span id="chromadb_n_results_value"></span>) messages</small></label>
|
<label for="chromadb_n_results"><small>Maximum number of ChromaDB 'memories' to inject: (<span id="chromadb_n_results_value"></span>) messages</small></label>
|
||||||
<input id="chromadb_n_results" type="range" min="${defaultSettings.n_results_min}" max="${defaultSettings.n_results_max}" step="${defaultSettings.n_results_step}" value="${defaultSettings.n_results}" />
|
<input id="chromadb_n_results" type="range" min="${defaultSettings.n_results_min}" max="${defaultSettings.n_results_max}" step="${defaultSettings.n_results_step}" value="${defaultSettings.n_results}" />
|
||||||
|
<label for="chromadb_keep_context_proportion"><small>Auto-adjust proportion of messages to keep / inject: (<span id="chromadb_keep_context_proportion_value"></span>% kept)</small></label>
|
||||||
|
<input id="chromadb_keep_context_proportion" type="range" min="${defaultSettings.keep_context_proportion_min}" max="${defaultSettings.keep_context_proportion_max}" step="${defaultSettings.keep_context_proportion_step}" value="${defaultSettings.keep_context_proportion}" />
|
||||||
<label for="chromadb_split_length"><small>Max length for each 'memory' pulled from the current chat history: (<span id="chromadb_split_length_value"></span>) characters</small></label>
|
<label for="chromadb_split_length"><small>Max length for each 'memory' pulled from the current chat history: (<span id="chromadb_split_length_value"></span>) characters</small></label>
|
||||||
<input id="chromadb_split_length" type="range" min="${defaultSettings.split_length_min}" max="${defaultSettings.split_length_max}" step="${defaultSettings.split_length_step}" value="${defaultSettings.split_length}" />
|
<input id="chromadb_split_length" type="range" min="${defaultSettings.split_length_min}" max="${defaultSettings.split_length_max}" step="${defaultSettings.split_length_step}" value="${defaultSettings.split_length}" />
|
||||||
<label for="chromadb_file_split_length"><small>Max length for each 'memory' pulled from imported text files: (<span id="chromadb_file_split_length_value"></span>) characters</small></label>
|
<label for="chromadb_file_split_length"><small>Max length for each 'memory' pulled from imported text files: (<span id="chromadb_file_split_length_value"></span>) characters</small></label>
|
||||||
@ -480,6 +547,10 @@ jQuery(async () => {
|
|||||||
<input type="checkbox" id="chromadb_freeze" />
|
<input type="checkbox" id="chromadb_freeze" />
|
||||||
<span>Freeze ChromaDB state</span>
|
<span>Freeze ChromaDB state</span>
|
||||||
</label>
|
</label>
|
||||||
|
<label class="checkbox_label for="chromadb_auto_adjust" title="Automatically adjusts the number of messages to keep based on the average number of messages in the current chat and the chosen proportion.">
|
||||||
|
<input type="checkbox" id="chromadb_auto_adjust" />
|
||||||
|
<span>Auto-adjust number of messages to keep / query</span>
|
||||||
|
</label>
|
||||||
<div class="flex-container spaceEvenly">
|
<div class="flex-container spaceEvenly">
|
||||||
<div id="chromadb_inject" title="Upload custom textual data to use in the context of the current chat" class="menu_button">
|
<div id="chromadb_inject" title="Upload custom textual data to use in the context of the current chat" class="menu_button">
|
||||||
<i class="fa-solid fa-file-arrow-up"></i>
|
<i class="fa-solid fa-file-arrow-up"></i>
|
||||||
@ -517,6 +588,8 @@ jQuery(async () => {
|
|||||||
$('#chromadb_purge').on('click', onPurgeClick);
|
$('#chromadb_purge').on('click', onPurgeClick);
|
||||||
$('#chromadb_export').on('click', onExportClick);
|
$('#chromadb_export').on('click', onExportClick);
|
||||||
$('#chromadb_freeze').on('input', onFreezeInput);
|
$('#chromadb_freeze').on('input', onFreezeInput);
|
||||||
|
$('#chromadb_auto_adjust').on('input', onAutoAdjustInput);
|
||||||
|
$('#chromadb_keep_context_proportion').on('input', onKeepContextProportionInput);
|
||||||
await loadSettings();
|
await loadSettings();
|
||||||
|
|
||||||
// Not sure if this is needed, but it's here just in case
|
// Not sure if this is needed, but it's here just in case
|
||||||
|
Reference in New Issue
Block a user