mirror of
https://github.com/SillyTavern/SillyTavern.git
synced 2025-06-05 21:59:27 +02:00
Add option to auto-adjust number of chroma messages to keep / query based on context size.
This commit is contained in:
@ -558,7 +558,7 @@ function getCurrentChatId() {
|
||||
}
|
||||
}
|
||||
|
||||
const CHARACTERS_PER_TOKEN_RATIO = 3.35;
|
||||
export const CHARACTERS_PER_TOKEN_RATIO = 3.35;
|
||||
const talkativeness_default = 0.5;
|
||||
|
||||
var is_advanced_char_open = false;
|
||||
@ -1945,8 +1945,11 @@ async function Generate(type, { automatic_trigger, force_name2, resolve, reject,
|
||||
coreChat.pop();
|
||||
}
|
||||
|
||||
// Determine token limit
|
||||
let this_max_context = getMaxContextSize();
|
||||
|
||||
if (extension_settings.chromadb.n_results !== 0) {
|
||||
await runGenerationInterceptors(coreChat);
|
||||
await runGenerationInterceptors(coreChat, this_max_context);
|
||||
console.log(`Core/all messages: ${coreChat.length}/${chat.length}`);
|
||||
}
|
||||
|
||||
@ -1993,9 +1996,6 @@ async function Generate(type, { automatic_trigger, force_name2, resolve, reject,
|
||||
chat2[i] = formatMessageHistoryItem(coreChat[j], isInstruct);
|
||||
}
|
||||
|
||||
// Determine token limit
|
||||
let this_max_context = getMaxContextSize();
|
||||
|
||||
// Adjust token limit for Horde
|
||||
let adjustedParams;
|
||||
if (main_api == 'koboldhorde' && (horde_settings.auto_adjust_context_length || horde_settings.auto_adjust_response_length)) {
|
||||
|
@ -422,12 +422,12 @@ async function loadExtensionSettings(settings) {
|
||||
}
|
||||
}
|
||||
|
||||
async function runGenerationInterceptors(chat) {
|
||||
async function runGenerationInterceptors(chat, contextSize) {
|
||||
for (const manifest of Object.values(manifests)) {
|
||||
const interceptorKey = manifest.generate_interceptor;
|
||||
if (typeof window[interceptorKey] === 'function') {
|
||||
try {
|
||||
await window[interceptorKey](chat);
|
||||
await window[interceptorKey](chat, contextSize);
|
||||
} catch (e) {
|
||||
console.error(`Failed running interceptor for ${manifest.display_name}`, e);
|
||||
}
|
||||
|
@ -1,4 +1,4 @@
|
||||
import { saveSettingsDebounced, getCurrentChatId, system_message_types, eventSource, event_types } from "../../../script.js";
|
||||
import { saveSettingsDebounced, getCurrentChatId, system_message_types, eventSource, event_types, CHARACTERS_PER_TOKEN_RATIO } from "../../../script.js";
|
||||
import { humanizedDateTime } from "../../RossAscends-mods.js";
|
||||
import { getApiUrl, extension_settings, getContext, doExtrasFetch } from "../../extensions.js";
|
||||
import { getFileText, onlyUnique, splitRecursive, IndexedDBStore } from "../../utils.js";
|
||||
@ -29,6 +29,14 @@ const defaultSettings = {
|
||||
file_split_length_min: 512,
|
||||
file_split_length_max: 4096,
|
||||
file_split_length_step: 128,
|
||||
|
||||
keep_context_proportion: 0.5,
|
||||
keep_context_proportion_min: 0.0,
|
||||
keep_context_proportion_max: 1.0,
|
||||
keep_context_proportion_step: 0.05,
|
||||
|
||||
auto_adjust: true,
|
||||
freeze: false,
|
||||
};
|
||||
|
||||
const postHeaders = {
|
||||
@ -92,6 +100,8 @@ async function loadSettings() {
|
||||
$('#chromadb_n_results').val(extension_settings.chromadb.n_results).trigger('input');
|
||||
$('#chromadb_split_length').val(extension_settings.chromadb.split_length).trigger('input');
|
||||
$('#chromadb_file_split_length').val(extension_settings.chromadb.file_split_length).trigger('input');
|
||||
$('#chromadb_keep_context_proportion').val(extension_settings.chromadb.keep_context_proportion).trigger('input');
|
||||
$('#chromadb_auto_adjust').prop('checked', extension_settings.chromadb.auto_adjust);
|
||||
$('#chromadb_freeze').prop('checked', extension_settings.chromadb.freeze);
|
||||
}
|
||||
|
||||
@ -390,7 +400,51 @@ async function onSelectInjectFile(e) {
|
||||
}
|
||||
}
|
||||
|
||||
window.chromadb_interceptGeneration = async (chat) => {
|
||||
/*
|
||||
* Automatically adjusts the extension settings for the optimal number of messages to keep and query based
|
||||
* on the chat history and a specified maximum context length.
|
||||
*/
|
||||
function doAutoAdjust(chat, maxContext) {
|
||||
console.debug('CHROMADB: Auto-adjusting sliders (messages: %o, maxContext: %o)', chat.length, maxContext);
|
||||
// Get mean message length
|
||||
const meanMessageLength = chat.reduce((acc, cur) => acc + cur.mes.length, 0) / chat.length;
|
||||
|
||||
if (Number.isNaN(meanMessageLength)) {
|
||||
console.debug('CHROMADB: Mean message length is NaN, aborting auto-adjust');
|
||||
return;
|
||||
}
|
||||
|
||||
console.debug('CHROMADB: Mean message length (characters): %o', meanMessageLength);
|
||||
// Convert to number of "tokens"
|
||||
const meanMessageLengthTokens = Math.ceil(meanMessageLength / CHARACTERS_PER_TOKEN_RATIO);
|
||||
console.debug('CHROMADB: Mean message length (tokens): %o', meanMessageLengthTokens);
|
||||
// Get number of messages in context
|
||||
const contextMessages = Math.max(1, Math.ceil(maxContext / meanMessageLengthTokens));
|
||||
// Round up to nearest 10
|
||||
const contextMessagesRounded = Math.ceil(contextMessages / 10) * 10;
|
||||
console.debug('CHROMADB: Estimated context messages (rounded): %o', contextMessagesRounded);
|
||||
// Messages to keep (proportional, rounded to nearest 5, minimum 10)
|
||||
const messagesToKeep = Math.max(10, Math.ceil(contextMessagesRounded * extension_settings.chromadb.keep_context_proportion / 5) * 5);
|
||||
console.debug('CHROMADB: Estimated messages to keep: %o', messagesToKeep);
|
||||
// Messages to query (rounded)
|
||||
const messagesToQuery = contextMessagesRounded - messagesToKeep;
|
||||
console.debug('CHROMADB: Estimated messages to query: %o', messagesToQuery);
|
||||
// Set extension settings
|
||||
extension_settings.chromadb.keep_context = messagesToKeep;
|
||||
extension_settings.chromadb.n_results = messagesToQuery;
|
||||
// Update sliders
|
||||
$('#chromadb_keep_context').val(messagesToKeep);
|
||||
$('#chromadb_n_results').val(messagesToQuery);
|
||||
// Update labels
|
||||
$('#chromadb_keep_context_value').text(extension_settings.chromadb.keep_context);
|
||||
$('#chromadb_n_results_value').text(extension_settings.chromadb.n_results);
|
||||
}
|
||||
|
||||
window.chromadb_interceptGeneration = async (chat, maxContext) => {
|
||||
if (extension_settings.chromadb.auto_adjust) {
|
||||
doAutoAdjust(chat, maxContext);
|
||||
}
|
||||
|
||||
const currentChatId = getCurrentChatId();
|
||||
const selectedStrategy = extension_settings.chromadb.strategy;
|
||||
if (currentChatId) {
|
||||
@ -452,6 +506,17 @@ function onFreezeInput() {
|
||||
saveSettingsDebounced();
|
||||
}
|
||||
|
||||
function onAutoAdjustInput() {
|
||||
extension_settings.chromadb.auto_adjust = $('#chromadb_auto_adjust').is(':checked');
|
||||
saveSettingsDebounced();
|
||||
}
|
||||
|
||||
function onKeepContextProportionInput() {
|
||||
extension_settings.chromadb.keep_context_proportion = $('#chromadb_keep_context_proportion').val();
|
||||
$('#chromadb_keep_context_proportion_value').text(Math.round(extension_settings.chromadb.keep_context_proportion * 100));
|
||||
saveSettingsDebounced();
|
||||
}
|
||||
|
||||
jQuery(async () => {
|
||||
const settingsHtml = `
|
||||
<div class="chromadb_settings">
|
||||
@ -472,6 +537,8 @@ jQuery(async () => {
|
||||
<input id="chromadb_keep_context" type="range" min="${defaultSettings.keep_context_min}" max="${defaultSettings.keep_context_max}" step="${defaultSettings.keep_context_step}" value="${defaultSettings.keep_context}" />
|
||||
<label for="chromadb_n_results"><small>Maximum number of ChromaDB 'memories' to inject: (<span id="chromadb_n_results_value"></span>) messages</small></label>
|
||||
<input id="chromadb_n_results" type="range" min="${defaultSettings.n_results_min}" max="${defaultSettings.n_results_max}" step="${defaultSettings.n_results_step}" value="${defaultSettings.n_results}" />
|
||||
<label for="chromadb_keep_context_proportion"><small>Auto-adjust proportion of messages to keep / inject: (<span id="chromadb_keep_context_proportion_value"></span>% kept)</small></label>
|
||||
<input id="chromadb_keep_context_proportion" type="range" min="${defaultSettings.keep_context_proportion_min}" max="${defaultSettings.keep_context_proportion_max}" step="${defaultSettings.keep_context_proportion_step}" value="${defaultSettings.keep_context_proportion}" />
|
||||
<label for="chromadb_split_length"><small>Max length for each 'memory' pulled from the current chat history: (<span id="chromadb_split_length_value"></span>) characters</small></label>
|
||||
<input id="chromadb_split_length" type="range" min="${defaultSettings.split_length_min}" max="${defaultSettings.split_length_max}" step="${defaultSettings.split_length_step}" value="${defaultSettings.split_length}" />
|
||||
<label for="chromadb_file_split_length"><small>Max length for each 'memory' pulled from imported text files: (<span id="chromadb_file_split_length_value"></span>) characters</small></label>
|
||||
@ -480,6 +547,10 @@ jQuery(async () => {
|
||||
<input type="checkbox" id="chromadb_freeze" />
|
||||
<span>Freeze ChromaDB state</span>
|
||||
</label>
|
||||
<label class="checkbox_label for="chromadb_auto_adjust" title="Automatically adjusts the number of messages to keep based on the average number of messages in the current chat and the chosen proportion.">
|
||||
<input type="checkbox" id="chromadb_auto_adjust" />
|
||||
<span>Auto-adjust number of messages to keep / query</span>
|
||||
</label>
|
||||
<div class="flex-container spaceEvenly">
|
||||
<div id="chromadb_inject" title="Upload custom textual data to use in the context of the current chat" class="menu_button">
|
||||
<i class="fa-solid fa-file-arrow-up"></i>
|
||||
@ -517,6 +588,8 @@ jQuery(async () => {
|
||||
$('#chromadb_purge').on('click', onPurgeClick);
|
||||
$('#chromadb_export').on('click', onExportClick);
|
||||
$('#chromadb_freeze').on('input', onFreezeInput);
|
||||
$('#chromadb_auto_adjust').on('input', onAutoAdjustInput);
|
||||
$('#chromadb_keep_context_proportion').on('input', onKeepContextProportionInput);
|
||||
await loadSettings();
|
||||
|
||||
// Not sure if this is needed, but it's here just in case
|
||||
|
Reference in New Issue
Block a user