mirror of
https://github.com/SillyTavern/SillyTavern.git
synced 2025-06-05 21:59:27 +02:00
Add ability to temporarily override response length for /gen and /genraw
This commit is contained in:
203
public/script.js
203
public/script.js
@ -2372,21 +2372,31 @@ function getStoppingStrings(isImpersonate, isContinue) {
|
|||||||
* @param {boolean} skipWIAN whether to skip addition of World Info and Author's Note into the prompt
|
* @param {boolean} skipWIAN whether to skip addition of World Info and Author's Note into the prompt
|
||||||
* @param {string} quietImage Image to use for the quiet prompt
|
* @param {string} quietImage Image to use for the quiet prompt
|
||||||
* @param {string} quietName Name to use for the quiet prompt (defaults to "System:")
|
* @param {string} quietName Name to use for the quiet prompt (defaults to "System:")
|
||||||
|
* @param {number} [responseLength] Maximum response length. If unset, the global default value is used.
|
||||||
* @returns
|
* @returns
|
||||||
*/
|
*/
|
||||||
export async function generateQuietPrompt(quiet_prompt, quietToLoud, skipWIAN, quietImage = null, quietName = null) {
|
export async function generateQuietPrompt(quiet_prompt, quietToLoud, skipWIAN, quietImage = null, quietName = null, responseLength = null) {
|
||||||
console.log('got into genQuietPrompt');
|
console.log('got into genQuietPrompt');
|
||||||
/** @type {GenerateOptions} */
|
const responseLengthCustomized = typeof responseLength === 'number' && responseLength > 0;
|
||||||
const options = {
|
let originalResponseLength = -1;
|
||||||
quiet_prompt,
|
try {
|
||||||
quietToLoud,
|
/** @type {GenerateOptions} */
|
||||||
skipWIAN: skipWIAN,
|
const options = {
|
||||||
force_name2: true,
|
quiet_prompt,
|
||||||
quietImage: quietImage,
|
quietToLoud,
|
||||||
quietName: quietName,
|
skipWIAN: skipWIAN,
|
||||||
};
|
force_name2: true,
|
||||||
const generateFinished = await Generate('quiet', options);
|
quietImage: quietImage,
|
||||||
return generateFinished;
|
quietName: quietName,
|
||||||
|
};
|
||||||
|
originalResponseLength = responseLengthCustomized ? saveResponseLength(main_api, responseLength) : -1;
|
||||||
|
const generateFinished = await Generate('quiet', options);
|
||||||
|
return generateFinished;
|
||||||
|
} finally {
|
||||||
|
if (responseLengthCustomized) {
|
||||||
|
restoreResponseLength(main_api, originalResponseLength);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -2912,14 +2922,17 @@ class StreamingProcessor {
|
|||||||
* @param {boolean} instructOverride true to override instruct mode, false to use the default value
|
* @param {boolean} instructOverride true to override instruct mode, false to use the default value
|
||||||
* @param {boolean} quietToLoud true to generate a message in system mode, false to generate a message in character mode
|
* @param {boolean} quietToLoud true to generate a message in system mode, false to generate a message in character mode
|
||||||
* @param {string} [systemPrompt] System prompt to use. Only Instruct mode or OpenAI.
|
* @param {string} [systemPrompt] System prompt to use. Only Instruct mode or OpenAI.
|
||||||
|
* @param {number} [responseLength] Maximum response length. If unset, the global default value is used.
|
||||||
* @returns {Promise<string>} Generated message
|
* @returns {Promise<string>} Generated message
|
||||||
*/
|
*/
|
||||||
export async function generateRaw(prompt, api, instructOverride, quietToLoud, systemPrompt) {
|
export async function generateRaw(prompt, api, instructOverride, quietToLoud, systemPrompt, responseLength) {
|
||||||
if (!api) {
|
if (!api) {
|
||||||
api = main_api;
|
api = main_api;
|
||||||
}
|
}
|
||||||
|
|
||||||
const abortController = new AbortController();
|
const abortController = new AbortController();
|
||||||
|
const responseLengthCustomized = typeof responseLength === 'number' && responseLength > 0;
|
||||||
|
let originalResponseLength = -1;
|
||||||
const isInstruct = power_user.instruct.enabled && api !== 'openai' && api !== 'novel' && !instructOverride;
|
const isInstruct = power_user.instruct.enabled && api !== 'openai' && api !== 'novel' && !instructOverride;
|
||||||
const isQuiet = true;
|
const isQuiet = true;
|
||||||
|
|
||||||
@ -2934,70 +2947,109 @@ export async function generateRaw(prompt, api, instructOverride, quietToLoud, sy
|
|||||||
prompt = isInstruct ? formatInstructModeChat(name1, prompt, false, true, '', name1, name2, false) : prompt;
|
prompt = isInstruct ? formatInstructModeChat(name1, prompt, false, true, '', name1, name2, false) : prompt;
|
||||||
prompt = isInstruct ? (prompt + formatInstructModePrompt(name2, false, '', name1, name2, isQuiet, quietToLoud)) : (prompt + '\n');
|
prompt = isInstruct ? (prompt + formatInstructModePrompt(name2, false, '', name1, name2, isQuiet, quietToLoud)) : (prompt + '\n');
|
||||||
|
|
||||||
let generateData = {};
|
try {
|
||||||
|
originalResponseLength = responseLengthCustomized ? saveResponseLength(api, responseLength) : -1;
|
||||||
|
let generateData = {};
|
||||||
|
|
||||||
switch (api) {
|
switch (api) {
|
||||||
case 'kobold':
|
case 'kobold':
|
||||||
case 'koboldhorde':
|
case 'koboldhorde':
|
||||||
if (preset_settings === 'gui') {
|
if (preset_settings === 'gui') {
|
||||||
generateData = { prompt: prompt, gui_settings: true, max_length: amount_gen, max_context_length: max_context, api_server };
|
generateData = { prompt: prompt, gui_settings: true, max_length: amount_gen, max_context_length: max_context, api_server };
|
||||||
} else {
|
} else {
|
||||||
const isHorde = api === 'koboldhorde';
|
const isHorde = api === 'koboldhorde';
|
||||||
const koboldSettings = koboldai_settings[koboldai_setting_names[preset_settings]];
|
const koboldSettings = koboldai_settings[koboldai_setting_names[preset_settings]];
|
||||||
generateData = getKoboldGenerationData(prompt, koboldSettings, amount_gen, max_context, isHorde, 'quiet');
|
generateData = getKoboldGenerationData(prompt, koboldSettings, amount_gen, max_context, isHorde, 'quiet');
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case 'novel': {
|
||||||
|
const novelSettings = novelai_settings[novelai_setting_names[nai_settings.preset_settings_novel]];
|
||||||
|
generateData = getNovelGenerationData(prompt, novelSettings, amount_gen, false, false, null, 'quiet');
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
break;
|
case 'textgenerationwebui':
|
||||||
case 'novel': {
|
generateData = getTextGenGenerationData(prompt, amount_gen, false, false, null, 'quiet');
|
||||||
const novelSettings = novelai_settings[novelai_setting_names[nai_settings.preset_settings_novel]];
|
break;
|
||||||
generateData = getNovelGenerationData(prompt, novelSettings, amount_gen, false, false, null, 'quiet');
|
case 'openai': {
|
||||||
break;
|
generateData = [{ role: 'user', content: prompt.trim() }];
|
||||||
|
if (systemPrompt) {
|
||||||
|
generateData.unshift({ role: 'system', content: systemPrompt.trim() });
|
||||||
|
}
|
||||||
|
} break;
|
||||||
}
|
}
|
||||||
case 'textgenerationwebui':
|
|
||||||
generateData = getTextGenGenerationData(prompt, amount_gen, false, false, null, 'quiet');
|
let data = {};
|
||||||
break;
|
|
||||||
case 'openai': {
|
if (api == 'koboldhorde') {
|
||||||
generateData = [{ role: 'user', content: prompt.trim() }];
|
data = await generateHorde(prompt, generateData, abortController.signal, false);
|
||||||
if (systemPrompt) {
|
} else if (api == 'openai') {
|
||||||
generateData.unshift({ role: 'system', content: systemPrompt.trim() });
|
data = await sendOpenAIRequest('quiet', generateData, abortController.signal);
|
||||||
|
} else {
|
||||||
|
const generateUrl = getGenerateUrl(api);
|
||||||
|
const response = await fetch(generateUrl, {
|
||||||
|
method: 'POST',
|
||||||
|
headers: getRequestHeaders(),
|
||||||
|
cache: 'no-cache',
|
||||||
|
body: JSON.stringify(generateData),
|
||||||
|
signal: abortController.signal,
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!response.ok) {
|
||||||
|
const error = await response.json();
|
||||||
|
throw error;
|
||||||
}
|
}
|
||||||
} break;
|
|
||||||
|
data = await response.json();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (data.error) {
|
||||||
|
throw new Error(data.error);
|
||||||
|
}
|
||||||
|
|
||||||
|
const message = cleanUpMessage(extractMessageFromData(data), false, false, true);
|
||||||
|
|
||||||
|
if (!message) {
|
||||||
|
throw new Error('No message generated');
|
||||||
|
}
|
||||||
|
|
||||||
|
return message;
|
||||||
|
} finally {
|
||||||
|
if (responseLengthCustomized) {
|
||||||
|
restoreResponseLength(api, originalResponseLength);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
let data = {};
|
/**
|
||||||
|
* Temporarily change the response length for the specified API.
|
||||||
if (api == 'koboldhorde') {
|
* @param {string} api API to use.
|
||||||
data = await generateHorde(prompt, generateData, abortController.signal, false);
|
* @param {number} responseLength Target response length.
|
||||||
} else if (api == 'openai') {
|
* @returns {number} The original response length.
|
||||||
data = await sendOpenAIRequest('quiet', generateData, abortController.signal);
|
*/
|
||||||
|
function saveResponseLength(api, responseLength) {
|
||||||
|
let oldValue = -1;
|
||||||
|
if (api === 'openai') {
|
||||||
|
oldValue = oai_settings.openai_max_tokens;
|
||||||
|
oai_settings.openai_max_tokens = responseLength;
|
||||||
} else {
|
} else {
|
||||||
const generateUrl = getGenerateUrl(api);
|
oldValue = max_context;
|
||||||
const response = await fetch(generateUrl, {
|
max_context = responseLength;
|
||||||
method: 'POST',
|
|
||||||
headers: getRequestHeaders(),
|
|
||||||
cache: 'no-cache',
|
|
||||||
body: JSON.stringify(generateData),
|
|
||||||
signal: abortController.signal,
|
|
||||||
});
|
|
||||||
|
|
||||||
if (!response.ok) {
|
|
||||||
const error = await response.json();
|
|
||||||
throw error;
|
|
||||||
}
|
|
||||||
|
|
||||||
data = await response.json();
|
|
||||||
}
|
}
|
||||||
|
return oldValue;
|
||||||
|
}
|
||||||
|
|
||||||
if (data.error) {
|
/**
|
||||||
throw new Error(data.error);
|
* Restore the original response length for the specified API.
|
||||||
|
* @param {string} api API to use.
|
||||||
|
* @param {number} responseLength Target response length.
|
||||||
|
* @returns {void}
|
||||||
|
*/
|
||||||
|
function restoreResponseLength(api, responseLength) {
|
||||||
|
if (api === 'openai') {
|
||||||
|
oai_settings.openai_max_tokens = responseLength;
|
||||||
|
} else {
|
||||||
|
max_context = responseLength;
|
||||||
}
|
}
|
||||||
|
|
||||||
const message = cleanUpMessage(extractMessageFromData(data), false, false, true);
|
|
||||||
|
|
||||||
if (!message) {
|
|
||||||
throw new Error('No message generated');
|
|
||||||
}
|
|
||||||
|
|
||||||
return message;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -4390,10 +4442,19 @@ export async function sendMessageAsUser(messageText, messageBias, insertAt = nul
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
export function getMaxContextSize() {
|
/**
|
||||||
|
* Gets the maximum usable context size for the current API.
|
||||||
|
* @param {number|null} overrideResponseLength Optional override for the response length.
|
||||||
|
* @returns {number} Maximum usable context size.
|
||||||
|
*/
|
||||||
|
export function getMaxContextSize(overrideResponseLength = null) {
|
||||||
|
if (typeof overrideResponseLength !== 'number' || overrideResponseLength <= 0 || isNaN(overrideResponseLength)) {
|
||||||
|
overrideResponseLength = null;
|
||||||
|
}
|
||||||
|
|
||||||
let this_max_context = 1487;
|
let this_max_context = 1487;
|
||||||
if (main_api == 'kobold' || main_api == 'koboldhorde' || main_api == 'textgenerationwebui') {
|
if (main_api == 'kobold' || main_api == 'koboldhorde' || main_api == 'textgenerationwebui') {
|
||||||
this_max_context = (max_context - amount_gen);
|
this_max_context = (max_context - (overrideResponseLength || amount_gen));
|
||||||
}
|
}
|
||||||
if (main_api == 'novel') {
|
if (main_api == 'novel') {
|
||||||
this_max_context = Number(max_context);
|
this_max_context = Number(max_context);
|
||||||
@ -4410,10 +4471,10 @@ export function getMaxContextSize() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
this_max_context = this_max_context - amount_gen;
|
this_max_context = this_max_context - (overrideResponseLength || amount_gen);
|
||||||
}
|
}
|
||||||
if (main_api == 'openai') {
|
if (main_api == 'openai') {
|
||||||
this_max_context = oai_settings.openai_max_context - oai_settings.openai_max_tokens;
|
this_max_context = oai_settings.openai_max_context - (overrideResponseLength || oai_settings.openai_max_tokens);
|
||||||
}
|
}
|
||||||
return this_max_context;
|
return this_max_context;
|
||||||
}
|
}
|
||||||
|
@ -230,8 +230,8 @@ parser.addCommand('peek', peekCallback, [], '<span class="monospace">(message in
|
|||||||
parser.addCommand('delswipe', deleteSwipeCallback, ['swipedel'], '<span class="monospace">(optional 1-based id)</span> – deletes a swipe from the last chat message. If swipe id not provided - deletes the current swipe.', true, true);
|
parser.addCommand('delswipe', deleteSwipeCallback, ['swipedel'], '<span class="monospace">(optional 1-based id)</span> – deletes a swipe from the last chat message. If swipe id not provided - deletes the current swipe.', true, true);
|
||||||
parser.addCommand('echo', echoCallback, [], '<span class="monospace">(title=string severity=info/warning/error/success [text])</span> – echoes the text to toast message. Useful for pipes debugging.', true, true);
|
parser.addCommand('echo', echoCallback, [], '<span class="monospace">(title=string severity=info/warning/error/success [text])</span> – echoes the text to toast message. Useful for pipes debugging.', true, true);
|
||||||
//parser.addCommand('#', (_, value) => '', [], ' – a comment, does nothing, e.g. <tt>/# the next three commands switch variables a and b</tt>', true, true);
|
//parser.addCommand('#', (_, value) => '', [], ' – a comment, does nothing, e.g. <tt>/# the next three commands switch variables a and b</tt>', true, true);
|
||||||
parser.addCommand('gen', generateCallback, [], '<span class="monospace">(lock=on/off name="System" [prompt])</span> – generates text using the provided prompt and passes it to the next command through the pipe, optionally locking user input while generating and allowing to configure the in-prompt name for instruct mode (default = "System"). "as" argument controls the role of the output prompt: system (default) or char.', true, true);
|
parser.addCommand('gen', generateCallback, [], '<span class="monospace">(lock=on/off name="System" length=123 [prompt])</span> – generates text using the provided prompt and passes it to the next command through the pipe, optionally locking user input while generating and allowing to configure the in-prompt name for instruct mode (default = "System"). "as" argument controls the role of the output prompt: system (default) or char. If "length" argument is provided as a number in tokens, allows to temporarily override an API response length.', true, true);
|
||||||
parser.addCommand('genraw', generateRawCallback, [], '<span class="monospace">(lock=on/off instruct=on/off stop=[] as=system/char system="system prompt" [prompt])</span> – generates text using the provided prompt and passes it to the next command through the pipe, optionally locking user input while generating. Does not include chat history or character card. Use instruct=off to skip instruct formatting, e.g. <tt>/genraw instruct=off Why is the sky blue?</tt>. Use stop=... with a JSON-serialized array to add one-time custom stop strings, e.g. <tt>/genraw stop=["\\n"] Say hi</tt>. "as" argument controls the role of the output prompt: system (default) or char. "system" argument adds an (optional) system prompt at the start.', true, true);
|
parser.addCommand('genraw', generateRawCallback, [], '<span class="monospace">(lock=on/off instruct=on/off stop=[] as=system/char system="system prompt" length=123 [prompt])</span> – generates text using the provided prompt and passes it to the next command through the pipe, optionally locking user input while generating. Does not include chat history or character card. Use instruct=off to skip instruct formatting, e.g. <tt>/genraw instruct=off Why is the sky blue?</tt>. Use stop=... with a JSON-serialized array to add one-time custom stop strings, e.g. <tt>/genraw stop=["\\n"] Say hi</tt>. "as" argument controls the role of the output prompt: system (default) or char. "system" argument adds an (optional) system prompt at the start. If "length" argument is provided as a number in tokens, allows to temporarily override an API response length.', true, true);
|
||||||
parser.addCommand('addswipe', addSwipeCallback, ['swipeadd'], '<span class="monospace">(text)</span> – adds a swipe to the last chat message.', true, true);
|
parser.addCommand('addswipe', addSwipeCallback, ['swipeadd'], '<span class="monospace">(text)</span> – adds a swipe to the last chat message.', true, true);
|
||||||
parser.addCommand('abort', abortCallback, [], ' – aborts the slash command batch execution', true, true);
|
parser.addCommand('abort', abortCallback, [], ' – aborts the slash command batch execution', true, true);
|
||||||
parser.addCommand('fuzzy', fuzzyCallback, [], 'list=["a","b","c"] threshold=0.4 (text to search) – performs a fuzzy match of each items of list within the text to search. If any item matches then its name is returned. If no item list matches the text to search then no value is returned. The optional threshold (default is 0.4) allows some control over the matching. A low value (min 0.0) means the match is very strict. At 1.0 (max) the match is very loose and probably matches anything. The returned value passes to the next command through the pipe.', true, true); parser.addCommand('pass', (_, arg) => arg, ['return'], '<span class="monospace">(text)</span> – passes the text to the next command through the pipe.', true, true);
|
parser.addCommand('fuzzy', fuzzyCallback, [], 'list=["a","b","c"] threshold=0.4 (text to search) – performs a fuzzy match of each items of list within the text to search. If any item matches then its name is returned. If no item list matches the text to search then no value is returned. The optional threshold (default is 0.4) allows some control over the matching. A low value (min 0.0) means the match is very strict. At 1.0 (max) the match is very loose and probably matches anything. The returned value passes to the next command through the pipe.', true, true); parser.addCommand('pass', (_, arg) => arg, ['return'], '<span class="monospace">(text)</span> – passes the text to the next command through the pipe.', true, true);
|
||||||
@ -662,6 +662,7 @@ async function generateRawCallback(args, value) {
|
|||||||
const as = args?.as || 'system';
|
const as = args?.as || 'system';
|
||||||
const quietToLoud = as === 'char';
|
const quietToLoud = as === 'char';
|
||||||
const systemPrompt = resolveVariable(args?.system) || '';
|
const systemPrompt = resolveVariable(args?.system) || '';
|
||||||
|
const length = Number(resolveVariable(args?.length) ?? 0) || 0;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
if (lock) {
|
if (lock) {
|
||||||
@ -669,7 +670,7 @@ async function generateRawCallback(args, value) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
setEphemeralStopStrings(resolveVariable(args?.stop));
|
setEphemeralStopStrings(resolveVariable(args?.stop));
|
||||||
const result = await generateRaw(value, '', isFalseBoolean(args?.instruct), quietToLoud, systemPrompt);
|
const result = await generateRaw(value, '', isFalseBoolean(args?.instruct), quietToLoud, systemPrompt, length);
|
||||||
return result;
|
return result;
|
||||||
} finally {
|
} finally {
|
||||||
if (lock) {
|
if (lock) {
|
||||||
@ -690,6 +691,7 @@ async function generateCallback(args, value) {
|
|||||||
const lock = isTrueBoolean(args?.lock);
|
const lock = isTrueBoolean(args?.lock);
|
||||||
const as = args?.as || 'system';
|
const as = args?.as || 'system';
|
||||||
const quietToLoud = as === 'char';
|
const quietToLoud = as === 'char';
|
||||||
|
const length = Number(resolveVariable(args?.length) ?? 0) || 0;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
if (lock) {
|
if (lock) {
|
||||||
@ -698,7 +700,7 @@ async function generateCallback(args, value) {
|
|||||||
|
|
||||||
setEphemeralStopStrings(resolveVariable(args?.stop));
|
setEphemeralStopStrings(resolveVariable(args?.stop));
|
||||||
const name = args?.name;
|
const name = args?.name;
|
||||||
const result = await generateQuietPrompt(value, quietToLoud, false, '', name);
|
const result = await generateQuietPrompt(value, quietToLoud, false, '', name, length);
|
||||||
return result;
|
return result;
|
||||||
} finally {
|
} finally {
|
||||||
if (lock) {
|
if (lock) {
|
||||||
|
Reference in New Issue
Block a user