Merge branch 'staging' into qr-rewrite

This commit is contained in:
LenAnderson
2023-12-22 13:56:04 +00:00
42 changed files with 3098 additions and 1309 deletions

View File

@@ -4,6 +4,7 @@ import { callPopup, getRequestHeaders, saveSettingsDebounced, substituteParams }
import { getMessageTimeStamp } from '../../RossAscends-mods.js';
import { SECRET_KEYS, secret_state } from '../../secrets.js';
import { getMultimodalCaption } from '../shared.js';
import { textgen_types, textgenerationwebui_settings } from '../../textgen-settings.js';
export { MODULE_NAME };
const MODULE_NAME = 'caption';
@@ -134,7 +135,7 @@ async function doCaptionRequest(base64Img, fileData) {
case 'horde':
return await captionHorde(base64Img);
case 'multimodal':
return await captionMultimodal(extension_settings.caption.multimodal_api === 'google' ? base64Img : fileData);
return await captionMultimodal(fileData);
default:
throw new Error('Unknown caption source.');
}
@@ -216,7 +217,16 @@ async function captionHorde(base64Img) {
* @returns {Promise<{caption: string}>} Generated caption
*/
async function captionMultimodal(base64Img) {
const prompt = extension_settings.caption.prompt || PROMPT_DEFAULT;
let prompt = extension_settings.caption.prompt || PROMPT_DEFAULT;
if (extension_settings.caption.prompt_ask) {
const customPrompt = await callPopup('<h3>Enter a comment or question:</h3>', 'input', prompt, { rows: 2 });
if (!customPrompt) {
throw new Error('User aborted the caption sending.');
}
prompt = String(customPrompt).trim();
}
const caption = await getMultimodalCaption(base64Img, prompt);
return { caption };
}
@@ -271,9 +281,12 @@ jQuery(function () {
$(sendButton).on('click', () => {
const hasCaptionModule =
(modules.includes('caption') && extension_settings.caption.source === 'extras') ||
(extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'openai' && secret_state[SECRET_KEYS.OPENAI]) ||
(extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'openai' && (secret_state[SECRET_KEYS.OPENAI] || extension_settings.caption.allow_reverse_proxy)) ||
(extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'openrouter' && secret_state[SECRET_KEYS.OPENROUTER]) ||
(extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'google' && secret_state[SECRET_KEYS.MAKERSUITE]) ||
(extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'ollama' && textgenerationwebui_settings.server_urls[textgen_types.OLLAMA]) ||
(extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'llamacpp' && textgenerationwebui_settings.server_urls[textgen_types.LLAMACPP]) ||
(extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'custom') ||
extension_settings.caption.source === 'local' ||
extension_settings.caption.source === 'horde';
@@ -329,7 +342,7 @@ jQuery(function () {
<label for="caption_source">Source</label>
<select id="caption_source" class="text_pole">
<option value="local">Local</option>
<option value="multimodal">Multimodal (OpenAI / OpenRouter / Google)</option>
<option value="multimodal">Multimodal (OpenAI / llama / Google)</option>
<option value="extras">Extras</option>
<option value="horde">Horde</option>
</select>
@@ -337,9 +350,12 @@ jQuery(function () {
<div class="flex1 flex-container flexFlowColumn flexNoGap">
<label for="caption_multimodal_api">API</label>
<select id="caption_multimodal_api" class="flex1 text_pole">
<option value="llamacpp">llama.cpp</option>
<option value="ollama">Ollama</option>
<option value="openai">OpenAI</option>
<option value="openrouter">OpenRouter</option>
<option value="google">Google</option>
<option value="google">Google MakerSuite</option>
<option value="custom">Custom (OpenAI-compatible)</option>
</select>
</div>
<div class="flex1 flex-container flexFlowColumn flexNoGap">
@@ -349,16 +365,28 @@ jQuery(function () {
<option data-type="google" value="gemini-pro-vision">gemini-pro-vision</option>
<option data-type="openrouter" value="openai/gpt-4-vision-preview">openai/gpt-4-vision-preview</option>
<option data-type="openrouter" value="haotian-liu/llava-13b">haotian-liu/llava-13b</option>
<option data-type="ollama" value="ollama_current">[Currently selected]</option>
<option data-type="ollama" value="bakllava:latest">bakllava:latest</option>
<option data-type="ollama" value="llava:latest">llava:latest</option>
<option data-type="llamacpp" value="llamacpp_current">[Currently loaded]</option>
<option data-type="custom" value="custom_current">[Currently selected]</option>
</select>
</div>
<label data-type="openai" class="checkbox_label flexBasis100p" for="caption_allow_reverse_proxy" title="Allow using reverse proxy if defined and valid.">
<input id="caption_allow_reverse_proxy" type="checkbox" class="checkbox">
Allow reverse proxy
</label>
<div class="flexBasis100p m-b-1">
<small><b>Hint:</b> Set your API keys and endpoints in the 'API Connections' tab first.</small>
</div>
</div>
<div id="caption_prompt_block">
<label for="caption_prompt">Caption Prompt</label>
<textarea id="caption_prompt" class="text_pole" rows="1" placeholder="&lt; Use default &gt;">${PROMPT_DEFAULT}</textarea>
<label class="checkbox_label margin-bot-10px" for="caption_prompt_ask" title="Ask for a custom prompt every time an image is captioned.">
<input id="caption_prompt_ask" type="checkbox" class="checkbox">
Ask every time
</label>
</div>
<label for="caption_template">Message Template <small>(use <code>{{caption}}</code> macro)</small></label>
<textarea id="caption_template" class="text_pole" rows="2" placeholder="&lt; Use default &gt;">${TEMPLATE_DEFAULT}</textarea>
@@ -382,6 +410,7 @@ jQuery(function () {
$('#caption_refine_mode').prop('checked', !!(extension_settings.caption.refine_mode));
$('#caption_allow_reverse_proxy').prop('checked', !!(extension_settings.caption.allow_reverse_proxy));
$('#caption_prompt_ask').prop('checked', !!(extension_settings.caption.prompt_ask));
$('#caption_source').val(extension_settings.caption.source);
$('#caption_prompt').val(extension_settings.caption.prompt);
$('#caption_template').val(extension_settings.caption.template);
@@ -403,4 +432,8 @@ jQuery(function () {
extension_settings.caption.allow_reverse_proxy = $('#caption_allow_reverse_proxy').prop('checked');
saveSettingsDebounced();
});
$('#caption_prompt_ask').on('input', () => {
extension_settings.caption.prompt_ask = $('#caption_prompt_ask').prop('checked');
saveSettingsDebounced();
});
});

View File

@@ -992,8 +992,7 @@ async function getExpressionsList() {
}
const result = await resolveExpressionsList();
result.push(...extension_settings.expressions.custom);
return result;
return [...result, ...extension_settings.expressions.custom];
}
async function setExpression(character, expression, force) {

View File

@@ -5,6 +5,7 @@ import { is_group_generating, selected_group } from '../../group-chats.js';
import { registerSlashCommand } from '../../slash-commands.js';
import { loadMovingUIState } from '../../power-user.js';
import { dragElement } from '../../RossAscends-mods.js';
import { getTextTokens, tokenizers } from '../../tokenizers.js';
export { MODULE_NAME };
const MODULE_NAME = '1_memory';
@@ -42,26 +43,6 @@ const defaultPrompt = '[Pause your roleplay. Summarize the most important facts
const defaultTemplate = '[Summary: {{summary}}]';
const defaultSettings = {
minLongMemory: 16,
maxLongMemory: 1024,
longMemoryLength: 128,
shortMemoryLength: 512,
minShortMemory: 128,
maxShortMemory: 1024,
shortMemoryStep: 16,
longMemoryStep: 8,
repetitionPenaltyStep: 0.05,
repetitionPenalty: 1.2,
maxRepetitionPenalty: 2.0,
minRepetitionPenalty: 1.0,
temperature: 1.0,
minTemperature: 0.1,
maxTemperature: 2.0,
temperatureStep: 0.05,
lengthPenalty: 1,
minLengthPenalty: -4,
maxLengthPenalty: 4,
lengthPenaltyStep: 0.1,
memoryFrozen: false,
SkipWIAN: false,
source: summary_sources.extras,
@@ -95,11 +76,6 @@ function loadSettings() {
}
$('#summary_source').val(extension_settings.memory.source).trigger('change');
$('#memory_long_length').val(extension_settings.memory.longMemoryLength).trigger('input');
$('#memory_short_length').val(extension_settings.memory.shortMemoryLength).trigger('input');
$('#memory_repetition_penalty').val(extension_settings.memory.repetitionPenalty).trigger('input');
$('#memory_temperature').val(extension_settings.memory.temperature).trigger('input');
$('#memory_length_penalty').val(extension_settings.memory.lengthPenalty).trigger('input');
$('#memory_frozen').prop('checked', extension_settings.memory.memoryFrozen).trigger('input');
$('#memory_skipWIAN').prop('checked', extension_settings.memory.SkipWIAN).trigger('input');
$('#memory_prompt').val(extension_settings.memory.prompt).trigger('input');
@@ -126,51 +102,6 @@ function switchSourceControls(value) {
});
}
function onMemoryShortInput() {
const value = $(this).val();
extension_settings.memory.shortMemoryLength = Number(value);
$('#memory_short_length_tokens').text(value);
saveSettingsDebounced();
// Don't let long buffer be bigger than short
if (extension_settings.memory.longMemoryLength > extension_settings.memory.shortMemoryLength) {
$('#memory_long_length').val(extension_settings.memory.shortMemoryLength).trigger('input');
}
}
function onMemoryLongInput() {
const value = $(this).val();
extension_settings.memory.longMemoryLength = Number(value);
$('#memory_long_length_tokens').text(value);
saveSettingsDebounced();
// Don't let long buffer be bigger than short
if (extension_settings.memory.longMemoryLength > extension_settings.memory.shortMemoryLength) {
$('#memory_short_length').val(extension_settings.memory.longMemoryLength).trigger('input');
}
}
function onMemoryRepetitionPenaltyInput() {
const value = $(this).val();
extension_settings.memory.repetitionPenalty = Number(value);
$('#memory_repetition_penalty_value').text(extension_settings.memory.repetitionPenalty.toFixed(2));
saveSettingsDebounced();
}
function onMemoryTemperatureInput() {
const value = $(this).val();
extension_settings.memory.temperature = Number(value);
$('#memory_temperature_value').text(extension_settings.memory.temperature.toFixed(2));
saveSettingsDebounced();
}
function onMemoryLengthPenaltyInput() {
const value = $(this).val();
extension_settings.memory.lengthPenalty = Number(value);
$('#memory_length_penalty_value').text(extension_settings.memory.lengthPenalty.toFixed(2));
saveSettingsDebounced();
}
function onMemoryFrozenInput() {
const value = Boolean($(this).prop('checked'));
extension_settings.memory.memoryFrozen = value;
@@ -444,33 +375,36 @@ async function summarizeChatExtras(context) {
const longMemory = getLatestMemoryFromChat(chat);
const reversedChat = chat.slice().reverse();
reversedChat.shift();
let memoryBuffer = [];
const memoryBuffer = [];
const CONTEXT_SIZE = 1024 - 64;
for (let mes of reversedChat) {
for (const message of reversedChat) {
// we reached the point of latest memory
if (longMemory && mes.extra && mes.extra.memory == longMemory) {
if (longMemory && message.extra && message.extra.memory == longMemory) {
break;
}
// don't care about system
if (mes.is_system) {
if (message.is_system) {
continue;
}
// determine the sender's name
const name = mes.is_user ? (context.name1 ?? 'You') : (mes.force_avatar ? mes.name : context.name2);
const entry = `${name}:\n${mes['mes']}`;
const entry = `${message.name}:\n${message.mes}`;
memoryBuffer.push(entry);
// check if token limit was reached
if (context.getTokenCount(getMemoryString()) >= extension_settings.memory.shortMemoryLength) {
const tokens = getTextTokens(tokenizers.GPT2, getMemoryString()).length;
if (tokens >= CONTEXT_SIZE) {
break;
}
}
const resultingString = getMemoryString();
const resultingTokens = getTextTokens(tokenizers.GPT2, resultingString).length;
if (context.getTokenCount(resultingString) < extension_settings.memory.shortMemoryLength) {
if (!resultingString || resultingTokens < CONTEXT_SIZE) {
console.debug('Not enough context to summarize');
return;
}
@@ -488,13 +422,7 @@ async function summarizeChatExtras(context) {
},
body: JSON.stringify({
text: resultingString,
params: {
min_length: extension_settings.memory.longMemoryLength * 0, // testing how it behaves 0 min length
max_length: extension_settings.memory.longMemoryLength,
repetition_penalty: extension_settings.memory.repetitionPenalty,
temperature: extension_settings.memory.temperature,
length_penalty: extension_settings.memory.lengthPenalty,
},
params: {},
}),
});
@@ -623,11 +551,6 @@ function setupListeners() {
//setup shared listeners for popout and regular ext menu
$('#memory_restore').off('click').on('click', onMemoryRestoreClick);
$('#memory_contents').off('click').on('input', onMemoryContentInput);
$('#memory_long_length').off('click').on('input', onMemoryLongInput);
$('#memory_short_length').off('click').on('input', onMemoryShortInput);
$('#memory_repetition_penalty').off('click').on('input', onMemoryRepetitionPenaltyInput);
$('#memory_temperature').off('click').on('input', onMemoryTemperatureInput);
$('#memory_length_penalty').off('click').on('input', onMemoryLengthPenaltyInput);
$('#memory_frozen').off('click').on('input', onMemoryFrozenInput);
$('#memory_skipWIAN').off('click').on('input', onMemorySkipWIANInput);
$('#summary_source').off('click').on('change', onSummarySourceChange);
@@ -720,18 +643,6 @@ jQuery(function () {
<input id="memory_prompt_words_force" type="range" value="${defaultSettings.promptForceWords}" min="${defaultSettings.promptMinForceWords}" max="${defaultSettings.promptMaxForceWords}" step="${defaultSettings.promptForceWordsStep}" />
<small>If both sliders are non-zero, then both will trigger summary updates a their respective intervals.</small>
</div>
<div data-source="extras">
<label for="memory_short_length">Chat to Summarize buffer length (<span id="memory_short_length_tokens"></span> tokens)</label>
<input id="memory_short_length" type="range" value="${defaultSettings.shortMemoryLength}" min="${defaultSettings.minShortMemory}" max="${defaultSettings.maxShortMemory}" step="${defaultSettings.shortMemoryStep}" />
<label for="memory_long_length">Summary output length (<span id="memory_long_length_tokens"></span> tokens)</label>
<input id="memory_long_length" type="range" value="${defaultSettings.longMemoryLength}" min="${defaultSettings.minLongMemory}" max="${defaultSettings.maxLongMemory}" step="${defaultSettings.longMemoryStep}" />
<label for="memory_temperature">Temperature (<span id="memory_temperature_value"></span>)</label>
<input id="memory_temperature" type="range" value="${defaultSettings.temperature}" min="${defaultSettings.minTemperature}" max="${defaultSettings.maxTemperature}" step="${defaultSettings.temperatureStep}" />
<label for="memory_repetition_penalty">Repetition penalty (<span id="memory_repetition_penalty_value"></span>)</label>
<input id="memory_repetition_penalty" type="range" value="${defaultSettings.repetitionPenalty}" min="${defaultSettings.minRepetitionPenalty}" max="${defaultSettings.maxRepetitionPenalty}" step="${defaultSettings.repetitionPenaltyStep}" />
<label for="memory_length_penalty">Length preference <small>[higher = longer summaries]</small> (<span id="memory_length_penalty_value"></span>)</label>
<input id="memory_length_penalty" type="range" value="${defaultSettings.lengthPenalty}" min="${defaultSettings.minLengthPenalty}" max="${defaultSettings.maxLengthPenalty}" step="${defaultSettings.lengthPenaltyStep}" />
</div>
</div>
</div>
</div>

View File

@@ -2,6 +2,7 @@ import { getRequestHeaders } from '../../script.js';
import { extension_settings } from '../extensions.js';
import { oai_settings } from '../openai.js';
import { SECRET_KEYS, secret_state } from '../secrets.js';
import { textgen_types, textgenerationwebui_settings } from '../textgen-settings.js';
import { createThumbnail, isValidUrl } from '../utils.js';
/**
@@ -11,20 +12,19 @@ import { createThumbnail, isValidUrl } from '../utils.js';
* @returns {Promise<string>} Generated caption
*/
export async function getMultimodalCaption(base64Img, prompt) {
if (extension_settings.caption.multimodal_api === 'openai' && !secret_state[SECRET_KEYS.OPENAI]) {
throw new Error('OpenAI API key is not set.');
}
throwIfInvalidModel();
if (extension_settings.caption.multimodal_api === 'openrouter' && !secret_state[SECRET_KEYS.OPENROUTER]) {
throw new Error('OpenRouter API key is not set.');
}
const noPrefix = ['google', 'ollama', 'llamacpp'].includes(extension_settings.caption.multimodal_api);
if (extension_settings.caption.multimodal_api === 'google' && !secret_state[SECRET_KEYS.MAKERSUITE]) {
throw new Error('MakerSuite API key is not set.');
if (noPrefix && base64Img.startsWith('data:image/')) {
base64Img = base64Img.split(',')[1];
}
// OpenRouter has a payload limit of ~2MB. Google is 4MB, but we love democracy.
const isGoogle = extension_settings.caption.multimodal_api === 'google';
const isOllama = extension_settings.caption.multimodal_api === 'ollama';
const isLlamaCpp = extension_settings.caption.multimodal_api === 'llamacpp';
const isCustom = extension_settings.caption.multimodal_api === 'custom';
const base64Bytes = base64Img.length * 0.75;
const compressionLimit = 2 * 1024 * 1024;
if (['google', 'openrouter'].includes(extension_settings.caption.multimodal_api) && base64Bytes > compressionLimit) {
@@ -45,27 +45,91 @@ export async function getMultimodalCaption(base64Img, prompt) {
const proxyUrl = useReverseProxy ? oai_settings.reverse_proxy : '';
const proxyPassword = useReverseProxy ? oai_settings.proxy_password : '';
const apiResult = await fetch(`/api/${isGoogle ? 'google' : 'openai'}/caption-image`, {
const requestBody = {
image: base64Img,
prompt: prompt,
};
if (!isGoogle) {
requestBody.api = extension_settings.caption.multimodal_api || 'openai';
requestBody.model = extension_settings.caption.multimodal_model || 'gpt-4-vision-preview';
requestBody.reverse_proxy = proxyUrl;
requestBody.proxy_password = proxyPassword;
}
if (isOllama) {
if (extension_settings.caption.multimodal_model === 'ollama_current') {
requestBody.model = textgenerationwebui_settings.ollama_model;
}
requestBody.server_url = textgenerationwebui_settings.server_urls[textgen_types.OLLAMA];
}
if (isLlamaCpp) {
requestBody.server_url = textgenerationwebui_settings.server_urls[textgen_types.LLAMACPP];
}
if (isCustom) {
requestBody.server_url = oai_settings.custom_url;
requestBody.model = oai_settings.custom_model || 'gpt-4-vision-preview';
requestBody.custom_include_headers = oai_settings.custom_include_headers;
requestBody.custom_include_body = oai_settings.custom_include_body;
requestBody.custom_exclude_body = oai_settings.custom_exclude_body;
}
function getEndpointUrl() {
switch (extension_settings.caption.multimodal_api) {
case 'google':
return '/api/google/caption-image';
case 'llamacpp':
return '/api/backends/text-completions/llamacpp/caption-image';
case 'ollama':
return '/api/backends/text-completions/ollama/caption-image';
default:
return '/api/openai/caption-image';
}
}
const apiResult = await fetch(getEndpointUrl(), {
method: 'POST',
headers: getRequestHeaders(),
body: JSON.stringify({
image: base64Img,
prompt: prompt,
...(isGoogle
? {}
: {
api: extension_settings.caption.multimodal_api || 'openai',
model: extension_settings.caption.multimodal_model || 'gpt-4-vision-preview',
reverse_proxy: proxyUrl,
proxy_password: proxyPassword,
}),
}),
body: JSON.stringify(requestBody),
});
if (!apiResult.ok) {
throw new Error('Failed to caption image via OpenAI.');
throw new Error('Failed to caption image via Multimodal API.');
}
const { caption } = await apiResult.json();
return caption;
return String(caption).trim();
}
function throwIfInvalidModel() {
if (extension_settings.caption.multimodal_api === 'openai' && !secret_state[SECRET_KEYS.OPENAI]) {
throw new Error('OpenAI API key is not set.');
}
if (extension_settings.caption.multimodal_api === 'openrouter' && !secret_state[SECRET_KEYS.OPENROUTER]) {
throw new Error('OpenRouter API key is not set.');
}
if (extension_settings.caption.multimodal_api === 'google' && !secret_state[SECRET_KEYS.MAKERSUITE]) {
throw new Error('MakerSuite API key is not set.');
}
if (extension_settings.caption.multimodal_api === 'ollama' && !textgenerationwebui_settings.server_urls[textgen_types.OLLAMA]) {
throw new Error('Ollama server URL is not set.');
}
if (extension_settings.caption.multimodal_api === 'ollama' && extension_settings.caption.multimodal_model === 'ollama_current' && !textgenerationwebui_settings.ollama_model) {
throw new Error('Ollama model is not set.');
}
if (extension_settings.caption.multimodal_api === 'llamacpp' && !textgenerationwebui_settings.server_urls[textgen_types.LLAMACPP]) {
throw new Error('LlamaCPP server URL is not set.');
}
if (extension_settings.caption.multimodal_api === 'custom' && !oai_settings.custom_url) {
throw new Error('Custom API URL is not set.');
}
}

View File

@@ -12,6 +12,7 @@ import {
} from '../../../script.js';
import { extension_settings, getContext } from '../../extensions.js';
import { secret_state, writeSecret } from '../../secrets.js';
import { splitRecursive } from '../../utils.js';
export const autoModeOptions = {
NONE: 'none',
@@ -315,6 +316,28 @@ async function translateProviderBing(text, lang) {
throw new Error(response.statusText);
}
/**
* Splits text into chunks and translates each chunk separately
* @param {string} text Text to translate
* @param {string} lang Target language code
* @param {(text: string, lang: string) => Promise<string>} translateFn Function to translate a single chunk (must return a Promise)
* @param {number} chunkSize Maximum chunk size
* @returns {Promise<string>} Translated text
*/
async function chunkedTranslate(text, lang, translateFn, chunkSize = 5000) {
if (text.length <= chunkSize) {
return await translateFn(text, lang);
}
const chunks = splitRecursive(text, chunkSize);
let result = '';
for (const chunk of chunks) {
result += await translateFn(chunk, lang);
}
return result;
}
/**
* Translates text using the selected translation provider
* @param {string} text Text to translate
@@ -331,15 +354,15 @@ async function translate(text, lang) {
case 'libre':
return await translateProviderLibre(text, lang);
case 'google':
return await translateProviderGoogle(text, lang);
return await chunkedTranslate(text, lang, translateProviderGoogle, 5000);
case 'deepl':
return await translateProviderDeepl(text, lang);
case 'deeplx':
return await translateProviderDeepLX(text, lang);
return await chunkedTranslate(text, lang, translateProviderDeepLX, 1500);
case 'oneringtranslator':
return await translateProviderOneRing(text, lang);
case 'bing':
return await translateProviderBing(text, lang);
return await chunkedTranslate(text, lang, translateProviderBing, 1000);
default:
console.error('Unknown translation provider', extension_settings.translate.provider);
return text;