diff --git a/default/content/presets/openai/Default.json b/default/content/presets/openai/Default.json index e2345c687..4e6076d5a 100644 --- a/default/content/presets/openai/Default.json +++ b/default/content/presets/openai/Default.json @@ -8,7 +8,7 @@ "openrouter_force_instruct": false, "openrouter_group_models": false, "openrouter_sort_models": "alphabetically", - "ai21_model": "j2-ultra", + "ai21_model": "jamba-1.5-large", "mistralai_model": "mistral-large-latest", "custom_model": "", "custom_url": "", @@ -19,7 +19,6 @@ "temperature": 1, "frequency_penalty": 0, "presence_penalty": 0, - "count_penalty": 0, "top_p": 1, "top_k": 0, "top_a": 0, @@ -233,8 +232,6 @@ "assistant_prefill": "", "assistant_impersonation": "", "human_sysprompt_message": "Let's get started. Please generate your response based on the information and instructions provided above.", - "use_ai21_tokenizer": false, - "use_google_tokenizer": false, "claude_use_sysprompt": false, "use_alt_scale": false, "squash_system_messages": false, diff --git a/default/content/settings.json b/default/content/settings.json index a8af5277c..d18229517 100644 --- a/default/content/settings.json +++ b/default/content/settings.json @@ -196,7 +196,15 @@ "enableLabMode": false, "enableZenSliders": false, "ui_mode": 1, - "forbid_external_media": true + "forbid_external_media": true, + "stscript": { + "parser": { + "flags": { + "1": true, + "2": true + } + } + } }, "extension_settings": { "apiUrl": "http://localhost:5100", @@ -452,7 +460,6 @@ "temp_openai": 1.0, "freq_pen_openai": 0, "pres_pen_openai": 0, - "count_pen": 0, "top_p_openai": 1, "top_k_openai": 0, "stream_openai": true, @@ -614,7 +621,7 @@ "wi_format": "{0}", "openai_model": "gpt-4-turbo", "claude_model": "claude-3-5-sonnet-20240620", - "ai21_model": "j2-ultra", + "ai21_model": "jamba-1.5-large", "windowai_model": "", "openrouter_model": "OR_Website", "jailbreak_system": true, @@ -625,7 +632,6 @@ "show_external_models": false, "proxy_password": "", "assistant_prefill": "", - "assistant_impersonation": "", - "use_ai21_tokenizer": false + "assistant_impersonation": "" } } diff --git a/public/index.html b/public/index.html index ee429bea6..7d91c7c2a 100644 --- a/public/index.html +++ b/public/index.html @@ -5,7 +5,7 @@ SillyTavern - + @@ -436,7 +436,7 @@ -
+
Frequency Penalty
@@ -449,7 +449,7 @@
-
+
Presence Penalty
@@ -462,20 +462,7 @@
-
-
- Count Penalty -
-
-
- -
-
- -
-
-
-
+
Top K
@@ -1291,6 +1278,28 @@
+ +
+

+ + +
+
+

+
+
+ Threshold + + +
+
+ Probability + + +
+
+
+

@@ -1484,11 +1493,11 @@

- Banned Tokens -
+ Banned Tokens/Strings +

- +
@@ -1791,22 +1800,6 @@

-
- -
- Use the appropriate tokenizer for Jurassic models, which is more efficient than GPT's. -
-
-
- -
- Use the appropriate tokenizer for Google models via their API. Slower prompt processing, but offers much more accurate token counting. -
-
- + @@ -2791,10 +2784,12 @@

AI21 Model

@@ -2824,8 +2819,11 @@ + + + @@ -2955,6 +2953,8 @@ + + @@ -4012,7 +4012,7 @@ MovingUI  - +
@@ -4503,6 +4503,7 @@
- Also delete the current chat file - `, { + const result = await Popup.show.confirm(t`Start new chat?`, await renderTemplateAsync('newChatConfirm'), { onClose: () => deleteCurrentChat = !!$('#del_chat_checkbox').prop('checked'), }); if (!result) { @@ -10261,10 +10258,10 @@ jQuery(async function () { let deleteOnlySwipe = false; if (power_user.confirm_message_delete && fromSlashCommand !== true) { - const result = await callGenericPopup('Are you sure you want to delete this message?', POPUP_TYPE.CONFIRM, null, { - okButton: canDeleteSwipe ? 'Delete Swipe' : 'Delete Message', + const result = await callGenericPopup(t`Are you sure you want to delete this message?`, POPUP_TYPE.CONFIRM, null, { + okButton: canDeleteSwipe ? t`Delete Swipe` : t`Delete Message`, cancelButton: 'Cancel', - customButtons: canDeleteSwipe ? ['Delete Message'] : null, + customButtons: canDeleteSwipe ? [t`Delete Message`] : null, }); if (!result) { return; diff --git a/public/scripts/BulkEditOverlay.js b/public/scripts/BulkEditOverlay.js index 28517d32f..dce7e61ab 100644 --- a/public/scripts/BulkEditOverlay.js +++ b/public/scripts/BulkEditOverlay.js @@ -108,14 +108,12 @@ class CharacterContextMenu { * Delete one or more characters, * opens a popup. * - * @param {number} characterId + * @param {string|string[]} characterKey * @param {boolean} [deleteChats] * @returns {Promise} */ - static delete = async (characterId, deleteChats = false) => { - const character = CharacterContextMenu.#getCharacter(characterId); - - await deleteCharacter(character.avatar, { deleteChats: deleteChats }); + static delete = async (characterKey, deleteChats = false) => { + await deleteCharacter(characterKey, { deleteChats: deleteChats }); }; static #getCharacter = (characterId) => characters[characterId] ?? null; @@ -344,7 +342,7 @@ class BulkTagPopupHandler { const mutualTags = this.getMutualTags(); for (const characterId of this.characterIds) { - for(const tag of mutualTags) { + for (const tag of mutualTags) { removeTagFromMap(tag.id, characterId); } } @@ -599,8 +597,7 @@ class BulkEditOverlay { this.container.removeEventListener('mouseup', cancelHold); this.container.removeEventListener('touchend', cancelHold); - }, - BulkEditOverlay.longPressDelay); + }, BulkEditOverlay.longPressDelay); }; handleLongPressEnd = (event) => { @@ -847,11 +844,14 @@ class BulkEditOverlay { const deleteChats = document.getElementById('del_char_checkbox').checked ?? false; showLoader(); - toastr.info('We\'re deleting your characters, please wait...', 'Working on it'); - return Promise.allSettled(characterIds.map(async characterId => CharacterContextMenu.delete(characterId, deleteChats))) - .then(() => getCharacters()) + const toast = toastr.info('We\'re deleting your characters, please wait...', 'Working on it'); + const avatarList = characterIds.map(id => characters[id]?.avatar).filter(a => a); + return CharacterContextMenu.delete(avatarList, deleteChats) .then(() => this.browseState()) - .finally(() => hideLoader()); + .finally(() => { + toastr.clear(toast); + hideLoader(); + }); }); // At this moment the popup is already changed in the dom, but not yet closed/resolved. We build the avatar list here diff --git a/public/scripts/extensions/caption/settings.html b/public/scripts/extensions/caption/settings.html index b30db1f6c..afaa501e2 100644 --- a/public/scripts/extensions/caption/settings.html +++ b/public/scripts/extensions/caption/settings.html @@ -45,8 +45,11 @@ + + + diff --git a/public/scripts/extensions/tts/index.js b/public/scripts/extensions/tts/index.js index 9b4ce71ea..4d1affa91 100644 --- a/public/scripts/extensions/tts/index.js +++ b/public/scripts/extensions/tts/index.js @@ -421,7 +421,7 @@ function completeTtsJob() { async function tts(text, voiceId, char) { async function processResponse(response) { // RVC injection - if (extension_settings.rvc.enabled && typeof window['rvcVoiceConversion'] === 'function') + if (typeof window['rvcVoiceConversion'] === 'function' && extension_settings.rvc.enabled) response = await window['rvcVoiceConversion'](response, char, text); await addAudioJob(response, char); diff --git a/public/scripts/nai-settings.js b/public/scripts/nai-settings.js index edc69d70b..6b59033a8 100644 --- a/public/scripts/nai-settings.js +++ b/public/scripts/nai-settings.js @@ -78,6 +78,19 @@ export function getKayraMaxContextTokens() { return null; } +export function getKayraMaxResponseTokens() { + switch (novel_data?.tier) { + case 1: + return 100; + case 2: + return 100; + case 3: + return 150; + } + + return maximum_output_length; +} + export function getNovelTier() { return nai_tiers[novel_data?.tier] ?? 'no_connection'; } @@ -438,12 +451,14 @@ export function getNovelGenerationData(finalPrompt, settings, maxLength, isImper console.log(finalPrompt); } + const adjustedMaxLength = nai_settings.model_novel.includes('kayra') ? getKayraMaxResponseTokens() : maximum_output_length; + return { 'input': finalPrompt, 'model': nai_settings.model_novel, 'use_string': true, 'temperature': Number(nai_settings.temperature), - 'max_length': maxLength < maximum_output_length ? maxLength : maximum_output_length, + 'max_length': maxLength < adjustedMaxLength ? maxLength : adjustedMaxLength, 'min_length': Number(nai_settings.min_length), 'tail_free_sampling': Number(nai_settings.tail_free_sampling), 'repetition_penalty': Number(nai_settings.repetition_penalty), diff --git a/public/scripts/openai.js b/public/scripts/openai.js index 0de095b64..913b9c06b 100644 --- a/public/scripts/openai.js +++ b/public/scripts/openai.js @@ -74,6 +74,7 @@ import { ARGUMENT_TYPE, SlashCommandArgument } from './slash-commands/SlashComma import { renderTemplateAsync } from './templates.js'; import { SlashCommandEnumValue } from './slash-commands/SlashCommandEnumValue.js'; import { Popup, POPUP_RESULT } from './popup.js'; +import { t } from './i18n.js'; export { openai_messages_count, @@ -132,13 +133,9 @@ const max_2mil = 2000 * 1000; const scale_max = 8191; const claude_max = 9000; // We have a proper tokenizer, so theoretically could be larger (up to 9k) const claude_100k_max = 99000; -let ai21_max = 9200; //can easily fit 9k gpt tokens because j2's tokenizer is efficient af -const unlocked_max = max_200k; +const unlocked_max = max_2mil; const oai_max_temp = 2.0; -const claude_max_temp = 1.0; //same as j2 -const j2_max_topk = 10.0; -const j2_max_freq = 5.0; -const j2_max_pres = 5.0; +const claude_max_temp = 1.0; const openrouter_website_model = 'OR_Website'; const openai_max_stop_strings = 4; @@ -218,25 +215,11 @@ const sensitiveFields = [ 'custom_include_headers', ]; -function getPrefixMap() { - return selected_group ? { - assistant: '', - user: '', - system: 'OOC: ', - } - : { - assistant: '{{char}}:', - user: '{{user}}:', - system: '', - }; -} - const default_settings = { preset_settings_openai: 'Default', temp_openai: 1.0, freq_pen_openai: 0, pres_pen_openai: 0, - count_pen: 0.0, top_p_openai: 1.0, top_k_openai: 0, min_p_openai: 0, @@ -264,7 +247,7 @@ const default_settings = { openai_model: 'gpt-4-turbo', claude_model: 'claude-3-5-sonnet-20240620', google_model: 'gemini-1.5-pro', - ai21_model: 'j2-ultra', + ai21_model: 'jamba-1.5-large', mistralai_model: 'mistral-large-latest', cohere_model: 'command-r-plus', perplexity_model: 'llama-3.1-70b-instruct', @@ -294,8 +277,6 @@ const default_settings = { assistant_prefill: '', assistant_impersonation: '', human_sysprompt_message: default_claude_human_sysprompt_message, - use_ai21_tokenizer: false, - use_google_tokenizer: false, claude_use_sysprompt: false, use_makersuite_sysprompt: true, use_alt_scale: false, @@ -317,7 +298,6 @@ const oai_settings = { temp_openai: 1.0, freq_pen_openai: 0, pres_pen_openai: 0, - count_pen: 0.0, top_p_openai: 1.0, top_k_openai: 0, min_p_openai: 0, @@ -345,7 +325,7 @@ const oai_settings = { openai_model: 'gpt-4-turbo', claude_model: 'claude-3-5-sonnet-20240620', google_model: 'gemini-1.5-pro', - ai21_model: 'j2-ultra', + ai21_model: 'jamba-1.5-large', mistralai_model: 'mistral-large-latest', cohere_model: 'command-r-plus', perplexity_model: 'llama-3.1-70b-instruct', @@ -375,8 +355,6 @@ const oai_settings = { assistant_prefill: '', assistant_impersonation: '', human_sysprompt_message: default_claude_human_sysprompt_message, - use_ai21_tokenizer: false, - use_google_tokenizer: false, claude_use_sysprompt: false, use_makersuite_sysprompt: true, use_alt_scale: false, @@ -425,7 +403,7 @@ async function validateReverseProxy() { const rememberKey = `Proxy_SkipConfirm_${getStringHash(oai_settings.reverse_proxy)}`; const skipConfirm = localStorage.getItem(rememberKey) === 'true'; - const confirmation = skipConfirm || await Popup.show.confirm('Connecting To Proxy', `Are you sure you want to connect to the following proxy URL?${DOMPurify.sanitize(oai_settings.reverse_proxy)}`); + const confirmation = skipConfirm || await Popup.show.confirm(t`Connecting To Proxy`, await renderTemplateAsync('proxyConnectionWarning', { proxyURL: DOMPurify.sanitize(oai_settings.reverse_proxy) })); if (!confirmation) { toastr.error('Update or remove your reverse proxy settings.'); @@ -1148,7 +1126,6 @@ function preparePromptsForChatCompletion({ Scenario, charPersonality, name2, wor { role: 'system', content: charDescription, identifier: 'charDescription' }, { role: 'system', content: charPersonalityText, identifier: 'charPersonality' }, { role: 'system', content: scenarioText, identifier: 'scenario' }, - { role: 'system', content: personaDescription, identifier: 'personaDescription' }, // Unordered prompts without marker { role: 'system', content: impersonationPrompt, identifier: 'impersonate' }, { role: 'system', content: quietPrompt, identifier: 'quietPrompt' }, @@ -1395,6 +1372,11 @@ function tryParseStreamingError(response, decoded) { toastr.error(data.error.message || response.statusText, 'Chat Completion API'); throw new Error(data); } + + if (data.message) { + toastr.error(data.message, 'Chat Completion API'); + throw new Error(data); + } } catch { // No JSON. Do nothing. @@ -1802,7 +1784,6 @@ async function sendOpenAIRequest(type, messages, signal) { const isClaude = oai_settings.chat_completion_source == chat_completion_sources.CLAUDE; const isOpenRouter = oai_settings.chat_completion_source == chat_completion_sources.OPENROUTER; const isScale = oai_settings.chat_completion_source == chat_completion_sources.SCALE; - const isAI21 = oai_settings.chat_completion_source == chat_completion_sources.AI21; const isGoogle = oai_settings.chat_completion_source == chat_completion_sources.MAKERSUITE; const isOAI = oai_settings.chat_completion_source == chat_completion_sources.OPENAI; const isMistral = oai_settings.chat_completion_source == chat_completion_sources.MISTRALAI; @@ -1815,7 +1796,7 @@ async function sendOpenAIRequest(type, messages, signal) { const isQuiet = type === 'quiet'; const isImpersonate = type === 'impersonate'; const isContinue = type === 'continue'; - const stream = oai_settings.stream_openai && !isQuiet && !isScale && !isAI21 && !(isGoogle && oai_settings.google_model.includes('bison')); + const stream = oai_settings.stream_openai && !isQuiet && !isScale && !(isGoogle && oai_settings.google_model.includes('bison')); const useLogprobs = !!power_user.request_token_probabilities; const canMultiSwipe = oai_settings.n > 1 && !isContinue && !isImpersonate && !isQuiet && (isOAI || isCustom); @@ -1824,15 +1805,6 @@ async function sendOpenAIRequest(type, messages, signal) { replaceItemizedPromptText(messageId, messages); } - if (isAI21) { - const joinedMsgs = messages.reduce((acc, obj) => { - const prefix = getPrefixMap()[obj.role]; - return acc + (prefix ? (selected_group ? '\n' : prefix + ' ') : '') + obj.content + '\n'; - }, ''); - messages = substituteParams(joinedMsgs) + (isImpersonate ? `${name1}:` : `${name2}:`); - replaceItemizedPromptText(messageId, messages); - } - // If we're using the window.ai extension, use that instead // Doesn't support logit bias yet if (oai_settings.chat_completion_source == chat_completion_sources.WINDOWAI) { @@ -1931,12 +1903,6 @@ async function sendOpenAIRequest(type, messages, signal) { generate_data['use_makersuite_sysprompt'] = oai_settings.use_makersuite_sysprompt; } - if (isAI21) { - generate_data['top_k'] = Number(oai_settings.top_k_openai); - generate_data['count_pen'] = Number(oai_settings.count_pen); - generate_data['stop_tokens'] = [name1 + ':', substituteParams(oai_settings.new_chat_prompt), substituteParams(oai_settings.new_group_chat_prompt)]; - } - if (isMistral) { generate_data['safe_prompt'] = false; // already defaults to false, but just incase they change that in the future. } @@ -3008,7 +2974,6 @@ function loadOpenAISettings(data, settings) { oai_settings.temp_openai = settings.temp_openai ?? default_settings.temp_openai; oai_settings.freq_pen_openai = settings.freq_pen_openai ?? default_settings.freq_pen_openai; oai_settings.pres_pen_openai = settings.pres_pen_openai ?? default_settings.pres_pen_openai; - oai_settings.count_pen = settings.count_pen ?? default_settings.count_pen; oai_settings.top_p_openai = settings.top_p_openai ?? default_settings.top_p_openai; oai_settings.top_k_openai = settings.top_k_openai ?? default_settings.top_k_openai; oai_settings.top_a_openai = settings.top_a_openai ?? default_settings.top_a_openai; @@ -3080,10 +3045,12 @@ function loadOpenAISettings(data, settings) { oai_settings.names_behavior = character_names_behavior.COMPLETION; } + if (oai_settings.ai21_model.startsWith('j2-')) { + oai_settings.ai21_model = 'jamba-1.5-large'; + } + if (settings.wrap_in_quotes !== undefined) oai_settings.wrap_in_quotes = !!settings.wrap_in_quotes; if (settings.openai_model !== undefined) oai_settings.openai_model = settings.openai_model; - if (settings.use_ai21_tokenizer !== undefined) { oai_settings.use_ai21_tokenizer = !!settings.use_ai21_tokenizer; oai_settings.use_ai21_tokenizer ? ai21_max = 8191 : ai21_max = 9200; } - if (settings.use_google_tokenizer !== undefined) oai_settings.use_google_tokenizer = !!settings.use_google_tokenizer; if (settings.claude_use_sysprompt !== undefined) oai_settings.claude_use_sysprompt = !!settings.claude_use_sysprompt; if (settings.use_makersuite_sysprompt !== undefined) oai_settings.use_makersuite_sysprompt = !!settings.use_makersuite_sysprompt; if (settings.use_alt_scale !== undefined) { oai_settings.use_alt_scale = !!settings.use_alt_scale; updateScaleForm(); } @@ -3133,8 +3100,6 @@ function loadOpenAISettings(data, settings) { $('#jailbreak_system').prop('checked', oai_settings.jailbreak_system); $('#openai_show_external_models').prop('checked', oai_settings.show_external_models); $('#openai_external_category').toggle(oai_settings.show_external_models); - $('#use_ai21_tokenizer').prop('checked', oai_settings.use_ai21_tokenizer); - $('#use_google_tokenizer').prop('checked', oai_settings.use_google_tokenizer); $('#claude_use_sysprompt').prop('checked', oai_settings.claude_use_sysprompt); $('#use_makersuite_sysprompt').prop('checked', oai_settings.use_makersuite_sysprompt); $('#scale-alt').prop('checked', oai_settings.use_alt_scale); @@ -3170,9 +3135,6 @@ function loadOpenAISettings(data, settings) { $('#pres_pen_openai').val(oai_settings.pres_pen_openai); $('#pres_pen_counter_openai').val(Number(oai_settings.pres_pen_openai).toFixed(2)); - $('#count_pen').val(oai_settings.count_pen); - $('#count_pen_counter').val(Number(oai_settings.count_pen).toFixed(2)); - $('#top_p_openai').val(oai_settings.top_p_openai); $('#top_p_counter_openai').val(Number(oai_settings.top_p_openai).toFixed(2)); @@ -3392,7 +3354,6 @@ async function saveOpenAIPreset(name, settings, triggerUi = true) { temperature: settings.temp_openai, frequency_penalty: settings.freq_pen_openai, presence_penalty: settings.pres_pen_openai, - count_penalty: settings.count_pen, top_p: settings.top_p_openai, top_k: settings.top_k_openai, top_a: settings.top_a_openai, @@ -3427,8 +3388,6 @@ async function saveOpenAIPreset(name, settings, triggerUi = true) { assistant_prefill: settings.assistant_prefill, assistant_impersonation: settings.assistant_impersonation, human_sysprompt_message: settings.human_sysprompt_message, - use_ai21_tokenizer: settings.use_ai21_tokenizer, - use_google_tokenizer: settings.use_google_tokenizer, claude_use_sysprompt: settings.claude_use_sysprompt, use_makersuite_sysprompt: settings.use_makersuite_sysprompt, use_alt_scale: settings.use_alt_scale, @@ -3799,7 +3758,6 @@ function onSettingsPresetChange() { temperature: ['#temp_openai', 'temp_openai', false], frequency_penalty: ['#freq_pen_openai', 'freq_pen_openai', false], presence_penalty: ['#pres_pen_openai', 'pres_pen_openai', false], - count_penalty: ['#count_pen', 'count_pen', false], top_p: ['#top_p_openai', 'top_p_openai', false], top_k: ['#top_k_openai', 'top_k_openai', false], top_a: ['#top_a_openai', 'top_a_openai', false], @@ -3856,8 +3814,6 @@ function onSettingsPresetChange() { assistant_prefill: ['#claude_assistant_prefill', 'assistant_prefill', false], assistant_impersonation: ['#claude_assistant_impersonation', 'assistant_impersonation', false], human_sysprompt_message: ['#claude_human_sysprompt_textarea', 'human_sysprompt_message', false], - use_ai21_tokenizer: ['#use_ai21_tokenizer', 'use_ai21_tokenizer', true], - use_google_tokenizer: ['#use_google_tokenizer', 'use_google_tokenizer', true], claude_use_sysprompt: ['#claude_use_sysprompt', 'claude_use_sysprompt', true], use_makersuite_sysprompt: ['#use_makersuite_sysprompt', 'use_makersuite_sysprompt', true], use_alt_scale: ['#use_alt_scale', 'use_alt_scale', true], @@ -4028,6 +3984,11 @@ async function onModelChange() { } if ($(this).is('#model_ai21_select')) { + if (value === '' || value.startsWith('j2-')) { + value = 'jamba-1.5-large'; + $('#model_ai21_select').val(value); + } + console.log('AI21 model changed to', value); oai_settings.ai21_model = value; } @@ -4230,7 +4191,7 @@ async function onModelChange() { else if (['command-light-nightly', 'command-nightly'].includes(oai_settings.cohere_model)) { $('#openai_max_context').attr('max', max_8k); } - else if (['command-r', 'command-r-plus'].includes(oai_settings.cohere_model)) { + else if (oai_settings.cohere_model.includes('command-r')) { $('#openai_max_context').attr('max', max_128k); } else if (['c4ai-aya-23'].includes(oai_settings.cohere_model)) { @@ -4305,33 +4266,13 @@ async function onModelChange() { if (oai_settings.chat_completion_source == chat_completion_sources.AI21) { if (oai_settings.max_context_unlocked) { $('#openai_max_context').attr('max', unlocked_max); - } else { - $('#openai_max_context').attr('max', ai21_max); + } else if (oai_settings.ai21_model.includes('jamba-1.5') || oai_settings.ai21_model.includes('jamba-instruct')) { + $('#openai_max_context').attr('max', max_256k); } - oai_settings.openai_max_context = Math.min(oai_settings.openai_max_context, Number($('#openai_max_context').attr('max'))); + oai_settings.openai_max_context = Math.min(Number($('#openai_max_context').attr('max')), oai_settings.openai_max_context); $('#openai_max_context').val(oai_settings.openai_max_context).trigger('input'); - - oai_settings.temp_openai = Math.min(claude_max_temp, oai_settings.temp_openai); - $('#temp_openai').attr('max', claude_max_temp).val(oai_settings.temp_openai).trigger('input'); - - oai_settings.freq_pen_openai = Math.min(j2_max_freq, oai_settings.freq_pen_openai < 0 ? 0 : oai_settings.freq_pen_openai); - $('#freq_pen_openai').attr('min', 0).attr('max', j2_max_freq).val(oai_settings.freq_pen_openai).trigger('input'); - - oai_settings.pres_pen_openai = Math.min(j2_max_pres, oai_settings.pres_pen_openai < 0 ? 0 : oai_settings.pres_pen_openai); - $('#pres_pen_openai').attr('min', 0).attr('max', j2_max_pres).val(oai_settings.pres_pen_openai).trigger('input'); - - oai_settings.top_k_openai = Math.min(j2_max_topk, oai_settings.top_k_openai); - $('#top_k_openai').attr('max', j2_max_topk).val(oai_settings.top_k_openai).trigger('input'); - } else if (oai_settings.chat_completion_source != chat_completion_sources.AI21) { - oai_settings.freq_pen_openai = Math.min(2.0, oai_settings.freq_pen_openai); - $('#freq_pen_openai').attr('min', -2.0).attr('max', 2.0).val(oai_settings.freq_pen_openai).trigger('input'); - - oai_settings.pres_pen_openai = Math.min(2.0, oai_settings.pres_pen_openai); - $('#pres_pen_openai').attr('min', -2.0).attr('max', 2.0).val(oai_settings.pres_pen_openai).trigger('input'); - - oai_settings.top_k_openai = Math.min(200, oai_settings.top_k_openai); - $('#top_k_openai').attr('max', 200).val(oai_settings.top_k_openai).trigger('input'); + $('#temp_openai').attr('max', oai_max_temp).val(oai_settings.temp_openai).trigger('input'); } if (oai_settings.chat_completion_source == chat_completion_sources.CUSTOM) { @@ -4388,6 +4329,16 @@ async function onModelChange() { $('#temp_openai').attr('max', oai_max_temp).val(oai_settings.temp_openai).trigger('input'); } + if (oai_settings.chat_completion_source === chat_completion_sources.COHERE) { + oai_settings.pres_pen_openai = Math.min(Math.max(0, oai_settings.pres_pen_openai), 1); + $('#pres_pen_openai').attr('max', 1).attr('min', 0).val(oai_settings.pres_pen_openai).trigger('input'); + oai_settings.freq_pen_openai = Math.min(Math.max(0, oai_settings.freq_pen_openai), 1); + $('#freq_pen_openai').attr('max', 1).attr('min', 0).val(oai_settings.freq_pen_openai).trigger('input'); + } else { + $('#pres_pen_openai').attr('max', 2).attr('min', -2).val(oai_settings.pres_pen_openai).trigger('input'); + $('#freq_pen_openai').attr('max', 2).attr('min', -2).val(oai_settings.freq_pen_openai).trigger('input'); + } + $('#openai_max_context_counter').attr('max', Number($('#openai_max_context').attr('max'))); saveSettingsDebounced(); @@ -4673,17 +4624,17 @@ function toggleChatCompletionForms() { async function testApiConnection() { // Check if the previous request is still in progress if (is_send_press) { - toastr.info('Please wait for the previous request to complete.'); + toastr.info(t`Please wait for the previous request to complete.`); return; } try { const reply = await sendOpenAIRequest('quiet', [{ 'role': 'user', 'content': 'Hi' }]); console.log(reply); - toastr.success('API connection successful!'); + toastr.success(t`API connection successful!`); } catch (err) { - toastr.error('Could not get a reply from API. Check your connection settings / API key and try again.'); + toastr.error(t`Could not get a reply from API. Check your connection settings / API key and try again.`); } } @@ -4752,11 +4703,14 @@ export function isImageInliningSupported() { 'gemini-1.5-flash', 'gemini-1.5-flash-latest', 'gemini-1.5-flash-001', + 'gemini-1.5-flash-exp-0827', + 'gemini-1.5-flash-8b-exp-0827', 'gemini-1.0-pro-vision-latest', 'gemini-1.5-pro', 'gemini-1.5-pro-latest', 'gemini-1.5-pro-001', 'gemini-1.5-pro-exp-0801', + 'gemini-1.5-pro-exp-0827', 'gemini-pro-vision', 'claude-3', 'claude-3-5', @@ -4837,7 +4791,7 @@ function onProxyPresetChange() { if (selectedPreset) { setProxyPreset(selectedPreset.name, selectedPreset.url, selectedPreset.password); } else { - console.error(`Proxy preset "${value}" not found in proxies array.`); + console.error(t`Proxy preset '${value}' not found in proxies array.`); } saveSettingsDebounced(); } @@ -4849,7 +4803,7 @@ $('#save_proxy').on('click', async function () { setProxyPreset(presetName, reverseProxy, proxyPassword); saveSettingsDebounced(); - toastr.success('Proxy Saved'); + toastr.success(t`Proxy Saved`); if ($('#openai_proxy_preset').val() !== presetName) { const option = document.createElement('option'); option.text = presetName; @@ -4883,9 +4837,9 @@ $('#delete_proxy').on('click', async function () { saveSettingsDebounced(); $('#openai_proxy_preset').val(selected_proxy.name); - toastr.success('Proxy Deleted'); + toastr.success(t`Proxy Deleted`); } else { - toastr.error(`Could not find proxy with name "${presetName}"`); + toastr.error(t`Could not find proxy with name '${presetName}'`); } }); @@ -4953,12 +4907,6 @@ $(document).ready(async function () { saveSettingsDebounced(); }); - $('#count_pen').on('input', function () { - oai_settings.count_pen = Number($(this).val()); - $('#count_pen_counter').val(Number($(this).val()).toFixed(2)); - saveSettingsDebounced(); - }); - $('#top_p_openai').on('input', function () { oai_settings.top_p_openai = Number($(this).val()); $('#top_p_counter_openai').val(Number($(this).val()).toFixed(2)); @@ -5017,20 +4965,6 @@ $(document).ready(async function () { saveSettingsDebounced(); }); - $('#use_ai21_tokenizer').on('change', function () { - oai_settings.use_ai21_tokenizer = !!$('#use_ai21_tokenizer').prop('checked'); - oai_settings.use_ai21_tokenizer ? ai21_max = 8191 : ai21_max = 9200; - oai_settings.openai_max_context = Math.min(ai21_max, oai_settings.openai_max_context); - $('#openai_max_context').attr('max', ai21_max).val(oai_settings.openai_max_context).trigger('input'); - $('#openai_max_context_counter').attr('max', Number($('#openai_max_context').attr('max'))); - saveSettingsDebounced(); - }); - - $('#use_google_tokenizer').on('change', function () { - oai_settings.use_google_tokenizer = !!$('#use_google_tokenizer').prop('checked'); - saveSettingsDebounced(); - }); - $('#claude_use_sysprompt').on('change', function () { oai_settings.claude_use_sysprompt = !!$('#claude_use_sysprompt').prop('checked'); $('#claude_human_sysprompt_message_block').toggle(oai_settings.claude_use_sysprompt); diff --git a/public/scripts/power-user.js b/public/scripts/power-user.js index ac9476706..2fdb27958 100644 --- a/public/scripts/power-user.js +++ b/public/scripts/power-user.js @@ -107,6 +107,7 @@ export const persona_description_positions = { TOP_AN: 2, BOTTOM_AN: 3, AT_DEPTH: 4, + NONE: 9, }; let power_user = { diff --git a/public/scripts/templates/chatRename.html b/public/scripts/templates/chatRename.html new file mode 100644 index 000000000..2ffbd5654 --- /dev/null +++ b/public/scripts/templates/chatRename.html @@ -0,0 +1,6 @@ +

Enter the new name for the chat:

+ + !!Using an existing filename will produce an error!!
+ This will break the link between checkpoint chats.
+ No need to add '.jsonl' at the end.
+
\ No newline at end of file diff --git a/public/scripts/templates/newChatConfirm.html b/public/scripts/templates/newChatConfirm.html new file mode 100644 index 000000000..38436ba8f --- /dev/null +++ b/public/scripts/templates/newChatConfirm.html @@ -0,0 +1,4 @@ + \ No newline at end of file diff --git a/public/scripts/templates/proxyConnectionWarning.html b/public/scripts/templates/proxyConnectionWarning.html new file mode 100644 index 000000000..4218b5b98 --- /dev/null +++ b/public/scripts/templates/proxyConnectionWarning.html @@ -0,0 +1 @@ +Are you sure you want to connect to the following proxy URL?{{proxyURL}} \ No newline at end of file diff --git a/public/scripts/textgen-settings.js b/public/scripts/textgen-settings.js index 6b0b41fb4..161182381 100644 --- a/public/scripts/textgen-settings.js +++ b/public/scripts/textgen-settings.js @@ -188,6 +188,8 @@ const settings = { custom_model: '', bypass_status_check: false, openrouter_allow_fallbacks: true, + xtc_threshold: 0.1, + xtc_probability: 0, }; export let textgenerationwebui_banned_in_macros = []; @@ -263,6 +265,8 @@ export const setting_names = [ 'custom_model', 'bypass_status_check', 'openrouter_allow_fallbacks', + 'xtc_threshold', + 'xtc_probability', ]; const DYNATEMP_BLOCK = document.getElementById('dynatemp_block_ooba'); @@ -718,6 +722,8 @@ jQuery(function () { 'dry_multiplier_textgenerationwebui': 0, 'dry_base_textgenerationwebui': 1.75, 'dry_penalty_last_n_textgenerationwebui': 0, + 'xtc_threshold_textgenerationwebui': 0.1, + 'xtc_probability_textgenerationwebui': 0, }; for (const [id, value] of Object.entries(inputs)) { @@ -1156,6 +1162,8 @@ export function getTextGenGenerationData(finalPrompt, maxTokens, isImpersonate, 'api_server': getTextGenServer(), 'legacy_api': settings.legacy_api && (settings.type === OOBA || settings.type === APHRODITE), 'sampler_order': settings.type === textgen_types.KOBOLDCPP ? settings.sampler_order : undefined, + 'xtc_threshold': settings.xtc_threshold, + 'xtc_probability': settings.xtc_probability, }; const nonAphroditeParams = { 'rep_pen': settings.rep_pen, diff --git a/public/scripts/tokenizers.js b/public/scripts/tokenizers.js index e7a6dee7e..e06fab97b 100644 --- a/public/scripts/tokenizers.js +++ b/public/scripts/tokenizers.js @@ -27,6 +27,7 @@ export const tokenizers = { CLAUDE: 11, LLAMA3: 12, GEMMA: 13, + JAMBA: 14, BEST_MATCH: 99, }; @@ -36,6 +37,7 @@ export const SENTENCEPIECE_TOKENIZERS = [ tokenizers.YI, tokenizers.LLAMA3, tokenizers.GEMMA, + tokenizers.JAMBA, // uncomment when NovelAI releases Kayra and Clio weights, lol //tokenizers.NERD, //tokenizers.NERD2, @@ -98,6 +100,11 @@ const TOKENIZER_URLS = { decode: '/api/tokenizers/gemma/decode', count: '/api/tokenizers/gemma/encode', }, + [tokenizers.JAMBA]: { + encode: '/api/tokenizers/jamba/encode', + decode: '/api/tokenizers/jamba/decode', + count: '/api/tokenizers/jamba/encode', + }, [tokenizers.API_TEXTGENERATIONWEBUI]: { encode: '/api/tokenizers/remote/textgenerationwebui/encode', count: '/api/tokenizers/remote/textgenerationwebui/encode', @@ -164,7 +171,7 @@ export function getAvailableTokenizers() { tokenizerId: Number(tokenizerOption.value), tokenizerKey: Object.entries(tokenizers).find(([_, value]) => value === Number(tokenizerOption.value))[0].toLocaleLowerCase(), tokenizerName: tokenizerOption.text, - })) + })); } /** @@ -280,6 +287,12 @@ export function getTokenizerBestMatch(forApi) { if (model.includes('gemma')) { return tokenizers.GEMMA; } + if (model.includes('yi')) { + return tokenizers.YI; + } + if (model.includes('jamba')) { + return tokenizers.JAMBA; + } } return tokenizers.LLAMA; @@ -497,6 +510,7 @@ export function getTokenizerModel() { const mistralTokenizer = 'mistral'; const yiTokenizer = 'yi'; const gemmaTokenizer = 'gemma'; + const jambaTokenizer = 'jamba'; // Assuming no one would use it for different models.. right? if (oai_settings.chat_completion_source == chat_completion_sources.SCALE) { @@ -562,12 +576,19 @@ export function getTokenizerModel() { else if (oai_settings.openrouter_model.includes('GPT-NeoXT')) { return gpt2Tokenizer; } + else if (oai_settings.openrouter_model.includes('jamba')) { + return jambaTokenizer; + } } if (oai_settings.chat_completion_source == chat_completion_sources.MAKERSUITE) { return gemmaTokenizer; } + if (oai_settings.chat_completion_source == chat_completion_sources.AI21) { + return jambaTokenizer; + } + if (oai_settings.chat_completion_source == chat_completion_sources.CLAUDE) { return claudeTokenizer; } @@ -626,16 +647,7 @@ export function getTokenizerModel() { * @deprecated Use countTokensOpenAIAsync instead. */ export function countTokensOpenAI(messages, full = false) { - const shouldTokenizeAI21 = oai_settings.chat_completion_source === chat_completion_sources.AI21 && oai_settings.use_ai21_tokenizer; - const shouldTokenizeGoogle = oai_settings.chat_completion_source === chat_completion_sources.MAKERSUITE && oai_settings.use_google_tokenizer; - let tokenizerEndpoint = ''; - if (shouldTokenizeAI21) { - tokenizerEndpoint = '/api/tokenizers/ai21/count'; - } else if (shouldTokenizeGoogle) { - tokenizerEndpoint = `/api/tokenizers/google/count?model=${getTokenizerModel()}&reverse_proxy=${oai_settings.reverse_proxy}&proxy_password=${oai_settings.proxy_password}`; - } else { - tokenizerEndpoint = `/api/tokenizers/openai/count?model=${getTokenizerModel()}`; - } + const tokenizerEndpoint = `/api/tokenizers/openai/count?model=${getTokenizerModel()}`; const cacheObject = getTokenCacheObject(); if (!Array.isArray(messages)) { @@ -647,7 +659,7 @@ export function countTokensOpenAI(messages, full = false) { for (const message of messages) { const model = getTokenizerModel(); - if (model === 'claude' || shouldTokenizeAI21 || shouldTokenizeGoogle) { + if (model === 'claude') { full = true; } @@ -687,16 +699,7 @@ export function countTokensOpenAI(messages, full = false) { * @returns {Promise} Token count. */ export async function countTokensOpenAIAsync(messages, full = false) { - const shouldTokenizeAI21 = oai_settings.chat_completion_source === chat_completion_sources.AI21 && oai_settings.use_ai21_tokenizer; - const shouldTokenizeGoogle = oai_settings.chat_completion_source === chat_completion_sources.MAKERSUITE && oai_settings.use_google_tokenizer; - let tokenizerEndpoint = ''; - if (shouldTokenizeAI21) { - tokenizerEndpoint = '/api/tokenizers/ai21/count'; - } else if (shouldTokenizeGoogle) { - tokenizerEndpoint = `/api/tokenizers/google/count?model=${getTokenizerModel()}`; - } else { - tokenizerEndpoint = `/api/tokenizers/openai/count?model=${getTokenizerModel()}`; - } + const tokenizerEndpoint = `/api/tokenizers/openai/count?model=${getTokenizerModel()}`; const cacheObject = getTokenCacheObject(); if (!Array.isArray(messages)) { @@ -708,7 +711,7 @@ export async function countTokensOpenAIAsync(messages, full = false) { for (const message of messages) { const model = getTokenizerModel(); - if (model === 'claude' || shouldTokenizeAI21 || shouldTokenizeGoogle) { + if (model === 'claude') { full = true; } diff --git a/public/scripts/world-info.js b/public/scripts/world-info.js index 29cb62a1a..8fc8a19a2 100644 --- a/public/scripts/world-info.js +++ b/public/scripts/world-info.js @@ -4777,8 +4777,10 @@ jQuery(() => { world_info_min_activations = Number($(this).val()); $('#world_info_min_activations_counter').val(world_info_min_activations); - if (world_info_min_activations !== 0) { + if (world_info_min_activations !== 0 && world_info_max_recursion_steps !== 0) { $('#world_info_max_recursion_steps').val(0).trigger('input'); + flashHighlight($('#world_info_max_recursion_steps').parent()); // flash the other control to show it has changed + console.info('[WI] Max recursion steps set to 0, as min activations is set to', world_info_min_activations); } else { saveSettings(); } @@ -4840,8 +4842,10 @@ jQuery(() => { $('#world_info_max_recursion_steps').on('input', function () { world_info_max_recursion_steps = Number($(this).val()); $('#world_info_max_recursion_steps_counter').val(world_info_max_recursion_steps); - if (world_info_max_recursion_steps !== 0) { + if (world_info_max_recursion_steps !== 0 && world_info_min_activations !== 0) { $('#world_info_min_activations').val(0).trigger('input'); + flashHighlight($('#world_info_min_activations').parent()); // flash the other control to show it has changed + console.info('[WI] Min activations set to 0, as max recursion steps is set to', world_info_max_recursion_steps); } else { saveSettings(); } diff --git a/public/style.css b/public/style.css index f268b78bb..ee1c4d518 100644 --- a/public/style.css +++ b/public/style.css @@ -2421,7 +2421,6 @@ input[type="file"] { } #rm_print_characters_block .text_block { - height: 100%; width: 100%; opacity: 0.5; margin: 0 auto 1px auto; diff --git a/src/cohere-stream.js b/src/cohere-stream.js new file mode 100644 index 000000000..d59ecad5e --- /dev/null +++ b/src/cohere-stream.js @@ -0,0 +1,126 @@ +const DATA_PREFIX = 'data:'; + +/** + * Borrowed from Cohere SDK (MIT License) + * https://github.com/cohere-ai/cohere-typescript/blob/main/src/core/streaming-fetcher/Stream.ts + * Copyright (c) 2021 Cohere + */ +class CohereStream { + /** @type {ReadableStream} */ + stream; + /** @type {string} */ + prefix; + /** @type {string} */ + messageTerminator; + /** @type {string|undefined} */ + streamTerminator; + /** @type {AbortController} */ + controller = new AbortController(); + + constructor({ stream, eventShape }) { + this.stream = stream; + if (eventShape.type === 'sse') { + this.prefix = DATA_PREFIX; + this.messageTerminator = '\n'; + this.streamTerminator = eventShape.streamTerminator; + } else { + this.messageTerminator = eventShape.messageTerminator; + } + } + + async *iterMessages() { + const stream = readableStreamAsyncIterable(this.stream); + let buf = ''; + let prefixSeen = false; + let parsedAnyMessages = false; + for await (const chunk of stream) { + buf += this.decodeChunk(chunk); + + let terminatorIndex; + // Parse the chunk into as many messages as possible + while ((terminatorIndex = buf.indexOf(this.messageTerminator)) >= 0) { + // Extract the line from the buffer + let line = buf.slice(0, terminatorIndex + 1); + buf = buf.slice(terminatorIndex + 1); + + // Skip empty lines + if (line.length === 0) { + continue; + } + + // Skip the chunk until the prefix is found + if (!prefixSeen && this.prefix != null) { + const prefixIndex = line.indexOf(this.prefix); + if (prefixIndex === -1) { + continue; + } + prefixSeen = true; + line = line.slice(prefixIndex + this.prefix.length); + } + + // If the stream terminator is present, return + if (this.streamTerminator != null && line.includes(this.streamTerminator)) { + return; + } + + // Otherwise, yield message from the prefix to the terminator + const message = JSON.parse(line); + yield message; + prefixSeen = false; + parsedAnyMessages = true; + } + } + + if (!parsedAnyMessages && buf.length > 0) { + try { + yield JSON.parse(buf); + } catch (e) { + console.error('Error parsing message:', e); + } + } + } + + async *[Symbol.asyncIterator]() { + for await (const message of this.iterMessages()) { + yield message; + } + } + + decodeChunk(chunk) { + const decoder = new TextDecoder('utf8'); + return decoder.decode(chunk); + } +} + +function readableStreamAsyncIterable(stream) { + if (stream[Symbol.asyncIterator]) { + return stream; + } + + const reader = stream.getReader(); + return { + async next() { + try { + const result = await reader.read(); + if (result?.done) { + reader.releaseLock(); + } // release lock when stream becomes closed + return result; + } catch (e) { + reader.releaseLock(); // release lock when stream becomes errored + throw e; + } + }, + async return() { + const cancelPromise = reader.cancel(); + reader.releaseLock(); + await cancelPromise; + return { done: true, value: undefined }; + }, + [Symbol.asyncIterator]() { + return this; + }, + }; +} + +module.exports = CohereStream; diff --git a/src/endpoints/backends/chat-completions.js b/src/endpoints/backends/chat-completions.js index 25d40d8a6..a7689fa1c 100644 --- a/src/endpoints/backends/chat-completions.js +++ b/src/endpoints/backends/chat-completions.js @@ -1,11 +1,11 @@ const express = require('express'); const fetch = require('node-fetch').default; -const Readable = require('stream').Readable; const { jsonParser } = require('../../express-common'); const { CHAT_COMPLETION_SOURCES, GEMINI_SAFETY, BISON_SAFETY, OPENROUTER_HEADERS } = require('../../constants'); const { forwardFetchResponse, getConfigValue, tryParse, uuidv4, mergeObjectWithYaml, excludeKeysByYaml, color } = require('../../util'); -const { convertClaudeMessages, convertGooglePrompt, convertTextCompletionPrompt, convertCohereMessages, convertMistralMessages, convertCohereTools } = require('../../prompt-converters'); +const { convertClaudeMessages, convertGooglePrompt, convertTextCompletionPrompt, convertCohereMessages, convertMistralMessages, convertCohereTools, convertAI21Messages } = require('../../prompt-converters'); +const CohereStream = require('../../cohere-stream'); const { readSecret, SECRET_KEYS } = require('../secrets'); const { getTokenizerModel, getSentencepiceTokenizer, getTiktokenTokenizer, sentencepieceTokenizers, TEXT_COMPLETION_MODELS } = require('../tokenizers'); @@ -19,6 +19,7 @@ const API_GROQ = 'https://api.groq.com/openai/v1'; const API_MAKERSUITE = 'https://generativelanguage.googleapis.com'; const API_01AI = 'https://api.01.ai/v1'; const API_BLOCKENTROPY = 'https://api.blockentropy.ai/v1'; +const API_AI21 = 'https://api.ai21.com/studio/v1'; /** * Applies a post-processing step to the generated messages. @@ -40,52 +41,30 @@ function postProcessPrompt(messages, type, charName, userName) { /** * Ollama strikes back. Special boy #2's steaming routine. * Wrap this abomination into proper SSE stream, again. - * @param {import('node-fetch').Response} jsonStream JSON stream + * @param {Response} jsonStream JSON stream * @param {import('express').Request} request Express request * @param {import('express').Response} response Express response * @returns {Promise} Nothing valuable */ async function parseCohereStream(jsonStream, request, response) { try { - let partialData = ''; - jsonStream.body.on('data', (data) => { - const chunk = data.toString(); - partialData += chunk; - while (true) { - let json; - try { - json = JSON.parse(partialData); - } catch (e) { - break; - } - if (json.message) { - const message = json.message || 'Unknown error'; - const chunk = { error: { message: message } }; - response.write(`data: ${JSON.stringify(chunk)}\n\n`); - partialData = ''; - break; - } else if (json.event_type === 'text-generation') { - const text = json.text || ''; - const chunk = { choices: [{ text }] }; - response.write(`data: ${JSON.stringify(chunk)}\n\n`); - partialData = ''; - } else { - partialData = ''; - break; - } + const stream = new CohereStream({ stream: jsonStream.body, eventShape: { type: 'json', messageTerminator: '\n' } }); + + for await (const json of stream.iterMessages()) { + if (json.message) { + const message = json.message || 'Unknown error'; + const chunk = { error: { message: message } }; + response.write(`data: ${JSON.stringify(chunk)}\n\n`); + } else if (json.event_type === 'text-generation') { + const text = json.text || ''; + const chunk = { choices: [{ text }] }; + response.write(`data: ${JSON.stringify(chunk)}\n\n`); } - }); + } - request.socket.on('close', function () { - if (jsonStream.body instanceof Readable) jsonStream.body.destroy(); - response.end(); - }); - - jsonStream.body.on('end', () => { - console.log('Streaming request finished'); - response.write('data: [DONE]\n\n'); - response.end(); - }); + console.log('Streaming request finished'); + response.write('data: [DONE]\n\n'); + response.end(); } catch (error) { console.log('Error forwarding streaming response:', error); if (!response.headersSent) { @@ -233,7 +212,7 @@ async function sendScaleRequest(request, response) { if (!generateResponse.ok) { console.log(`Scale API returned error: ${generateResponse.status} ${generateResponse.statusText} ${await generateResponse.text()}`); - return response.status(generateResponse.status).send({ error: true }); + return response.status(500).send({ error: true }); } const generateResponseJson = await generateResponse.json(); @@ -413,6 +392,16 @@ async function sendAI21Request(request, response) { request.socket.on('close', function () { controller.abort(); }); + const convertedPrompt = convertAI21Messages(request.body.messages, request.body.char_name, request.body.user_name); + const body = { + messages: convertedPrompt, + model: request.body.model, + max_tokens: request.body.max_tokens, + temperature: request.body.temperature, + top_p: request.body.top_p, + stop: request.body.stop, + stream: request.body.stream, + }; const options = { method: 'POST', headers: { @@ -420,59 +409,35 @@ async function sendAI21Request(request, response) { 'content-type': 'application/json', Authorization: `Bearer ${readSecret(request.user.directories, SECRET_KEYS.AI21)}`, }, - body: JSON.stringify({ - numResults: 1, - maxTokens: request.body.max_tokens, - minTokens: 0, - temperature: request.body.temperature, - topP: request.body.top_p, - stopSequences: request.body.stop_tokens, - topKReturn: request.body.top_k, - frequencyPenalty: { - scale: request.body.frequency_penalty * 100, - applyToWhitespaces: false, - applyToPunctuations: false, - applyToNumbers: false, - applyToStopwords: false, - applyToEmojis: false, - }, - presencePenalty: { - scale: request.body.presence_penalty, - applyToWhitespaces: false, - applyToPunctuations: false, - applyToNumbers: false, - applyToStopwords: false, - applyToEmojis: false, - }, - countPenalty: { - scale: request.body.count_pen, - applyToWhitespaces: false, - applyToPunctuations: false, - applyToNumbers: false, - applyToStopwords: false, - applyToEmojis: false, - }, - prompt: request.body.messages, - }), + body: JSON.stringify(body), signal: controller.signal, }; - fetch(`https://api.ai21.com/studio/v1/${request.body.model}/complete`, options) - .then(r => r.json()) - .then(r => { - if (r.completions === undefined) { - console.log(r); - } else { - console.log(r.completions[0].data.text); - } - const reply = { choices: [{ 'message': { 'content': r.completions?.[0]?.data?.text } }] }; - return response.send(reply); - }) - .catch(err => { - console.error(err); - return response.send({ error: true }); - }); + console.log('AI21 request:', body); + try { + const generateResponse = await fetch(API_AI21 + '/chat/completions', options); + if (request.body.stream) { + forwardFetchResponse(generateResponse, response); + } else { + if (!generateResponse.ok) { + const errorText = await generateResponse.text(); + console.log(`AI21 API returned error: ${generateResponse.status} ${generateResponse.statusText} ${errorText}`); + const errorJson = tryParse(errorText) ?? { error: true }; + return response.status(500).send(errorJson); + } + const generateResponseJson = await generateResponse.json(); + console.log('AI21 response:', generateResponseJson); + return response.send(generateResponseJson); + } + } catch (error) { + console.log('Error communicating with AI21 API: ', error); + if (!response.headersSent) { + response.send({ error: true }); + } else { + response.end(); + } + } } /** @@ -531,10 +496,10 @@ async function sendMistralAIRequest(request, response) { forwardFetchResponse(generateResponse, response); } else { if (!generateResponse.ok) { - console.log(`MistralAI API returned error: ${generateResponse.status} ${generateResponse.statusText} ${await generateResponse.text()}`); - // a 401 unauthorized response breaks the frontend auth, so return a 500 instead. prob a better way of dealing with this. - // 401s are already handled by the streaming processor and dont pop up an error toast, that should probably be fixed too. - return response.status(generateResponse.status === 401 ? 500 : generateResponse.status).send({ error: true }); + const errorText = await generateResponse.text(); + console.log(`MistralAI API returned error: ${generateResponse.status} ${generateResponse.statusText} ${errorText}`); + const errorJson = tryParse(errorText) ?? { error: true }; + return response.status(500).send(errorJson); } const generateResponseJson = await generateResponse.json(); console.log('MistralAI response:', generateResponseJson); @@ -607,6 +572,11 @@ async function sendCohereRequest(request, response) { search_queries_only: false, }; + const canDoSafetyMode = String(request.body.model).endsWith('08-2024'); + if (canDoSafetyMode) { + requestBody.safety_mode = 'NONE'; + } + console.log('Cohere request:', requestBody); const config = { @@ -623,15 +593,15 @@ async function sendCohereRequest(request, response) { const apiUrl = API_COHERE + '/chat'; if (request.body.stream) { - const stream = await fetch(apiUrl, config); + const stream = await global.fetch(apiUrl, config); parseCohereStream(stream, request, response); } else { const generateResponse = await fetch(apiUrl, config); if (!generateResponse.ok) { - console.log(`Cohere API returned error: ${generateResponse.status} ${generateResponse.statusText} ${await generateResponse.text()}`); - // a 401 unauthorized response breaks the frontend auth, so return a 500 instead. prob a better way of dealing with this. - // 401s are already handled by the streaming processor and dont pop up an error toast, that should probably be fixed too. - return response.status(generateResponse.status === 401 ? 500 : generateResponse.status).send({ error: true }); + const errorText = await generateResponse.text(); + console.log(`Cohere API returned error: ${generateResponse.status} ${generateResponse.statusText} ${errorText}`); + const errorJson = tryParse(errorText) ?? { error: true }; + return response.status(500).send(errorJson); } const generateResponseJson = await generateResponse.json(); console.log('Cohere response:', generateResponseJson); diff --git a/src/endpoints/novelai.js b/src/endpoints/novelai.js index abbfe3ef6..d21602cc2 100644 --- a/src/endpoints/novelai.js +++ b/src/endpoints/novelai.js @@ -6,6 +6,7 @@ const { readAllChunks, extractFileFromZipBuffer, forwardFetchResponse } = requir const { jsonParser } = require('../express-common'); const API_NOVELAI = 'https://api.novelai.net'; +const TEXT_NOVELAI = 'https://text.novelai.net'; const IMAGE_NOVELAI = 'https://image.novelai.net'; // Ban bracket generation, plus defaults @@ -155,7 +156,7 @@ router.post('/generate', jsonParser, async function (req, res) { 'repetition_penalty_slope': req.body.repetition_penalty_slope, 'repetition_penalty_frequency': req.body.repetition_penalty_frequency, 'repetition_penalty_presence': req.body.repetition_penalty_presence, - 'repetition_penalty_whitelist': isNewModel ? repPenaltyAllowList : null, + 'repetition_penalty_whitelist': isNewModel ? repPenaltyAllowList.flat() : null, 'top_a': req.body.top_a, 'top_p': req.body.top_p, 'top_k': req.body.top_k, @@ -178,9 +179,7 @@ router.post('/generate', jsonParser, async function (req, res) { }; // Tells the model to stop generation at '>' - if ('theme_textadventure' === req.body.prefix && - (true === req.body.model.includes('clio') || - true === req.body.model.includes('kayra'))) { + if ('theme_textadventure' === req.body.prefix && isNewModel) { data.parameters.eos_token_id = 49405; } @@ -193,7 +192,8 @@ router.post('/generate', jsonParser, async function (req, res) { }; try { - const url = req.body.streaming ? `${API_NOVELAI}/ai/generate-stream` : `${API_NOVELAI}/ai/generate`; + const baseURL = req.body.model.includes('kayra') ? TEXT_NOVELAI : API_NOVELAI; + const url = req.body.streaming ? `${baseURL}/ai/generate-stream` : `${baseURL}/ai/generate`; const response = await fetch(url, { method: 'POST', timeout: 0, ...args }); if (req.body.streaming) { diff --git a/src/endpoints/tokenizers.js b/src/endpoints/tokenizers.js index 70befe3f7..99ae527a6 100644 --- a/src/endpoints/tokenizers.js +++ b/src/endpoints/tokenizers.js @@ -144,6 +144,7 @@ const spp_nerd_v2 = new SentencePieceTokenizer('src/tokenizers/nerdstash_v2.mode const spp_mistral = new SentencePieceTokenizer('src/tokenizers/mistral.model'); const spp_yi = new SentencePieceTokenizer('src/tokenizers/yi.model'); const spp_gemma = new SentencePieceTokenizer('src/tokenizers/gemma.model'); +const spp_jamba = new SentencePieceTokenizer('src/tokenizers/jamba.model'); const claude_tokenizer = new WebTokenizer('src/tokenizers/claude.json'); const llama3_tokenizer = new WebTokenizer('src/tokenizers/llama3.json'); @@ -154,6 +155,7 @@ const sentencepieceTokenizers = [ 'mistral', 'yi', 'gemma', + 'jamba', ]; /** @@ -186,6 +188,10 @@ function getSentencepiceTokenizer(model) { return spp_gemma; } + if (model.includes('jamba')) { + return spp_jamba; + } + return null; } @@ -322,6 +328,10 @@ function getTokenizerModel(requestModel) { return 'gemma'; } + if (requestModel.includes('jamba')) { + return 'jamba'; + } + // default return 'gpt-3.5-turbo'; } @@ -537,59 +547,13 @@ function createWebTokenizerDecodingHandler(tokenizer) { const router = express.Router(); -router.post('/ai21/count', jsonParser, async function (req, res) { - if (!req.body) return res.sendStatus(400); - const key = readSecret(req.user.directories, SECRET_KEYS.AI21); - const options = { - method: 'POST', - headers: { - accept: 'application/json', - 'content-type': 'application/json', - Authorization: `Bearer ${key}`, - }, - body: JSON.stringify({ text: req.body[0].content }), - }; - - try { - const response = await fetch('https://api.ai21.com/studio/v1/tokenize', options); - const data = await response.json(); - return res.send({ 'token_count': data?.tokens?.length || 0 }); - } catch (err) { - console.error(err); - return res.send({ 'token_count': 0 }); - } -}); - -router.post('/google/count', jsonParser, async function (req, res) { - if (!req.body) return res.sendStatus(400); - const options = { - method: 'POST', - headers: { - accept: 'application/json', - 'content-type': 'application/json', - }, - body: JSON.stringify({ contents: convertGooglePrompt(req.body, String(req.query.model)).contents }), - }; - try { - const reverseProxy = req.query.reverse_proxy?.toString() || ''; - const proxyPassword = req.query.proxy_password?.toString() || ''; - const apiKey = reverseProxy ? proxyPassword : readSecret(req.user.directories, SECRET_KEYS.MAKERSUITE); - const apiUrl = new URL(reverseProxy || API_MAKERSUITE); - const response = await fetch(`${apiUrl.origin}/v1beta/models/${req.query.model}:countTokens?key=${apiKey}`, options); - const data = await response.json(); - return res.send({ 'token_count': data?.totalTokens || 0 }); - } catch (err) { - console.error(err); - return res.send({ 'token_count': 0 }); - } -}); - router.post('/llama/encode', jsonParser, createSentencepieceEncodingHandler(spp_llama)); router.post('/nerdstash/encode', jsonParser, createSentencepieceEncodingHandler(spp_nerd)); router.post('/nerdstash_v2/encode', jsonParser, createSentencepieceEncodingHandler(spp_nerd_v2)); router.post('/mistral/encode', jsonParser, createSentencepieceEncodingHandler(spp_mistral)); router.post('/yi/encode', jsonParser, createSentencepieceEncodingHandler(spp_yi)); router.post('/gemma/encode', jsonParser, createSentencepieceEncodingHandler(spp_gemma)); +router.post('/jamba/encode', jsonParser, createSentencepieceEncodingHandler(spp_jamba)); router.post('/gpt2/encode', jsonParser, createTiktokenEncodingHandler('gpt2')); router.post('/claude/encode', jsonParser, createWebTokenizerEncodingHandler(claude_tokenizer)); router.post('/llama3/encode', jsonParser, createWebTokenizerEncodingHandler(llama3_tokenizer)); @@ -599,6 +563,7 @@ router.post('/nerdstash_v2/decode', jsonParser, createSentencepieceDecodingHandl router.post('/mistral/decode', jsonParser, createSentencepieceDecodingHandler(spp_mistral)); router.post('/yi/decode', jsonParser, createSentencepieceDecodingHandler(spp_yi)); router.post('/gemma/decode', jsonParser, createSentencepieceDecodingHandler(spp_gemma)); +router.post('/jamba/decode', jsonParser, createSentencepieceDecodingHandler(spp_jamba)); router.post('/gpt2/decode', jsonParser, createTiktokenDecodingHandler('gpt2')); router.post('/claude/decode', jsonParser, createWebTokenizerDecodingHandler(claude_tokenizer)); router.post('/llama3/decode', jsonParser, createWebTokenizerDecodingHandler(llama3_tokenizer)); @@ -637,6 +602,11 @@ router.post('/openai/encode', jsonParser, async function (req, res) { return handler(req, res); } + if (queryModel.includes('jamba')) { + const handler = createSentencepieceEncodingHandler(spp_jamba); + return handler(req, res); + } + const model = getTokenizerModel(queryModel); const handler = createTiktokenEncodingHandler(model); return handler(req, res); @@ -680,6 +650,11 @@ router.post('/openai/decode', jsonParser, async function (req, res) { return handler(req, res); } + if (queryModel.includes('jamba')) { + const handler = createSentencepieceDecodingHandler(spp_jamba); + return handler(req, res); + } + const model = getTokenizerModel(queryModel); const handler = createTiktokenDecodingHandler(model); return handler(req, res); @@ -731,6 +706,11 @@ router.post('/openai/count', jsonParser, async function (req, res) { return res.send({ 'token_count': num_tokens }); } + if (model === 'jamba') { + num_tokens = await countSentencepieceArrayTokens(spp_jamba, req.body); + return res.send({ 'token_count': num_tokens }); + } + const tokensPerName = queryModel.includes('gpt-3.5-turbo-0301') ? -1 : 1; const tokensPerMessage = queryModel.includes('gpt-3.5-turbo-0301') ? 4 : 3; const tokensPadding = 3; diff --git a/src/endpoints/translate.js b/src/endpoints/translate.js index 7260647d9..6b272e27e 100644 --- a/src/endpoints/translate.js +++ b/src/endpoints/translate.js @@ -110,27 +110,27 @@ router.post('/google', jsonParser, async (request, response) => { }); router.post('/yandex', jsonParser, async (request, response) => { - const chunks = request.body.chunks; - const lang = request.body.lang; - - if (!chunks || !lang) { - return response.sendStatus(400); - } - - // reconstruct original text to log - let inputText = ''; - - const params = new URLSearchParams(); - for (const chunk of chunks) { - params.append('text', chunk); - inputText += chunk; - } - params.append('lang', lang); - const ucid = uuidv4().replaceAll('-', ''); - - console.log('Input text: ' + inputText); - try { + const chunks = request.body.chunks; + const lang = request.body.lang; + + if (!chunks || !lang) { + return response.sendStatus(400); + } + + // reconstruct original text to log + let inputText = ''; + + const params = new URLSearchParams(); + for (const chunk of chunks) { + params.append('text', chunk); + inputText += chunk; + } + params.append('lang', lang); + const ucid = uuidv4().replaceAll('-', ''); + + console.log('Input text: ' + inputText); + const result = await fetch(`https://translate.yandex.net/api/v1/tr.json/translate?ucid=${ucid}&srv=android&format=text`, { method: 'POST', body: params, diff --git a/src/prompt-converters.js b/src/prompt-converters.js index d074228a9..2b87e8b93 100644 --- a/src/prompt-converters.js +++ b/src/prompt-converters.js @@ -267,10 +267,13 @@ function convertGooglePrompt(messages, model, useSysPrompt = false, charName = ' 'gemini-1.5-flash', 'gemini-1.5-flash-latest', 'gemini-1.5-flash-001', + 'gemini-1.5-flash-exp-0827', + 'gemini-1.5-flash-8b-exp-0827', 'gemini-1.5-pro', 'gemini-1.5-pro-latest', 'gemini-1.5-pro-001', 'gemini-1.5-pro-exp-0801', + 'gemini-1.5-pro-exp-0827', 'gemini-1.0-pro-vision-latest', 'gemini-pro-vision', ]; @@ -367,6 +370,78 @@ function convertGooglePrompt(messages, model, useSysPrompt = false, charName = ' return { contents: contents, system_instruction: system_instruction }; } +/** + * Convert AI21 prompt. Classic: system message squash, user/assistant message merge. + * @param {object[]} messages Array of messages + * @param {string} charName Character name + * @param {string} userName User name + */ +function convertAI21Messages(messages, charName = '', userName = '') { + if (!Array.isArray(messages)) { + return []; + } + + // Collect all the system messages up until the first instance of a non-system message, and then remove them from the messages array. + let i = 0, systemPrompt = ''; + + for (i = 0; i < messages.length; i++) { + if (messages[i].role !== 'system') { + break; + } + // Append example names if not already done by the frontend (e.g. for group chats). + if (userName && messages[i].name === 'example_user') { + if (!messages[i].content.startsWith(`${userName}: `)) { + messages[i].content = `${userName}: ${messages[i].content}`; + } + } + if (charName && messages[i].name === 'example_assistant') { + if (!messages[i].content.startsWith(`${charName}: `)) { + messages[i].content = `${charName}: ${messages[i].content}`; + } + } + systemPrompt += `${messages[i].content}\n\n`; + } + + messages.splice(0, i); + + // Prevent erroring out if the messages array is empty. + if (messages.length === 0) { + messages.unshift({ + role: 'user', + content: '[Start a new chat]', + }); + } + + if (systemPrompt) { + messages.unshift({ + role: 'system', + content: systemPrompt.trim(), + }); + } + + // Doesn't support completion names, so prepend if not already done by the frontend (e.g. for group chats). + messages.forEach(msg => { + if ('name' in msg) { + if (msg.role !== 'system' && !msg.content.startsWith(`${msg.name}: `)) { + msg.content = `${msg.name}: ${msg.content}`; + } + delete msg.name; + } + }); + + // Since the messaging endpoint only supports alternating turns, we have to merge messages with the same role if they follow each other + let mergedMessages = []; + messages.forEach((message) => { + if (mergedMessages.length > 0 && mergedMessages[mergedMessages.length - 1].role === message.role) { + mergedMessages[mergedMessages.length - 1].content += '\n\n' + message.content; + } else { + mergedMessages.push(message); + } + }); + + return mergedMessages; +} + /** * Convert a prompt from the ChatML objects to the format used by MistralAI. * @param {object[]} messages Array of messages @@ -520,4 +595,5 @@ module.exports = { convertCohereMessages, convertMistralMessages, convertCohereTools, + convertAI21Messages, }; diff --git a/src/tokenizers/jamba.model b/src/tokenizers/jamba.model new file mode 100644 index 000000000..b32365c47 Binary files /dev/null and b/src/tokenizers/jamba.model differ diff --git a/src/util.js b/src/util.js index c1ff10069..609f8cecb 100644 --- a/src/util.js +++ b/src/util.js @@ -34,8 +34,9 @@ function getConfig() { CACHED_CONFIG = config; return config; } catch (error) { - console.warn('Failed to read config.yaml'); - return {}; + console.error(color.red('FATAL: Failed to read config.yaml. Please check the file for syntax errors.')); + console.error(error.message); + process.exit(1); } } @@ -298,8 +299,8 @@ const color = { * @returns {string} A UUIDv4 string */ function uuidv4() { - if ('randomUUID' in crypto) { - return crypto.randomUUID(); + if ('crypto' in global && 'randomUUID' in global.crypto) { + return global.crypto.randomUUID(); } return 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx'.replace(/[xy]/g, function (c) { const r = Math.random() * 16 | 0;