New AI21 Jamba + tokenizer

This commit is contained in:
Cohee
2024-08-26 12:07:36 +03:00
parent ff834efde3
commit 5fc16a2474
10 changed files with 188 additions and 266 deletions

View File

@ -8,7 +8,7 @@
"openrouter_force_instruct": false, "openrouter_force_instruct": false,
"openrouter_group_models": false, "openrouter_group_models": false,
"openrouter_sort_models": "alphabetically", "openrouter_sort_models": "alphabetically",
"ai21_model": "j2-ultra", "ai21_model": "jamba-1.5-mini",
"mistralai_model": "mistral-large-latest", "mistralai_model": "mistral-large-latest",
"custom_model": "", "custom_model": "",
"custom_url": "", "custom_url": "",
@ -19,7 +19,6 @@
"temperature": 1, "temperature": 1,
"frequency_penalty": 0, "frequency_penalty": 0,
"presence_penalty": 0, "presence_penalty": 0,
"count_penalty": 0,
"top_p": 1, "top_p": 1,
"top_k": 0, "top_k": 0,
"top_a": 0, "top_a": 0,
@ -233,8 +232,6 @@
"assistant_prefill": "", "assistant_prefill": "",
"assistant_impersonation": "", "assistant_impersonation": "",
"human_sysprompt_message": "Let's get started. Please generate your response based on the information and instructions provided above.", "human_sysprompt_message": "Let's get started. Please generate your response based on the information and instructions provided above.",
"use_ai21_tokenizer": false,
"use_google_tokenizer": false,
"claude_use_sysprompt": false, "claude_use_sysprompt": false,
"use_alt_scale": false, "use_alt_scale": false,
"squash_system_messages": false, "squash_system_messages": false,

View File

@ -452,7 +452,6 @@
"temp_openai": 1.0, "temp_openai": 1.0,
"freq_pen_openai": 0, "freq_pen_openai": 0,
"pres_pen_openai": 0, "pres_pen_openai": 0,
"count_pen": 0,
"top_p_openai": 1, "top_p_openai": 1,
"top_k_openai": 0, "top_k_openai": 0,
"stream_openai": true, "stream_openai": true,
@ -614,7 +613,7 @@
"wi_format": "{0}", "wi_format": "{0}",
"openai_model": "gpt-4-turbo", "openai_model": "gpt-4-turbo",
"claude_model": "claude-3-5-sonnet-20240620", "claude_model": "claude-3-5-sonnet-20240620",
"ai21_model": "j2-ultra", "ai21_model": "jamba-1.5-mini",
"windowai_model": "", "windowai_model": "",
"openrouter_model": "OR_Website", "openrouter_model": "OR_Website",
"jailbreak_system": true, "jailbreak_system": true,
@ -625,7 +624,6 @@
"show_external_models": false, "show_external_models": false,
"proxy_password": "", "proxy_password": "",
"assistant_prefill": "", "assistant_prefill": "",
"assistant_impersonation": "", "assistant_impersonation": ""
"use_ai21_tokenizer": false
} }
} }

View File

@ -436,7 +436,7 @@
</div> </div>
</div> </div>
</div> </div>
<div data-newbie-hidden class="range-block" data-source="openai,openrouter,ai21,custom,cohere,perplexity,groq"> <div data-newbie-hidden class="range-block" data-source="openai,openrouter,custom,cohere,perplexity,groq">
<div class="range-block-title" data-i18n="Frequency Penalty"> <div class="range-block-title" data-i18n="Frequency Penalty">
Frequency Penalty Frequency Penalty
</div> </div>
@ -449,7 +449,7 @@
</div> </div>
</div> </div>
</div> </div>
<div data-newbie-hidden class="range-block" data-source="openai,openrouter,ai21,custom,cohere,perplexity,groq"> <div data-newbie-hidden class="range-block" data-source="openai,openrouter,custom,cohere,perplexity,groq">
<div class="range-block-title" data-i18n="Presence Penalty"> <div class="range-block-title" data-i18n="Presence Penalty">
Presence Penalty Presence Penalty
</div> </div>
@ -462,20 +462,7 @@
</div> </div>
</div> </div>
</div> </div>
<div data-newbie-hidden class="range-block" data-source="ai21"> <div data-newbie-hidden class="range-block" data-source="claude,openrouter,makersuite,cohere,perplexity">
<div class="range-block-title" data-i18n="Count Penalty">
Count Penalty
</div>
<div class="range-block-range-and-counter">
<div class="range-block-range">
<input type="range" id="count_pen" name="volume" min="0" max="1" step="0.01">
</div>
<div class="range-block-counter">
<input type="number" min="0" max="1" step="0.01" data-for="count_pen" id="count_pen_counter">
</div>
</div>
</div>
<div data-newbie-hidden class="range-block" data-source="claude,openrouter,ai21,makersuite,cohere,perplexity">
<div class="range-block-title" data-i18n="Top K"> <div class="range-block-title" data-i18n="Top K">
Top K Top K
</div> </div>
@ -1791,22 +1778,6 @@
</select> </select>
</div> </div>
</div> </div>
<div class="range-block" data-source="ai21">
<label for="use_ai21_tokenizer" title="Use AI21 Tokenizer" data-i18n="[title]Use AI21 Tokenizer" class="checkbox_label widthFreeExpand">
<input id="use_ai21_tokenizer" type="checkbox" /><span data-i18n="Use AI21 Tokenizer">Use AI21 Tokenizer</span>
</label>
<div class="toggle-description justifyLeft">
<span data-i18n="Use the appropriate tokenizer for Jurassic models, which is more efficient than GPT's.">Use the appropriate tokenizer for Jurassic models, which is more efficient than GPT's.</span>
</div>
</div>
<div class="range-block" data-source="makersuite">
<label for="use_google_tokenizer" title="Use Google Tokenizer" data-i18n="[title]Use Google Tokenizer" class="checkbox_label widthFreeExpand">
<input id="use_google_tokenizer" type="checkbox" /><span data-i18n="Use Google Tokenizer">Use Google Tokenizer</span>
</label>
<div class="toggle-description justifyLeft">
<span data-i18n="Use the appropriate tokenizer for Google models via their API. Slower prompt processing, but offers much more accurate token counting.">Use the appropriate tokenizer for Google models via their API. Slower prompt processing, but offers much more accurate token counting.</span>
</div>
</div>
<div class="range-block" data-source="makersuite"> <div class="range-block" data-source="makersuite">
<label for="use_makersuite_sysprompt" class="checkbox_label widthFreeExpand"> <label for="use_makersuite_sysprompt" class="checkbox_label widthFreeExpand">
<input id="use_makersuite_sysprompt" type="checkbox" /> <input id="use_makersuite_sysprompt" type="checkbox" />
@ -2791,10 +2762,12 @@
<div> <div>
<h4 data-i18n="AI21 Model">AI21 Model</h4> <h4 data-i18n="AI21 Model">AI21 Model</h4>
<select id="model_ai21_select"> <select id="model_ai21_select">
<optgroup label="Latest"> <optgroup label="Jamba 1.5">
<option value="j2-ultra">j2-ultra</option> <option value="jamba-1.5-mini">jamba-1.5-mini</option>
<option value="j2-mid">j2-mid</option> <option value="jamba-1.5-large">jamba-1.5-large</option>
<option value="j2-light">j2-light</option> </optgroup>
<optgroup label="Jamba-Instruct (Deprecated)">
<option value="jamba-instruct-preview">jamba-instruct-preview</option>
</optgroup> </optgroup>
</select> </select>
</div> </div>
@ -3450,6 +3423,7 @@
<option value="3">Llama 1/2</option> <option value="3">Llama 1/2</option>
<option value="12">Llama 3</option> <option value="12">Llama 3</option>
<option value="13">Gemma / Gemini</option> <option value="13">Gemma / Gemini</option>
<option value="14">Jamba</option>
<option value="4">NerdStash (NovelAI Clio)</option> <option value="4">NerdStash (NovelAI Clio)</option>
<option value="5">NerdStash v2 (NovelAI Kayra)</option> <option value="5">NerdStash v2 (NovelAI Kayra)</option>
<option value="7">Mistral</option> <option value="7">Mistral</option>

View File

@ -2826,7 +2826,7 @@ export function getCharacterCardFields() {
} }
export function isStreamingEnabled() { export function isStreamingEnabled() {
const noStreamSources = [chat_completion_sources.SCALE, chat_completion_sources.AI21]; const noStreamSources = [chat_completion_sources.SCALE];
return ((main_api == 'openai' && oai_settings.stream_openai && !noStreamSources.includes(oai_settings.chat_completion_source) && !(oai_settings.chat_completion_source == chat_completion_sources.MAKERSUITE && oai_settings.google_model.includes('bison'))) return ((main_api == 'openai' && oai_settings.stream_openai && !noStreamSources.includes(oai_settings.chat_completion_source) && !(oai_settings.chat_completion_source == chat_completion_sources.MAKERSUITE && oai_settings.google_model.includes('bison')))
|| (main_api == 'kobold' && kai_settings.streaming_kobold && kai_flags.can_use_streaming) || (main_api == 'kobold' && kai_settings.streaming_kobold && kai_flags.can_use_streaming)
|| (main_api == 'novel' && nai_settings.streaming_novel) || (main_api == 'novel' && nai_settings.streaming_novel)

View File

@ -132,13 +132,9 @@ const max_2mil = 2000 * 1000;
const scale_max = 8191; const scale_max = 8191;
const claude_max = 9000; // We have a proper tokenizer, so theoretically could be larger (up to 9k) const claude_max = 9000; // We have a proper tokenizer, so theoretically could be larger (up to 9k)
const claude_100k_max = 99000; const claude_100k_max = 99000;
let ai21_max = 9200; //can easily fit 9k gpt tokens because j2's tokenizer is efficient af
const unlocked_max = max_200k; const unlocked_max = max_200k;
const oai_max_temp = 2.0; const oai_max_temp = 2.0;
const claude_max_temp = 1.0; //same as j2 const claude_max_temp = 1.0;
const j2_max_topk = 10.0;
const j2_max_freq = 5.0;
const j2_max_pres = 5.0;
const openrouter_website_model = 'OR_Website'; const openrouter_website_model = 'OR_Website';
const openai_max_stop_strings = 4; const openai_max_stop_strings = 4;
@ -218,25 +214,11 @@ const sensitiveFields = [
'custom_include_headers', 'custom_include_headers',
]; ];
function getPrefixMap() {
return selected_group ? {
assistant: '',
user: '',
system: 'OOC: ',
}
: {
assistant: '{{char}}:',
user: '{{user}}:',
system: '',
};
}
const default_settings = { const default_settings = {
preset_settings_openai: 'Default', preset_settings_openai: 'Default',
temp_openai: 1.0, temp_openai: 1.0,
freq_pen_openai: 0, freq_pen_openai: 0,
pres_pen_openai: 0, pres_pen_openai: 0,
count_pen: 0.0,
top_p_openai: 1.0, top_p_openai: 1.0,
top_k_openai: 0, top_k_openai: 0,
min_p_openai: 0, min_p_openai: 0,
@ -264,7 +246,7 @@ const default_settings = {
openai_model: 'gpt-4-turbo', openai_model: 'gpt-4-turbo',
claude_model: 'claude-3-5-sonnet-20240620', claude_model: 'claude-3-5-sonnet-20240620',
google_model: 'gemini-1.5-pro', google_model: 'gemini-1.5-pro',
ai21_model: 'j2-ultra', ai21_model: 'jamba-1.5-mini',
mistralai_model: 'mistral-large-latest', mistralai_model: 'mistral-large-latest',
cohere_model: 'command-r-plus', cohere_model: 'command-r-plus',
perplexity_model: 'llama-3.1-70b-instruct', perplexity_model: 'llama-3.1-70b-instruct',
@ -294,8 +276,6 @@ const default_settings = {
assistant_prefill: '', assistant_prefill: '',
assistant_impersonation: '', assistant_impersonation: '',
human_sysprompt_message: default_claude_human_sysprompt_message, human_sysprompt_message: default_claude_human_sysprompt_message,
use_ai21_tokenizer: false,
use_google_tokenizer: false,
claude_use_sysprompt: false, claude_use_sysprompt: false,
use_makersuite_sysprompt: true, use_makersuite_sysprompt: true,
use_alt_scale: false, use_alt_scale: false,
@ -317,7 +297,6 @@ const oai_settings = {
temp_openai: 1.0, temp_openai: 1.0,
freq_pen_openai: 0, freq_pen_openai: 0,
pres_pen_openai: 0, pres_pen_openai: 0,
count_pen: 0.0,
top_p_openai: 1.0, top_p_openai: 1.0,
top_k_openai: 0, top_k_openai: 0,
min_p_openai: 0, min_p_openai: 0,
@ -345,7 +324,7 @@ const oai_settings = {
openai_model: 'gpt-4-turbo', openai_model: 'gpt-4-turbo',
claude_model: 'claude-3-5-sonnet-20240620', claude_model: 'claude-3-5-sonnet-20240620',
google_model: 'gemini-1.5-pro', google_model: 'gemini-1.5-pro',
ai21_model: 'j2-ultra', ai21_model: 'jamba-1.5-mini',
mistralai_model: 'mistral-large-latest', mistralai_model: 'mistral-large-latest',
cohere_model: 'command-r-plus', cohere_model: 'command-r-plus',
perplexity_model: 'llama-3.1-70b-instruct', perplexity_model: 'llama-3.1-70b-instruct',
@ -375,8 +354,6 @@ const oai_settings = {
assistant_prefill: '', assistant_prefill: '',
assistant_impersonation: '', assistant_impersonation: '',
human_sysprompt_message: default_claude_human_sysprompt_message, human_sysprompt_message: default_claude_human_sysprompt_message,
use_ai21_tokenizer: false,
use_google_tokenizer: false,
claude_use_sysprompt: false, claude_use_sysprompt: false,
use_makersuite_sysprompt: true, use_makersuite_sysprompt: true,
use_alt_scale: false, use_alt_scale: false,
@ -1802,7 +1779,6 @@ async function sendOpenAIRequest(type, messages, signal) {
const isClaude = oai_settings.chat_completion_source == chat_completion_sources.CLAUDE; const isClaude = oai_settings.chat_completion_source == chat_completion_sources.CLAUDE;
const isOpenRouter = oai_settings.chat_completion_source == chat_completion_sources.OPENROUTER; const isOpenRouter = oai_settings.chat_completion_source == chat_completion_sources.OPENROUTER;
const isScale = oai_settings.chat_completion_source == chat_completion_sources.SCALE; const isScale = oai_settings.chat_completion_source == chat_completion_sources.SCALE;
const isAI21 = oai_settings.chat_completion_source == chat_completion_sources.AI21;
const isGoogle = oai_settings.chat_completion_source == chat_completion_sources.MAKERSUITE; const isGoogle = oai_settings.chat_completion_source == chat_completion_sources.MAKERSUITE;
const isOAI = oai_settings.chat_completion_source == chat_completion_sources.OPENAI; const isOAI = oai_settings.chat_completion_source == chat_completion_sources.OPENAI;
const isMistral = oai_settings.chat_completion_source == chat_completion_sources.MISTRALAI; const isMistral = oai_settings.chat_completion_source == chat_completion_sources.MISTRALAI;
@ -1815,7 +1791,7 @@ async function sendOpenAIRequest(type, messages, signal) {
const isQuiet = type === 'quiet'; const isQuiet = type === 'quiet';
const isImpersonate = type === 'impersonate'; const isImpersonate = type === 'impersonate';
const isContinue = type === 'continue'; const isContinue = type === 'continue';
const stream = oai_settings.stream_openai && !isQuiet && !isScale && !isAI21 && !(isGoogle && oai_settings.google_model.includes('bison')); const stream = oai_settings.stream_openai && !isQuiet && !isScale && !(isGoogle && oai_settings.google_model.includes('bison'));
const useLogprobs = !!power_user.request_token_probabilities; const useLogprobs = !!power_user.request_token_probabilities;
const canMultiSwipe = oai_settings.n > 1 && !isContinue && !isImpersonate && !isQuiet && (isOAI || isCustom); const canMultiSwipe = oai_settings.n > 1 && !isContinue && !isImpersonate && !isQuiet && (isOAI || isCustom);
@ -1824,15 +1800,6 @@ async function sendOpenAIRequest(type, messages, signal) {
replaceItemizedPromptText(messageId, messages); replaceItemizedPromptText(messageId, messages);
} }
if (isAI21) {
const joinedMsgs = messages.reduce((acc, obj) => {
const prefix = getPrefixMap()[obj.role];
return acc + (prefix ? (selected_group ? '\n' : prefix + ' ') : '') + obj.content + '\n';
}, '');
messages = substituteParams(joinedMsgs) + (isImpersonate ? `${name1}:` : `${name2}:`);
replaceItemizedPromptText(messageId, messages);
}
// If we're using the window.ai extension, use that instead // If we're using the window.ai extension, use that instead
// Doesn't support logit bias yet // Doesn't support logit bias yet
if (oai_settings.chat_completion_source == chat_completion_sources.WINDOWAI) { if (oai_settings.chat_completion_source == chat_completion_sources.WINDOWAI) {
@ -1931,12 +1898,6 @@ async function sendOpenAIRequest(type, messages, signal) {
generate_data['use_makersuite_sysprompt'] = oai_settings.use_makersuite_sysprompt; generate_data['use_makersuite_sysprompt'] = oai_settings.use_makersuite_sysprompt;
} }
if (isAI21) {
generate_data['top_k'] = Number(oai_settings.top_k_openai);
generate_data['count_pen'] = Number(oai_settings.count_pen);
generate_data['stop_tokens'] = [name1 + ':', substituteParams(oai_settings.new_chat_prompt), substituteParams(oai_settings.new_group_chat_prompt)];
}
if (isMistral) { if (isMistral) {
generate_data['safe_prompt'] = false; // already defaults to false, but just incase they change that in the future. generate_data['safe_prompt'] = false; // already defaults to false, but just incase they change that in the future.
} }
@ -3008,7 +2969,6 @@ function loadOpenAISettings(data, settings) {
oai_settings.temp_openai = settings.temp_openai ?? default_settings.temp_openai; oai_settings.temp_openai = settings.temp_openai ?? default_settings.temp_openai;
oai_settings.freq_pen_openai = settings.freq_pen_openai ?? default_settings.freq_pen_openai; oai_settings.freq_pen_openai = settings.freq_pen_openai ?? default_settings.freq_pen_openai;
oai_settings.pres_pen_openai = settings.pres_pen_openai ?? default_settings.pres_pen_openai; oai_settings.pres_pen_openai = settings.pres_pen_openai ?? default_settings.pres_pen_openai;
oai_settings.count_pen = settings.count_pen ?? default_settings.count_pen;
oai_settings.top_p_openai = settings.top_p_openai ?? default_settings.top_p_openai; oai_settings.top_p_openai = settings.top_p_openai ?? default_settings.top_p_openai;
oai_settings.top_k_openai = settings.top_k_openai ?? default_settings.top_k_openai; oai_settings.top_k_openai = settings.top_k_openai ?? default_settings.top_k_openai;
oai_settings.top_a_openai = settings.top_a_openai ?? default_settings.top_a_openai; oai_settings.top_a_openai = settings.top_a_openai ?? default_settings.top_a_openai;
@ -3080,10 +3040,12 @@ function loadOpenAISettings(data, settings) {
oai_settings.names_behavior = character_names_behavior.COMPLETION; oai_settings.names_behavior = character_names_behavior.COMPLETION;
} }
if (oai_settings.ai21_model.startsWith('j2-')) {
oai_settings.ai21_model = 'jamba-1.5-mini';
}
if (settings.wrap_in_quotes !== undefined) oai_settings.wrap_in_quotes = !!settings.wrap_in_quotes; if (settings.wrap_in_quotes !== undefined) oai_settings.wrap_in_quotes = !!settings.wrap_in_quotes;
if (settings.openai_model !== undefined) oai_settings.openai_model = settings.openai_model; if (settings.openai_model !== undefined) oai_settings.openai_model = settings.openai_model;
if (settings.use_ai21_tokenizer !== undefined) { oai_settings.use_ai21_tokenizer = !!settings.use_ai21_tokenizer; oai_settings.use_ai21_tokenizer ? ai21_max = 8191 : ai21_max = 9200; }
if (settings.use_google_tokenizer !== undefined) oai_settings.use_google_tokenizer = !!settings.use_google_tokenizer;
if (settings.claude_use_sysprompt !== undefined) oai_settings.claude_use_sysprompt = !!settings.claude_use_sysprompt; if (settings.claude_use_sysprompt !== undefined) oai_settings.claude_use_sysprompt = !!settings.claude_use_sysprompt;
if (settings.use_makersuite_sysprompt !== undefined) oai_settings.use_makersuite_sysprompt = !!settings.use_makersuite_sysprompt; if (settings.use_makersuite_sysprompt !== undefined) oai_settings.use_makersuite_sysprompt = !!settings.use_makersuite_sysprompt;
if (settings.use_alt_scale !== undefined) { oai_settings.use_alt_scale = !!settings.use_alt_scale; updateScaleForm(); } if (settings.use_alt_scale !== undefined) { oai_settings.use_alt_scale = !!settings.use_alt_scale; updateScaleForm(); }
@ -3133,8 +3095,6 @@ function loadOpenAISettings(data, settings) {
$('#jailbreak_system').prop('checked', oai_settings.jailbreak_system); $('#jailbreak_system').prop('checked', oai_settings.jailbreak_system);
$('#openai_show_external_models').prop('checked', oai_settings.show_external_models); $('#openai_show_external_models').prop('checked', oai_settings.show_external_models);
$('#openai_external_category').toggle(oai_settings.show_external_models); $('#openai_external_category').toggle(oai_settings.show_external_models);
$('#use_ai21_tokenizer').prop('checked', oai_settings.use_ai21_tokenizer);
$('#use_google_tokenizer').prop('checked', oai_settings.use_google_tokenizer);
$('#claude_use_sysprompt').prop('checked', oai_settings.claude_use_sysprompt); $('#claude_use_sysprompt').prop('checked', oai_settings.claude_use_sysprompt);
$('#use_makersuite_sysprompt').prop('checked', oai_settings.use_makersuite_sysprompt); $('#use_makersuite_sysprompt').prop('checked', oai_settings.use_makersuite_sysprompt);
$('#scale-alt').prop('checked', oai_settings.use_alt_scale); $('#scale-alt').prop('checked', oai_settings.use_alt_scale);
@ -3170,9 +3130,6 @@ function loadOpenAISettings(data, settings) {
$('#pres_pen_openai').val(oai_settings.pres_pen_openai); $('#pres_pen_openai').val(oai_settings.pres_pen_openai);
$('#pres_pen_counter_openai').val(Number(oai_settings.pres_pen_openai).toFixed(2)); $('#pres_pen_counter_openai').val(Number(oai_settings.pres_pen_openai).toFixed(2));
$('#count_pen').val(oai_settings.count_pen);
$('#count_pen_counter').val(Number(oai_settings.count_pen).toFixed(2));
$('#top_p_openai').val(oai_settings.top_p_openai); $('#top_p_openai').val(oai_settings.top_p_openai);
$('#top_p_counter_openai').val(Number(oai_settings.top_p_openai).toFixed(2)); $('#top_p_counter_openai').val(Number(oai_settings.top_p_openai).toFixed(2));
@ -3392,7 +3349,6 @@ async function saveOpenAIPreset(name, settings, triggerUi = true) {
temperature: settings.temp_openai, temperature: settings.temp_openai,
frequency_penalty: settings.freq_pen_openai, frequency_penalty: settings.freq_pen_openai,
presence_penalty: settings.pres_pen_openai, presence_penalty: settings.pres_pen_openai,
count_penalty: settings.count_pen,
top_p: settings.top_p_openai, top_p: settings.top_p_openai,
top_k: settings.top_k_openai, top_k: settings.top_k_openai,
top_a: settings.top_a_openai, top_a: settings.top_a_openai,
@ -3427,8 +3383,6 @@ async function saveOpenAIPreset(name, settings, triggerUi = true) {
assistant_prefill: settings.assistant_prefill, assistant_prefill: settings.assistant_prefill,
assistant_impersonation: settings.assistant_impersonation, assistant_impersonation: settings.assistant_impersonation,
human_sysprompt_message: settings.human_sysprompt_message, human_sysprompt_message: settings.human_sysprompt_message,
use_ai21_tokenizer: settings.use_ai21_tokenizer,
use_google_tokenizer: settings.use_google_tokenizer,
claude_use_sysprompt: settings.claude_use_sysprompt, claude_use_sysprompt: settings.claude_use_sysprompt,
use_makersuite_sysprompt: settings.use_makersuite_sysprompt, use_makersuite_sysprompt: settings.use_makersuite_sysprompt,
use_alt_scale: settings.use_alt_scale, use_alt_scale: settings.use_alt_scale,
@ -3799,7 +3753,6 @@ function onSettingsPresetChange() {
temperature: ['#temp_openai', 'temp_openai', false], temperature: ['#temp_openai', 'temp_openai', false],
frequency_penalty: ['#freq_pen_openai', 'freq_pen_openai', false], frequency_penalty: ['#freq_pen_openai', 'freq_pen_openai', false],
presence_penalty: ['#pres_pen_openai', 'pres_pen_openai', false], presence_penalty: ['#pres_pen_openai', 'pres_pen_openai', false],
count_penalty: ['#count_pen', 'count_pen', false],
top_p: ['#top_p_openai', 'top_p_openai', false], top_p: ['#top_p_openai', 'top_p_openai', false],
top_k: ['#top_k_openai', 'top_k_openai', false], top_k: ['#top_k_openai', 'top_k_openai', false],
top_a: ['#top_a_openai', 'top_a_openai', false], top_a: ['#top_a_openai', 'top_a_openai', false],
@ -3856,8 +3809,6 @@ function onSettingsPresetChange() {
assistant_prefill: ['#claude_assistant_prefill', 'assistant_prefill', false], assistant_prefill: ['#claude_assistant_prefill', 'assistant_prefill', false],
assistant_impersonation: ['#claude_assistant_impersonation', 'assistant_impersonation', false], assistant_impersonation: ['#claude_assistant_impersonation', 'assistant_impersonation', false],
human_sysprompt_message: ['#claude_human_sysprompt_textarea', 'human_sysprompt_message', false], human_sysprompt_message: ['#claude_human_sysprompt_textarea', 'human_sysprompt_message', false],
use_ai21_tokenizer: ['#use_ai21_tokenizer', 'use_ai21_tokenizer', true],
use_google_tokenizer: ['#use_google_tokenizer', 'use_google_tokenizer', true],
claude_use_sysprompt: ['#claude_use_sysprompt', 'claude_use_sysprompt', true], claude_use_sysprompt: ['#claude_use_sysprompt', 'claude_use_sysprompt', true],
use_makersuite_sysprompt: ['#use_makersuite_sysprompt', 'use_makersuite_sysprompt', true], use_makersuite_sysprompt: ['#use_makersuite_sysprompt', 'use_makersuite_sysprompt', true],
use_alt_scale: ['#use_alt_scale', 'use_alt_scale', true], use_alt_scale: ['#use_alt_scale', 'use_alt_scale', true],
@ -4305,33 +4256,13 @@ async function onModelChange() {
if (oai_settings.chat_completion_source == chat_completion_sources.AI21) { if (oai_settings.chat_completion_source == chat_completion_sources.AI21) {
if (oai_settings.max_context_unlocked) { if (oai_settings.max_context_unlocked) {
$('#openai_max_context').attr('max', unlocked_max); $('#openai_max_context').attr('max', unlocked_max);
} else { } else if (oai_settings.ai21_model.includes('jamba-1.5') || oai_settings.ai21_model.includes('jamba-instruct')) {
$('#openai_max_context').attr('max', ai21_max); $('#openai_max_context').attr('max', max_256k);
} }
oai_settings.openai_max_context = Math.min(oai_settings.openai_max_context, Number($('#openai_max_context').attr('max'))); oai_settings.openai_max_context = Math.min(Number($('#openai_max_context').attr('max')), oai_settings.openai_max_context);
$('#openai_max_context').val(oai_settings.openai_max_context).trigger('input'); $('#openai_max_context').val(oai_settings.openai_max_context).trigger('input');
$('#temp_openai').attr('max', oai_max_temp).val(oai_settings.temp_openai).trigger('input');
oai_settings.temp_openai = Math.min(claude_max_temp, oai_settings.temp_openai);
$('#temp_openai').attr('max', claude_max_temp).val(oai_settings.temp_openai).trigger('input');
oai_settings.freq_pen_openai = Math.min(j2_max_freq, oai_settings.freq_pen_openai < 0 ? 0 : oai_settings.freq_pen_openai);
$('#freq_pen_openai').attr('min', 0).attr('max', j2_max_freq).val(oai_settings.freq_pen_openai).trigger('input');
oai_settings.pres_pen_openai = Math.min(j2_max_pres, oai_settings.pres_pen_openai < 0 ? 0 : oai_settings.pres_pen_openai);
$('#pres_pen_openai').attr('min', 0).attr('max', j2_max_pres).val(oai_settings.pres_pen_openai).trigger('input');
oai_settings.top_k_openai = Math.min(j2_max_topk, oai_settings.top_k_openai);
$('#top_k_openai').attr('max', j2_max_topk).val(oai_settings.top_k_openai).trigger('input');
} else if (oai_settings.chat_completion_source != chat_completion_sources.AI21) {
oai_settings.freq_pen_openai = Math.min(2.0, oai_settings.freq_pen_openai);
$('#freq_pen_openai').attr('min', -2.0).attr('max', 2.0).val(oai_settings.freq_pen_openai).trigger('input');
oai_settings.pres_pen_openai = Math.min(2.0, oai_settings.pres_pen_openai);
$('#pres_pen_openai').attr('min', -2.0).attr('max', 2.0).val(oai_settings.pres_pen_openai).trigger('input');
oai_settings.top_k_openai = Math.min(200, oai_settings.top_k_openai);
$('#top_k_openai').attr('max', 200).val(oai_settings.top_k_openai).trigger('input');
} }
if (oai_settings.chat_completion_source == chat_completion_sources.CUSTOM) { if (oai_settings.chat_completion_source == chat_completion_sources.CUSTOM) {
@ -4953,12 +4884,6 @@ $(document).ready(async function () {
saveSettingsDebounced(); saveSettingsDebounced();
}); });
$('#count_pen').on('input', function () {
oai_settings.count_pen = Number($(this).val());
$('#count_pen_counter').val(Number($(this).val()).toFixed(2));
saveSettingsDebounced();
});
$('#top_p_openai').on('input', function () { $('#top_p_openai').on('input', function () {
oai_settings.top_p_openai = Number($(this).val()); oai_settings.top_p_openai = Number($(this).val());
$('#top_p_counter_openai').val(Number($(this).val()).toFixed(2)); $('#top_p_counter_openai').val(Number($(this).val()).toFixed(2));
@ -5017,20 +4942,6 @@ $(document).ready(async function () {
saveSettingsDebounced(); saveSettingsDebounced();
}); });
$('#use_ai21_tokenizer').on('change', function () {
oai_settings.use_ai21_tokenizer = !!$('#use_ai21_tokenizer').prop('checked');
oai_settings.use_ai21_tokenizer ? ai21_max = 8191 : ai21_max = 9200;
oai_settings.openai_max_context = Math.min(ai21_max, oai_settings.openai_max_context);
$('#openai_max_context').attr('max', ai21_max).val(oai_settings.openai_max_context).trigger('input');
$('#openai_max_context_counter').attr('max', Number($('#openai_max_context').attr('max')));
saveSettingsDebounced();
});
$('#use_google_tokenizer').on('change', function () {
oai_settings.use_google_tokenizer = !!$('#use_google_tokenizer').prop('checked');
saveSettingsDebounced();
});
$('#claude_use_sysprompt').on('change', function () { $('#claude_use_sysprompt').on('change', function () {
oai_settings.claude_use_sysprompt = !!$('#claude_use_sysprompt').prop('checked'); oai_settings.claude_use_sysprompt = !!$('#claude_use_sysprompt').prop('checked');
$('#claude_human_sysprompt_message_block').toggle(oai_settings.claude_use_sysprompt); $('#claude_human_sysprompt_message_block').toggle(oai_settings.claude_use_sysprompt);

View File

@ -27,6 +27,7 @@ export const tokenizers = {
CLAUDE: 11, CLAUDE: 11,
LLAMA3: 12, LLAMA3: 12,
GEMMA: 13, GEMMA: 13,
JAMBA: 14,
BEST_MATCH: 99, BEST_MATCH: 99,
}; };
@ -36,6 +37,7 @@ export const SENTENCEPIECE_TOKENIZERS = [
tokenizers.YI, tokenizers.YI,
tokenizers.LLAMA3, tokenizers.LLAMA3,
tokenizers.GEMMA, tokenizers.GEMMA,
tokenizers.JAMBA,
// uncomment when NovelAI releases Kayra and Clio weights, lol // uncomment when NovelAI releases Kayra and Clio weights, lol
//tokenizers.NERD, //tokenizers.NERD,
//tokenizers.NERD2, //tokenizers.NERD2,
@ -98,6 +100,11 @@ const TOKENIZER_URLS = {
decode: '/api/tokenizers/gemma/decode', decode: '/api/tokenizers/gemma/decode',
count: '/api/tokenizers/gemma/encode', count: '/api/tokenizers/gemma/encode',
}, },
[tokenizers.JAMBA]: {
encode: '/api/tokenizers/jamba/encode',
decode: '/api/tokenizers/jamba/decode',
count: '/api/tokenizers/jamba/encode',
},
[tokenizers.API_TEXTGENERATIONWEBUI]: { [tokenizers.API_TEXTGENERATIONWEBUI]: {
encode: '/api/tokenizers/remote/textgenerationwebui/encode', encode: '/api/tokenizers/remote/textgenerationwebui/encode',
count: '/api/tokenizers/remote/textgenerationwebui/encode', count: '/api/tokenizers/remote/textgenerationwebui/encode',
@ -164,7 +171,7 @@ export function getAvailableTokenizers() {
tokenizerId: Number(tokenizerOption.value), tokenizerId: Number(tokenizerOption.value),
tokenizerKey: Object.entries(tokenizers).find(([_, value]) => value === Number(tokenizerOption.value))[0].toLocaleLowerCase(), tokenizerKey: Object.entries(tokenizers).find(([_, value]) => value === Number(tokenizerOption.value))[0].toLocaleLowerCase(),
tokenizerName: tokenizerOption.text, tokenizerName: tokenizerOption.text,
})) }));
} }
/** /**
@ -280,6 +287,12 @@ export function getTokenizerBestMatch(forApi) {
if (model.includes('gemma')) { if (model.includes('gemma')) {
return tokenizers.GEMMA; return tokenizers.GEMMA;
} }
if (model.includes('yi')) {
return tokenizers.YI;
}
if (model.includes('jamba')) {
return tokenizers.JAMBA;
}
} }
return tokenizers.LLAMA; return tokenizers.LLAMA;
@ -497,6 +510,7 @@ export function getTokenizerModel() {
const mistralTokenizer = 'mistral'; const mistralTokenizer = 'mistral';
const yiTokenizer = 'yi'; const yiTokenizer = 'yi';
const gemmaTokenizer = 'gemma'; const gemmaTokenizer = 'gemma';
const jambaTokenizer = 'jamba';
// Assuming no one would use it for different models.. right? // Assuming no one would use it for different models.. right?
if (oai_settings.chat_completion_source == chat_completion_sources.SCALE) { if (oai_settings.chat_completion_source == chat_completion_sources.SCALE) {
@ -562,12 +576,19 @@ export function getTokenizerModel() {
else if (oai_settings.openrouter_model.includes('GPT-NeoXT')) { else if (oai_settings.openrouter_model.includes('GPT-NeoXT')) {
return gpt2Tokenizer; return gpt2Tokenizer;
} }
else if (oai_settings.openrouter_model.includes('jamba')) {
return jambaTokenizer;
}
} }
if (oai_settings.chat_completion_source == chat_completion_sources.MAKERSUITE) { if (oai_settings.chat_completion_source == chat_completion_sources.MAKERSUITE) {
return gemmaTokenizer; return gemmaTokenizer;
} }
if (oai_settings.chat_completion_source == chat_completion_sources.AI21) {
return jambaTokenizer;
}
if (oai_settings.chat_completion_source == chat_completion_sources.CLAUDE) { if (oai_settings.chat_completion_source == chat_completion_sources.CLAUDE) {
return claudeTokenizer; return claudeTokenizer;
} }
@ -626,16 +647,7 @@ export function getTokenizerModel() {
* @deprecated Use countTokensOpenAIAsync instead. * @deprecated Use countTokensOpenAIAsync instead.
*/ */
export function countTokensOpenAI(messages, full = false) { export function countTokensOpenAI(messages, full = false) {
const shouldTokenizeAI21 = oai_settings.chat_completion_source === chat_completion_sources.AI21 && oai_settings.use_ai21_tokenizer; const tokenizerEndpoint = `/api/tokenizers/openai/count?model=${getTokenizerModel()}`;
const shouldTokenizeGoogle = oai_settings.chat_completion_source === chat_completion_sources.MAKERSUITE && oai_settings.use_google_tokenizer;
let tokenizerEndpoint = '';
if (shouldTokenizeAI21) {
tokenizerEndpoint = '/api/tokenizers/ai21/count';
} else if (shouldTokenizeGoogle) {
tokenizerEndpoint = `/api/tokenizers/google/count?model=${getTokenizerModel()}&reverse_proxy=${oai_settings.reverse_proxy}&proxy_password=${oai_settings.proxy_password}`;
} else {
tokenizerEndpoint = `/api/tokenizers/openai/count?model=${getTokenizerModel()}`;
}
const cacheObject = getTokenCacheObject(); const cacheObject = getTokenCacheObject();
if (!Array.isArray(messages)) { if (!Array.isArray(messages)) {
@ -647,7 +659,7 @@ export function countTokensOpenAI(messages, full = false) {
for (const message of messages) { for (const message of messages) {
const model = getTokenizerModel(); const model = getTokenizerModel();
if (model === 'claude' || shouldTokenizeAI21 || shouldTokenizeGoogle) { if (model === 'claude') {
full = true; full = true;
} }
@ -687,16 +699,7 @@ export function countTokensOpenAI(messages, full = false) {
* @returns {Promise<number>} Token count. * @returns {Promise<number>} Token count.
*/ */
export async function countTokensOpenAIAsync(messages, full = false) { export async function countTokensOpenAIAsync(messages, full = false) {
const shouldTokenizeAI21 = oai_settings.chat_completion_source === chat_completion_sources.AI21 && oai_settings.use_ai21_tokenizer; const tokenizerEndpoint = `/api/tokenizers/openai/count?model=${getTokenizerModel()}`;
const shouldTokenizeGoogle = oai_settings.chat_completion_source === chat_completion_sources.MAKERSUITE && oai_settings.use_google_tokenizer;
let tokenizerEndpoint = '';
if (shouldTokenizeAI21) {
tokenizerEndpoint = '/api/tokenizers/ai21/count';
} else if (shouldTokenizeGoogle) {
tokenizerEndpoint = `/api/tokenizers/google/count?model=${getTokenizerModel()}`;
} else {
tokenizerEndpoint = `/api/tokenizers/openai/count?model=${getTokenizerModel()}`;
}
const cacheObject = getTokenCacheObject(); const cacheObject = getTokenCacheObject();
if (!Array.isArray(messages)) { if (!Array.isArray(messages)) {
@ -708,7 +711,7 @@ export async function countTokensOpenAIAsync(messages, full = false) {
for (const message of messages) { for (const message of messages) {
const model = getTokenizerModel(); const model = getTokenizerModel();
if (model === 'claude' || shouldTokenizeAI21 || shouldTokenizeGoogle) { if (model === 'claude') {
full = true; full = true;
} }

View File

@ -5,7 +5,7 @@ const Readable = require('stream').Readable;
const { jsonParser } = require('../../express-common'); const { jsonParser } = require('../../express-common');
const { CHAT_COMPLETION_SOURCES, GEMINI_SAFETY, BISON_SAFETY, OPENROUTER_HEADERS } = require('../../constants'); const { CHAT_COMPLETION_SOURCES, GEMINI_SAFETY, BISON_SAFETY, OPENROUTER_HEADERS } = require('../../constants');
const { forwardFetchResponse, getConfigValue, tryParse, uuidv4, mergeObjectWithYaml, excludeKeysByYaml, color } = require('../../util'); const { forwardFetchResponse, getConfigValue, tryParse, uuidv4, mergeObjectWithYaml, excludeKeysByYaml, color } = require('../../util');
const { convertClaudeMessages, convertGooglePrompt, convertTextCompletionPrompt, convertCohereMessages, convertMistralMessages, convertCohereTools } = require('../../prompt-converters'); const { convertClaudeMessages, convertGooglePrompt, convertTextCompletionPrompt, convertCohereMessages, convertMistralMessages, convertCohereTools, convertAI21Messages } = require('../../prompt-converters');
const { readSecret, SECRET_KEYS } = require('../secrets'); const { readSecret, SECRET_KEYS } = require('../secrets');
const { getTokenizerModel, getSentencepiceTokenizer, getTiktokenTokenizer, sentencepieceTokenizers, TEXT_COMPLETION_MODELS } = require('../tokenizers'); const { getTokenizerModel, getSentencepiceTokenizer, getTiktokenTokenizer, sentencepieceTokenizers, TEXT_COMPLETION_MODELS } = require('../tokenizers');
@ -19,6 +19,7 @@ const API_GROQ = 'https://api.groq.com/openai/v1';
const API_MAKERSUITE = 'https://generativelanguage.googleapis.com'; const API_MAKERSUITE = 'https://generativelanguage.googleapis.com';
const API_01AI = 'https://api.01.ai/v1'; const API_01AI = 'https://api.01.ai/v1';
const API_BLOCKENTROPY = 'https://api.blockentropy.ai/v1'; const API_BLOCKENTROPY = 'https://api.blockentropy.ai/v1';
const API_AI21 = 'https://api.ai21.com/studio/v1';
/** /**
* Applies a post-processing step to the generated messages. * Applies a post-processing step to the generated messages.
@ -413,6 +414,16 @@ async function sendAI21Request(request, response) {
request.socket.on('close', function () { request.socket.on('close', function () {
controller.abort(); controller.abort();
}); });
const convertedPrompt = convertAI21Messages(request.body.messages, request.body.char_name, request.body.user_name);
const body = {
messages: convertedPrompt,
model: request.body.model,
max_tokens: request.body.max_tokens,
temperature: request.body.temperature,
top_p: request.body.top_p,
stop: request.body.stop,
stream: request.body.stream,
};
const options = { const options = {
method: 'POST', method: 'POST',
headers: { headers: {
@ -420,59 +431,33 @@ async function sendAI21Request(request, response) {
'content-type': 'application/json', 'content-type': 'application/json',
Authorization: `Bearer ${readSecret(request.user.directories, SECRET_KEYS.AI21)}`, Authorization: `Bearer ${readSecret(request.user.directories, SECRET_KEYS.AI21)}`,
}, },
body: JSON.stringify({ body: JSON.stringify(body),
numResults: 1,
maxTokens: request.body.max_tokens,
minTokens: 0,
temperature: request.body.temperature,
topP: request.body.top_p,
stopSequences: request.body.stop_tokens,
topKReturn: request.body.top_k,
frequencyPenalty: {
scale: request.body.frequency_penalty * 100,
applyToWhitespaces: false,
applyToPunctuations: false,
applyToNumbers: false,
applyToStopwords: false,
applyToEmojis: false,
},
presencePenalty: {
scale: request.body.presence_penalty,
applyToWhitespaces: false,
applyToPunctuations: false,
applyToNumbers: false,
applyToStopwords: false,
applyToEmojis: false,
},
countPenalty: {
scale: request.body.count_pen,
applyToWhitespaces: false,
applyToPunctuations: false,
applyToNumbers: false,
applyToStopwords: false,
applyToEmojis: false,
},
prompt: request.body.messages,
}),
signal: controller.signal, signal: controller.signal,
}; };
fetch(`https://api.ai21.com/studio/v1/${request.body.model}/complete`, options) console.log('AI21 request:', body);
.then(r => r.json())
.then(r => {
if (r.completions === undefined) {
console.log(r);
} else {
console.log(r.completions[0].data.text);
}
const reply = { choices: [{ 'message': { 'content': r.completions?.[0]?.data?.text } }] };
return response.send(reply);
})
.catch(err => {
console.error(err);
return response.send({ error: true });
});
try{
const generateResponse = await fetch(API_AI21 + '/chat/completions', options);
if (request.body.stream) {
forwardFetchResponse(generateResponse, response);
} else {
if (!generateResponse.ok) {
console.log(`AI21 API returned error: ${generateResponse.status} ${generateResponse.statusText} ${await generateResponse.text()}`);
return response.status(500).send({ error: true });
}
const generateResponseJson = await generateResponse.json();
console.log('AI21 response:', generateResponseJson);
return response.send(generateResponseJson);
}
} catch (error) {
console.log('Error communicating with MistralAI API: ', error);
if (!response.headersSent) {
response.send({ error: true });
} else {
response.end();
}
}
} }
/** /**

View File

@ -144,6 +144,7 @@ const spp_nerd_v2 = new SentencePieceTokenizer('src/tokenizers/nerdstash_v2.mode
const spp_mistral = new SentencePieceTokenizer('src/tokenizers/mistral.model'); const spp_mistral = new SentencePieceTokenizer('src/tokenizers/mistral.model');
const spp_yi = new SentencePieceTokenizer('src/tokenizers/yi.model'); const spp_yi = new SentencePieceTokenizer('src/tokenizers/yi.model');
const spp_gemma = new SentencePieceTokenizer('src/tokenizers/gemma.model'); const spp_gemma = new SentencePieceTokenizer('src/tokenizers/gemma.model');
const spp_jamba = new SentencePieceTokenizer('src/tokenizers/jamba.model');
const claude_tokenizer = new WebTokenizer('src/tokenizers/claude.json'); const claude_tokenizer = new WebTokenizer('src/tokenizers/claude.json');
const llama3_tokenizer = new WebTokenizer('src/tokenizers/llama3.json'); const llama3_tokenizer = new WebTokenizer('src/tokenizers/llama3.json');
@ -154,6 +155,7 @@ const sentencepieceTokenizers = [
'mistral', 'mistral',
'yi', 'yi',
'gemma', 'gemma',
'jamba',
]; ];
/** /**
@ -186,6 +188,10 @@ function getSentencepiceTokenizer(model) {
return spp_gemma; return spp_gemma;
} }
if (model.includes('jamba')) {
return spp_jamba;
}
return null; return null;
} }
@ -322,6 +328,10 @@ function getTokenizerModel(requestModel) {
return 'gemma'; return 'gemma';
} }
if (requestModel.includes('jamba')) {
return 'jamba';
}
// default // default
return 'gpt-3.5-turbo'; return 'gpt-3.5-turbo';
} }
@ -537,59 +547,13 @@ function createWebTokenizerDecodingHandler(tokenizer) {
const router = express.Router(); const router = express.Router();
router.post('/ai21/count', jsonParser, async function (req, res) {
if (!req.body) return res.sendStatus(400);
const key = readSecret(req.user.directories, SECRET_KEYS.AI21);
const options = {
method: 'POST',
headers: {
accept: 'application/json',
'content-type': 'application/json',
Authorization: `Bearer ${key}`,
},
body: JSON.stringify({ text: req.body[0].content }),
};
try {
const response = await fetch('https://api.ai21.com/studio/v1/tokenize', options);
const data = await response.json();
return res.send({ 'token_count': data?.tokens?.length || 0 });
} catch (err) {
console.error(err);
return res.send({ 'token_count': 0 });
}
});
router.post('/google/count', jsonParser, async function (req, res) {
if (!req.body) return res.sendStatus(400);
const options = {
method: 'POST',
headers: {
accept: 'application/json',
'content-type': 'application/json',
},
body: JSON.stringify({ contents: convertGooglePrompt(req.body, String(req.query.model)).contents }),
};
try {
const reverseProxy = req.query.reverse_proxy?.toString() || '';
const proxyPassword = req.query.proxy_password?.toString() || '';
const apiKey = reverseProxy ? proxyPassword : readSecret(req.user.directories, SECRET_KEYS.MAKERSUITE);
const apiUrl = new URL(reverseProxy || API_MAKERSUITE);
const response = await fetch(`${apiUrl.origin}/v1beta/models/${req.query.model}:countTokens?key=${apiKey}`, options);
const data = await response.json();
return res.send({ 'token_count': data?.totalTokens || 0 });
} catch (err) {
console.error(err);
return res.send({ 'token_count': 0 });
}
});
router.post('/llama/encode', jsonParser, createSentencepieceEncodingHandler(spp_llama)); router.post('/llama/encode', jsonParser, createSentencepieceEncodingHandler(spp_llama));
router.post('/nerdstash/encode', jsonParser, createSentencepieceEncodingHandler(spp_nerd)); router.post('/nerdstash/encode', jsonParser, createSentencepieceEncodingHandler(spp_nerd));
router.post('/nerdstash_v2/encode', jsonParser, createSentencepieceEncodingHandler(spp_nerd_v2)); router.post('/nerdstash_v2/encode', jsonParser, createSentencepieceEncodingHandler(spp_nerd_v2));
router.post('/mistral/encode', jsonParser, createSentencepieceEncodingHandler(spp_mistral)); router.post('/mistral/encode', jsonParser, createSentencepieceEncodingHandler(spp_mistral));
router.post('/yi/encode', jsonParser, createSentencepieceEncodingHandler(spp_yi)); router.post('/yi/encode', jsonParser, createSentencepieceEncodingHandler(spp_yi));
router.post('/gemma/encode', jsonParser, createSentencepieceEncodingHandler(spp_gemma)); router.post('/gemma/encode', jsonParser, createSentencepieceEncodingHandler(spp_gemma));
router.post('/jamba/encode', jsonParser, createSentencepieceEncodingHandler(spp_jamba));
router.post('/gpt2/encode', jsonParser, createTiktokenEncodingHandler('gpt2')); router.post('/gpt2/encode', jsonParser, createTiktokenEncodingHandler('gpt2'));
router.post('/claude/encode', jsonParser, createWebTokenizerEncodingHandler(claude_tokenizer)); router.post('/claude/encode', jsonParser, createWebTokenizerEncodingHandler(claude_tokenizer));
router.post('/llama3/encode', jsonParser, createWebTokenizerEncodingHandler(llama3_tokenizer)); router.post('/llama3/encode', jsonParser, createWebTokenizerEncodingHandler(llama3_tokenizer));
@ -599,6 +563,7 @@ router.post('/nerdstash_v2/decode', jsonParser, createSentencepieceDecodingHandl
router.post('/mistral/decode', jsonParser, createSentencepieceDecodingHandler(spp_mistral)); router.post('/mistral/decode', jsonParser, createSentencepieceDecodingHandler(spp_mistral));
router.post('/yi/decode', jsonParser, createSentencepieceDecodingHandler(spp_yi)); router.post('/yi/decode', jsonParser, createSentencepieceDecodingHandler(spp_yi));
router.post('/gemma/decode', jsonParser, createSentencepieceDecodingHandler(spp_gemma)); router.post('/gemma/decode', jsonParser, createSentencepieceDecodingHandler(spp_gemma));
router.post('/jamba/decode', jsonParser, createSentencepieceDecodingHandler(spp_jamba));
router.post('/gpt2/decode', jsonParser, createTiktokenDecodingHandler('gpt2')); router.post('/gpt2/decode', jsonParser, createTiktokenDecodingHandler('gpt2'));
router.post('/claude/decode', jsonParser, createWebTokenizerDecodingHandler(claude_tokenizer)); router.post('/claude/decode', jsonParser, createWebTokenizerDecodingHandler(claude_tokenizer));
router.post('/llama3/decode', jsonParser, createWebTokenizerDecodingHandler(llama3_tokenizer)); router.post('/llama3/decode', jsonParser, createWebTokenizerDecodingHandler(llama3_tokenizer));
@ -637,6 +602,11 @@ router.post('/openai/encode', jsonParser, async function (req, res) {
return handler(req, res); return handler(req, res);
} }
if (queryModel.includes('jamba')) {
const handler = createSentencepieceEncodingHandler(spp_jamba);
return handler(req, res);
}
const model = getTokenizerModel(queryModel); const model = getTokenizerModel(queryModel);
const handler = createTiktokenEncodingHandler(model); const handler = createTiktokenEncodingHandler(model);
return handler(req, res); return handler(req, res);
@ -680,6 +650,11 @@ router.post('/openai/decode', jsonParser, async function (req, res) {
return handler(req, res); return handler(req, res);
} }
if (queryModel.includes('jamba')) {
const handler = createSentencepieceDecodingHandler(spp_jamba);
return handler(req, res);
}
const model = getTokenizerModel(queryModel); const model = getTokenizerModel(queryModel);
const handler = createTiktokenDecodingHandler(model); const handler = createTiktokenDecodingHandler(model);
return handler(req, res); return handler(req, res);
@ -731,6 +706,11 @@ router.post('/openai/count', jsonParser, async function (req, res) {
return res.send({ 'token_count': num_tokens }); return res.send({ 'token_count': num_tokens });
} }
if (model === 'jamba') {
num_tokens = await countSentencepieceArrayTokens(spp_jamba, req.body);
return res.send({ 'token_count': num_tokens });
}
const tokensPerName = queryModel.includes('gpt-3.5-turbo-0301') ? -1 : 1; const tokensPerName = queryModel.includes('gpt-3.5-turbo-0301') ? -1 : 1;
const tokensPerMessage = queryModel.includes('gpt-3.5-turbo-0301') ? 4 : 3; const tokensPerMessage = queryModel.includes('gpt-3.5-turbo-0301') ? 4 : 3;
const tokensPadding = 3; const tokensPadding = 3;

View File

@ -367,6 +367,79 @@ function convertGooglePrompt(messages, model, useSysPrompt = false, charName = '
return { contents: contents, system_instruction: system_instruction }; return { contents: contents, system_instruction: system_instruction };
} }
/**
* Convert AI21 prompt. Classic: system message squash, user/assistant message merge.
* @param {object[]} messages Array of messages
* @param {string} charName Character name
* @param {string} userName User name
*/
function convertAI21Messages(messages, charName = '', userName = '') {
if (!Array.isArray(messages)) {
return [];
}
// Collect all the system messages up until the first instance of a non-system message, and then remove them from the messages array.
let i = 0, systemPrompt = '';
for (i = 0; i < messages.length; i++) {
if (messages[i].role !== 'system') {
break;
}
// Append example names if not already done by the frontend (e.g. for group chats).
if (userName && messages[i].name === 'example_user') {
if (!messages[i].content.startsWith(`${userName}: `)) {
messages[i].content = `${userName}: ${messages[i].content}`;
}
}
if (charName && messages[i].name === 'example_assistant') {
if (!messages[i].content.startsWith(`${charName}: `)) {
messages[i].content = `${charName}: ${messages[i].content}`;
}
}
systemPrompt += `${messages[i].content}\n\n`;
}
messages.splice(0, i);
// Check if the first message in the array is of type user, if not, interject with humanMsgFix or a blank message.
// Also prevents erroring out if the messages array is empty.
if (messages.length === 0 || (messages.length > 0 && messages[0].role !== 'user')) {
messages.unshift({
role: 'user',
content: '[Start a new chat]',
});
}
if (systemPrompt) {
messages.unshift({
role: 'system',
content: systemPrompt.trim(),
});
}
// Doesn't support completion names, so prepend if not already done by the frontend (e.g. for group chats).
messages.forEach(msg => {
if ('name' in msg) {
if (msg.role !== 'system' && !msg.content.startsWith(`${msg.name}: `)) {
msg.content = `${msg.name}: ${msg.content}`;
}
delete msg.name;
}
});
// Since the messaging endpoint only supports alternating turns, we have to merge messages with the same role if they follow each other
let mergedMessages = [];
messages.forEach((message) => {
if (mergedMessages.length > 0 && mergedMessages[mergedMessages.length - 1].role === message.role) {
mergedMessages[mergedMessages.length - 1].content += '\n\n' + message.content;
} else {
mergedMessages.push(message);
}
});
return mergedMessages;
}
/** /**
* Convert a prompt from the ChatML objects to the format used by MistralAI. * Convert a prompt from the ChatML objects to the format used by MistralAI.
* @param {object[]} messages Array of messages * @param {object[]} messages Array of messages
@ -520,4 +593,5 @@ module.exports = {
convertCohereMessages, convertCohereMessages,
convertMistralMessages, convertMistralMessages,
convertCohereTools, convertCohereTools,
convertAI21Messages,
}; };

BIN
src/tokenizers/jamba.model Normal file

Binary file not shown.