This commit is contained in:
Bin Chen 2024-04-21 23:16:40 +08:00 committed by GitHub
commit 5ea125076b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
23 changed files with 1832 additions and 56 deletions

1284
package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@ -36,7 +36,9 @@
"ws": "^8.13.0",
"yaml": "^2.3.4",
"yargs": "^17.7.1",
"yauzl": "^2.10.0"
"yauzl": "^2.10.0",
"@aws-sdk/client-bedrock-runtime": "^3.515.0",
"@aws-sdk/client-bedrock": "^3.515.0"
},
"overrides": {
"parse-bmfont-xml": {

View File

@ -469,7 +469,7 @@
</span>
</div>
</div>
<div class="range-block" data-source="openai,claude,windowai,openrouter,ai21,scale,makersuite,mistralai,custom,cohere">
<div class="range-block" data-source="openai,claude,windowai,openrouter,ai21,scale,makersuite,mistralai,custom,cohere,bedrock">
<div class="range-block-title" data-i18n="Temperature">
Temperature
</div>
@ -521,7 +521,7 @@
</div>
</div>
</div>
<div data-newbie-hidden class="range-block" data-source="claude,openrouter,ai21,makersuite,cohere">
<div data-newbie-hidden class="range-block" data-source="claude,openrouter,ai21,makersuite,cohere,bedrock">
<div class="range-block-title" data-i18n="Top K">
Top K
</div>
@ -534,7 +534,7 @@
</div>
</div>
</div>
<div data-newbie-hidden class="range-block" data-source="openai,claude,openrouter,ai21,scale,makersuite,mistralai,custom,cohere">
<div data-newbie-hidden class="range-block" data-source="openai,claude,openrouter,ai21,scale,makersuite,mistralai,custom,cohere,bedrock">
<div class="range-block-title" data-i18n="Top-p">
Top P
</div>
@ -1773,7 +1773,7 @@
<span data-i18n="Use the appropriate tokenizer for Google models via their API. Slower prompt processing, but offers much more accurate token counting.">Use the appropriate tokenizer for Google models via their API. Slower prompt processing, but offers much more accurate token counting.</span>
</div>
</div>
<div data-newbie-hidden class="range-block" data-source="claude">
<div data-newbie-hidden class="range-block" data-source="claude,bedrock">
<div class="wide100p">
<span id="claude_assistant_prefill_text" data-i18n="Assistant Prefill">Assistant Prefill</span>
<textarea id="claude_assistant_prefill" class="text_pole textarea_compact" name="assistant_prefill autoSetHeight" rows="3" maxlength="10000" data-i18n="[placeholder]Start Claude's answer with..." placeholder="Start Claude's answer with..."></textarea>
@ -2288,6 +2288,7 @@
<optgroup>
<option value="ai21">AI21</option>
<option value="claude">Claude</option>
<option value="bedrock">Amazon Bedrock</option>
<option value="cohere">Cohere</option>
<option value="makersuite">Google MakerSuite</option>
<option value="mistralai">MistralAI</option>
@ -2446,6 +2447,53 @@
</label>
</div>
</form>
<form id="bedrock_form" data-source="bedrock" action="javascript:void(null);" method="post" enctype="multipart/form-data">
<div>
<h4>AWS Region</h4>
<select id="aws_region_select">
<option value="us-east-1">us-east-1</option>
<option value="us-west-2">us-west-2</option>
</select>
</div>
<h4>AWS AK/SK(Optional if you use IAM Role/AWS CLI)</h4>
<div class="flex-container">
<input id="api_key_bedrock_access" name="api_key_bedrock_access" placeholder="AWS Access key" title="AWS Access key" class="text_pole flex1" maxlength="500" value="" type="text" autocomplete="off">
<div title="Clear your API key" data-i18n="[title]Clear your Access key" class="menu_button fa-solid fa-circle-xmark clear-api-key" data-key="api_key_bedrock_access"></div>
<input id="api_key_bedrock_secret" name="api_key_bedrock_secret" placeholder="AWS Secret key" title="AWS Secret key" class="text_pole flex1" maxlength="500" value="" type="text" autocomplete="off">
<div title="Clear your API key" data-i18n="[title]Clear your Secret key" class="menu_button fa-solid fa-circle-xmark clear-api-key" data-key="api_key_bedrock_secret"></div>
</div>
<div data-for="api_key_bedrock_access" class="neutral_warning">
For privacy reasons, your Access key will be hidden after you reload the page.
</div>
<div data-for="api_key_bedrock_secret" class="neutral_warning">
For privacy reasons, your Secret key will be hidden after you reload the page.
</div>
<div>
<h4 data-i18n="Amazon Bedrock Model">Amazon Bedrock Model</h4>
<select id="model_bedrock_select">
<optgroup label="Claude 3">
<option value="anthropic.claude-3-sonnet-20240229-v1:0">claude-3-Sonnet</option>
<option value="anthropic.claude-3-haiku-20240307-v1:0">claude-3-Haiku</option>
</optgroup>
<optgroup label="Claude 2">
<option value="anthropic.claude-2">claude-2</option>
<option value="anthropic.claude-v2:1">claude-2.1</option>
</optgroup>
<optgroup label="Claude Instant">
<option value="anthropic.claude-instant-v1">claude-instant-v1</option>
</optgroup>
<optgroup label="Mistral">
<option value="mistral.mistral-large-2402-v1:0">Mistral-Large(2402)</option>
<option value="mistral.mixtral-8x7b-instruct-v0:1">Mixtral-8X7B-Instruct(v0.1)</option>
<option value="mistral.mistral-7b-instruct-v0:2">Mistral-7B-Instruct(v0.2)</option>
</optgroup>
<!-- <optgroup label="LLaMA">
<option value="anthropic.llama-13b-chat">llama-13b-chat</option>
</optgroup> -->
</select>
</div>
</form>
<form id="claude_form" data-source="claude" action="javascript:void(null);" method="post" enctype="multipart/form-data">
<h4>Claude API Key</h4>
<div>

View File

@ -62,7 +62,7 @@
"Temperature": "درجة الحرارة",
"Frequency Penalty": "عقوبة التكرار",
"Presence Penalty": "عقوبة الوجود",
"Top-p": "أعلى p",
"Top-p": "أعلى p",
"Display bot response text chunks as they are generated": "عرض النصوص لجظة بلحظة",
"Top A": "أعلى A",
"Typical Sampling": "عينة نموذجية",
@ -101,7 +101,7 @@
"Inserts jailbreak as a last system message.": "يدرج كسر الحظر كرسالة نظام أخيرة.",
"This tells the AI to ignore its usual content restrictions.": "هذا يخبر الذكاء الاصطناعي بتجاهل القيود المعتادة على المحتوى.",
"NSFW Encouraged": "NSFW مشجع",
"Tell the AI that NSFW is allowed.": "قل للذكاء الاصطناعي أنه يُسمح بـ NSFW",
"Tell the AI that NSFW is allowed.": "قل للذكاء الاصطناعي أنه يُسمح بـ NSFW",
"NSFW Prioritized": "الأولوية للمحتوى غير مناسب للعمل",
"NSFW prompt text goes first in the prompt to emphasize its effect.": "النص الغير مناسب للعمل يأتي أولاً في التعليمات لتأكيد تأثيره.",
"Streaming": "البث المباشر ل",
@ -141,7 +141,7 @@
"Influences bot behavior in its responses": "يؤثر على سلوك الروبوت في ردوده.",
"Connect": "الاتصال",
"Test Message": "رسالة اختبار",
"API": "واجهة برمجة التطبيقات (API)",
"API": "واجهة برمجة التطبيقات (API)",
"KoboldAI": "KoboldAI",
"Use Horde": "استخدام Horde",
"API url": "رابط API",
@ -206,7 +206,7 @@
"Scale API Key": "مفتاح API لـ Scale",
"Alt Method": "طريقة بديلة",
"AI21 API Key": "مفتاح API لـ AI21",
"AI21 Model": "نموذج AI21",
"AI21 Model": "نموذج AI21",
"View API Usage Metrics": "عرض مقاييس استخدام واجهة برمجة التطبيقات",
"Show External models (provided by API)": "عرض النماذج الخارجية (المقدمة من قبل واجهة برمجة التطبيقات)",
"Bot": "روبوت:",
@ -915,7 +915,7 @@
"Use the appropriate tokenizer for Google models via their API. Slower prompt processing, but offers much more accurate token counting.": "استخدم المحلل النحوي المناسب لنماذج Google عبر واجهة برمجة التطبيقات الخاصة بهم. معالجة الإشارات الأولية بطيئة، ولكنها تقدم عداد رمز دقيق جدًا.",
"Load koboldcpp order": "تحميل أمر koboldcpp",
"Use Google Tokenizer": "استخدم محلل النحوي من Google"
}
}

View File

@ -916,5 +916,5 @@
"Use Google Tokenizer": "Usar Tokenizador de Google"
}

View File

@ -205,7 +205,7 @@
"Scale API Key": "Clé API Scale",
"Alt Method": "Méthode alternative",
"AI21 API Key": "Clé API AI21",
"AI21 Model": "Modèle AI21",
"AI21 Model": "Modèle AI21",
"View API Usage Metrics": "Afficher les mesures d'utilisation de l'API",
"Show External models (provided by API)": "Afficher les modèles externes (fournis par l'API)",
"Bot": "Bot",
@ -914,5 +914,5 @@
"Use the appropriate tokenizer for Google models via their API. Slower prompt processing, but offers much more accurate token counting.": "Utilisez le tokenizer approprié pour les modèles Google via leur API. Traitement des invitations plus lent, mais offre un décompte de jetons beaucoup plus précis.",
"Load koboldcpp order": "Charger l'ordre koboldcpp",
"Use Google Tokenizer": "Utiliser le tokenizer Google"
}
}

View File

@ -62,7 +62,7 @@
"Temperature": "Hitastig",
"Frequency Penalty": "Tíðnarefning",
"Presence Penalty": "Tilkoma refning",
"Top-p": "Topp-p",
"Top-p": "Topp-p",
"Display bot response text chunks as they are generated": "Birta bætir svarborðstextabrot þegar þau eru búnar til",
"Top A": "Topp A",
"Typical Sampling": "Venjuleg úrtaka",
@ -915,5 +915,5 @@
"Use the appropriate tokenizer for Google models via their API. Slower prompt processing, but offers much more accurate token counting.": "Notaðu rétta tokenizer fyrir Google módel með þeirra API. Hægri umhvörf fyrir hvöttavinnslu, en býður upp á miklu nákvæmari talningu á táknunum.",
"Load koboldcpp order": "Hlaðið inn færslu af koboldcpp",
"Use Google Tokenizer": "Notaðu Google Tokenizer"
}
}

View File

@ -917,5 +917,5 @@
"Use Google Tokenizer": "Usa il Tokenizer di Google"
}
}

View File

@ -140,7 +140,7 @@
"Influences bot behavior in its responses": "返信でボットの動作に影響を与えます",
"Connect": "接続",
"Test Message": "テストメッセージ",
"API": "API",
"API": "API",
"KoboldAI": "KoboldAI",
"Use Horde": "ホードを使用",
"API url": "API URL",
@ -914,5 +914,5 @@
"Use the appropriate tokenizer for Google models via their API. Slower prompt processing, but offers much more accurate token counting.": "Googleモデル用の適切なトークナイザーを使用します。 API経由で。 処理が遅くなりますが、トークンの数え上げがはるかに正確になります。",
"Load koboldcpp order": "koboldcppオーダーを読み込む",
"Use Google Tokenizer": "Googleトークナイザーを使用"
}
}

View File

@ -916,5 +916,5 @@
"Load koboldcpp order": "코볼드 CPP 순서로 로드",
"Use Google Tokenizer": "Google 토크나이저 사용"
}
}

View File

@ -1,16 +1,17 @@
[
{ "lang": "ar-sa", "display": "عربي (Arabic)" },
{ "lang": "zh-cn", "display": "简体中文 (Chinese) (Simplified)" },
{ "lang": "nl-nl", "display": "Nederlands (Dutch)" },
{ "lang": "de-de", "display": "Deutsch (German)" },
{ "lang": "fr-fr", "display": "Français (French)" },
{ "lang": "is-is", "display": "íslenska (Icelandic)" },
{ "lang": "it-it", "display": "Italiano (Italian)" },
{ "lang": "ja-jp", "display": "日本語 (Japanese)" },
{ "lang": "ko-kr", "display": "한국어 (Korean)" },
{ "lang": "pt-pt", "display": "Português (Portuguese brazil)" },
{ "lang": "ru-ru", "display": "Русский (Russian)" },
{ "lang": "es-es", "display": "Español (Spanish)" },
{ "lang": "uk-ua", "display": "Yкраїнська (Ukrainian)" },
{ "lang": "vi-vn", "display": "Tiếng Việt (Vietnamese)" }
]
[
{ "lang": "ar-sa", "display": "عربي (Arabic)" },
{ "lang": "zh-cn", "display": "简体中文(Chinese Simplified)" },
{ "lang": "nl-nl", "display": "Nederlands (Dutch)" },
{ "lang": "de-de", "display": "Deutsch (German)" },
{ "lang": "fr-fr", "display": "Français (French)" },
{ "lang": "is-is", "display": "íslenska (Icelandic)" },
{ "lang": "it-it", "display": "Italiano (Italian)" },
{ "lang": "ja-jp", "display": "日本語 (Japanese)" },
{ "lang": "ko-kr", "display": "한국어 (Korean)" },
{ "lang": "pt-pt", "display": "Português (Portuguese brazil)" },
{ "lang": "ru-ru", "display": "Русский (Russian)" },
{ "lang": "es-es", "display": "Español (Spanish)" },
{ "lang": "uk-ua", "display": "Yкраїнська (Ukrainian)" },
{ "lang": "vi-vn", "display": "Tiếng Việt (Vietnamese)" }
]

View File

@ -917,5 +917,5 @@
"Use Google Tokenizer": "Google Tokenizer gebruiken"
}
}

View File

@ -915,5 +915,5 @@
"Use Google Tokenizer": "Usar Tokenizer do Google"
}
}

View File

@ -916,4 +916,4 @@
"Use the appropriate tokenizer for Google models via their API. Slower prompt processing, but offers much more accurate token counting.": "Используйте соответствующий токенизатор для моделей Google через их API. Медленная обработка подсказок, но предлагает намного более точный подсчет токенов.",
"Load koboldcpp order": "Загрузить порядок koboldcpp",
"Use Google Tokenizer": "Использовать токенизатор Google"
}
}

View File

@ -62,7 +62,7 @@
"Temperature": "Nhiệt độ",
"Frequency Penalty": "Phạt Tần số",
"Presence Penalty": "Phạt Sự hiện",
"Top-p": "Top-p",
"Top-p": "Top-p",
"Display bot response text chunks as they are generated": "Hiển thị các phần văn bản phản hồi của bot khi chúng được tạo ra",
"Top A": "Top A",
"Typical Sampling": "Mẫu Đại diện",
@ -141,7 +141,7 @@
"Influences bot behavior in its responses": "Ảnh hưởng đến hành vi của bot trong các phản hồi của nó",
"Connect": "Kết nối",
"Test Message": "Tin nhắn kiểm tra",
"API": "Giao diện lập trình ứng dụng (API)",
"API": "Giao diện lập trình ứng dụng (API)",
"KoboldAI": "KoboldAI",
"Use Horde": "Sử dụng Horde",
"API url": "URL API",
@ -206,7 +206,7 @@
"Scale API Key": "Khóa API của Scale",
"Alt Method": "Phương pháp thay thế",
"AI21 API Key": "Khóa API của AI21",
"AI21 Model": "Mô hình AI21",
"AI21 Model": "Mô hình AI21",
"View API Usage Metrics": "Xem số liệu sử dụng API",
"Show External models (provided by API)": "Hiển thị các mô hình bên ngoài (do API cung cấp)",
"Bot": "Bot:",
@ -915,5 +915,5 @@
"Use the appropriate tokenizer for Google models via their API. Slower prompt processing, but offers much more accurate token counting.": "Sử dụng bộ mã hóa phù hợp cho các mô hình của Google thông qua API của họ. Xử lý lời mời chậm hơn, nhưng cung cấp đếm token chính xác hơn nhiều.",
"Load koboldcpp order": "Tải đơn hàng koboldcpp",
"Use Google Tokenizer": "Sử dụng bộ mã hóa của Google"
}
}

View File

@ -171,6 +171,7 @@ export const chat_completion_sources = {
MAKERSUITE: 'makersuite',
MISTRALAI: 'mistralai',
CUSTOM: 'custom',
BEDROCK: 'bedrock',
COHERE: 'cohere',
};
@ -231,6 +232,8 @@ const default_settings = {
claude_model: 'claude-2.1',
google_model: 'gemini-pro',
ai21_model: 'j2-ultra',
bedrock_model: 'anthropic.claude-2.0',
bedrock_region: 'us-east-1',
mistralai_model: 'mistral-medium-latest',
cohere_model: 'command-r',
custom_model: '',
@ -301,6 +304,8 @@ const oai_settings = {
claude_model: 'claude-2.1',
google_model: 'gemini-pro',
ai21_model: 'j2-ultra',
bedrock_model: 'anthropic.claude-2.0',
bedrock_region: 'us-east-1',
mistralai_model: 'mistral-medium-latest',
cohere_model: 'command-r',
custom_model: '',
@ -1391,6 +1396,8 @@ function getChatCompletionModel() {
return oai_settings.mistralai_model;
case chat_completion_sources.CUSTOM:
return oai_settings.custom_model;
case chat_completion_sources.BEDROCK:
return oai_settings.bedrock_model;
case chat_completion_sources.COHERE:
return oai_settings.cohere_model;
default:
@ -1605,6 +1612,7 @@ async function sendOpenAIRequest(type, messages, signal) {
let logit_bias = {};
const messageId = getNextMessageId(type);
const isClaude = oai_settings.chat_completion_source == chat_completion_sources.CLAUDE;
const isBedrock = oai_settings.chat_completion_source == chat_completion_sources.BEDROCK;
const isOpenRouter = oai_settings.chat_completion_source == chat_completion_sources.OPENROUTER;
const isScale = oai_settings.chat_completion_source == chat_completion_sources.SCALE;
const isAI21 = oai_settings.chat_completion_source == chat_completion_sources.AI21;
@ -1696,7 +1704,7 @@ async function sendOpenAIRequest(type, messages, signal) {
delete generate_data.logprobs;
}
if (isClaude) {
if (isClaude || (isBedrock && model.startsWith('anthropic.claude')) ) {
generate_data['top_k'] = Number(oai_settings.top_k_openai);
generate_data['claude_use_sysprompt'] = oai_settings.claude_use_sysprompt;
generate_data['stop'] = getCustomStoppingStrings(); // Claude shouldn't have limits on stop strings.
@ -1706,6 +1714,18 @@ async function sendOpenAIRequest(type, messages, signal) {
generate_data['assistant_prefill'] = substituteParams(oai_settings.assistant_prefill);
}
}
if (isBedrock) {
generate_data['bedrock_region'] = oai_settings.bedrock_region;
// min value of top_k is 1, max is 200.
if(model.startsWith('mistral.')) {
generate_data['top_k'] = Number(Math.max(oai_settings.top_k_openai, 1));
// Don't add a prefill on quiet gens (summarization)
if (!isQuiet) {
generate_data['assistant_prefill'] = substituteParams(oai_settings.assistant_prefill);
}
}
}
if (isOpenRouter) {
generate_data['top_k'] = Number(oai_settings.top_k_openai);
@ -1825,6 +1845,12 @@ async function sendOpenAIRequest(type, messages, signal) {
function getStreamingReply(data) {
if (oai_settings.chat_completion_source == chat_completion_sources.CLAUDE) {
return data?.delta?.text || '';
} else if (oai_settings.chat_completion_source == chat_completion_sources.BEDROCK) {
if (oai_settings.bedrock_model.startsWith('anthropic.')) {
return data?.delta?.text || '';
} else if (oai_settings.bedrock_model.startsWith('mistral.')) {
return data?.outputs?.[0].text || '';
}
} else if (oai_settings.chat_completion_source == chat_completion_sources.MAKERSUITE) {
return data?.candidates?.[0]?.content?.parts?.[0]?.text || '';
} else {
@ -2625,6 +2651,8 @@ function loadOpenAISettings(data, settings) {
oai_settings.mistralai_model = settings.mistralai_model ?? default_settings.mistralai_model;
oai_settings.cohere_model = settings.cohere_model ?? default_settings.cohere_model;
oai_settings.custom_model = settings.custom_model ?? default_settings.custom_model;
oai_settings.bedrock_model = settings.bedrock_model ?? default_settings.bedrock_model;
oai_settings.bedrock_region = settings.bedrock_region ?? default_settings.bedrock_region;
oai_settings.custom_url = settings.custom_url ?? default_settings.custom_url;
oai_settings.custom_include_body = settings.custom_include_body ?? default_settings.custom_include_body;
oai_settings.custom_exclude_body = settings.custom_exclude_body ?? default_settings.custom_exclude_body;
@ -2673,6 +2701,10 @@ function loadOpenAISettings(data, settings) {
$('#openai_image_inlining').prop('checked', oai_settings.image_inlining);
$('#openai_bypass_status_check').prop('checked', oai_settings.bypass_status_check);
$('#aws_region_select').val(oai_settings.bedrock_region);
$('#model_bedrock_select').val(oai_settings.bedrock_model);
$(`#model_openai_select option[value="${oai_settings.bedrock_model}"`).attr('selected', true);
$('#model_openai_select').val(oai_settings.openai_model);
$(`#model_openai_select option[value="${oai_settings.openai_model}"`).attr('selected', true);
$('#model_claude_select').val(oai_settings.claude_model);
@ -2856,6 +2888,10 @@ async function getStatusOpen() {
data.custom_include_headers = oai_settings.custom_include_headers;
}
if (oai_settings.chat_completion_source === chat_completion_sources.BEDROCK) {
data.bedrock_region = oai_settings.bedrock_region;
}
const canBypass = (oai_settings.chat_completion_source === chat_completion_sources.OPENAI && oai_settings.bypass_status_check) || oai_settings.chat_completion_source === chat_completion_sources.CUSTOM;
if (canBypass) {
setOnlineStatus('Status check bypassed');
@ -3541,6 +3577,11 @@ async function onModelChange() {
$('#custom_model_id').val(value).trigger('input');
}
if ($(this).is('#model_bedrock_select')) {
console.log('Bedrock model changed to', value);
oai_settings.bedrock_model = value;
}
if (oai_settings.chat_completion_source == chat_completion_sources.SCALE) {
if (oai_settings.max_context_unlocked) {
$('#openai_max_context').attr('max', unlocked_max);
@ -3716,6 +3757,29 @@ async function onModelChange() {
$('#openai_max_context').val(oai_settings.openai_max_context).trigger('input');
}
if (oai_settings.chat_completion_source == chat_completion_sources.BEDROCK) {
oai_settings.bedrock_region = String($('#aws_region_select').val());
if (oai_settings.max_context_unlocked) {
$('#openai_max_context').attr('max', max_200k);
}
else if (value == 'anthropic.claude-2:1') {
$('#openai_max_context').attr('max', max_200k);
}
else if (value.endsWith('100k') || value === 'anthropic.claude-instant-v1') {
$('#openai_max_context').attr('max', claude_100k_max);
}
else {
$('#openai_max_context').attr('max', claude_max);
}
oai_settings.openai_max_context = Math.min(oai_settings.openai_max_context, Number($('#openai_max_context').attr('max')));
$('#openai_max_context').val(oai_settings.openai_max_context).trigger('input');
oai_settings.temp_openai = Math.min(claude_max_temp, oai_settings.temp_openai);
$('#temp_openai').attr('max', claude_max_temp).val(oai_settings.temp_openai).trigger('input');
}
$('#openai_max_context_counter').attr('max', Number($('#openai_max_context').attr('max')));
saveSettingsDebounced();
@ -3884,6 +3948,24 @@ async function onConnectButtonClick(e) {
}
}
if (oai_settings.chat_completion_source == chat_completion_sources.BEDROCK) {
const access_key_aws = String($('#api_key_bedrock_access').val()).trim();
const secret_key_aws = String($('#api_key_bedrock_secret').val()).trim();
if (access_key_aws.length) {
await writeSecret(SECRET_KEYS.BEDROCK_ACCESS_KEY, access_key_aws);
}
if (!secret_state[SECRET_KEYS.BEDROCK_ACCESS_KEY]) {
console.log('No access key saved for Amazon Bedrock');
}
if (secret_key_aws.length) {
await writeSecret(SECRET_KEYS.BEDROCK_SECRET_KEY, secret_key_aws);
}
if (!secret_state[SECRET_KEYS.BEDROCK_SECRET_KEY]) {
console.log('No secret key saved for Amazon Bedrock');
}
}
startStatusLoading();
saveSettingsDebounced();
await getStatusOpen();
@ -3925,6 +4007,9 @@ function toggleChatCompletionForms() {
else if (oai_settings.chat_completion_source == chat_completion_sources.CUSTOM) {
$('#model_custom_select').trigger('change');
}
else if (oai_settings.chat_completion_source == chat_completion_sources.BEDROCK) {
$('#model_bedrock_select').trigger('change');
}
$('[data-source]').each(function () {
const validSources = $(this).data('source').split(',');
$(this).toggle(validSources.includes(oai_settings.chat_completion_source));
@ -3948,6 +4033,7 @@ async function testApiConnection() {
toastr.success('API connection successful!');
}
catch (err) {
console.log(err);
toastr.error('Could not get a reply from API. Check your connection settings / API key and try again.');
}
}
@ -4581,6 +4667,8 @@ $(document).ready(async function () {
$('#model_mistralai_select').on('change', onModelChange);
$('#model_cohere_select').on('change', onModelChange);
$('#model_custom_select').on('change', onModelChange);
$('#model_bedrock_select').on('change', onModelChange);
$('#aws_region_select').on('change', onModelChange);
$('#settings_preset_openai').on('change', onSettingsPresetChange);
$('#new_oai_preset').on('click', onNewPresetClick);
$('#delete_oai_preset').on('click', onDeletePresetClick);

View File

@ -20,6 +20,8 @@ export const SECRET_KEYS = {
DREAMGEN: 'api_key_dreamgen',
CUSTOM: 'api_key_custom',
OOBA: 'api_key_ooba',
BEDROCK_ACCESS_KEY: 'api_key_bedrock_access',
BEDROCK_SECRET_KEY: 'api_key_bedrock_secret',
NOMICAI: 'api_key_nomicai',
KOBOLDCPP: 'api_key_koboldcpp',
LLAMACPP: 'api_key_llamacpp',
@ -43,6 +45,12 @@ const INPUT_MAP = {
[SECRET_KEYS.CUSTOM]: '#api_key_custom',
[SECRET_KEYS.TOGETHERAI]: '#api_key_togetherai',
[SECRET_KEYS.OOBA]: '#api_key_ooba',
[SECRET_KEYS.BEDROCK_ACCESS_KEY]: '#api_key_bedrock_access',
[SECRET_KEYS.BEDROCK_SECRET_KEY]: '#api_key_bedrock_secret',
[SECRET_KEYS.INFERMATICAI]: '#api_key_infermaticai',
[SECRET_KEYS.DREAMGEN]: '#api_key_dreamgen',
[SECRET_KEYS.NOMICAI]: '#api_key_nomicai',
[SECRET_KEYS.KOBOLDCPP]: '#api_key_koboldcpp',
[SECRET_KEYS.INFERMATICAI]: '#api_key_infermaticai',
[SECRET_KEYS.DREAMGEN]: '#api_key_dreamgen',
[SECRET_KEYS.NOMICAI]: '#api_key_nomicai',

99
src/bedrock.js Normal file
View File

@ -0,0 +1,99 @@
// const fs = require('fs');
const { BedrockRuntimeClient, InvokeModelCommand, InvokeModelWithResponseStreamCommand } = require("@aws-sdk/client-bedrock-runtime");
const { BedrockClient, ListFoundationModelsCommand } = require("@aws-sdk/client-bedrock");
const { readSecret, SECRET_KEYS } = require('./endpoints/secrets');
const getClient = (function() {
const client = {};
let aksk = '';
return function(region_name) {
const access_key = readSecret(SECRET_KEYS.BEDROCK_ACCESS_KEY) || '';
const secret_key = readSecret(SECRET_KEYS.BEDROCK_SECRET_KEY) || '';
const _aksk = access_key + secret_key;
const refresh = _aksk != aksk;
if(! client[region_name] || refresh) {
aksk = _aksk;
const secrets = readSecret(SECRET_KEYS.BEDROCK);
if (access_key && secret_key) {
client[region_name] = new BedrockClient({
region: region_name,
credentials: {
accessKeyId: access_key,
secretAccessKey: secret_key
}
});
} else {
console.log('warn: secrets not found for bedrock, will fallback to default provider.');
client[region_name] = new BedrockClient({region: region_name});
}
}
return client[region_name];
};
})();
const getRuntimeClient = (function() {
const client = {};
let aksk = '';
return function(region_name) {
const access_key = readSecret(SECRET_KEYS.BEDROCK_ACCESS_KEY) || '';
const secret_key = readSecret(SECRET_KEYS.BEDROCK_SECRET_KEY) || '';
const _aksk = access_key + secret_key;
const refresh = _aksk != aksk;
if(! client[region_name] || refresh) {
aksk = _aksk;
if (access_key && secret_key) {
client[region_name] = new BedrockRuntimeClient({
region: region_name,
credentials: {
accessKeyId: access_key,
secretAccessKey: secret_key
}
});
} else {
console.log('warn: secrets not found for bedrock, will fallback to default provider.');
client[region_name] = new BedrockRuntimeClient({region: region_name});
}
}
return client[region_name];
};
})();
async function listTextModels(region_name) {
const command = new ListFoundationModelsCommand({ byOutputModality: 'TEXT' });
const data = await getClient(region_name).send(command);
return data;
}
async function invokeModel(region_name, params) {
const modelId = params.modelId;
if (-1 === modelId.indexOf('claude-3')) {
const command = new InvokeModelCommand(params);
const data = await getRuntimeClient(region_name).send(command);
return data;
} else {
const command = new InvokeModelCommand(params);
const data = await getRuntimeClient(region_name).send(command);
return data;
}
}
async function invokeModelWithStreaming(region_name, params) {
const command = new InvokeModelWithResponseStreamCommand(params);
const data = await getRuntimeClient(region_name).send(command);
return data;
}
module.exports = {
getRuntimeClient,
getClient,
listTextModels,
invokeModel,
invokeModelWithStreaming,
};

View File

@ -162,6 +162,7 @@ const CHAT_COMPLETION_SOURCES = {
MAKERSUITE: 'makersuite',
MISTRALAI: 'mistralai',
CUSTOM: 'custom',
BEDROCK: 'bedrock',
COHERE: 'cohere',
};

View File

@ -4,12 +4,14 @@ const Readable = require('stream').Readable;
const { jsonParser } = require('../../express-common');
const { CHAT_COMPLETION_SOURCES, GEMINI_SAFETY, BISON_SAFETY, OPENROUTER_HEADERS } = require('../../constants');
const { forwardFetchResponse, getConfigValue, tryParse, uuidv4, mergeObjectWithYaml, excludeKeysByYaml, color } = require('../../util');
const { convertClaudeMessages, convertGooglePrompt, convertTextCompletionPrompt, convertCohereMessages } = require('../../prompt-converters');
const { forwardFetchResponse, forwardBedrockStreamResponse, getConfigValue, tryParse, uuidv4, mergeObjectWithYaml, excludeKeysByYaml, color } = require('../../util');
const { convertClaudeMessages, convertGooglePrompt, convertTextCompletionPrompt, convertCohereMessages, convertMistralPrompt } = require('../../prompt-converters');
const { readSecret, SECRET_KEYS } = require('../secrets');
const { getTokenizerModel, getSentencepiceTokenizer, getTiktokenTokenizer, sentencepieceTokenizers, TEXT_COMPLETION_MODELS } = require('../tokenizers');
const { listTextModels, invokeModel, invokeModelWithStreaming } = require('../../bedrock');
const API_OPENAI = 'https://api.openai.com/v1';
const API_CLAUDE = 'https://api.anthropic.com/v1';
const API_MISTRAL = 'https://api.mistral.ai/v1';
@ -522,6 +524,130 @@ async function sendMistralAIRequest(request, response) {
}
}
/**
* Constuct Bedrock Claude inference request payload
* @param {express.Request} request Express request
*/
function constructBedrockClaudePayload(request) {
let use_system_prompt = ( request.body.model.startsWith('anthropic.claude-2') || request.body.model.startsWith('anthropic.claude-3') ) && request.body.claude_use_sysprompt;
let converted_prompt = convertClaudeMessages(request.body.messages, request.body.assistant_prefill, use_system_prompt, request.body.human_sysprompt_message, request.body.char_name, request.body.user_name);
// Add custom stop sequences
const stopSequences = ['\n\nHuman:', '\n\nSystem:', '\n\nAssistant:'];
if (Array.isArray(request.body.stop)) {
stopSequences.push(...request.body.stop);
}
const modelRequestBody = {
messages: converted_prompt.messages,
max_tokens: request.body.max_tokens,
stop_sequences: stopSequences,
temperature: request.body.temperature,
top_p: request.body.top_p,
top_k: request.body.top_k,
anthropic_version: 'bedrock-2023-05-31',
};
if (use_system_prompt) {
modelRequestBody.system = converted_prompt.systemPrompt;
}
return modelRequestBody;
}
/**
* Constuct Bedrock Mistral inference request payload.
* format: <s>[INST] System Prompt + Instruction [/INST] Model answer</s>[INST] Follow-up instruction [/INST]
* @param {express.Request} request Express request
*/
function constructBedrockMistralPayload(request) {
let converted_prompt = convertMistralPrompt(request.body.messages, request.body.assistant_prefill);
const modelRequestBody = {
prompt: converted_prompt,
max_tokens: request.body.max_tokens,
stop: request.body.stop || [],
temperature: request.body.temperature,
top_p: request.body.top_p,
top_k: request.body.top_k,
};
return modelRequestBody;
}
/**
* Sends a request to Amazon Bedrock
* @param {express.Request} request Express request
* @param {express.Response} response Express response
*/
async function sendBedrockRequest(request, response) {
const divider = '-'.repeat(process.stdout.columns);
const bedrock_region = request.body.bedrock_region || 'us-east-1';
let modelRequestBody;
try {
const controller = new AbortController();
request.socket.removeAllListeners('close');
request.socket.on('close', function () {
controller.abort();
});
if(request.body.model.startsWith('anthropic.')){
modelRequestBody = constructBedrockClaudePayload(request);
} else if(request.body.model.startsWith('mistral.')){
modelRequestBody = constructBedrockMistralPayload(request);
} else {
console.log(color.red(`Unknown model family ${request.body.model}\n${divider}`));
return response.status(400).send({ error: true });
}
const bedrockRequestBody = { // InvokeModelRequest
body: JSON.stringify(modelRequestBody), //new Uint8Array(), // e.g. Buffer.from("") or new TextEncoder().encode("") // required
contentType: 'application/json',
accept: 'application/json',
modelId: request.body.model, // required
};
console.log('Bedrock request:', JSON.stringify(bedrockRequestBody));
if (request.body.stream) {
const respBedrockStream = await invokeModelWithStreaming(bedrock_region, bedrockRequestBody);
// Pipe remote SSE stream to Express response
forwardBedrockStreamResponse(respBedrockStream, response);
} else {
const resp = await invokeModel(bedrock_region, bedrockRequestBody);
const statusCode = resp['$metadata']['httpStatusCode'];
const body = resp.body.transformToString();
if (statusCode !== 200 ){
console.log(color.red(`Claude API returned error: ${resp['$metadata']['httpStatusCode']} ${body}\n${divider}`));
return response.status(statusCode).send({ error: true });
}
console.log('Claude response:', body);
let content;
// Wrap it back to OAI format
if(request.body.model.startsWith('anthropic.')){
content = JSON.parse(body)['content'][0]['text'];
} else if(request.body.model.startsWith('mistral.')){
content = JSON.parse(body)['outputs'][0]['text'];
}
const reply = { choices: [{ 'message': { 'content': content } }] };
return response.send(reply);
}
} catch (error) {
console.log(color.red(`Error communicating with Bedrock Claude: ${error}\n${divider}`));
if (!response.headersSent) {
return response.status(500).send({ error: true });
} else {
response.end();
}
}
}
async function sendCohereRequest(request, response) {
const apiKey = readSecret(SECRET_KEYS.COHERE);
const controller = new AbortController();
@ -616,6 +742,7 @@ router.post('/status', jsonParser, async function (request, response_getstatus_o
let api_url;
let api_key_openai;
let headers;
let bedrock_region = 'us-east-1';
if (request.body.chat_completion_source === CHAT_COMPLETION_SOURCES.OPENAI) {
api_url = new URL(request.body.reverse_proxy || API_OPENAI).toString();
@ -635,6 +762,8 @@ router.post('/status', jsonParser, async function (request, response_getstatus_o
api_key_openai = readSecret(SECRET_KEYS.CUSTOM);
headers = {};
mergeObjectWithYaml(headers, request.body.custom_include_headers);
} else if (request.body.chat_completion_source === CHAT_COMPLETION_SOURCES.BEDROCK) {
bedrock_region = request.body.bedrock_region;
} else if (request.body.chat_completion_source === CHAT_COMPLETION_SOURCES.COHERE) {
api_url = API_COHERE;
api_key_openai = readSecret(SECRET_KEYS.COHERE);
@ -644,6 +773,24 @@ router.post('/status', jsonParser, async function (request, response_getstatus_o
return response_getstatus_openai.status(400).send({ error: true });
}
if (request.body.chat_completion_source === CHAT_COMPLETION_SOURCES.BEDROCK) {
try {
let resp = await listTextModels(bedrock_region);
let models = resp.modelSummaries;
response_getstatus_openai.send(models);
console.log('Available Bedrock Text models:', models);
} catch(e) {
console.error(e);
if (!response_getstatus_openai.headersSent) {
response_getstatus_openai.send({ error: true });
} else {
response_getstatus_openai.end();
}
}
return;
}
if (!api_key_openai && !request.body.reverse_proxy && request.body.chat_completion_source !== CHAT_COMPLETION_SOURCES.CUSTOM) {
console.log('OpenAI API key is missing.');
return response_getstatus_openai.status(400).send({ error: true });
@ -791,6 +938,7 @@ router.post('/generate', jsonParser, function (request, response) {
case CHAT_COMPLETION_SOURCES.AI21: return sendAI21Request(request, response);
case CHAT_COMPLETION_SOURCES.MAKERSUITE: return sendMakerSuiteRequest(request, response);
case CHAT_COMPLETION_SOURCES.MISTRALAI: return sendMistralAIRequest(request, response);
case CHAT_COMPLETION_SOURCES.BEDROCK: return sendBedrockRequest(request, response);
case CHAT_COMPLETION_SOURCES.COHERE: return sendCohereRequest(request, response);
}

View File

@ -30,6 +30,8 @@ const SECRET_KEYS = {
MISTRALAI: 'api_key_mistralai',
CUSTOM: 'api_key_custom',
OOBA: 'api_key_ooba',
BEDROCK_ACCESS_KEY: 'api_key_bedrock_access',
BEDROCK_SECRET_KEY: 'api_key_bedrock_secret',
INFERMATICAI: 'api_key_infermaticai',
DREAMGEN: 'api_key_dreamgen',
NOMICAI: 'api_key_nomicai',

View File

@ -355,10 +355,73 @@ function convertTextCompletionPrompt(messages) {
return messageStrings.join('\n') + '\nassistant:';
}
/**
* Convert a prompt from the ChatML objects to the format used by Mistral.
* @param {object[]} messages Array of messages
* @param {string} assistantPrefill Add Assistant prefill at the end of prompt.
* @returns {string} Prompt for Mistral
*/
function convertMistralPrompt(messages, assistantPrefill) {
//Prepare messages for mistral.
let systemPrompt = '';
if(messages.length) {
// remove last assistant message
if(messages[messages.length - 1].role === 'assistant') {
messages.pop();
}
// Collect all the system messages up until the first instance of a non-system message,
// and then remove them from the messages array.
let i;
for (i = 0; i < messages.length; i++) {
if (messages[i].role !== 'system') {
break;
}
systemPrompt += `${messages[i].content}\n`;
}
messages.splice(0, i);
}
// merge system prompt into first user msg, or inject as first user message.
if(messages.length && messages[0].role === 'user') {
messages[0].message = systemPrompt + messages[0].message;
} else {
messages.unshift({
role: 'user',
message: systemPrompt
});
}
// Convert messages to the prompt.
let requestPrompt = messages.map((v, i) => {
// Set prefix and subfix according to the role.
let prefix = {
'user': (i !== 0 || messages.length - 1 == i) ? '[INST] ' : `<s>${systemPrompt}[INST] `,
'assistant': ' ',
}[v.role] ?? '';
let subfix = {
'user': ' [/INST]',
'assistant': '</s>',
}[v.role] ?? '';
if(i % 2 !== 0) {
console.log("WARN: msg should be in user/assist/user/assist order.")
}
return `${prefix}${v.content}${subfix}`;
}).join('');
return requestPrompt + assistantPrefill;
}
module.exports = {
convertClaudePrompt,
convertClaudeMessages,
convertGooglePrompt,
convertTextCompletionPrompt,
convertCohereMessages,
convertMistralPrompt,
};

View File

@ -411,6 +411,37 @@ function forwardFetchResponse(from, to) {
});
}
/**
* Pipe a fetch() response to an Express.js Response, including status code.
* @param {import('node-fetch').Response} from The Fetch API response to pipe from.
* @param {Express.Response} to The Express response to pipe to.
*/
async function forwardBedrockStreamResponse(from, to) {
to.header('Content-Type', 'text/event-stream');
to.header('Cache-Control', 'no-cache');
to.header('Connection', 'keep-alive');
to.flushHeaders(); // flush the headers to establish SSE with client
for await (const event of from.body) {
let respCode = from.$metadata.httpStatusCode;
if (event.chunk && event.chunk.bytes) {
const chunk = Buffer.from(event.chunk.bytes).toString("utf-8");
to.write(`data: ${chunk}\n\n`);
} else if (
event.internalServerException ||
event.modelStreamErrorException ||
event.throttlingException ||
event.validationException
) {
console.error(event);
break;
}
}
to.end()
}
/**
* Makes an HTTP/2 request to the specified endpoint.
*
@ -612,6 +643,7 @@ module.exports = {
removeOldBackups,
getImages,
forwardFetchResponse,
forwardBedrockStreamResponse,
getHexString,
mergeObjectWithYaml,
excludeKeysByYaml,