mirror of
https://github.com/SillyTavern/SillyTavern.git
synced 2025-06-05 21:59:27 +02:00
bedrock claude 2 - non streaming mode
This commit is contained in:
1284
package-lock.json
generated
1284
package-lock.json
generated
File diff suppressed because it is too large
Load Diff
@@ -35,7 +35,9 @@
|
||||
"ws": "^8.13.0",
|
||||
"yaml": "^2.3.4",
|
||||
"yargs": "^17.7.1",
|
||||
"yauzl": "^2.10.0"
|
||||
"yauzl": "^2.10.0",
|
||||
"@aws-sdk/client-bedrock-runtime": "^3.515.0",
|
||||
"@aws-sdk/client-bedrock": "^3.515.0"
|
||||
},
|
||||
"overrides": {
|
||||
"parse-bmfont-xml": {
|
||||
|
@@ -2052,6 +2052,7 @@
|
||||
<option value="windowai">Window AI</option>
|
||||
<option value="openrouter">OpenRouter</option>
|
||||
<option value="claude">Claude</option>
|
||||
<option value="bedrock">Amazon Bedrock</option>
|
||||
<option value="scale">Scale</option>
|
||||
<option value="ai21">AI21</option>
|
||||
<option value="makersuite">Google MakerSuite</option>
|
||||
@@ -2208,6 +2209,57 @@
|
||||
</label>
|
||||
</div>
|
||||
</form>
|
||||
<form id="bedrock_form" data-source="bedrock" action="javascript:void(null);" method="post" enctype="multipart/form-data">
|
||||
<div>
|
||||
<h4>AWS Region</h4>
|
||||
<select id="aws_region_select">
|
||||
<option value="us-east-1">us-east-1</option>
|
||||
<option value="us-west-2">us-west-2</option>
|
||||
</select>
|
||||
</div>
|
||||
|
||||
<h4>AWS AKSK(Optional, if you use IAM Role)</h4>
|
||||
<!-- <div>
|
||||
<span data-i18n="Get your key from">Get your key from </span> <a target="_blank" href="https://console.anthropic.com/account/keys" data-i18n="Anthropic's developer console">Anthropic's developer console</a>.
|
||||
</div> -->
|
||||
<div class="flex-container">
|
||||
<input id="api_key_bedrock" name="api_key_bedrock" placeholder="Bedrock Key(AWS AK/SK)" class="text_pole flex1" maxlength="500" value="" type="text" autocomplete="off">
|
||||
<div title="Clear your API key" data-i18n="[title]Clear your Access key" class="menu_button fa-solid fa-circle-xmark clear-api-key" data-key="api_key_bedrock"></div>
|
||||
</div>
|
||||
<div class="flex-container">
|
||||
<input id="access_key_aws" name="access_key_aws" placeholder="AWS Access Key" class="text_pole flex1" maxlength="500" value="" type="text" autocomplete="off">
|
||||
<div title="Clear your Access key" data-i18n="[title]Clear your Access key" class="menu_button fa-solid fa-circle-xmark clear-api-key" data-key="access_key_aws"></div>
|
||||
</div>
|
||||
<div class="flex-container">
|
||||
<input id="secret_key_aws" name="secret_key_aws" placeholder="AWS Secret Key" class="text_pole flex1" maxlength="500" value="" type="text" autocomplete="off">
|
||||
<div title="Clear your Secret key" data-i18n="[title]Clear your Secret key" class="menu_button fa-solid fa-circle-xmark clear-api-key" data-key="secret_key_aws"></div>
|
||||
</div>
|
||||
<div data-for="access_key_aws" class="neutral_warning">
|
||||
For privacy reasons, your Access key will be hidden after you reload the page.
|
||||
</div>
|
||||
<div data-for="secret_key_aws" class="neutral_warning">
|
||||
For privacy reasons, your Secret key will be hidden after you reload the page.
|
||||
</div>
|
||||
<div>
|
||||
<h4 data-i18n="Amazon Bedrock Model">Amazon Bedrock Model</h4>
|
||||
<select id="model_bedrock_select">
|
||||
<optgroup label="Claude 3">
|
||||
<option value="anthropic.claude-3-sonnet-20240229-v1:0">claude-3-Sonnet</option>
|
||||
<option value="anthropic.claude-3-haiku-20240307-v1:0">claude-3-Haiku</option>
|
||||
</optgroup>
|
||||
<optgroup label="Claude 2">
|
||||
<option value="anthropic.claude-2">claude-2</option>
|
||||
<option value="anthropic.claude-v2:1">claude-2.1</option>
|
||||
</optgroup>
|
||||
<optgroup label="Claude Instant">
|
||||
<option value="anthropic.claude-instant-v1">claude-instant-v1</option>
|
||||
</optgroup>
|
||||
<!-- <optgroup label="LLaMA">
|
||||
<option value="anthropic.llama-13b-chat">llama-13b-chat</option>
|
||||
</optgroup> -->
|
||||
</select>
|
||||
</div>
|
||||
</form>
|
||||
<form id="claude_form" data-source="claude" action="javascript:void(null);" method="post" enctype="multipart/form-data">
|
||||
<h4>Claude API Key</h4>
|
||||
<div>
|
||||
|
@@ -8546,6 +8546,15 @@ jQuery(async function () {
|
||||
}
|
||||
});
|
||||
|
||||
$('#access_key_aws, #secret_key_aws').on('change', async function(e){
|
||||
e.stopPropagation();
|
||||
|
||||
const access_key_aws = String($('#access_key_aws').val()).trim();
|
||||
const secret_key_aws = String($('#secret_key_aws').val()).trim();
|
||||
|
||||
$('#api_key_bedrock').val(JSON.stringify([access_key_aws, secret_key_aws]));
|
||||
});
|
||||
|
||||
$('#api_button_textgenerationwebui').on('click', async function (e) {
|
||||
const mancerKey = String($('#api_key_mancer').val()).trim();
|
||||
if (mancerKey.length) {
|
||||
@@ -8572,6 +8581,11 @@ jQuery(async function () {
|
||||
await writeSecret(SECRET_KEYS.OOBA, oobaKey);
|
||||
}
|
||||
|
||||
const bedrockKey = String($('#api_key_bedrock').val()).trim();
|
||||
if (bedrockKey.length) {
|
||||
await writeSecret(SECRET_KEYS.BEDROCK, bedrockKey);
|
||||
}
|
||||
|
||||
validateTextGenUrl();
|
||||
startStatusLoading();
|
||||
main_api = 'textgenerationwebui';
|
||||
|
@@ -169,6 +169,7 @@ export const chat_completion_sources = {
|
||||
MAKERSUITE: 'makersuite',
|
||||
MISTRALAI: 'mistralai',
|
||||
CUSTOM: 'custom',
|
||||
BEDROCK: 'bedrock',
|
||||
};
|
||||
|
||||
const prefixMap = selected_group ? {
|
||||
@@ -286,6 +287,8 @@ const oai_settings = {
|
||||
google_model: 'gemini-pro',
|
||||
ai21_model: 'j2-ultra',
|
||||
mistralai_model: 'mistral-medium',
|
||||
bedrock_model: 'anthropic.claude-instant-v1',
|
||||
bedrock_region: 'us-west-2',
|
||||
custom_model: '',
|
||||
custom_url: '',
|
||||
custom_include_body: '',
|
||||
@@ -1323,6 +1326,8 @@ function getChatCompletionModel() {
|
||||
return oai_settings.mistralai_model;
|
||||
case chat_completion_sources.CUSTOM:
|
||||
return oai_settings.custom_model;
|
||||
case chat_completion_sources.BEDROCK:
|
||||
return oai_settings.bedrock_model;
|
||||
default:
|
||||
throw new Error(`Unknown chat completion source: ${oai_settings.chat_completion_source}`);
|
||||
}
|
||||
@@ -1535,6 +1540,7 @@ async function sendOpenAIRequest(type, messages, signal) {
|
||||
let logit_bias = {};
|
||||
const messageId = getNextMessageId(type);
|
||||
const isClaude = oai_settings.chat_completion_source == chat_completion_sources.CLAUDE;
|
||||
const isBedrock = oai_settings.chat_completion_source == chat_completion_sources.BEDROCK;
|
||||
const isOpenRouter = oai_settings.chat_completion_source == chat_completion_sources.OPENROUTER;
|
||||
const isScale = oai_settings.chat_completion_source == chat_completion_sources.SCALE;
|
||||
const isAI21 = oai_settings.chat_completion_source == chat_completion_sources.AI21;
|
||||
@@ -1635,6 +1641,19 @@ async function sendOpenAIRequest(type, messages, signal) {
|
||||
}
|
||||
}
|
||||
|
||||
if (isBedrock) {
|
||||
generate_data['top_k'] = Number(oai_settings.top_k_openai);
|
||||
generate_data['exclude_assistant'] = oai_settings.exclude_assistant;
|
||||
generate_data['claude_use_sysprompt'] = oai_settings.claude_use_sysprompt;
|
||||
generate_data['claude_exclude_prefixes'] = oai_settings.claude_exclude_prefixes;
|
||||
generate_data['stop'] = getCustomStoppingStrings(); // Claude shouldn't have limits on stop strings.
|
||||
generate_data['human_sysprompt_message'] = substituteParams(oai_settings.human_sysprompt_message);
|
||||
// Don't add a prefill on quiet gens (summarization)
|
||||
if (!isQuiet && !oai_settings.exclude_assistant) {
|
||||
generate_data['assistant_prefill'] = substituteParams(oai_settings.assistant_prefill);
|
||||
}
|
||||
}
|
||||
|
||||
if (isOpenRouter) {
|
||||
generate_data['top_k'] = Number(oai_settings.top_k_openai);
|
||||
generate_data['min_p'] = Number(oai_settings.min_p_openai);
|
||||
@@ -2702,6 +2721,10 @@ async function getStatusOpen() {
|
||||
data.custom_include_headers = oai_settings.custom_include_headers;
|
||||
}
|
||||
|
||||
if (oai_settings.chat_completion_source === chat_completion_sources.BEDROCK) {
|
||||
data.bedrock_region = oai_settings.bedrock_region;
|
||||
}
|
||||
|
||||
const canBypass = (oai_settings.chat_completion_source === chat_completion_sources.OPENAI && oai_settings.bypass_status_check) || oai_settings.chat_completion_source === chat_completion_sources.CUSTOM;
|
||||
if (canBypass) {
|
||||
setOnlineStatus('Status check bypassed');
|
||||
@@ -3360,6 +3383,11 @@ async function onModelChange() {
|
||||
$('#custom_model_id').val(value).trigger('input');
|
||||
}
|
||||
|
||||
if ($(this).is('#model_bedrock_select')) {
|
||||
console.log('Bedrock model changed to', value);
|
||||
oai_settings.bedrock_model = value;
|
||||
}
|
||||
|
||||
if (oai_settings.chat_completion_source == chat_completion_sources.SCALE) {
|
||||
if (oai_settings.max_context_unlocked) {
|
||||
$('#openai_max_context').attr('max', unlocked_max);
|
||||
@@ -3513,6 +3541,29 @@ async function onModelChange() {
|
||||
$('#openai_max_context').val(oai_settings.openai_max_context).trigger('input');
|
||||
}
|
||||
|
||||
if (oai_settings.chat_completion_source == chat_completion_sources.BEDROCK) {
|
||||
oai_settings.bedrock_region = String($('#aws_region_select').val());
|
||||
|
||||
if (oai_settings.max_context_unlocked) {
|
||||
$('#openai_max_context').attr('max', max_200k);
|
||||
}
|
||||
else if (value == 'anthropic.claude-2:1') {
|
||||
$('#openai_max_context').attr('max', max_200k);
|
||||
}
|
||||
else if (value.endsWith('100k') || value === 'anthropic.claude-instant-v1') {
|
||||
$('#openai_max_context').attr('max', claude_100k_max);
|
||||
}
|
||||
else {
|
||||
$('#openai_max_context').attr('max', claude_max);
|
||||
}
|
||||
|
||||
oai_settings.openai_max_context = Math.min(oai_settings.openai_max_context, Number($('#openai_max_context').attr('max')));
|
||||
$('#openai_max_context').val(oai_settings.openai_max_context).trigger('input');
|
||||
|
||||
oai_settings.temp_openai = Math.min(claude_max_temp, oai_settings.temp_openai);
|
||||
$('#temp_openai').attr('max', claude_max_temp).val(oai_settings.temp_openai).trigger('input');
|
||||
}
|
||||
|
||||
$('#openai_max_context_counter').attr('max', Number($('#openai_max_context').attr('max')));
|
||||
|
||||
saveSettingsDebounced();
|
||||
@@ -3668,6 +3719,19 @@ async function onConnectButtonClick(e) {
|
||||
}
|
||||
}
|
||||
|
||||
if (oai_settings.chat_completion_source == chat_completion_sources.BEDROCK) {
|
||||
const access_key_aws = String($('#access_key_aws').val()).trim();
|
||||
const secret_key_aws = String($('#secret_key_aws').val()).trim();
|
||||
|
||||
if (access_key_aws.length && secret_key_aws.length) {
|
||||
await writeSecret(SECRET_KEYS.BEDROCK, [access_key_aws, secret_key_aws]);
|
||||
}
|
||||
|
||||
if (!secret_state[SECRET_KEYS.BEDROCK]) {
|
||||
console.log('No secret key saved for Amazon Bedrock');
|
||||
return;
|
||||
}
|
||||
}
|
||||
startStatusLoading();
|
||||
saveSettingsDebounced();
|
||||
await getStatusOpen();
|
||||
@@ -3706,6 +3770,9 @@ function toggleChatCompletionForms() {
|
||||
else if (oai_settings.chat_completion_source == chat_completion_sources.CUSTOM) {
|
||||
$('#model_custom_select').trigger('change');
|
||||
}
|
||||
else if (oai_settings.chat_completion_source == chat_completion_sources.BEDROCK) {
|
||||
$('#model_bedrock_select').trigger('change');
|
||||
}
|
||||
$('[data-source]').each(function () {
|
||||
const validSources = $(this).data('source').split(',');
|
||||
$(this).toggle(validSources.includes(oai_settings.chat_completion_source));
|
||||
@@ -3730,6 +3797,7 @@ async function testApiConnection() {
|
||||
toastr.success('API connection successful!');
|
||||
}
|
||||
catch (err) {
|
||||
console.log(err);
|
||||
toastr.error('Could not get a reply from API. Check your connection settings / API key and try again.');
|
||||
}
|
||||
}
|
||||
@@ -4320,6 +4388,8 @@ $(document).ready(async function () {
|
||||
$('#model_ai21_select').on('change', onModelChange);
|
||||
$('#model_mistralai_select').on('change', onModelChange);
|
||||
$('#model_custom_select').on('change', onModelChange);
|
||||
$('#model_bedrock_select').on('change', onModelChange);
|
||||
$('#aws_region_select').on('change', onModelChange);
|
||||
$('#settings_preset_openai').on('change', onSettingsPresetChange);
|
||||
$('#new_oai_preset').on('click', onNewPresetClick);
|
||||
$('#delete_oai_preset').on('click', onDeletePresetClick);
|
||||
|
@@ -18,6 +18,7 @@ export const SECRET_KEYS = {
|
||||
TOGETHERAI: 'api_key_togetherai',
|
||||
CUSTOM: 'api_key_custom',
|
||||
OOBA: 'api_key_ooba',
|
||||
BEDROCK: 'api_key_bedrock',
|
||||
};
|
||||
|
||||
const INPUT_MAP = {
|
||||
@@ -37,6 +38,7 @@ const INPUT_MAP = {
|
||||
[SECRET_KEYS.CUSTOM]: '#api_key_custom',
|
||||
[SECRET_KEYS.TOGETHERAI]: '#api_key_togetherai',
|
||||
[SECRET_KEYS.OOBA]: '#api_key_ooba',
|
||||
[SECRET_KEYS.BEDROCK]: '#api_key_bedrock',
|
||||
};
|
||||
|
||||
async function clearSecret() {
|
||||
@@ -45,6 +47,10 @@ async function clearSecret() {
|
||||
secret_state[key] = false;
|
||||
updateSecretDisplay();
|
||||
$(INPUT_MAP[key]).val('');
|
||||
if (key == SECRET_KEYS.BEDROCK) {
|
||||
$('#access_key_aws').val('');
|
||||
$('#secret_key_aws').val('');
|
||||
}
|
||||
$('#main_api').trigger('change');
|
||||
}
|
||||
|
||||
|
62
src/bedrock.js
Normal file
62
src/bedrock.js
Normal file
@@ -0,0 +1,62 @@
|
||||
// const fs = require('fs');
|
||||
const { BedrockRuntimeClient, InvokeModelCommand, InvokeModelWithResponseStreamCommand } = require("@aws-sdk/client-bedrock-runtime");
|
||||
const { BedrockClient, ListFoundationModelsCommand } = require("@aws-sdk/client-bedrock");
|
||||
|
||||
const getClient = (function() {
|
||||
const client = {};
|
||||
return function(region_name) {
|
||||
if(! client[region_name]) {
|
||||
client[region_name] = new BedrockClient({ region: region_name });
|
||||
}
|
||||
return client[region_name];
|
||||
};
|
||||
})();
|
||||
|
||||
const getRuntimeClient = (function() {
|
||||
const client = {};
|
||||
return function(region_name) {
|
||||
if(! client[region_name]) {
|
||||
client[region_name] = new BedrockRuntimeClient({ region: region_name });
|
||||
}
|
||||
return client[region_name];
|
||||
};
|
||||
})();
|
||||
|
||||
async function listTextModels(region_name) {
|
||||
const command = new ListFoundationModelsCommand({ byOutputModality: 'TEXT' });
|
||||
const data = await getClient(region_name).send(command);
|
||||
// process data.
|
||||
return data;
|
||||
}
|
||||
|
||||
async function invokeModel(region_name, params) {
|
||||
const modelId = params.modelId;
|
||||
if (-1 === modelId.indexOf('claude-3')) {
|
||||
const command = new InvokeModelCommand(params);
|
||||
const data = await getRuntimeClient(region_name).send(command);
|
||||
|
||||
// process data.
|
||||
return data;
|
||||
} else {
|
||||
// todo: cluade 3 model invoke
|
||||
const command = new InvokeModelCommand(params);
|
||||
const data = await getRuntimeClient(region_name).send(command);
|
||||
|
||||
// process data.
|
||||
return data;
|
||||
}
|
||||
}
|
||||
|
||||
async function invokeModelWithStreaming(region_name, params) {
|
||||
const command = new InvokeModelWithResponseStreamCommand(params);
|
||||
const data = await getRuntimeClient(region_name).send(command);
|
||||
// process data.
|
||||
return data;
|
||||
}
|
||||
module.exports = {
|
||||
getRuntimeClient,
|
||||
getClient,
|
||||
listTextModels,
|
||||
invokeModel,
|
||||
invokeModelWithStreaming,
|
||||
};
|
@@ -162,6 +162,7 @@ const CHAT_COMPLETION_SOURCES = {
|
||||
MAKERSUITE: 'makersuite',
|
||||
MISTRALAI: 'mistralai',
|
||||
CUSTOM: 'custom',
|
||||
BEDROCK: 'bedrock',
|
||||
};
|
||||
|
||||
const UPLOADS_PATH = './uploads';
|
||||
|
@@ -4,15 +4,18 @@ const { Readable } = require('stream');
|
||||
|
||||
const { jsonParser } = require('../../express-common');
|
||||
const { CHAT_COMPLETION_SOURCES, GEMINI_SAFETY, BISON_SAFETY } = require('../../constants');
|
||||
const { forwardFetchResponse, getConfigValue, tryParse, uuidv4, mergeObjectWithYaml, excludeKeysByYaml, color } = require('../../util');
|
||||
const { forwardFetchResponse, forwardBedrockStreamResponse, getConfigValue, tryParse, uuidv4, mergeObjectWithYaml, excludeKeysByYaml, color } = require('../../util');
|
||||
const { convertClaudePrompt, convertGooglePrompt, convertTextCompletionPrompt } = require('../prompt-converters');
|
||||
|
||||
const { readSecret, SECRET_KEYS } = require('../secrets');
|
||||
const { getTokenizerModel, getSentencepiceTokenizer, getTiktokenTokenizer, sentencepieceTokenizers, TEXT_COMPLETION_MODELS } = require('../tokenizers');
|
||||
|
||||
const { listTextModels, invokeModel, invokeModelWithStreaming } = require('../../bedrock');
|
||||
|
||||
const API_OPENAI = 'https://api.openai.com/v1';
|
||||
const API_CLAUDE = 'https://api.anthropic.com/v1';
|
||||
const API_MISTRAL = 'https://api.mistral.ai/v1';
|
||||
|
||||
/**
|
||||
* Sends a request to Claude API.
|
||||
* @param {express.Request} request Express request
|
||||
@@ -497,6 +500,151 @@ async function sendMistralAIRequest(request, response) {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Sends a request to Amazon Bedrock
|
||||
* @param {express.Request} request Express request
|
||||
* @param {express.Response} response Express response
|
||||
*/
|
||||
async function sendBedrockRequest(request, response) {
|
||||
// const apiUrl = new URL(request.body.reverse_proxy || API_CLAUDE).toString();
|
||||
const apiKey = request.body.reverse_proxy ? request.body.proxy_password : readSecret(SECRET_KEYS.BEDROCK);
|
||||
const divider = '-'.repeat(process.stdout.columns);
|
||||
|
||||
if (!apiKey) {
|
||||
console.log(color.red(`Claude API key is missing.\n${divider}`));
|
||||
return response.status(400).send({ error: true });
|
||||
}
|
||||
|
||||
try {
|
||||
const controller = new AbortController();
|
||||
request.socket.removeAllListeners('close');
|
||||
request.socket.on('close', function () {
|
||||
controller.abort();
|
||||
});
|
||||
|
||||
const isSysPromptSupported = request.body.model === 'anthropic.claude-2' || request.body.model === 'anthropic.claude-2.1';
|
||||
const requestPrompt = convertClaudePrompt(request.body.messages, !request.body.exclude_assistant, request.body.assistant_prefill, isSysPromptSupported, request.body.claude_use_sysprompt, request.body.human_sysprompt_message, request.body.claude_exclude_prefixes);
|
||||
|
||||
// Check Claude messages sequence and prefixes presence.
|
||||
let sequenceError = [];
|
||||
const sequence = requestPrompt.split('\n').filter(x => x.startsWith('Human:') || x.startsWith('Assistant:'));
|
||||
const humanFound = sequence.some(line => line.startsWith('Human:'));
|
||||
const assistantFound = sequence.some(line => line.startsWith('Assistant:'));
|
||||
let humanErrorCount = 0;
|
||||
let assistantErrorCount = 0;
|
||||
|
||||
for (let i = 0; i < sequence.length - 1; i++) {
|
||||
if (sequence[i].startsWith(sequence[i + 1].split(':')[0])) {
|
||||
if (sequence[i].startsWith('Human:')) {
|
||||
humanErrorCount++;
|
||||
} else if (sequence[i].startsWith('Assistant:')) {
|
||||
assistantErrorCount++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!humanFound) {
|
||||
sequenceError.push(`${divider}\nWarning: No 'Human:' prefix found in the prompt.\n${divider}`);
|
||||
}
|
||||
if (!assistantFound) {
|
||||
sequenceError.push(`${divider}\nWarning: No 'Assistant: ' prefix found in the prompt.\n${divider}`);
|
||||
}
|
||||
if (sequence[0] && !sequence[0].startsWith('Human:')) {
|
||||
sequenceError.push(`${divider}\nWarning: The messages sequence should start with 'Human:' prefix.\nMake sure you have '\\n\\nHuman:' prefix at the very beggining of the prompt, or after the system prompt.\n${divider}`);
|
||||
}
|
||||
if (humanErrorCount > 0 || assistantErrorCount > 0) {
|
||||
sequenceError.push(`${divider}\nWarning: Detected incorrect Prefix sequence(s).`);
|
||||
sequenceError.push(`Incorrect "Human:" prefix(es): ${humanErrorCount}.\nIncorrect "Assistant: " prefix(es): ${assistantErrorCount}.`);
|
||||
sequenceError.push('Check the prompt above and fix it in the SillyTavern.');
|
||||
sequenceError.push('\nThe correct sequence in the console should look like this:\n(System prompt msg) <-(for the sysprompt format only, else have \\n\\n above the first human\'s message.)');
|
||||
sequenceError.push(`\\n + <-----(Each message beginning with the "Assistant:/Human:" prefix must have \\n\\n before it.)\n\\n +\nHuman: \\n +\n\\n +\nAssistant: \\n +\n...\n\\n +\nHuman: \\n +\n\\n +\nAssistant: \n${divider}`);
|
||||
}
|
||||
|
||||
// Add custom stop sequences
|
||||
const stopSequences = ['\n\nHuman:', '\n\nSystem:', '\n\nAssistant:'];
|
||||
if (Array.isArray(request.body.stop)) {
|
||||
stopSequences.push(...request.body.stop);
|
||||
}
|
||||
|
||||
// const requestBody = {
|
||||
// prompt: requestPrompt,
|
||||
// model: request.body.model,
|
||||
// max_tokens_to_sample: request.body.max_tokens,
|
||||
// stop_sequences: stopSequences,
|
||||
// temperature: request.body.temperature,
|
||||
// top_p: request.body.top_p,
|
||||
// top_k: request.body.top_k,
|
||||
// stream: request.body.stream,
|
||||
// };
|
||||
|
||||
const modelBody = {
|
||||
'prompt': requestPrompt,
|
||||
'temperature': request.body.temperature || 0.9,
|
||||
'top_p': request.body.top_p || 0.999,
|
||||
'top_k': request.body.top_k || 250,
|
||||
'max_tokens_to_sample': request.body.max_tokens,
|
||||
'stop_sequences': stopSequences,
|
||||
};
|
||||
const bedrockClaudeRequestBody = { // InvokeModelRequest
|
||||
body: JSON.stringify(modelBody), //new Uint8Array(), // e.g. Buffer.from("") or new TextEncoder().encode("") // required
|
||||
contentType: 'application/json',
|
||||
accept: 'application/json',
|
||||
modelId: request.body.model, // required
|
||||
};
|
||||
|
||||
console.log('Claude request:', bedrockClaudeRequestBody);
|
||||
|
||||
sequenceError.forEach(sequenceError => {
|
||||
console.log(color.red(sequenceError));
|
||||
});
|
||||
|
||||
// const generateResponse = await fetch(apiUrl + '/complete', {
|
||||
// method: 'POST',
|
||||
// signal: controller.signal,
|
||||
// body: JSON.stringify(bedrockClaudeRequestBody),
|
||||
// headers: {
|
||||
// 'Content-Type': 'application/json',
|
||||
// 'anthropic-version': '2023-06-01',
|
||||
// 'x-api-key': apiKey,
|
||||
// },
|
||||
// timeout: 0,
|
||||
// });
|
||||
|
||||
if (request.body.stream) {
|
||||
const respBedrockStream = await invokeModelWithStreaming('us-east-1', bedrockClaudeRequestBody);
|
||||
|
||||
await forwardBedrockStreamResponse(respBedrockStream, response);
|
||||
// Pipe remote SSE stream to Express response
|
||||
// forwardFetchResponse(generateResponse, response);
|
||||
} else {
|
||||
// if (!generateResponse.ok) {
|
||||
// console.log(color.red(`Claude API returned error: ${generateResponse.status} ${generateResponse.statusText}\n${await generateResponse.text()}\n${divider}`));
|
||||
// return response.status(generateResponse.status).send({ error: true });
|
||||
// }
|
||||
const resp = await invokeModel('us-east-1', bedrockClaudeRequestBody);
|
||||
const statusCode = resp['$metadata']['httpStatusCode'];
|
||||
const body = resp.body.transformToString();
|
||||
|
||||
if (statusCode !== 200 ){
|
||||
console.log(color.red(`Claude API returned error: ${resp['$metadata']['httpStatusCode']} ${body}\n${divider}`));
|
||||
return response.status(statusCode).send({ error: true });
|
||||
}
|
||||
|
||||
console.log('Claude response:', body);
|
||||
|
||||
// Wrap it back to OAI format
|
||||
const reply = { choices: [{ 'message': { 'content': JSON.parse(body)['completion'] } }] };
|
||||
return response.send(reply);
|
||||
}
|
||||
} catch (error) {
|
||||
console.log(color.red(`Error communicating with Claude: ${error}\n${divider}`));
|
||||
if (!response.headersSent) {
|
||||
return response.status(500).send({ error: true });
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
const router = express.Router();
|
||||
|
||||
router.post('/status', jsonParser, async function (request, response_getstatus_openai) {
|
||||
@@ -524,11 +672,33 @@ router.post('/status', jsonParser, async function (request, response_getstatus_o
|
||||
api_key_openai = readSecret(SECRET_KEYS.CUSTOM);
|
||||
headers = {};
|
||||
mergeObjectWithYaml(headers, request.body.custom_include_headers);
|
||||
} else if (request.body.chat_completion_source === CHAT_COMPLETION_SOURCES.BEDROCK) {
|
||||
var bedrock_region = request.body.bedrock_region;
|
||||
// api_key_openai = readSecret(SECRET_KEYS.BEDROCK);
|
||||
} else {
|
||||
console.log('This chat completion source is not supported yet.');
|
||||
return response_getstatus_openai.status(400).send({ error: true });
|
||||
}
|
||||
|
||||
// todo: should check Key or IAM Role permission
|
||||
if (request.body.chat_completion_source === CHAT_COMPLETION_SOURCES.BEDROCK) {
|
||||
try {
|
||||
let resp = await listTextModels(bedrock_region);
|
||||
let models = resp.modelSummaries;
|
||||
response_getstatus_openai.send(models);
|
||||
console.log('Available Bedrock Text models:', models);
|
||||
} catch(e) {
|
||||
console.error(e);
|
||||
|
||||
if (!response_getstatus_openai.headersSent) {
|
||||
response_getstatus_openai.send({ error: true });
|
||||
} else {
|
||||
response_getstatus_openai.end();
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (!api_key_openai && !request.body.reverse_proxy && request.body.chat_completion_source !== CHAT_COMPLETION_SOURCES.CUSTOM) {
|
||||
console.log('OpenAI API key is missing.');
|
||||
return response_getstatus_openai.status(400).send({ error: true });
|
||||
@@ -672,6 +842,7 @@ router.post('/generate', jsonParser, function (request, response) {
|
||||
case CHAT_COMPLETION_SOURCES.AI21: return sendAI21Request(request, response);
|
||||
case CHAT_COMPLETION_SOURCES.MAKERSUITE: return sendMakerSuiteRequest(request, response);
|
||||
case CHAT_COMPLETION_SOURCES.MISTRALAI: return sendMistralAIRequest(request, response);
|
||||
case CHAT_COMPLETION_SOURCES.BEDROCK: return sendBedrockRequest(request, response);
|
||||
}
|
||||
|
||||
let apiUrl;
|
||||
@@ -733,6 +904,11 @@ router.post('/generate', jsonParser, function (request, response) {
|
||||
bodyParams.logprobs = true;
|
||||
}
|
||||
|
||||
mergeObjectWithYaml(bodyParams, request.body.custom_include_body);
|
||||
mergeObjectWithYaml(headers, request.body.custom_include_headers);
|
||||
} else if (request.body.chat_completion_source === CHAT_COMPLETION_SOURCES.BEDROCK) {
|
||||
console.log(request);
|
||||
apiKey = readSecret(SECRET_KEYS.BEDROCK);
|
||||
mergeObjectWithYaml(bodyParams, request.body.custom_include_body);
|
||||
mergeObjectWithYaml(headers, request.body.custom_include_headers);
|
||||
} else {
|
||||
|
@@ -30,6 +30,7 @@ const SECRET_KEYS = {
|
||||
MISTRALAI: 'api_key_mistralai',
|
||||
CUSTOM: 'api_key_custom',
|
||||
OOBA: 'api_key_ooba',
|
||||
BEDROCK: 'api_key_bedrock',
|
||||
};
|
||||
|
||||
/**
|
||||
|
26
src/util.js
26
src/util.js
@@ -399,6 +399,31 @@ function forwardFetchResponse(from, to) {
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Pipe a fetch() response to an Express.js Response, including status code.
|
||||
* @param {import('node-fetch').Response} from The Fetch API response to pipe from.
|
||||
* @param {Express.Response} to The Express response to pipe to.
|
||||
*/
|
||||
async function forwardBedrockStreamResponse(from, to) {
|
||||
for await (const event of from.body) {
|
||||
if (event.chunk && event.chunk.bytes) {
|
||||
const chunk = JSON.parse(Buffer.from(event.chunk.bytes).toString("utf-8"));
|
||||
// chunks.push(chunk.completion); // change this line
|
||||
to.write(chunk);
|
||||
} else if (
|
||||
event.internalServerException ||
|
||||
event.modelStreamErrorException ||
|
||||
event.throttlingException ||
|
||||
event.validationException
|
||||
) {
|
||||
console.error(event);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
to.end();
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds YAML-serialized object to the object.
|
||||
* @param {object} obj Object
|
||||
@@ -542,6 +567,7 @@ module.exports = {
|
||||
removeOldBackups,
|
||||
getImages,
|
||||
forwardFetchResponse,
|
||||
forwardBedrockStreamResponse,
|
||||
getHexString,
|
||||
mergeObjectWithYaml,
|
||||
excludeKeysByYaml,
|
||||
|
Reference in New Issue
Block a user