diff --git a/public/index.html b/public/index.html
index d19fc6fce..17dc73582 100644
--- a/public/index.html
+++ b/public/index.html
@@ -1442,10 +1442,16 @@
OpenAI Model
diff --git a/public/scripts/openai.js b/public/scripts/openai.js
index 429b641f4..4667272bd 100644
--- a/public/scripts/openai.js
+++ b/public/scripts/openai.js
@@ -80,6 +80,7 @@ const default_bias_presets = {
};
const gpt3_max = 4095;
+const gpt3_16k_max = 16383;
const gpt4_max = 8191;
const gpt_neox_max = 2048;
const gpt4_32k_max = 32767;
@@ -1540,18 +1541,22 @@ function onModelChange() {
if (oai_settings.max_context_unlocked) {
$('#openai_max_context').attr('max', unlocked_max);
}
- else if (value == 'gpt-4' || value == 'gpt-4-0314') {
+ else if (value == 'gpt-4' || value == 'gpt-4-0314' || value == 'gpt-4-0613') {
$('#openai_max_context').attr('max', gpt4_max);
}
- else if (value == 'gpt-4-32k') {
+ else if (value == 'gpt-4-32k' || value == 'gpt-4-32k-0314' || value == 'gpt-4-32k-0613') {
$('#openai_max_context').attr('max', gpt4_32k_max);
}
+ else if (value == 'gpt-3.5-turbo-16k' || value == 'gpt-3.5-turbo-16k-0613') {
+ $('#openai_max_context').attr('max', gpt3_16k_max);
+ }
else {
$('#openai_max_context').attr('max', gpt3_max);
- oai_settings.openai_max_context = Math.max(oai_settings.openai_max_context, gpt3_max);
- $('#openai_max_context').val(oai_settings.openai_max_context).trigger('input');
}
+ oai_settings.openai_max_context = Math.max(oai_settings.openai_max_context, Number($('#openai_max_context').attr('max')));
+ $('#openai_max_context').val(oai_settings.openai_max_context).trigger('input');
+
$('#openai_reverse_proxy').attr('placeholder', 'https://api.openai.com/v1');
oai_settings.temp_openai = Math.min(oai_max_temp, oai_settings.temp_openai);
diff --git a/server.js b/server.js
index 78949d80e..ff067877e 100644
--- a/server.js
+++ b/server.js
@@ -158,6 +158,23 @@ async function countSentencepieceTokens(spp, text) {
const tokenizersCache = {};
+function getTokenizerModel(requestModel) {
+ if (requestModel.includes('gpt-4-32k')) {
+ return 'gpt-4-32k';
+ }
+
+ if (requestModel.includes('gpt-4')) {
+ return 'gpt-4';
+ }
+
+ if (requestModel.includes('gpt-3.5-turbo')) {
+ return 'gpt-3.5-turbo';
+ }
+
+ // default
+ return 'gpt-3.5-turbo';
+}
+
function getTiktokenTokenizer(model) {
if (tokenizersCache[model]) {
return tokenizersCache[model];
@@ -2787,7 +2804,8 @@ app.post("/openai_bias", jsonParser, async function (request, response) {
let result = {};
- const tokenizer = getTiktokenTokenizer(request.query.model === 'gpt-4-0314' ? 'gpt-4' : request.query.model);
+ const model = getTokenizerModel(String(request.query.model || ''));
+ const tokenizer = getTiktokenTokenizer(model);
for (const entry of request.body) {
if (!entry || !entry.text) {
@@ -3083,11 +3101,13 @@ app.post("/generate_openai", jsonParser, function (request, response_generate_op
app.post("/tokenize_openai", jsonParser, function (request, response_tokenize_openai = response) {
if (!request.body) return response_tokenize_openai.sendStatus(400);
- const tokensPerName = request.query.model.includes('gpt-4') ? 1 : -1;
- const tokensPerMessage = request.query.model.includes('gpt-4') ? 3 : 4;
+ const model = getTokenizerModel(String(request.query.model || ''));
+
+ const tokensPerName = model.includes('gpt-4') ? 1 : -1;
+ const tokensPerMessage = model.includes('gpt-4') ? 3 : 4;
const tokensPadding = 3;
- const tokenizer = getTiktokenTokenizer(request.query.model === 'gpt-4-0314' ? 'gpt-4' : request.query.model);
+ const tokenizer = getTiktokenTokenizer(model);
let num_tokens = 0;
for (const msg of request.body) {