Send example dialogues as system messages with names

This commit is contained in:
SillyLossy
2023-04-15 20:05:58 +03:00
parent e3b9c24d8c
commit a563a45bf0
2 changed files with 13 additions and 9 deletions

View File

@ -209,7 +209,7 @@ function parseExampleIntoIndividual(messageExampleString) {
let in_user = false;
let in_bot = false;
// DRY my cock and balls
function add_msg(name, role) {
function add_msg(name, role, system_name) {
// join different newlines (we split them by \n and join by \n)
// remove char name
// strip to remove extra spaces
@ -219,7 +219,7 @@ function parseExampleIntoIndividual(messageExampleString) {
parsed_msg = `${name}: ${parsed_msg}`;
}
result.push({ "role": role, "content": parsed_msg });
result.push({ "role": role, "content": parsed_msg, "name": system_name });
cur_msg_lines = [];
}
// skip first line as it'll always be "This is how {bot name} should talk"
@ -231,14 +231,14 @@ function parseExampleIntoIndividual(messageExampleString) {
in_user = true;
// we were in the bot mode previously, add the message
if (in_bot) {
add_msg(name2, "assistant");
add_msg(name2, "system", "example_assistant");
}
in_bot = false;
} else if (cur_str.indexOf(name2 + ":") === 0) {
in_bot = true;
// we were in the user mode previously, add the message
if (in_user) {
add_msg(name1, "user");
add_msg(name1, "system", "example_user");
}
in_user = false;
}
@ -247,9 +247,9 @@ function parseExampleIntoIndividual(messageExampleString) {
}
// Special case for last message in a block because we don't have a new message to trigger the switch
if (in_user) {
add_msg(name1, "user");
add_msg(name1, "system", "example_user");
} else if (in_bot) {
add_msg(name2, "assistant");
add_msg(name2, "system", "example_assistant");
}
return result;
}

View File

@ -2140,19 +2140,23 @@ app.post("/generate_openai", jsonParser, function (request, response_generate_op
app.post("/tokenize_openai", jsonParser, function (request, response_tokenize_openai = response) {
if (!request.body) return response_tokenize_openai.sendStatus(400);
const tokensPerName = request.query.model.includes('gpt-4') ? 1 : -1;
const tokensPerMessage = request.query.model.includes('gpt-4') ? 3 : 4;
const tokensPadding = 3;
const tokenizer = tiktoken.encoding_for_model(request.query.model);
let num_tokens = 0;
for (const msg of request.body) {
num_tokens += 4;
num_tokens += tokensPerMessage;
for (const [key, value] of Object.entries(msg)) {
num_tokens += tokenizer.encode(value).length;
if (key == "name") {
num_tokens += -1;
num_tokens += tokensPerName;
}
}
}
num_tokens += 2;
num_tokens += tokensPadding;
tokenizer.free();