mirror of
https://github.com/SillyTavern/SillyTavern.git
synced 2025-06-05 21:59:27 +02:00
Add Claude tokenizer
This commit is contained in:
6
package-lock.json
generated
6
package-lock.json
generated
@ -10,6 +10,7 @@
|
||||
"license": "AGPL-3.0",
|
||||
"dependencies": {
|
||||
"@dqbd/tiktoken": "^1.0.2",
|
||||
"@mlc-ai/web-tokenizers": "^0.1.0",
|
||||
"axios": "^1.4.0",
|
||||
"command-exists": "^1.2.9",
|
||||
"compression": "^1",
|
||||
@ -561,6 +562,11 @@
|
||||
"integrity": "sha512-XPSJHWmi394fuUuzDnGz1wiKqWfo1yXecHQMRf2l6hztTO+nPru658AyDngaBe7isIxEkRsPR3FZh+s7iVa4Uw==",
|
||||
"dev": true
|
||||
},
|
||||
"node_modules/@mlc-ai/web-tokenizers": {
|
||||
"version": "0.1.0",
|
||||
"resolved": "https://registry.npmjs.org/@mlc-ai/web-tokenizers/-/web-tokenizers-0.1.0.tgz",
|
||||
"integrity": "sha512-whiQ+40ohtAFoFOGcje1Io7BMr434Wh3hM3nBCWlJMpXxL5Rlig/AH9wjyUPsytKwWTEe7RoYPyXSbFw5Vs6Tw=="
|
||||
},
|
||||
"node_modules/@nodelib/fs.scandir": {
|
||||
"version": "2.1.5",
|
||||
"resolved": "https://registry.npmjs.org/@nodelib/fs.scandir/-/fs.scandir-2.1.5.tgz",
|
||||
|
@ -1,6 +1,7 @@
|
||||
{
|
||||
"dependencies": {
|
||||
"@dqbd/tiktoken": "^1.0.2",
|
||||
"@mlc-ai/web-tokenizers": "^0.1.0",
|
||||
"axios": "^1.4.0",
|
||||
"command-exists": "^1.2.9",
|
||||
"compression": "^1",
|
||||
|
@ -1,73 +0,0 @@
|
||||
body {
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
width: 100%;
|
||||
background-color: rgb(36, 37, 37);
|
||||
background-repeat: no-repeat;
|
||||
background-attachment: fixed;
|
||||
background-size: cover;
|
||||
font-family: "Noto Sans", "Noto Color Emoji", sans-serif;
|
||||
font-size: 16px;
|
||||
/*1rem*/
|
||||
color: #999;
|
||||
box-sizing: border-box;
|
||||
-webkit-font-smoothing: antialiased;
|
||||
-moz-osx-font-smoothing: grayscale;
|
||||
/*z-index:0;*/
|
||||
}
|
||||
|
||||
#main {
|
||||
padding-top: 20px;
|
||||
/*z-index:1;*/
|
||||
}
|
||||
|
||||
#content {
|
||||
margin: 0 auto;
|
||||
max-width: 700px;
|
||||
border: 1px solid #333;
|
||||
padding: 20px;
|
||||
border-radius: 20px;
|
||||
background-color: rgba(0, 0, 0, 0.5);
|
||||
line-height: 1.5rem;
|
||||
box-shadow: 0 0 5px black;
|
||||
/*z-index: 2;*/
|
||||
}
|
||||
|
||||
code {
|
||||
border: 1px solid #999;
|
||||
background-color: rgba(0, 0, 0, 0.5);
|
||||
padding: 5px;
|
||||
border-radius: 5px;
|
||||
display: block;
|
||||
white-space: pre-line;
|
||||
}
|
||||
|
||||
a {
|
||||
color: orange;
|
||||
text-decoration: none;
|
||||
border-bottom: 1px dotted orange;
|
||||
}
|
||||
|
||||
h2,
|
||||
h3 {
|
||||
color: #ccc;
|
||||
}
|
||||
|
||||
hr {
|
||||
border: 1px solid #999;
|
||||
}
|
||||
|
||||
table {
|
||||
width: 100%;
|
||||
}
|
||||
|
||||
table,
|
||||
th,
|
||||
td {
|
||||
border: 1px solid;
|
||||
border-collapse: collapse;
|
||||
}
|
||||
|
||||
table img {
|
||||
max-width: 200px;
|
||||
}
|
@ -1,23 +0,0 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
|
||||
<head>
|
||||
<title>SillyTavern Documentation</title>
|
||||
<link rel="stylesheet" href="/css/notes.css">
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<link href="/webfonts/NotoSans/stylesheet.css" rel="stylesheet">
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<div id="main">
|
||||
<div id="content">
|
||||
<h2>You weren't supposed to be able to get here, you know.</h1>
|
||||
<h3>All help materials has been moved here:</h3>
|
||||
<h3><a href="https://docs.sillytavern.app/">SillyTavern Documentation</a></h3>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</body>
|
||||
|
||||
</html>
|
@ -927,16 +927,15 @@ function getTokenizerModel() {
|
||||
return turboTokenizer;
|
||||
}
|
||||
else if (oai_settings.windowai_model.includes('claude')) {
|
||||
return turboTokenizer;
|
||||
return 'claude';
|
||||
}
|
||||
else if (oai_settings.windowai_model.includes('GPT-NeoXT')) {
|
||||
return 'gpt2';
|
||||
}
|
||||
}
|
||||
|
||||
// We don't have a Claude tokenizer for JS yet. Turbo 3.5 should be able to handle this.
|
||||
if (oai_settings.chat_completion_source == chat_completion_sources.CLAUDE) {
|
||||
return turboTokenizer;
|
||||
return 'claude';
|
||||
}
|
||||
|
||||
// Default to Turbo 3.5
|
||||
|
65
server.js
65
server.js
@ -128,10 +128,13 @@ let response_getstatus;
|
||||
const delay = ms => new Promise(resolve => setTimeout(resolve, ms))
|
||||
|
||||
const { SentencePieceProcessor, cleanText } = require("sentencepiece-js");
|
||||
const { Tokenizer } = require('@mlc-ai/web-tokenizers');
|
||||
const CHARS_PER_TOKEN = 3.35;
|
||||
|
||||
let spp_llama;
|
||||
let spp_nerd;
|
||||
let spp_nerd_v2;
|
||||
let claude_tokenizer;
|
||||
|
||||
async function loadSentencepieceTokenizer(modelPath) {
|
||||
try {
|
||||
@ -147,7 +150,7 @@ async function loadSentencepieceTokenizer(modelPath) {
|
||||
async function countSentencepieceTokens(spp, text) {
|
||||
// Fallback to strlen estimation
|
||||
if (!spp) {
|
||||
return Math.ceil(text.length / 3.35);
|
||||
return Math.ceil(text.length / CHARS_PER_TOKEN);
|
||||
}
|
||||
|
||||
let cleaned = cleanText(text);
|
||||
@ -156,9 +159,36 @@ async function countSentencepieceTokens(spp, text) {
|
||||
return ids.length;
|
||||
}
|
||||
|
||||
async function loadClaudeTokenizer(modelPath) {
|
||||
try {
|
||||
const arrayBuffer = fs.readFileSync(modelPath).buffer;
|
||||
const instance = await Tokenizer.fromJSON(arrayBuffer);
|
||||
return instance;
|
||||
} catch (error) {
|
||||
console.error("Claude tokenizer failed to load: " + modelPath, error);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
function countClaudeTokens(tokenizer, messages) {
|
||||
const convertedPrompt = convertClaudePrompt(messages, false, false);
|
||||
|
||||
// Fallback to strlen estimation
|
||||
if (!tokenizer) {
|
||||
return Math.ceil(convertedPrompt.length / CHARS_PER_TOKEN);
|
||||
}
|
||||
|
||||
const count = tokenizer.encode(convertedPrompt).length;
|
||||
return count;
|
||||
}
|
||||
|
||||
const tokenizersCache = {};
|
||||
|
||||
function getTokenizerModel(requestModel) {
|
||||
if (requestModel.includes('claude')) {
|
||||
return 'claude';
|
||||
}
|
||||
|
||||
if (requestModel.includes('gpt-4-32k')) {
|
||||
return 'gpt-4-32k';
|
||||
}
|
||||
@ -2870,6 +2900,12 @@ app.post("/openai_bias", jsonParser, async function (request, response) {
|
||||
let result = {};
|
||||
|
||||
const model = getTokenizerModel(String(request.query.model || ''));
|
||||
|
||||
// no bias for claude
|
||||
if (model == 'claude') {
|
||||
return response.send(result);
|
||||
}
|
||||
|
||||
const tokenizer = getTiktokenTokenizer(model);
|
||||
|
||||
for (const entry of request.body) {
|
||||
@ -2942,7 +2978,7 @@ app.post("/deletepreset_openai", jsonParser, function (request, response) {
|
||||
});
|
||||
|
||||
// Prompt Conversion script taken from RisuAI by @kwaroran (GPLv3).
|
||||
function convertClaudePrompt(messages) {
|
||||
function convertClaudePrompt(messages, addHumanPrefix, addAssistantPostfix) {
|
||||
// Claude doesn't support message names, so we'll just add them to the message content.
|
||||
for (const message of messages) {
|
||||
if (message.name && message.role !== "system") {
|
||||
@ -2972,7 +3008,16 @@ function convertClaudePrompt(messages) {
|
||||
break
|
||||
}
|
||||
return prefix + v.content;
|
||||
}).join('') + '\n\nAssistant: ';
|
||||
}).join('');
|
||||
|
||||
if (addHumanPrefix) {
|
||||
requestPrompt = "\n\nHuman: " + requestPrompt;
|
||||
}
|
||||
|
||||
if (addAssistantPostfix) {
|
||||
requestPrompt = requestPrompt + '\n\nAssistant: ';
|
||||
}
|
||||
|
||||
return requestPrompt;
|
||||
}
|
||||
|
||||
@ -2993,14 +3038,14 @@ async function sendClaudeRequest(request, response) {
|
||||
controller.abort();
|
||||
});
|
||||
|
||||
const requestPrompt = convertClaudePrompt(request.body.messages);
|
||||
const requestPrompt = convertClaudePrompt(request.body.messages, true, true);
|
||||
console.log('Claude request:', requestPrompt);
|
||||
|
||||
const generateResponse = await fetch(api_url + '/complete', {
|
||||
method: "POST",
|
||||
signal: controller.signal,
|
||||
body: JSON.stringify({
|
||||
prompt: "\n\nHuman: " + requestPrompt,
|
||||
prompt: requestPrompt,
|
||||
model: request.body.model,
|
||||
max_tokens_to_sample: request.body.max_tokens,
|
||||
stop_sequences: ["\n\nHuman:", "\n\nSystem:", "\n\nAssistant:"],
|
||||
@ -3166,15 +3211,20 @@ app.post("/generate_openai", jsonParser, function (request, response_generate_op
|
||||
app.post("/tokenize_openai", jsonParser, function (request, response_tokenize_openai = response) {
|
||||
if (!request.body) return response_tokenize_openai.sendStatus(400);
|
||||
|
||||
let num_tokens = 0;
|
||||
const model = getTokenizerModel(String(request.query.model || ''));
|
||||
|
||||
if (model == 'claude') {
|
||||
num_tokens = countClaudeTokens(claude_tokenizer, request.body);
|
||||
return response_tokenize_openai.send({ "token_count": num_tokens });
|
||||
}
|
||||
|
||||
const tokensPerName = model.includes('gpt-4') ? 1 : -1;
|
||||
const tokensPerMessage = model.includes('gpt-4') ? 3 : 4;
|
||||
const tokensPadding = 3;
|
||||
|
||||
const tokenizer = getTiktokenTokenizer(model);
|
||||
|
||||
let num_tokens = 0;
|
||||
for (const msg of request.body) {
|
||||
num_tokens += tokensPerMessage;
|
||||
for (const [key, value] of Object.entries(msg)) {
|
||||
@ -3282,10 +3332,11 @@ const setupTasks = async function () {
|
||||
// Colab users could run the embedded tool
|
||||
if (!is_colab) await convertWebp();
|
||||
|
||||
[spp_llama, spp_nerd, spp_nerd_v2] = await Promise.all([
|
||||
[spp_llama, spp_nerd, spp_nerd_v2, claude_tokenizer] = await Promise.all([
|
||||
loadSentencepieceTokenizer('src/sentencepiece/tokenizer.model'),
|
||||
loadSentencepieceTokenizer('src/sentencepiece/nerdstash.model'),
|
||||
loadSentencepieceTokenizer('src/sentencepiece/nerdstash_v2.model'),
|
||||
loadClaudeTokenizer('src/claude.json'),
|
||||
]);
|
||||
|
||||
console.log('Launching...');
|
||||
|
1
src/claude.json
Normal file
1
src/claude.json
Normal file
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user