Rename tokenizer routes

They're all under tokenizers/ now, and there are "count", "encode", and
"decode" endpoints. This forms a clearer hierarchy.
This commit is contained in:
valadaptive 2023-12-03 09:42:54 -05:00
parent c073a89e2c
commit 57bc95133e
2 changed files with 37 additions and 37 deletions

View File

@ -161,17 +161,17 @@ function callTokenizer(type, str, padding) {
case tokenizers.NONE:
return guesstimate(str) + padding;
case tokenizers.GPT2:
return countTokensRemote('/api/tokenize/gpt2', str, padding);
return countTokensRemote('/api/tokenizers/gpt2/encode', str, padding);
case tokenizers.LLAMA:
return countTokensRemote('/api/tokenize/llama', str, padding);
return countTokensRemote('/api/tokenizers/llama/encode', str, padding);
case tokenizers.NERD:
return countTokensRemote('/api/tokenize/nerdstash', str, padding);
return countTokensRemote('/api/tokenizers/nerdstash/encode', str, padding);
case tokenizers.NERD2:
return countTokensRemote('/api/tokenize/nerdstash_v2', str, padding);
return countTokensRemote('/api/tokenizers/nerdstash_v2/encode', str, padding);
case tokenizers.MISTRAL:
return countTokensRemote('/api/tokenize/mistral', str, padding);
return countTokensRemote('/api/tokenizers/mistral/encode', str, padding);
case tokenizers.YI:
return countTokensRemote('/api/tokenize/yi', str, padding);
return countTokensRemote('/api/tokenizers/yi/encode', str, padding);
case tokenizers.API:
return countTokensRemote('/tokenize_via_api', str, padding);
default:
@ -349,7 +349,7 @@ export function countTokensOpenAI(messages, full = false) {
jQuery.ajax({
async: false,
type: 'POST', //
url: shouldTokenizeAI21 ? '/api/tokenize/ai21' : `/api/tokenize/openai?model=${model}`,
url: shouldTokenizeAI21 ? '/api/tokenizers/ai21/count' : `/api/tokenizers/openai/count?model=${model}`,
data: JSON.stringify([message]),
dataType: 'json',
contentType: 'application/json',
@ -509,20 +509,20 @@ function decodeTextTokensRemote(endpoint, ids, model = '') {
export function getTextTokens(tokenizerType, str) {
switch (tokenizerType) {
case tokenizers.GPT2:
return getTextTokensRemote('/api/tokenize/gpt2', str);
return getTextTokensRemote('/api/tokenizers/gpt2/encode', str);
case tokenizers.LLAMA:
return getTextTokensRemote('/api/tokenize/llama', str);
return getTextTokensRemote('/api/tokenizers/llama/encode', str);
case tokenizers.NERD:
return getTextTokensRemote('/api/tokenize/nerdstash', str);
return getTextTokensRemote('/api/tokenizers/nerdstash/encode', str);
case tokenizers.NERD2:
return getTextTokensRemote('/api/tokenize/nerdstash_v2', str);
return getTextTokensRemote('/api/tokenizers/nerdstash_v2/encode', str);
case tokenizers.MISTRAL:
return getTextTokensRemote('/api/tokenize/mistral', str);
return getTextTokensRemote('/api/tokenizers/mistral/encode', str);
case tokenizers.YI:
return getTextTokensRemote('/api/tokenize/yi', str);
return getTextTokensRemote('/api/tokenizers/yi/encode', str);
case tokenizers.OPENAI: {
const model = getTokenizerModel();
return getTextTokensRemote('/api/tokenize/openai-encode', str, model);
return getTextTokensRemote('/api/tokenizers/openai/encode', str, model);
}
case tokenizers.API:
return getTextTokensRemote('/tokenize_via_api', str);
@ -540,20 +540,20 @@ export function getTextTokens(tokenizerType, str) {
export function decodeTextTokens(tokenizerType, ids) {
switch (tokenizerType) {
case tokenizers.GPT2:
return decodeTextTokensRemote('/api/decode/gpt2', ids);
return decodeTextTokensRemote('/api/tokenizers/gpt2/decode', ids);
case tokenizers.LLAMA:
return decodeTextTokensRemote('/api/decode/llama', ids);
return decodeTextTokensRemote('/api/tokenizers/llama/decode', ids);
case tokenizers.NERD:
return decodeTextTokensRemote('/api/decode/nerdstash', ids);
return decodeTextTokensRemote('/api/tokenizers/nerdstash/decode', ids);
case tokenizers.NERD2:
return decodeTextTokensRemote('/api/decode/nerdstash_v2', ids);
return decodeTextTokensRemote('/api/tokenizers/nerdstash_v2/decode', ids);
case tokenizers.MISTRAL:
return decodeTextTokensRemote('/api/decode/mistral', ids);
return decodeTextTokensRemote('/api/tokenizers/mistral/decode', ids);
case tokenizers.YI:
return decodeTextTokensRemote('/api/decode/yi', ids);
return decodeTextTokensRemote('/api/tokenizers/yi/decode', ids);
case tokenizers.OPENAI: {
const model = getTokenizerModel();
return decodeTextTokensRemote('/api/decode/openai', ids, model);
return decodeTextTokensRemote('/api/tokenizers/openai/decode', ids, model);
}
default:
console.warn('Calling decodeTextTokens with unsupported tokenizer type', tokenizerType);

View File

@ -365,7 +365,7 @@ async function loadTokenizers() {
* @param {any} jsonParser JSON parser middleware
*/
function registerEndpoints(app, jsonParser) {
app.post('/api/tokenize/ai21', jsonParser, async function (req, res) {
app.post('/api/tokenizers/ai21/count', jsonParser, async function (req, res) {
if (!req.body) return res.sendStatus(400);
const options = {
method: 'POST',
@ -387,20 +387,20 @@ function registerEndpoints(app, jsonParser) {
}
});
app.post('/api/tokenize/llama', jsonParser, createSentencepieceEncodingHandler(spp_llama));
app.post('/api/tokenize/nerdstash', jsonParser, createSentencepieceEncodingHandler(spp_nerd));
app.post('/api/tokenize/nerdstash_v2', jsonParser, createSentencepieceEncodingHandler(spp_nerd_v2));
app.post('/api/tokenize/mistral', jsonParser, createSentencepieceEncodingHandler(spp_mistral));
app.post('/api/tokenize/yi', jsonParser, createSentencepieceEncodingHandler(spp_yi));
app.post('/api/tokenize/gpt2', jsonParser, createTiktokenEncodingHandler('gpt2'));
app.post('/api/decode/llama', jsonParser, createSentencepieceDecodingHandler(spp_llama));
app.post('/api/decode/nerdstash', jsonParser, createSentencepieceDecodingHandler(spp_nerd));
app.post('/api/decode/nerdstash_v2', jsonParser, createSentencepieceDecodingHandler(spp_nerd_v2));
app.post('/api/decode/mistral', jsonParser, createSentencepieceDecodingHandler(spp_mistral));
app.post('/api/decode/yi', jsonParser, createSentencepieceDecodingHandler(spp_yi));
app.post('/api/decode/gpt2', jsonParser, createTiktokenDecodingHandler('gpt2'));
app.post('/api/tokenizers/llama/encode', jsonParser, createSentencepieceEncodingHandler(spp_llama));
app.post('/api/tokenizers/nerdstash/encode', jsonParser, createSentencepieceEncodingHandler(spp_nerd));
app.post('/api/tokenizers/nerdstash_v2/encode', jsonParser, createSentencepieceEncodingHandler(spp_nerd_v2));
app.post('/api/tokenizers/mistral/encode', jsonParser, createSentencepieceEncodingHandler(spp_mistral));
app.post('/api/tokenizers/yi/encode', jsonParser, createSentencepieceEncodingHandler(spp_yi));
app.post('/api/tokenizers/gpt2/encode', jsonParser, createTiktokenEncodingHandler('gpt2'));
app.post('/api/tokenizers/llama/decode', jsonParser, createSentencepieceDecodingHandler(spp_llama));
app.post('/api/tokenizers/nerdstash/decode', jsonParser, createSentencepieceDecodingHandler(spp_nerd));
app.post('/api/tokenizers/nerdstash_v2/decode', jsonParser, createSentencepieceDecodingHandler(spp_nerd_v2));
app.post('/api/tokenizers/mistral/decode', jsonParser, createSentencepieceDecodingHandler(spp_mistral));
app.post('/api/tokenizers/yi/decode', jsonParser, createSentencepieceDecodingHandler(spp_yi));
app.post('/api/tokenizers/gpt2/decode', jsonParser, createTiktokenDecodingHandler('gpt2'));
app.post('/api/tokenize/openai-encode', jsonParser, async function (req, res) {
app.post('/api/tokenizers/openai/encode', jsonParser, async function (req, res) {
try {
const queryModel = String(req.query.model || '');
@ -435,7 +435,7 @@ function registerEndpoints(app, jsonParser) {
}
});
app.post('/api/decode/openai', jsonParser, async function (req, res) {
app.post('/api/tokenizers/openai/decode', jsonParser, async function (req, res) {
try {
const queryModel = String(req.query.model || '');
@ -469,7 +469,7 @@ function registerEndpoints(app, jsonParser) {
}
});
app.post('/api/tokenize/openai', jsonParser, async function (req, res) {
app.post('/api/tokenizers/openai/count', jsonParser, async function (req, res) {
try {
if (!req.body) return res.sendStatus(400);