Rename tokenizer routes
They're all under tokenizers/ now, and there are "count", "encode", and "decode" endpoints. This forms a clearer hierarchy.
This commit is contained in:
parent
c073a89e2c
commit
57bc95133e
|
@ -161,17 +161,17 @@ function callTokenizer(type, str, padding) {
|
|||
case tokenizers.NONE:
|
||||
return guesstimate(str) + padding;
|
||||
case tokenizers.GPT2:
|
||||
return countTokensRemote('/api/tokenize/gpt2', str, padding);
|
||||
return countTokensRemote('/api/tokenizers/gpt2/encode', str, padding);
|
||||
case tokenizers.LLAMA:
|
||||
return countTokensRemote('/api/tokenize/llama', str, padding);
|
||||
return countTokensRemote('/api/tokenizers/llama/encode', str, padding);
|
||||
case tokenizers.NERD:
|
||||
return countTokensRemote('/api/tokenize/nerdstash', str, padding);
|
||||
return countTokensRemote('/api/tokenizers/nerdstash/encode', str, padding);
|
||||
case tokenizers.NERD2:
|
||||
return countTokensRemote('/api/tokenize/nerdstash_v2', str, padding);
|
||||
return countTokensRemote('/api/tokenizers/nerdstash_v2/encode', str, padding);
|
||||
case tokenizers.MISTRAL:
|
||||
return countTokensRemote('/api/tokenize/mistral', str, padding);
|
||||
return countTokensRemote('/api/tokenizers/mistral/encode', str, padding);
|
||||
case tokenizers.YI:
|
||||
return countTokensRemote('/api/tokenize/yi', str, padding);
|
||||
return countTokensRemote('/api/tokenizers/yi/encode', str, padding);
|
||||
case tokenizers.API:
|
||||
return countTokensRemote('/tokenize_via_api', str, padding);
|
||||
default:
|
||||
|
@ -349,7 +349,7 @@ export function countTokensOpenAI(messages, full = false) {
|
|||
jQuery.ajax({
|
||||
async: false,
|
||||
type: 'POST', //
|
||||
url: shouldTokenizeAI21 ? '/api/tokenize/ai21' : `/api/tokenize/openai?model=${model}`,
|
||||
url: shouldTokenizeAI21 ? '/api/tokenizers/ai21/count' : `/api/tokenizers/openai/count?model=${model}`,
|
||||
data: JSON.stringify([message]),
|
||||
dataType: 'json',
|
||||
contentType: 'application/json',
|
||||
|
@ -509,20 +509,20 @@ function decodeTextTokensRemote(endpoint, ids, model = '') {
|
|||
export function getTextTokens(tokenizerType, str) {
|
||||
switch (tokenizerType) {
|
||||
case tokenizers.GPT2:
|
||||
return getTextTokensRemote('/api/tokenize/gpt2', str);
|
||||
return getTextTokensRemote('/api/tokenizers/gpt2/encode', str);
|
||||
case tokenizers.LLAMA:
|
||||
return getTextTokensRemote('/api/tokenize/llama', str);
|
||||
return getTextTokensRemote('/api/tokenizers/llama/encode', str);
|
||||
case tokenizers.NERD:
|
||||
return getTextTokensRemote('/api/tokenize/nerdstash', str);
|
||||
return getTextTokensRemote('/api/tokenizers/nerdstash/encode', str);
|
||||
case tokenizers.NERD2:
|
||||
return getTextTokensRemote('/api/tokenize/nerdstash_v2', str);
|
||||
return getTextTokensRemote('/api/tokenizers/nerdstash_v2/encode', str);
|
||||
case tokenizers.MISTRAL:
|
||||
return getTextTokensRemote('/api/tokenize/mistral', str);
|
||||
return getTextTokensRemote('/api/tokenizers/mistral/encode', str);
|
||||
case tokenizers.YI:
|
||||
return getTextTokensRemote('/api/tokenize/yi', str);
|
||||
return getTextTokensRemote('/api/tokenizers/yi/encode', str);
|
||||
case tokenizers.OPENAI: {
|
||||
const model = getTokenizerModel();
|
||||
return getTextTokensRemote('/api/tokenize/openai-encode', str, model);
|
||||
return getTextTokensRemote('/api/tokenizers/openai/encode', str, model);
|
||||
}
|
||||
case tokenizers.API:
|
||||
return getTextTokensRemote('/tokenize_via_api', str);
|
||||
|
@ -540,20 +540,20 @@ export function getTextTokens(tokenizerType, str) {
|
|||
export function decodeTextTokens(tokenizerType, ids) {
|
||||
switch (tokenizerType) {
|
||||
case tokenizers.GPT2:
|
||||
return decodeTextTokensRemote('/api/decode/gpt2', ids);
|
||||
return decodeTextTokensRemote('/api/tokenizers/gpt2/decode', ids);
|
||||
case tokenizers.LLAMA:
|
||||
return decodeTextTokensRemote('/api/decode/llama', ids);
|
||||
return decodeTextTokensRemote('/api/tokenizers/llama/decode', ids);
|
||||
case tokenizers.NERD:
|
||||
return decodeTextTokensRemote('/api/decode/nerdstash', ids);
|
||||
return decodeTextTokensRemote('/api/tokenizers/nerdstash/decode', ids);
|
||||
case tokenizers.NERD2:
|
||||
return decodeTextTokensRemote('/api/decode/nerdstash_v2', ids);
|
||||
return decodeTextTokensRemote('/api/tokenizers/nerdstash_v2/decode', ids);
|
||||
case tokenizers.MISTRAL:
|
||||
return decodeTextTokensRemote('/api/decode/mistral', ids);
|
||||
return decodeTextTokensRemote('/api/tokenizers/mistral/decode', ids);
|
||||
case tokenizers.YI:
|
||||
return decodeTextTokensRemote('/api/decode/yi', ids);
|
||||
return decodeTextTokensRemote('/api/tokenizers/yi/decode', ids);
|
||||
case tokenizers.OPENAI: {
|
||||
const model = getTokenizerModel();
|
||||
return decodeTextTokensRemote('/api/decode/openai', ids, model);
|
||||
return decodeTextTokensRemote('/api/tokenizers/openai/decode', ids, model);
|
||||
}
|
||||
default:
|
||||
console.warn('Calling decodeTextTokens with unsupported tokenizer type', tokenizerType);
|
||||
|
|
|
@ -365,7 +365,7 @@ async function loadTokenizers() {
|
|||
* @param {any} jsonParser JSON parser middleware
|
||||
*/
|
||||
function registerEndpoints(app, jsonParser) {
|
||||
app.post('/api/tokenize/ai21', jsonParser, async function (req, res) {
|
||||
app.post('/api/tokenizers/ai21/count', jsonParser, async function (req, res) {
|
||||
if (!req.body) return res.sendStatus(400);
|
||||
const options = {
|
||||
method: 'POST',
|
||||
|
@ -387,20 +387,20 @@ function registerEndpoints(app, jsonParser) {
|
|||
}
|
||||
});
|
||||
|
||||
app.post('/api/tokenize/llama', jsonParser, createSentencepieceEncodingHandler(spp_llama));
|
||||
app.post('/api/tokenize/nerdstash', jsonParser, createSentencepieceEncodingHandler(spp_nerd));
|
||||
app.post('/api/tokenize/nerdstash_v2', jsonParser, createSentencepieceEncodingHandler(spp_nerd_v2));
|
||||
app.post('/api/tokenize/mistral', jsonParser, createSentencepieceEncodingHandler(spp_mistral));
|
||||
app.post('/api/tokenize/yi', jsonParser, createSentencepieceEncodingHandler(spp_yi));
|
||||
app.post('/api/tokenize/gpt2', jsonParser, createTiktokenEncodingHandler('gpt2'));
|
||||
app.post('/api/decode/llama', jsonParser, createSentencepieceDecodingHandler(spp_llama));
|
||||
app.post('/api/decode/nerdstash', jsonParser, createSentencepieceDecodingHandler(spp_nerd));
|
||||
app.post('/api/decode/nerdstash_v2', jsonParser, createSentencepieceDecodingHandler(spp_nerd_v2));
|
||||
app.post('/api/decode/mistral', jsonParser, createSentencepieceDecodingHandler(spp_mistral));
|
||||
app.post('/api/decode/yi', jsonParser, createSentencepieceDecodingHandler(spp_yi));
|
||||
app.post('/api/decode/gpt2', jsonParser, createTiktokenDecodingHandler('gpt2'));
|
||||
app.post('/api/tokenizers/llama/encode', jsonParser, createSentencepieceEncodingHandler(spp_llama));
|
||||
app.post('/api/tokenizers/nerdstash/encode', jsonParser, createSentencepieceEncodingHandler(spp_nerd));
|
||||
app.post('/api/tokenizers/nerdstash_v2/encode', jsonParser, createSentencepieceEncodingHandler(spp_nerd_v2));
|
||||
app.post('/api/tokenizers/mistral/encode', jsonParser, createSentencepieceEncodingHandler(spp_mistral));
|
||||
app.post('/api/tokenizers/yi/encode', jsonParser, createSentencepieceEncodingHandler(spp_yi));
|
||||
app.post('/api/tokenizers/gpt2/encode', jsonParser, createTiktokenEncodingHandler('gpt2'));
|
||||
app.post('/api/tokenizers/llama/decode', jsonParser, createSentencepieceDecodingHandler(spp_llama));
|
||||
app.post('/api/tokenizers/nerdstash/decode', jsonParser, createSentencepieceDecodingHandler(spp_nerd));
|
||||
app.post('/api/tokenizers/nerdstash_v2/decode', jsonParser, createSentencepieceDecodingHandler(spp_nerd_v2));
|
||||
app.post('/api/tokenizers/mistral/decode', jsonParser, createSentencepieceDecodingHandler(spp_mistral));
|
||||
app.post('/api/tokenizers/yi/decode', jsonParser, createSentencepieceDecodingHandler(spp_yi));
|
||||
app.post('/api/tokenizers/gpt2/decode', jsonParser, createTiktokenDecodingHandler('gpt2'));
|
||||
|
||||
app.post('/api/tokenize/openai-encode', jsonParser, async function (req, res) {
|
||||
app.post('/api/tokenizers/openai/encode', jsonParser, async function (req, res) {
|
||||
try {
|
||||
const queryModel = String(req.query.model || '');
|
||||
|
||||
|
@ -435,7 +435,7 @@ function registerEndpoints(app, jsonParser) {
|
|||
}
|
||||
});
|
||||
|
||||
app.post('/api/decode/openai', jsonParser, async function (req, res) {
|
||||
app.post('/api/tokenizers/openai/decode', jsonParser, async function (req, res) {
|
||||
try {
|
||||
const queryModel = String(req.query.model || '');
|
||||
|
||||
|
@ -469,7 +469,7 @@ function registerEndpoints(app, jsonParser) {
|
|||
}
|
||||
});
|
||||
|
||||
app.post('/api/tokenize/openai', jsonParser, async function (req, res) {
|
||||
app.post('/api/tokenizers/openai/count', jsonParser, async function (req, res) {
|
||||
try {
|
||||
if (!req.body) return res.sendStatus(400);
|
||||
|
||||
|
|
Loading…
Reference in New Issue