Use a proper tokenizer for GPT-4o

This commit is contained in:
Cohee 2024-05-14 15:30:11 +03:00
parent 49cb8daf7d
commit 3113109f0a
3 changed files with 13 additions and 8 deletions

11
package-lock.json generated
View File

@ -12,7 +12,6 @@
"dependencies": {
"@agnai/sentencepiece-js": "^1.1.1",
"@agnai/web-tokenizers": "^0.1.3",
"@dqbd/tiktoken": "^1.0.13",
"@zeldafan0225/ai_horde": "^4.0.1",
"archiver": "^7.0.1",
"bing-translate-api": "^2.9.1",
@ -46,6 +45,7 @@
"sanitize-filename": "^1.6.3",
"sillytavern-transformers": "^2.14.6",
"simple-git": "^3.19.1",
"tiktoken": "^1.0.15",
"vectra": "^0.2.2",
"wavefile": "^11.0.0",
"write-file-atomic": "^5.0.1",
@ -82,10 +82,6 @@
"version": "0.1.3",
"license": "Apache-2.0"
},
"node_modules/@dqbd/tiktoken": {
"version": "1.0.13",
"license": "MIT"
},
"node_modules/@eslint-community/eslint-utils": {
"version": "4.4.0",
"dev": true,
@ -4403,6 +4399,11 @@
"dev": true,
"license": "MIT"
},
"node_modules/tiktoken": {
"version": "1.0.15",
"resolved": "https://registry.npmjs.org/tiktoken/-/tiktoken-1.0.15.tgz",
"integrity": "sha512-sCsrq/vMWUSEW29CJLNmPvWxlVp7yh2tlkAjpJltIKqp5CKf98ZNpdeHRmAlPVFlGEbswDc6SmI8vz64W/qErw=="
},
"node_modules/timm": {
"version": "1.7.1",
"license": "MIT"

View File

@ -2,7 +2,6 @@
"dependencies": {
"@agnai/sentencepiece-js": "^1.1.1",
"@agnai/web-tokenizers": "^0.1.3",
"@dqbd/tiktoken": "^1.0.13",
"@zeldafan0225/ai_horde": "^4.0.1",
"archiver": "^7.0.1",
"bing-translate-api": "^2.9.1",
@ -36,6 +35,7 @@
"sanitize-filename": "^1.6.3",
"sillytavern-transformers": "^2.14.6",
"simple-git": "^3.19.1",
"tiktoken": "^1.0.15",
"vectra": "^0.2.2",
"wavefile": "^11.0.0",
"write-file-atomic": "^5.0.1",

View File

@ -2,7 +2,7 @@ const fs = require('fs');
const path = require('path');
const express = require('express');
const { SentencePieceProcessor } = require('@agnai/sentencepiece-js');
const tiktoken = require('@dqbd/tiktoken');
const tiktoken = require('tiktoken');
const { Tokenizer } = require('@agnai/web-tokenizers');
const { convertClaudePrompt, convertGooglePrompt } = require('../prompt-converters');
const { readSecret, SECRET_KEYS } = require('./secrets');
@ -15,7 +15,7 @@ const { setAdditionalHeaders } = require('../additional-headers');
*/
/**
* @type {{[key: string]: import("@dqbd/tiktoken").Tiktoken}} Tokenizers cache
* @type {{[key: string]: import('tiktoken').Tiktoken}} Tokenizers cache
*/
const tokenizersCache = {};
@ -262,6 +262,10 @@ function getWebTokenizersChunks(tokenizer, ids) {
* @returns {string} Tokenizer model to use
*/
function getTokenizerModel(requestModel) {
if (requestModel.includes('gpt-4o')) {
return 'gpt-4o';
}
if (requestModel.includes('gpt-4-32k')) {
return 'gpt-4-32k';
}