Install sentencepiece tokenizer from npm

This commit is contained in:
SillyLossy
2023-05-23 22:27:37 +03:00
parent f813d5c225
commit 4feebd0ba1
4 changed files with 44 additions and 17 deletions

17
package-lock.json generated
View File

@ -35,6 +35,7 @@
"png-chunks-extract": "^1.0.0", "png-chunks-extract": "^1.0.0",
"rimraf": "^3.0.2", "rimraf": "^3.0.2",
"sanitize-filename": "^1.6.3", "sanitize-filename": "^1.6.3",
"sentencepiece-js": "^1.1.0",
"uniqolor": "^1.1.0", "uniqolor": "^1.1.0",
"webp-converter": "2.3.2", "webp-converter": "2.3.2",
"ws": "^8.13.0", "ws": "^8.13.0",
@ -634,6 +635,14 @@
"version": "1.1.0", "version": "1.1.0",
"license": "MIT" "license": "MIT"
}, },
"node_modules/app-root-path": {
"version": "3.1.0",
"resolved": "https://registry.npmjs.org/app-root-path/-/app-root-path-3.1.0.tgz",
"integrity": "sha512-biN3PwB2gUtjaYy/isrU3aNWI5w+fAfvHkSvCKeQGxhmYpwKFUxudR3Yya+KqVRHBmEDYh+/lTozYCFbmzX4nA==",
"engines": {
"node": ">= 6.0.0"
}
},
"node_modules/append-field": { "node_modules/append-field": {
"version": "1.0.0", "version": "1.0.0",
"license": "MIT" "license": "MIT"
@ -2781,6 +2790,14 @@
"version": "2.1.3", "version": "2.1.3",
"license": "MIT" "license": "MIT"
}, },
"node_modules/sentencepiece-js": {
"version": "1.1.0",
"resolved": "https://registry.npmjs.org/sentencepiece-js/-/sentencepiece-js-1.1.0.tgz",
"integrity": "sha512-HN6teKCRO9tz37zbaNI3i+vMZ/JRWDt6kmZ7OVpzQv1jZHyYNmf5tE7CFpIYN86+y9TLB0cuscMdA3OHhT/MhQ==",
"dependencies": {
"app-root-path": "^3.1.0"
}
},
"node_modules/serve-static": { "node_modules/serve-static": {
"version": "1.15.0", "version": "1.15.0",
"license": "MIT", "license": "MIT",

View File

@ -26,6 +26,7 @@
"png-chunks-extract": "^1.0.0", "png-chunks-extract": "^1.0.0",
"rimraf": "^3.0.2", "rimraf": "^3.0.2",
"sanitize-filename": "^1.6.3", "sanitize-filename": "^1.6.3",
"sentencepiece-js": "^1.1.0",
"uniqolor": "^1.1.0", "uniqolor": "^1.1.0",
"webp-converter": "2.3.2", "webp-converter": "2.3.2",
"ws": "^8.13.0", "ws": "^8.13.0",

View File

@ -133,10 +133,27 @@ let response_getstatus_openai;
const delay = ms => new Promise(resolve => setTimeout(resolve, ms)) const delay = ms => new Promise(resolve => setTimeout(resolve, ms))
const { SentencePieceProcessor, cleanText } = require("./src/sentencepiece/sentencepiece.min.js"); const { SentencePieceProcessor, cleanText } = require("sentencepiece-js");
let spp = new SentencePieceProcessor();
let spp;
async function loadSentencepieceTokenizer() {
try {
const spp = new SentencePieceProcessor();
await spp.load("src/sentencepiece/tokenizer.model");
return spp;
} catch (error) {
console.error("Sentencepiece tokenizer failed to load.");
return null;
}
};
async function countTokensLlama(text) { async function countTokensLlama(text) {
// Fallback to strlen estimation
if (!spp) {
return Math.ceil(v.length / 3.35);
}
let cleaned = cleanText(text); let cleaned = cleanText(text);
let ids = spp.encodeIds(cleaned); let ids = spp.encodeIds(cleaned);
@ -2676,7 +2693,7 @@ const setupTasks = async function () {
// Colab users could run the embedded tool // Colab users could run the embedded tool
if (!is_colab) await convertWebp(); if (!is_colab) await convertWebp();
await spp.load(`./src/sentencepiece/tokenizer.model`); spp = await loadSentencepieceTokenizer();
console.log('Launching...'); console.log('Launching...');
@ -2685,12 +2702,12 @@ const setupTasks = async function () {
} }
if (listen && !config.whitelistMode && !config.basicAuthMode) { if (listen && !config.whitelistMode && !config.basicAuthMode) {
if (config.securityOverride) if (config.securityOverride)
console.warn("Security has been override. If it's not a trusted network, change the settings."); console.warn("Security has been override. If it's not a trusted network, change the settings.");
else { else {
console.error('Your SillyTavern is currently unsecurely open to the public. Enable whitelisting or basic authentication.'); console.error('Your SillyTavern is currently unsecurely open to the public. Enable whitelisting or basic authentication.');
process.exit(1); process.exit(1);
} }
} }
if (true === cliArguments.ssl) if (true === cliArguments.ssl)

File diff suppressed because one or more lines are too long