mirror of
https://github.com/SillyTavern/SillyTavern.git
synced 2025-06-05 21:59:27 +02:00
Install sentencepiece tokenizer from npm
This commit is contained in:
17
package-lock.json
generated
17
package-lock.json
generated
@ -35,6 +35,7 @@
|
||||
"png-chunks-extract": "^1.0.0",
|
||||
"rimraf": "^3.0.2",
|
||||
"sanitize-filename": "^1.6.3",
|
||||
"sentencepiece-js": "^1.1.0",
|
||||
"uniqolor": "^1.1.0",
|
||||
"webp-converter": "2.3.2",
|
||||
"ws": "^8.13.0",
|
||||
@ -634,6 +635,14 @@
|
||||
"version": "1.1.0",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/app-root-path": {
|
||||
"version": "3.1.0",
|
||||
"resolved": "https://registry.npmjs.org/app-root-path/-/app-root-path-3.1.0.tgz",
|
||||
"integrity": "sha512-biN3PwB2gUtjaYy/isrU3aNWI5w+fAfvHkSvCKeQGxhmYpwKFUxudR3Yya+KqVRHBmEDYh+/lTozYCFbmzX4nA==",
|
||||
"engines": {
|
||||
"node": ">= 6.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/append-field": {
|
||||
"version": "1.0.0",
|
||||
"license": "MIT"
|
||||
@ -2781,6 +2790,14 @@
|
||||
"version": "2.1.3",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/sentencepiece-js": {
|
||||
"version": "1.1.0",
|
||||
"resolved": "https://registry.npmjs.org/sentencepiece-js/-/sentencepiece-js-1.1.0.tgz",
|
||||
"integrity": "sha512-HN6teKCRO9tz37zbaNI3i+vMZ/JRWDt6kmZ7OVpzQv1jZHyYNmf5tE7CFpIYN86+y9TLB0cuscMdA3OHhT/MhQ==",
|
||||
"dependencies": {
|
||||
"app-root-path": "^3.1.0"
|
||||
}
|
||||
},
|
||||
"node_modules/serve-static": {
|
||||
"version": "1.15.0",
|
||||
"license": "MIT",
|
||||
|
@ -26,6 +26,7 @@
|
||||
"png-chunks-extract": "^1.0.0",
|
||||
"rimraf": "^3.0.2",
|
||||
"sanitize-filename": "^1.6.3",
|
||||
"sentencepiece-js": "^1.1.0",
|
||||
"uniqolor": "^1.1.0",
|
||||
"webp-converter": "2.3.2",
|
||||
"ws": "^8.13.0",
|
||||
|
23
server.js
23
server.js
@ -133,10 +133,27 @@ let response_getstatus_openai;
|
||||
|
||||
const delay = ms => new Promise(resolve => setTimeout(resolve, ms))
|
||||
|
||||
const { SentencePieceProcessor, cleanText } = require("./src/sentencepiece/sentencepiece.min.js");
|
||||
let spp = new SentencePieceProcessor();
|
||||
const { SentencePieceProcessor, cleanText } = require("sentencepiece-js");
|
||||
|
||||
let spp;
|
||||
|
||||
async function loadSentencepieceTokenizer() {
|
||||
try {
|
||||
const spp = new SentencePieceProcessor();
|
||||
await spp.load("src/sentencepiece/tokenizer.model");
|
||||
return spp;
|
||||
} catch (error) {
|
||||
console.error("Sentencepiece tokenizer failed to load.");
|
||||
return null;
|
||||
}
|
||||
};
|
||||
|
||||
async function countTokensLlama(text) {
|
||||
// Fallback to strlen estimation
|
||||
if (!spp) {
|
||||
return Math.ceil(v.length / 3.35);
|
||||
}
|
||||
|
||||
let cleaned = cleanText(text);
|
||||
|
||||
let ids = spp.encodeIds(cleaned);
|
||||
@ -2676,7 +2693,7 @@ const setupTasks = async function () {
|
||||
// Colab users could run the embedded tool
|
||||
if (!is_colab) await convertWebp();
|
||||
|
||||
await spp.load(`./src/sentencepiece/tokenizer.model`);
|
||||
spp = await loadSentencepieceTokenizer();
|
||||
|
||||
console.log('Launching...');
|
||||
|
||||
|
8
src/sentencepiece/sentencepiece.min.js
vendored
8
src/sentencepiece/sentencepiece.min.js
vendored
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user