mirror of
https://github.com/SillyTavern/SillyTavern.git
synced 2025-06-05 21:59:27 +02:00
Install sentencepiece tokenizer from npm
This commit is contained in:
17
package-lock.json
generated
17
package-lock.json
generated
@ -35,6 +35,7 @@
|
|||||||
"png-chunks-extract": "^1.0.0",
|
"png-chunks-extract": "^1.0.0",
|
||||||
"rimraf": "^3.0.2",
|
"rimraf": "^3.0.2",
|
||||||
"sanitize-filename": "^1.6.3",
|
"sanitize-filename": "^1.6.3",
|
||||||
|
"sentencepiece-js": "^1.1.0",
|
||||||
"uniqolor": "^1.1.0",
|
"uniqolor": "^1.1.0",
|
||||||
"webp-converter": "2.3.2",
|
"webp-converter": "2.3.2",
|
||||||
"ws": "^8.13.0",
|
"ws": "^8.13.0",
|
||||||
@ -634,6 +635,14 @@
|
|||||||
"version": "1.1.0",
|
"version": "1.1.0",
|
||||||
"license": "MIT"
|
"license": "MIT"
|
||||||
},
|
},
|
||||||
|
"node_modules/app-root-path": {
|
||||||
|
"version": "3.1.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/app-root-path/-/app-root-path-3.1.0.tgz",
|
||||||
|
"integrity": "sha512-biN3PwB2gUtjaYy/isrU3aNWI5w+fAfvHkSvCKeQGxhmYpwKFUxudR3Yya+KqVRHBmEDYh+/lTozYCFbmzX4nA==",
|
||||||
|
"engines": {
|
||||||
|
"node": ">= 6.0.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/append-field": {
|
"node_modules/append-field": {
|
||||||
"version": "1.0.0",
|
"version": "1.0.0",
|
||||||
"license": "MIT"
|
"license": "MIT"
|
||||||
@ -2781,6 +2790,14 @@
|
|||||||
"version": "2.1.3",
|
"version": "2.1.3",
|
||||||
"license": "MIT"
|
"license": "MIT"
|
||||||
},
|
},
|
||||||
|
"node_modules/sentencepiece-js": {
|
||||||
|
"version": "1.1.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/sentencepiece-js/-/sentencepiece-js-1.1.0.tgz",
|
||||||
|
"integrity": "sha512-HN6teKCRO9tz37zbaNI3i+vMZ/JRWDt6kmZ7OVpzQv1jZHyYNmf5tE7CFpIYN86+y9TLB0cuscMdA3OHhT/MhQ==",
|
||||||
|
"dependencies": {
|
||||||
|
"app-root-path": "^3.1.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/serve-static": {
|
"node_modules/serve-static": {
|
||||||
"version": "1.15.0",
|
"version": "1.15.0",
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
|
@ -26,6 +26,7 @@
|
|||||||
"png-chunks-extract": "^1.0.0",
|
"png-chunks-extract": "^1.0.0",
|
||||||
"rimraf": "^3.0.2",
|
"rimraf": "^3.0.2",
|
||||||
"sanitize-filename": "^1.6.3",
|
"sanitize-filename": "^1.6.3",
|
||||||
|
"sentencepiece-js": "^1.1.0",
|
||||||
"uniqolor": "^1.1.0",
|
"uniqolor": "^1.1.0",
|
||||||
"webp-converter": "2.3.2",
|
"webp-converter": "2.3.2",
|
||||||
"ws": "^8.13.0",
|
"ws": "^8.13.0",
|
||||||
|
35
server.js
35
server.js
@ -133,10 +133,27 @@ let response_getstatus_openai;
|
|||||||
|
|
||||||
const delay = ms => new Promise(resolve => setTimeout(resolve, ms))
|
const delay = ms => new Promise(resolve => setTimeout(resolve, ms))
|
||||||
|
|
||||||
const { SentencePieceProcessor, cleanText } = require("./src/sentencepiece/sentencepiece.min.js");
|
const { SentencePieceProcessor, cleanText } = require("sentencepiece-js");
|
||||||
let spp = new SentencePieceProcessor();
|
|
||||||
|
let spp;
|
||||||
|
|
||||||
|
async function loadSentencepieceTokenizer() {
|
||||||
|
try {
|
||||||
|
const spp = new SentencePieceProcessor();
|
||||||
|
await spp.load("src/sentencepiece/tokenizer.model");
|
||||||
|
return spp;
|
||||||
|
} catch (error) {
|
||||||
|
console.error("Sentencepiece tokenizer failed to load.");
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
async function countTokensLlama(text) {
|
async function countTokensLlama(text) {
|
||||||
|
// Fallback to strlen estimation
|
||||||
|
if (!spp) {
|
||||||
|
return Math.ceil(v.length / 3.35);
|
||||||
|
}
|
||||||
|
|
||||||
let cleaned = cleanText(text);
|
let cleaned = cleanText(text);
|
||||||
|
|
||||||
let ids = spp.encodeIds(cleaned);
|
let ids = spp.encodeIds(cleaned);
|
||||||
@ -2676,7 +2693,7 @@ const setupTasks = async function () {
|
|||||||
// Colab users could run the embedded tool
|
// Colab users could run the embedded tool
|
||||||
if (!is_colab) await convertWebp();
|
if (!is_colab) await convertWebp();
|
||||||
|
|
||||||
await spp.load(`./src/sentencepiece/tokenizer.model`);
|
spp = await loadSentencepieceTokenizer();
|
||||||
|
|
||||||
console.log('Launching...');
|
console.log('Launching...');
|
||||||
|
|
||||||
@ -2685,12 +2702,12 @@ const setupTasks = async function () {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (listen && !config.whitelistMode && !config.basicAuthMode) {
|
if (listen && !config.whitelistMode && !config.basicAuthMode) {
|
||||||
if (config.securityOverride)
|
if (config.securityOverride)
|
||||||
console.warn("Security has been override. If it's not a trusted network, change the settings.");
|
console.warn("Security has been override. If it's not a trusted network, change the settings.");
|
||||||
else {
|
else {
|
||||||
console.error('Your SillyTavern is currently unsecurely open to the public. Enable whitelisting or basic authentication.');
|
console.error('Your SillyTavern is currently unsecurely open to the public. Enable whitelisting or basic authentication.');
|
||||||
process.exit(1);
|
process.exit(1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (true === cliArguments.ssl)
|
if (true === cliArguments.ssl)
|
||||||
|
8
src/sentencepiece/sentencepiece.min.js
vendored
8
src/sentencepiece/sentencepiece.min.js
vendored
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user