diff --git a/.gitignore b/.gitignore index a3e39cb09..582031246 100644 --- a/.gitignore +++ b/.gitignore @@ -35,3 +35,4 @@ content.log cloudflared.exe public/assets/ access.log +/vectors/ diff --git a/package-lock.json b/package-lock.json index 9bde269ce..ee683edaa 100644 --- a/package-lock.json +++ b/package-lock.json @@ -12,6 +12,8 @@ "@agnai/sentencepiece-js": "^1.1.1", "@agnai/web-tokenizers": "^0.1.3", "@dqbd/tiktoken": "^1.0.2", + "@tensorflow-models/universal-sentence-encoder": "^1.3.3", + "@tensorflow/tfjs": "^4.10.0", "command-exists": "^1.2.9", "compression": "^1", "cookie-parser": "^1.4.6", @@ -40,6 +42,7 @@ "sanitize-filename": "^1.6.3", "simple-git": "^3.19.1", "uniqolor": "^1.1.0", + "vectra": "^0.2.2", "webp-converter": "2.3.2", "write-file-atomic": "^5.0.1", "ws": "^8.13.0", @@ -642,16 +645,274 @@ "node": ">= 8" } }, + "node_modules/@tensorflow-models/universal-sentence-encoder": { + "version": "1.3.3", + "resolved": "https://registry.npmjs.org/@tensorflow-models/universal-sentence-encoder/-/universal-sentence-encoder-1.3.3.tgz", + "integrity": "sha512-mipL7ad0CW6uQ68FUkNgkNj/zgA4qgBnNcnMMkNTdL9MUMnzCxu3AE8pWnx2ReKHwdqEG4e8IpaYKfH4B8bojg==", + "peerDependencies": { + "@tensorflow/tfjs-converter": "^3.6.0", + "@tensorflow/tfjs-core": "^3.6.0" + } + }, + "node_modules/@tensorflow/tfjs": { + "version": "4.10.0", + "resolved": "https://registry.npmjs.org/@tensorflow/tfjs/-/tfjs-4.10.0.tgz", + "integrity": "sha512-16q6CcGEoxfg3kimBzNBrZg069GRd4Du49uyQHUMsEvpHbmU0ZpWa2zhjjZ71GlURpbD1LZIZNp6cd2Q1Eqjow==", + "dependencies": { + "@tensorflow/tfjs-backend-cpu": "4.10.0", + "@tensorflow/tfjs-backend-webgl": "4.10.0", + "@tensorflow/tfjs-converter": "4.10.0", + "@tensorflow/tfjs-core": "4.10.0", + "@tensorflow/tfjs-data": "4.10.0", + "@tensorflow/tfjs-layers": "4.10.0", + "argparse": "^1.0.10", + "chalk": "^4.1.0", + "core-js": "3.29.1", + "regenerator-runtime": "^0.13.5", + "yargs": "^16.0.3" + }, + "bin": { + "tfjs-custom-module": "dist/tools/custom_module/cli.js" + } + }, + "node_modules/@tensorflow/tfjs-converter": { + "version": "3.21.0", + "resolved": "https://registry.npmjs.org/@tensorflow/tfjs-converter/-/tfjs-converter-3.21.0.tgz", + "integrity": "sha512-12Y4zVDq3yW+wSjSDpSv4HnpL2sDZrNiGSg8XNiDE4HQBdjdA+a+Q3sZF/8NV9y2yoBhL5L7V4mMLDdbZBd9/Q==", + "peer": true, + "peerDependencies": { + "@tensorflow/tfjs-core": "3.21.0" + } + }, + "node_modules/@tensorflow/tfjs-core": { + "version": "3.21.0", + "resolved": "https://registry.npmjs.org/@tensorflow/tfjs-core/-/tfjs-core-3.21.0.tgz", + "integrity": "sha512-YSfsswOqWfd+M4bXIhT3hwtAb+IV8+ODwIxwdFR/7jTAPZP1wMVnSlpKnXHAN64HFOiP+Tm3HmKusEZ0+09A0w==", + "peer": true, + "dependencies": { + "@types/long": "^4.0.1", + "@types/offscreencanvas": "~2019.3.0", + "@types/seedrandom": "^2.4.28", + "@types/webgl-ext": "0.0.30", + "@webgpu/types": "0.1.16", + "long": "4.0.0", + "node-fetch": "~2.6.1", + "seedrandom": "^3.0.5" + }, + "engines": { + "yarn": ">= 1.3.2" + } + }, + "node_modules/@tensorflow/tfjs/node_modules/@tensorflow/tfjs-backend-cpu": { + "version": "4.10.0", + "resolved": "https://registry.npmjs.org/@tensorflow/tfjs-backend-cpu/-/tfjs-backend-cpu-4.10.0.tgz", + "integrity": "sha512-w3f0ORR1smSpkW7om2yQVunRYMjyqWcEbWCPirR1DQ6ImgW+VWqmM2oVPQXRsFYpwg1g6bk2Jp5COafpPA+krw==", + "dependencies": { + "@types/seedrandom": "^2.4.28", + "seedrandom": "^3.0.5" + }, + "engines": { + "yarn": ">= 1.3.2" + }, + "peerDependencies": { + "@tensorflow/tfjs-core": "4.10.0" + } + }, + "node_modules/@tensorflow/tfjs/node_modules/@tensorflow/tfjs-backend-webgl": { + "version": "4.10.0", + "resolved": "https://registry.npmjs.org/@tensorflow/tfjs-backend-webgl/-/tfjs-backend-webgl-4.10.0.tgz", + "integrity": "sha512-Vzl/pyXHa9TgFaRJGspExjZVDKgkKvLxOkPaH+psE2LPnQkiH/IOPO7HKO0U3+hZql977BIiZdHc6HNprFS3/A==", + "dependencies": { + "@tensorflow/tfjs-backend-cpu": "4.10.0", + "@types/offscreencanvas": "~2019.3.0", + "@types/seedrandom": "^2.4.28", + "seedrandom": "^3.0.5" + }, + "engines": { + "yarn": ">= 1.3.2" + }, + "peerDependencies": { + "@tensorflow/tfjs-core": "4.10.0" + } + }, + "node_modules/@tensorflow/tfjs/node_modules/@tensorflow/tfjs-converter": { + "version": "4.10.0", + "resolved": "https://registry.npmjs.org/@tensorflow/tfjs-converter/-/tfjs-converter-4.10.0.tgz", + "integrity": "sha512-ffLpK+ismdmiDcoTID2aidP3/uJYyQPjmKdRZ3hBUkrczy7pQIcCW8blIR9Gk20htB4OLQMf74ZxbpfdQ9nYeQ==", + "peerDependencies": { + "@tensorflow/tfjs-core": "4.10.0" + } + }, + "node_modules/@tensorflow/tfjs/node_modules/@tensorflow/tfjs-core": { + "version": "4.10.0", + "resolved": "https://registry.npmjs.org/@tensorflow/tfjs-core/-/tfjs-core-4.10.0.tgz", + "integrity": "sha512-klc6lUTYRbHQLEFnVKtTICNK+WUlduUcrlXsDs6ixKAOJzLAlIR2JnmJICt2AT2Rxwu0Zj2bAYojSxgcIcUUxA==", + "dependencies": { + "@types/long": "^4.0.1", + "@types/offscreencanvas": "~2019.7.0", + "@types/seedrandom": "^2.4.28", + "@webgpu/types": "0.1.30", + "long": "4.0.0", + "node-fetch": "~2.6.1", + "seedrandom": "^3.0.5" + }, + "engines": { + "yarn": ">= 1.3.2" + } + }, + "node_modules/@tensorflow/tfjs/node_modules/@tensorflow/tfjs-core/node_modules/@types/offscreencanvas": { + "version": "2019.7.1", + "resolved": "https://registry.npmjs.org/@types/offscreencanvas/-/offscreencanvas-2019.7.1.tgz", + "integrity": "sha512-+HSrJgjBW77ALieQdMJvXhRZUIRN1597L+BKvsyeiIlHHERnqjcuOLyodK3auJ3Y3zRezNKtKAhuQWYJfEgFHQ==" + }, + "node_modules/@tensorflow/tfjs/node_modules/@tensorflow/tfjs-data": { + "version": "4.10.0", + "resolved": "https://registry.npmjs.org/@tensorflow/tfjs-data/-/tfjs-data-4.10.0.tgz", + "integrity": "sha512-71rQ6xSipXdClKja705jrWZkH9ostAYuVZlf7nW2AJXUCzhrGsJAkcHag4m568mDFoAqfQQTBy4Gk26h0/Y+Pg==", + "dependencies": { + "@types/node-fetch": "^2.1.2", + "node-fetch": "~2.6.1", + "string_decoder": "^1.3.0" + }, + "peerDependencies": { + "@tensorflow/tfjs-core": "4.10.0", + "seedrandom": "^3.0.5" + } + }, + "node_modules/@tensorflow/tfjs/node_modules/@tensorflow/tfjs-layers": { + "version": "4.10.0", + "resolved": "https://registry.npmjs.org/@tensorflow/tfjs-layers/-/tfjs-layers-4.10.0.tgz", + "integrity": "sha512-SLZWnuDF98WmmJQ5NhsXJFlJPwoKxfLowvAHTlLz+Q1Po4juZVZ+BkatRsqrI2sA2B0IIu2TJp4VEAFWMqzTTg==", + "peerDependencies": { + "@tensorflow/tfjs-core": "4.10.0" + } + }, + "node_modules/@tensorflow/tfjs/node_modules/@webgpu/types": { + "version": "0.1.30", + "resolved": "https://registry.npmjs.org/@webgpu/types/-/types-0.1.30.tgz", + "integrity": "sha512-9AXJSmL3MzY8ZL//JjudA//q+2kBRGhLBFpkdGksWIuxrMy81nFrCzj2Am+mbh8WoU6rXmv7cY5E3rdlyru2Qg==" + }, + "node_modules/@tensorflow/tfjs/node_modules/cliui": { + "version": "7.0.4", + "resolved": "https://registry.npmjs.org/cliui/-/cliui-7.0.4.tgz", + "integrity": "sha512-OcRE68cOsVMXp1Yvonl/fzkQOyjLSu/8bhPDfQt0e0/Eb283TKP20Fs2MqoPsr9SwA595rRCA+QMzYc9nBP+JQ==", + "dependencies": { + "string-width": "^4.2.0", + "strip-ansi": "^6.0.0", + "wrap-ansi": "^7.0.0" + } + }, + "node_modules/@tensorflow/tfjs/node_modules/safe-buffer": { + "version": "5.2.1", + "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz", + "integrity": "sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ] + }, + "node_modules/@tensorflow/tfjs/node_modules/string_decoder": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.3.0.tgz", + "integrity": "sha512-hkRX8U1WjJFd8LsDJ2yQ/wWWxaopEsABU1XfkM8A+j0+85JAGppt16cr1Whg6KIbb4okU6Mql6BOj+uup/wKeA==", + "dependencies": { + "safe-buffer": "~5.2.0" + } + }, + "node_modules/@tensorflow/tfjs/node_modules/yargs": { + "version": "16.2.0", + "resolved": "https://registry.npmjs.org/yargs/-/yargs-16.2.0.tgz", + "integrity": "sha512-D1mvvtDG0L5ft/jGWkLpG1+m0eQxOfaBvTNELraWj22wSVUMWxZUvYgJYcKh6jGGIkJFhH4IZPQhR4TKpc8mBw==", + "dependencies": { + "cliui": "^7.0.2", + "escalade": "^3.1.1", + "get-caller-file": "^2.0.5", + "require-directory": "^2.1.1", + "string-width": "^4.2.0", + "y18n": "^5.0.5", + "yargs-parser": "^20.2.2" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/@tensorflow/tfjs/node_modules/yargs-parser": { + "version": "20.2.9", + "resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-20.2.9.tgz", + "integrity": "sha512-y11nGElTIV+CT3Zv9t7VKl+Q3hTQoT9a1Qzezhhl6Rp21gJ/IVTW7Z3y9EWXhuUBC2Shnf+DX0antecpAwSP8w==", + "engines": { + "node": ">=10" + } + }, "node_modules/@tokenizer/token": { "version": "0.3.0", "resolved": "https://registry.npmjs.org/@tokenizer/token/-/token-0.3.0.tgz", "integrity": "sha512-OvjF+z51L3ov0OyAU0duzsYuvO01PH7x4t6DJx+guahgTnBHkhJdG7soQeTSFLWN3efnHyibZ4Z8l2EuWwJN3A==" }, + "node_modules/@types/long": { + "version": "4.0.2", + "resolved": "https://registry.npmjs.org/@types/long/-/long-4.0.2.tgz", + "integrity": "sha512-MqTGEo5bj5t157U6fA/BiDynNkn0YknVdh48CMPkTSpFTVmvao5UQmm7uEF6xBEo7qIMAlY/JSleYaE6VOdpaA==" + }, "node_modules/@types/node": { "version": "16.9.1", "resolved": "https://registry.npmjs.org/@types/node/-/node-16.9.1.tgz", "integrity": "sha512-QpLcX9ZSsq3YYUUnD3nFDY8H7wctAhQj/TFKL8Ya8v5fMm3CFXxo8zStsLAl780ltoYoo1WvKUVGBQK+1ifr7g==" }, + "node_modules/@types/node-fetch": { + "version": "2.6.4", + "resolved": "https://registry.npmjs.org/@types/node-fetch/-/node-fetch-2.6.4.tgz", + "integrity": "sha512-1ZX9fcN4Rvkvgv4E6PAY5WXUFWFcRWxZa3EW83UjycOB9ljJCedb2CupIP4RZMEwF/M3eTcCihbBRgwtGbg5Rg==", + "dependencies": { + "@types/node": "*", + "form-data": "^3.0.0" + } + }, + "node_modules/@types/node-fetch/node_modules/form-data": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/form-data/-/form-data-3.0.1.tgz", + "integrity": "sha512-RHkBKtLWUVwd7SqRIvCZMEvAMoGUp0XU+seQiZejj0COz3RI3hWP4sCv3gZWWLjJTd7rGwcsF5eKZGii0r/hbg==", + "dependencies": { + "asynckit": "^0.4.0", + "combined-stream": "^1.0.8", + "mime-types": "^2.1.12" + }, + "engines": { + "node": ">= 6" + } + }, + "node_modules/@types/offscreencanvas": { + "version": "2019.3.0", + "resolved": "https://registry.npmjs.org/@types/offscreencanvas/-/offscreencanvas-2019.3.0.tgz", + "integrity": "sha512-esIJx9bQg+QYF0ra8GnvfianIY8qWB0GBx54PK5Eps6m+xTj86KLavHv6qDhzKcu5UUOgNfJ2pWaIIV7TRUd9Q==" + }, + "node_modules/@types/seedrandom": { + "version": "2.4.30", + "resolved": "https://registry.npmjs.org/@types/seedrandom/-/seedrandom-2.4.30.tgz", + "integrity": "sha512-AnxLHewubLVzoF/A4qdxBGHCKifw8cY32iro3DQX9TPcetE95zBeVt3jnsvtvAUf1vwzMfwzp4t/L2yqPlnjkQ==" + }, + "node_modules/@types/webgl-ext": { + "version": "0.0.30", + "resolved": "https://registry.npmjs.org/@types/webgl-ext/-/webgl-ext-0.0.30.tgz", + "integrity": "sha512-LKVgNmBxN0BbljJrVUwkxwRYqzsAEPcZOe6S2T6ZaBDIrFp0qu4FNlpc5sM1tGbXUYFgdVQIoeLk1Y1UoblyEg==", + "peer": true + }, + "node_modules/@webgpu/types": { + "version": "0.1.16", + "resolved": "https://registry.npmjs.org/@webgpu/types/-/types-0.1.16.tgz", + "integrity": "sha512-9E61voMP4+Rze02jlTXud++Htpjyyk8vw5Hyw9FGRrmhHQg2GqbuOfwf5Klrb8vTxc2XWI3EfO7RUHMpxTj26A==", + "peer": true + }, "node_modules/@xmldom/xmldom": { "version": "0.8.9", "resolved": "https://registry.npmjs.org/@xmldom/xmldom/-/xmldom-0.8.9.tgz", @@ -748,6 +1009,14 @@ "resolved": "https://registry.npmjs.org/append-field/-/append-field-1.0.0.tgz", "integrity": "sha512-klpgFSWLW1ZEs8svjfb7g4qWY0YS5imI82dTg+QahUvJ8YqAY0P10Uk8tTyh9ZGuYEZEMaeJYCF5BFuX552hsw==" }, + "node_modules/argparse": { + "version": "1.0.10", + "resolved": "https://registry.npmjs.org/argparse/-/argparse-1.0.10.tgz", + "integrity": "sha512-o5Roy6tNG4SL/FOkCAN6RzjiakZS25RLYFrcMttJqbdd8BWrnA+fGz57iN5Pb06pvBGvl5gQ0B48dJlslXvoTg==", + "dependencies": { + "sprintf-js": "~1.0.2" + } + }, "node_modules/array-flatten": { "version": "1.1.1", "resolved": "https://registry.npmjs.org/array-flatten/-/array-flatten-1.1.1.tgz", @@ -767,6 +1036,11 @@ "node": ">=8" } }, + "node_modules/asynckit": { + "version": "0.4.0", + "resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz", + "integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==" + }, "node_modules/at-least-node": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/at-least-node/-/at-least-node-1.0.0.tgz", @@ -776,6 +1050,16 @@ "node": ">= 4.0.0" } }, + "node_modules/axios": { + "version": "1.5.0", + "resolved": "https://registry.npmjs.org/axios/-/axios-1.5.0.tgz", + "integrity": "sha512-D4DdjDo5CY50Qms0qGQTTw6Q44jl7zRwY7bthds06pUGfChBCTcQs+N743eFWGEd6pRTMd6A+I87aWyFV5wiZQ==", + "dependencies": { + "follow-redirects": "^1.15.0", + "form-data": "^4.0.0", + "proxy-from-env": "^1.1.0" + } + }, "node_modules/base64-js": { "version": "1.5.1", "resolved": "https://registry.npmjs.org/base64-js/-/base64-js-1.5.1.tgz", @@ -856,6 +1140,11 @@ "node": ">= 0.8" } }, + "node_modules/boolbase": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/boolbase/-/boolbase-1.0.0.tgz", + "integrity": "sha512-JZOSA7Mo9sNGB8+UjSgzdLtokWAky1zbztM3WRLCbZ70/3cTANmQmOdR7y2g+J0e2WXywy1yS468tY+IruqEww==" + }, "node_modules/braces": { "version": "3.0.2", "resolved": "https://registry.npmjs.org/braces/-/braces-3.0.2.tgz", @@ -947,7 +1236,6 @@ "version": "4.1.2", "resolved": "https://registry.npmjs.org/chalk/-/chalk-4.1.2.tgz", "integrity": "sha512-oKnbhFyRIXpUuez8iBMmyEa4nbj4IOQyuhc/wy9kY7/WVPcwIO9VA668Pu8RkO7+0G76SLROeyw9CpQ061i4mA==", - "dev": true, "dependencies": { "ansi-styles": "^4.1.0", "supports-color": "^7.1.0" @@ -959,6 +1247,42 @@ "url": "https://github.com/chalk/chalk?sponsor=1" } }, + "node_modules/cheerio": { + "version": "1.0.0-rc.12", + "resolved": "https://registry.npmjs.org/cheerio/-/cheerio-1.0.0-rc.12.tgz", + "integrity": "sha512-VqR8m68vM46BNnuZ5NtnGBKIE/DfN0cRIzg9n40EIq9NOv90ayxLBXA8fXC5gquFRGJSTRqBq25Jt2ECLR431Q==", + "dependencies": { + "cheerio-select": "^2.1.0", + "dom-serializer": "^2.0.0", + "domhandler": "^5.0.3", + "domutils": "^3.0.1", + "htmlparser2": "^8.0.1", + "parse5": "^7.0.0", + "parse5-htmlparser2-tree-adapter": "^7.0.0" + }, + "engines": { + "node": ">= 6" + }, + "funding": { + "url": "https://github.com/cheeriojs/cheerio?sponsor=1" + } + }, + "node_modules/cheerio-select": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/cheerio-select/-/cheerio-select-2.1.0.tgz", + "integrity": "sha512-9v9kG0LvzrlcungtnJtpGNxY+fzECQKhK4EGJX2vByejiMX84MFNQw4UxPJl3bFbTMw+Dfs37XaIkCwTZfLh4g==", + "dependencies": { + "boolbase": "^1.0.0", + "css-select": "^5.1.0", + "css-what": "^6.1.0", + "domelementtype": "^2.3.0", + "domhandler": "^5.0.3", + "domutils": "^3.0.1" + }, + "funding": { + "url": "https://github.com/sponsors/fb55" + } + }, "node_modules/chownr": { "version": "1.1.4", "resolved": "https://registry.npmjs.org/chownr/-/chownr-1.1.4.tgz", @@ -994,6 +1318,17 @@ "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz", "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==" }, + "node_modules/combined-stream": { + "version": "1.0.8", + "resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz", + "integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==", + "dependencies": { + "delayed-stream": "~1.0.0" + }, + "engines": { + "node": ">= 0.8" + } + }, "node_modules/command-exists": { "version": "1.2.9", "resolved": "https://registry.npmjs.org/command-exists/-/command-exists-1.2.9.tgz", @@ -1104,6 +1439,16 @@ "resolved": "https://registry.npmjs.org/cookie-signature/-/cookie-signature-1.0.6.tgz", "integrity": "sha512-QADzlaHc8icV8I7vbaJXJwod9HWYp8uCqf1xa4OfNu1T7JVxQIrUgOWtHdNDtPiywmFbiS12VjotIXLrKM3orQ==" }, + "node_modules/core-js": { + "version": "3.29.1", + "resolved": "https://registry.npmjs.org/core-js/-/core-js-3.29.1.tgz", + "integrity": "sha512-+jwgnhg6cQxKYIIjGtAHq2nwUOolo9eoFZ4sHfUH09BLXBgxnH4gA0zEd+t+BO2cNB8idaBtZFcFTRjQJRJmAw==", + "hasInstallScript": true, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/core-js" + } + }, "node_modules/core-util-is": { "version": "1.0.3", "resolved": "https://registry.npmjs.org/core-util-is/-/core-util-is-1.0.3.tgz", @@ -1137,6 +1482,32 @@ "http-errors": "^2.0.0" } }, + "node_modules/css-select": { + "version": "5.1.0", + "resolved": "https://registry.npmjs.org/css-select/-/css-select-5.1.0.tgz", + "integrity": "sha512-nwoRF1rvRRnnCqqY7updORDsuqKzqYJ28+oSMaJMMgOauh3fvwHqMS7EZpIPqK8GL+g9mKxF1vP/ZjSeNjEVHg==", + "dependencies": { + "boolbase": "^1.0.0", + "css-what": "^6.1.0", + "domhandler": "^5.0.2", + "domutils": "^3.0.1", + "nth-check": "^2.0.1" + }, + "funding": { + "url": "https://github.com/sponsors/fb55" + } + }, + "node_modules/css-what": { + "version": "6.1.0", + "resolved": "https://registry.npmjs.org/css-what/-/css-what-6.1.0.tgz", + "integrity": "sha512-HTUrgRJ7r4dsZKU6GjmpfRK1O76h97Z8MfS1G0FozR+oF2kG6Vfe8JE6zwrkbxigziPHinCJ+gCPjA9EaBDtRw==", + "engines": { + "node": ">= 6" + }, + "funding": { + "url": "https://github.com/sponsors/fb55" + } + }, "node_modules/debug": { "version": "2.6.9", "resolved": "https://registry.npmjs.org/debug/-/debug-2.6.9.tgz", @@ -1177,6 +1548,14 @@ "node": ">=8" } }, + "node_modules/delayed-stream": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz", + "integrity": "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==", + "engines": { + "node": ">=0.4.0" + } + }, "node_modules/depd": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/depd/-/depd-2.0.0.tgz", @@ -1223,11 +1602,70 @@ "node": ">=8" } }, + "node_modules/dom-serializer": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/dom-serializer/-/dom-serializer-2.0.0.tgz", + "integrity": "sha512-wIkAryiqt/nV5EQKqQpo3SToSOV9J0DnbJqwK7Wv/Trc92zIAYZ4FlMu+JPFW1DfGFt81ZTCGgDEabffXeLyJg==", + "dependencies": { + "domelementtype": "^2.3.0", + "domhandler": "^5.0.2", + "entities": "^4.2.0" + }, + "funding": { + "url": "https://github.com/cheeriojs/dom-serializer?sponsor=1" + } + }, "node_modules/dom-walk": { "version": "0.1.2", "resolved": "https://registry.npmjs.org/dom-walk/-/dom-walk-0.1.2.tgz", "integrity": "sha512-6QvTW9mrGeIegrFXdtQi9pk7O/nSK6lSdXW2eqUspN5LWD7UTji2Fqw5V2YLjBpHEoU9Xl/eUWNpDeZvoyOv2w==" }, + "node_modules/domelementtype": { + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/domelementtype/-/domelementtype-2.3.0.tgz", + "integrity": "sha512-OLETBj6w0OsagBwdXnPdN0cnMfF9opN69co+7ZrbfPGrdpPVNBUj02spi6B1N7wChLQiPn4CSH/zJvXw56gmHw==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/fb55" + } + ] + }, + "node_modules/domhandler": { + "version": "5.0.3", + "resolved": "https://registry.npmjs.org/domhandler/-/domhandler-5.0.3.tgz", + "integrity": "sha512-cgwlv/1iFQiFnU96XXgROh8xTeetsnJiDsTc7TYCLFd9+/WNkIqPTxiM/8pSd8VIrhXGTf1Ny1q1hquVqDJB5w==", + "dependencies": { + "domelementtype": "^2.3.0" + }, + "engines": { + "node": ">= 4" + }, + "funding": { + "url": "https://github.com/fb55/domhandler?sponsor=1" + } + }, + "node_modules/domutils": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/domutils/-/domutils-3.1.0.tgz", + "integrity": "sha512-H78uMmQtI2AhgDJjWeQmHwJJ2bLPD3GMmO7Zja/ZZh84wkm+4ut+IUnUdRa8uCGX88DiVx1j6FRe1XfxEgjEZA==", + "dependencies": { + "dom-serializer": "^2.0.0", + "domelementtype": "^2.3.0", + "domhandler": "^5.0.3" + }, + "funding": { + "url": "https://github.com/fb55/domutils?sponsor=1" + } + }, + "node_modules/dotenv": { + "version": "8.6.0", + "resolved": "https://registry.npmjs.org/dotenv/-/dotenv-8.6.0.tgz", + "integrity": "sha512-IrPdXQsk2BbzvCBGBOTmmSH5SodmqZNt4ERAZDmW4CT+tL8VtvinqywuANaFu4bOMWki16nqf0e4oC0QIaDr/g==", + "engines": { + "node": ">=10" + } + }, "node_modules/ee-first": { "version": "1.1.1", "resolved": "https://registry.npmjs.org/ee-first/-/ee-first-1.1.1.tgz", @@ -1255,6 +1693,17 @@ "once": "^1.4.0" } }, + "node_modules/entities": { + "version": "4.5.0", + "resolved": "https://registry.npmjs.org/entities/-/entities-4.5.0.tgz", + "integrity": "sha512-V0hjH4dGPh9Ao5p0MoRY6BVqtwCjhz6vI5LT8AJ55H+4g9/4vbHx1I54fS0XuclLhDHArPQCiMjDxjaL8fPxhw==", + "engines": { + "node": ">=0.12" + }, + "funding": { + "url": "https://github.com/fb55/entities?sponsor=1" + } + }, "node_modules/escalade": { "version": "3.1.1", "resolved": "https://registry.npmjs.org/escalade/-/escalade-3.1.1.tgz", @@ -1268,6 +1717,14 @@ "resolved": "https://registry.npmjs.org/escape-html/-/escape-html-1.0.3.tgz", "integrity": "sha512-NiSupZ4OeuGwr68lGIeym/ksIZMJodUGOSCZ/FSnTxcrekbvqrgdUxlJOMpijaKZVjAJrWrGs/6Jy8OMuyj9ow==" }, + "node_modules/escape-string-regexp": { + "version": "1.0.5", + "resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-1.0.5.tgz", + "integrity": "sha512-vbRorB5FUQWvla16U8R/qgaFIya2qGzwDrNmCZuYKrbdSUMG6I1ZCGQRefkRVhuOkIGVne7BQ35DSfo1qvJqFg==", + "engines": { + "node": ">=0.8.0" + } + }, "node_modules/etag": { "version": "1.8.1", "resolved": "https://registry.npmjs.org/etag/-/etag-1.8.1.tgz", @@ -1445,6 +1902,38 @@ "node": ">= 0.8" } }, + "node_modules/follow-redirects": { + "version": "1.15.2", + "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.2.tgz", + "integrity": "sha512-VQLG33o04KaQ8uYi2tVNbdrWp1QWxNNea+nmIB4EVM28v0hmP17z7aG1+wAkNzVq4KeXTq3221ye5qTJP91JwA==", + "funding": [ + { + "type": "individual", + "url": "https://github.com/sponsors/RubenVerborgh" + } + ], + "engines": { + "node": ">=4.0" + }, + "peerDependenciesMeta": { + "debug": { + "optional": true + } + } + }, + "node_modules/form-data": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.0.tgz", + "integrity": "sha512-ETEklSGi5t0QMZuiXoA/Q6vcnxcLQP5vdugSpuAyi6SVGi2clPPp+xgEhuMaHC+zGgn31Kd235W35f7Hykkaww==", + "dependencies": { + "asynckit": "^0.4.0", + "combined-stream": "^1.0.8", + "mime-types": "^2.1.12" + }, + "engines": { + "node": ">= 6" + } + }, "node_modules/forwarded": { "version": "0.2.0", "resolved": "https://registry.npmjs.org/forwarded/-/forwarded-0.2.0.tgz", @@ -1580,6 +2069,11 @@ "resolved": "https://registry.npmjs.org/google-translate-api-browser/-/google-translate-api-browser-3.0.1.tgz", "integrity": "sha512-KTLodkyGBWMK9IW6QIeJ2zCuju4Z0CLpbkADKo+yLhbSTD4l+CXXpQ/xaynGVAzeBezzJG6qn8MLeqOq3SmW0A==" }, + "node_modules/gpt-3-encoder": { + "version": "1.1.4", + "resolved": "https://registry.npmjs.org/gpt-3-encoder/-/gpt-3-encoder-1.1.4.tgz", + "integrity": "sha512-fSQRePV+HUAhCn7+7HL7lNIXNm6eaFWFbNLOOGtmSJ0qJycyQvj60OvRlH7mee8xAMjBDNRdMXlMwjAbMTDjkg==" + }, "node_modules/gpt3-tokenizer": { "version": "1.1.5", "resolved": "https://registry.npmjs.org/gpt3-tokenizer/-/gpt3-tokenizer-1.1.5.tgz", @@ -1612,7 +2106,6 @@ "version": "4.0.0", "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-4.0.0.tgz", "integrity": "sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ==", - "dev": true, "engines": { "node": ">=8" } @@ -1639,6 +2132,24 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/htmlparser2": { + "version": "8.0.2", + "resolved": "https://registry.npmjs.org/htmlparser2/-/htmlparser2-8.0.2.tgz", + "integrity": "sha512-GYdjWKDkbRLkZ5geuHs5NY1puJ+PXwP7+fHPRz06Eirsb9ugf6d8kkXav6ADhcODhFFPMIXyxkxSuMf3D6NCFA==", + "funding": [ + "https://github.com/fb55/htmlparser2?sponsor=1", + { + "type": "github", + "url": "https://github.com/sponsors/fb55" + } + ], + "dependencies": { + "domelementtype": "^2.3.0", + "domhandler": "^5.0.3", + "domutils": "^3.0.1", + "entities": "^4.4.0" + } + }, "node_modules/http-errors": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/http-errors/-/http-errors-2.0.0.tgz", @@ -1912,6 +2423,71 @@ "node": ">=4" } }, + "node_modules/json-colorizer": { + "version": "2.2.2", + "resolved": "https://registry.npmjs.org/json-colorizer/-/json-colorizer-2.2.2.tgz", + "integrity": "sha512-56oZtwV1piXrQnRNTtJeqRv+B9Y/dXAYLqBBaYl/COcUdoZxgLBLAO88+CnkbT6MxNs0c5E9mPBIb2sFcNz3vw==", + "dependencies": { + "chalk": "^2.4.1", + "lodash.get": "^4.4.2" + } + }, + "node_modules/json-colorizer/node_modules/ansi-styles": { + "version": "3.2.1", + "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-3.2.1.tgz", + "integrity": "sha512-VT0ZI6kZRdTh8YyJw3SMbYm/u+NqfsAxEpWO0Pf9sq8/e94WxxOpPKx9FR1FlyCtOVDNOQ+8ntlqFxiRc+r5qA==", + "dependencies": { + "color-convert": "^1.9.0" + }, + "engines": { + "node": ">=4" + } + }, + "node_modules/json-colorizer/node_modules/chalk": { + "version": "2.4.2", + "resolved": "https://registry.npmjs.org/chalk/-/chalk-2.4.2.tgz", + "integrity": "sha512-Mti+f9lpJNcwF4tWV8/OrTTtF1gZi+f8FqlyAdouralcFWFQWF2+NgCHShjkCb+IFBLq9buZwE1xckQU4peSuQ==", + "dependencies": { + "ansi-styles": "^3.2.1", + "escape-string-regexp": "^1.0.5", + "supports-color": "^5.3.0" + }, + "engines": { + "node": ">=4" + } + }, + "node_modules/json-colorizer/node_modules/color-convert": { + "version": "1.9.3", + "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-1.9.3.tgz", + "integrity": "sha512-QfAUtd+vFdAtFQcC8CCyYt1fYWxSqAiK2cSD6zDB8N3cpsEBAvRxp9zOGg6G/SHHJYAT88/az/IuDGALsNVbGg==", + "dependencies": { + "color-name": "1.1.3" + } + }, + "node_modules/json-colorizer/node_modules/color-name": { + "version": "1.1.3", + "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.3.tgz", + "integrity": "sha512-72fSenhMw2HZMTVHeCA9KCmpEIbzWiQsjN+BHcBbS9vr1mtt+vJjPdksIBNUmKAW8TFUDPJK5SUU3QhE9NEXDw==" + }, + "node_modules/json-colorizer/node_modules/has-flag": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-3.0.0.tgz", + "integrity": "sha512-sKJf1+ceQBr4SMkvQnBDNDtf4TXpVhVGateu0t918bl30FnbE2m4vNLX+VWe/dpjlb+HugGYzW7uQXH98HPEYw==", + "engines": { + "node": ">=4" + } + }, + "node_modules/json-colorizer/node_modules/supports-color": { + "version": "5.5.0", + "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-5.5.0.tgz", + "integrity": "sha512-QjVjwdXIt408MIiAqCX4oUKsgU2EqAGzs2Ppkm4aQYbjm+ZEWEcW4SfFNTr4uMNZma0ey4f5lgLrkB0aX0QMow==", + "dependencies": { + "has-flag": "^3.0.0" + }, + "engines": { + "node": ">=4" + } + }, "node_modules/json5": { "version": "2.2.3", "resolved": "https://registry.npmjs.org/json5/-/json5-2.2.3.tgz", @@ -1955,6 +2531,16 @@ "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz", "integrity": "sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg==" }, + "node_modules/lodash.get": { + "version": "4.4.2", + "resolved": "https://registry.npmjs.org/lodash.get/-/lodash.get-4.4.2.tgz", + "integrity": "sha512-z+Uw/vLuy6gQe8cfaFWD7p0wVv8fJl3mbzXh33RS+0oW2wvUqiRXiQ69gLWSLpgB5/6sU+r6BlQR0MBILadqTQ==" + }, + "node_modules/long": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/long/-/long-4.0.0.tgz", + "integrity": "sha512-XsP+KhQif4bjX1kbuSiySJFNAehNxgLb6hPRGJ9QsUr8ajHkuXGdrHmFUTUUXhDwVX2R5bY4JNZEwbUiMhV+MA==" + }, "node_modules/lru-cache": { "version": "6.0.0", "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-6.0.0.tgz", @@ -2204,6 +2790,17 @@ } } }, + "node_modules/nth-check": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/nth-check/-/nth-check-2.1.1.tgz", + "integrity": "sha512-lqjrjmaOoAnWfMmBPL+XNnynZh2+swxiX3WUE0s4yEHI6m+AwrK2UZOimIRl3X/4QctVqS8AiZjFqyOGrMXb/w==", + "dependencies": { + "boolbase": "^1.0.0" + }, + "funding": { + "url": "https://github.com/fb55/nth-check?sponsor=1" + } + }, "node_modules/object-assign": { "version": "4.1.1", "resolved": "https://registry.npmjs.org/object-assign/-/object-assign-4.1.1.tgz", @@ -2269,6 +2866,23 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/openai": { + "version": "3.3.0", + "resolved": "https://registry.npmjs.org/openai/-/openai-3.3.0.tgz", + "integrity": "sha512-uqxI/Au+aPRnsaQRe8CojU0eCR7I0mBiKjD3sNMzY6DaC1ZVrc85u98mtJW6voDug8fgGN+DIZmTDxTthxb7dQ==", + "dependencies": { + "axios": "^0.26.0", + "form-data": "^4.0.0" + } + }, + "node_modules/openai/node_modules/axios": { + "version": "0.26.1", + "resolved": "https://registry.npmjs.org/axios/-/axios-0.26.1.tgz", + "integrity": "sha512-fPwcX4EvnSHuInCMItEhAGnaSEXRBjtzh9fOtsE6E1G6p7vl7edEeZe11QHf18+6+9gR5PbKV/sGKNaD8YaMeA==", + "dependencies": { + "follow-redirects": "^1.14.8" + } + }, "node_modules/p-is-promise": { "version": "3.0.0", "resolved": "https://registry.npmjs.org/p-is-promise/-/p-is-promise-3.0.0.tgz", @@ -2319,6 +2933,29 @@ "resolved": "https://registry.npmjs.org/parse-headers/-/parse-headers-2.0.5.tgz", "integrity": "sha512-ft3iAoLOB/MlwbNXgzy43SWGP6sQki2jQvAyBg/zDFAgr9bfNWZIUj42Kw2eJIl8kEi4PbgE6U1Zau/HwI75HA==" }, + "node_modules/parse5": { + "version": "7.1.2", + "resolved": "https://registry.npmjs.org/parse5/-/parse5-7.1.2.tgz", + "integrity": "sha512-Czj1WaSVpaoj0wbhMzLmWD69anp2WH7FXMB9n1Sy8/ZFF9jolSQVMu1Ij5WIyGmcBmhk7EOndpO4mIpihVqAXw==", + "dependencies": { + "entities": "^4.4.0" + }, + "funding": { + "url": "https://github.com/inikulin/parse5?sponsor=1" + } + }, + "node_modules/parse5-htmlparser2-tree-adapter": { + "version": "7.0.0", + "resolved": "https://registry.npmjs.org/parse5-htmlparser2-tree-adapter/-/parse5-htmlparser2-tree-adapter-7.0.0.tgz", + "integrity": "sha512-B77tOZrqqfUfnVcOrUvfdLbz4pu4RopLD/4vmu3HUPswwTA8OH0EMW9BlWR2B0RCoiZRAHEUu7IxeP1Pd1UU+g==", + "dependencies": { + "domhandler": "^5.0.2", + "parse5": "^7.0.0" + }, + "funding": { + "url": "https://github.com/inikulin/parse5?sponsor=1" + } + }, "node_modules/parseurl": { "version": "1.3.3", "resolved": "https://registry.npmjs.org/parseurl/-/parseurl-1.3.3.tgz", @@ -2650,6 +3287,11 @@ "node": ">= 0.10" } }, + "node_modules/proxy-from-env": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/proxy-from-env/-/proxy-from-env-1.1.0.tgz", + "integrity": "sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg==" + }, "node_modules/pump": { "version": "3.0.0", "resolved": "https://registry.npmjs.org/pump/-/pump-3.0.0.tgz", @@ -2899,6 +3541,11 @@ "resolved": "https://registry.npmjs.org/sax/-/sax-1.2.4.tgz", "integrity": "sha512-NqVDv9TpANUjFm0N8uM5GxL36UgKi9/atZw+x7YFnQ8ckwFGKrl4xX4yWtrey3UJm5nP1kUbnYgLopqWNSRhWw==" }, + "node_modules/seedrandom": { + "version": "3.0.5", + "resolved": "https://registry.npmjs.org/seedrandom/-/seedrandom-3.0.5.tgz", + "integrity": "sha512-8OwmbklUNzwezjGInmZ+2clQmExQPvomqjL7LFqOYqtmuxRgQYqOD3mHaU+MvZn5FLUeVxVfQjwLZW/n/JFuqg==" + }, "node_modules/semver": { "version": "7.5.4", "resolved": "https://registry.npmjs.org/semver/-/semver-7.5.4.tgz", @@ -3079,6 +3726,11 @@ "resolved": "https://registry.npmjs.org/sliced/-/sliced-1.0.1.tgz", "integrity": "sha512-VZBmZP8WU3sMOZm1bdgTadsQbcscK0UM8oKxKVBs4XAhUo2Xxzm/OFMGBkPusxw9xL3Uy8LrzEqGqJhclsr0yA==" }, + "node_modules/sprintf-js": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/sprintf-js/-/sprintf-js-1.0.3.tgz", + "integrity": "sha512-D9cPgkvLlV3t3IzL0D0YLvGA9Ahk4PcvVwUbN0dSGr1aP0Nrt4AEnTUbuGvquEC0mA64Gqt1fzirlRs5ibXx8g==" + }, "node_modules/statuses": { "version": "2.0.1", "resolved": "https://registry.npmjs.org/statuses/-/statuses-2.0.1.tgz", @@ -3165,7 +3817,6 @@ "version": "7.2.0", "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-7.2.0.tgz", "integrity": "sha512-qpCAvRl9stuOHveKsn7HncJRvv501qIacKzQlO/+Lwxc9+0q2wLyv4Dfvt80/DPn2pqOBsJdDiogXGR9+OvwRw==", - "dev": true, "dependencies": { "has-flag": "^4.0.0" }, @@ -3372,6 +4023,14 @@ "node": ">= 0.4.0" } }, + "node_modules/uuid": { + "version": "9.0.0", + "resolved": "https://registry.npmjs.org/uuid/-/uuid-9.0.0.tgz", + "integrity": "sha512-MXcSTerfPa4uqyzStbRoTgt5XIe3x5+42+q1sDuy3R5MDk66URdLMOZe5aPX/SQd+kuYAh0FdP/pO28IkQyTeg==", + "bin": { + "uuid": "dist/bin/uuid" + } + }, "node_modules/vary": { "version": "1.1.2", "resolved": "https://registry.npmjs.org/vary/-/vary-1.1.2.tgz", @@ -3380,6 +4039,24 @@ "node": ">= 0.8" } }, + "node_modules/vectra": { + "version": "0.2.2", + "resolved": "https://registry.npmjs.org/vectra/-/vectra-0.2.2.tgz", + "integrity": "sha512-Z1d29Zil+dlA9/Qz4VJB3zcWjIARY8QH5xQEnHmGgmTUP58cPRV8NK7P0QH91IRs0RvAWrYiQf9Y5JqQo0vJlQ==", + "dependencies": { + "axios": "^1.3.4", + "cheerio": "^1.0.0-rc.12", + "dotenv": "^8.2.0", + "gpt-3-encoder": "1.1.4", + "json-colorizer": "^2.2.2", + "openai": "^3.2.1", + "uuid": "^9.0.0", + "yargs": "^17.7.2" + }, + "bin": { + "vectra": "bin/vectra.js" + } + }, "node_modules/webidl-conversions": { "version": "3.0.1", "resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-3.0.1.tgz", diff --git a/package.json b/package.json index 0dc6193d3..aa74e21a5 100644 --- a/package.json +++ b/package.json @@ -3,6 +3,8 @@ "@agnai/sentencepiece-js": "^1.1.1", "@agnai/web-tokenizers": "^0.1.3", "@dqbd/tiktoken": "^1.0.2", + "@tensorflow-models/universal-sentence-encoder": "^1.3.3", + "@tensorflow/tfjs": "^4.10.0", "command-exists": "^1.2.9", "compression": "^1", "cookie-parser": "^1.4.6", @@ -31,6 +33,7 @@ "sanitize-filename": "^1.6.3", "simple-git": "^3.19.1", "uniqolor": "^1.1.0", + "vectra": "^0.2.2", "webp-converter": "2.3.2", "write-file-atomic": "^5.0.1", "ws": "^8.13.0", diff --git a/server.js b/server.js index 4c0497669..18134a04b 100644 --- a/server.js +++ b/server.js @@ -5825,3 +5825,6 @@ app.post('/get_character_assets_list', jsonParser, async (request, response) => return response.sendStatus(500); } }); + +// Vector storage DB +require('./src/vectors').registerEndpoints(app, jsonParser); diff --git a/src/vectors.js b/src/vectors.js new file mode 100644 index 000000000..f09444ec9 --- /dev/null +++ b/src/vectors.js @@ -0,0 +1,192 @@ +const express = require('express'); +const vectra = require('vectra'); +const path = require('path'); +const sanitize = require('sanitize-filename'); +require('@tensorflow/tfjs'); +const encoder = require('@tensorflow-models/universal-sentence-encoder'); + +/** + * Lazy loading class for the embedding model. + */ +class EmbeddingModel { + /** + * @type {encoder.UniversalSentenceEncoder} - The embedding model + */ + #model; + + async get() { + if (!this.#model) { + this.model = await encoder.load(); + } + + return this.#model; + } +} + +/** + * Hard limit on the number of results to return from the vector search. + */ +const TOP_K = 100; +const model = new EmbeddingModel(); + +/** + * Gets the index for the vector collection + * @param {string} collectionId - The collection ID + * @returns {Promise<vectra.LocalIndex>} - The index for the collection + */ +async function getIndex(collectionId) { + const index = new vectra.LocalIndex(path.join(process.cwd(), 'vectors', sanitize(collectionId))); + + if (!await index.isIndexCreated()) { + await index.createIndex(); + } + + return index; +} + +/** + * Inserts items into the vector collection + * @param {string} collectionId - The collection ID + * @param {{ hash: number; text: string; }[]} items - The items to insert + */ +async function insertVectorItems(collectionId, items) { + const index = await getIndex(collectionId); + const use = await model.get(); + + await index.beginUpdate(); + + for (const item of items) { + const text = item.text; + const hash = item.hash; + const tensor = await use.embed(text); + const vector = Array.from(await tensor.data()); + await index.upsertItem({ vector: vector, metadata: { hash, text } }); + } + + await index.endUpdate(); +} + +/** + * Gets the hashes of the items in the vector collection + * @param {string} collectionId - The collection ID + * @returns {Promise<number[]>} - The hashes of the items in the collection + */ +async function getSavedHashes(collectionId) { + const index = await getIndex(collectionId); + + const items = await index.listItems(); + const hashes = items.map(x => Number(x.metadata.hash)); + + return hashes; +} + +/** + * Deletes items from the vector collection by hash + * @param {string} collectionId - The collection ID + * @param {number[]} hashes - The hashes of the items to delete + */ +async function deleteVectorItems(collectionId, hashes) { + const index = await getIndex(collectionId); + const items = await index.listItemsByMetadata({ hash: { '$in': hashes } }); + + await index.beginUpdate(); + + for (const item of items) { + await index.deleteItem(item.id); + } + + await index.endUpdate(); +} + +/** + * Gets the hashes of the items in the vector collection that match the search text + * @param {string} collectionId + * @param {string} searchText + * @returns {Promise<number[]>} - The hashes of the items that match the search text + */ +async function queryCollection(collectionId, searchText) { + const index = await getIndex(collectionId); + const use = await model.get(); + const tensor = await use.embed(searchText); + const vector = Array.from(await tensor.data()); + + const result = await index.queryItems(vector, TOP_K); + const hashes = result.map(x => Number(x.item.metadata.hash)); + return hashes; +} + +/** + * Registers the endpoints for the vector API + * @param {express.Express} app - Express app + * @param {any} jsonParser - Express JSON parser + */ +async function registerEndpoints(app, jsonParser) { + app.post('/api/vector/query', jsonParser, async (req, res) => { + try { + if (!req.body.collectionId || !req.body.searchText) { + return res.sendStatus(400); + } + + const collectionId = String(req.body.collectionId); + const searchText = String(req.body.searchText); + + const results = await queryCollection(collectionId, searchText); + return res.json(results); + } catch (error) { + console.error(error); + return res.sendStatus(500); + } + }); + + app.post('/api/vector/insert', jsonParser, async (req, res) => { + try { + if (!Array.isArray(req.body.items) || !req.body.collectionId) { + return res.sendStatus(400); + } + + const collectionId = String(req.body.collectionId); + const items = req.body.items.map(x => ({ hash: x.hash, text: x.text })); + + await insertVectorItems(collectionId, items); + return res.sendStatus(200); + } catch (error) { + console.error(error); + return res.sendStatus(500); + } + }); + + app.post('/api/vector/list', jsonParser, async (req, res) => { + try { + if (!req.body.collectionId) { + return res.sendStatus(400); + } + + const collectionId = String(req.body.collectionId); + + const hashes = await getSavedHashes(collectionId); + return res.json(hashes); + } catch (error) { + console.error(error); + return res.sendStatus(500); + } + }); + + app.post('/api/vector/delete', jsonParser, async (req, res) => { + try { + if (!Array.isArray(req.body.hashes) || !req.body.collectionId) { + return res.sendStatus(400); + } + + const collectionId = String(req.body.collectionId); + const hashes = req.body.hashes.map(x => Number(x)); + + await deleteVectorItems(collectionId, hashes); + return res.sendStatus(200); + } catch (error) { + console.error(error); + return res.sendStatus(500); + } + }); +} + +module.exports = { registerEndpoints };