Refactor transformers.js usage

2025-06-05 21:59:27 +02:00 · 2023-09-14 23:12:33 +03:00
parent cb8d9ac71b
commit 0cc048cb64
7 changed files with 167 additions and 171 deletions
--- a/default/config.conf
+++ b/default/config.conf
@@ -21,6 +21,8 @@ const extras = {
    classificationModel: 'Cohee/distilbert-base-uncased-go-emotions-onnx',
    // Image captioning model. HuggingFace ID of a model in ONNX format.
    captioningModel: 'Xenova/vit-gpt2-image-captioning',
    // Feature extraction model. HuggingFace ID of a model in ONNX format.
    embeddingModel: 'Xenova/all-mpnet-base-v2,
 };
 // Request overrides for additional headers
--- a/server.js
+++ b/server.js
@@ -5250,21 +5250,18 @@ app.post('/get_character_assets_list', jsonParser, async (request, response) =>
 // Stable Diffusion generation
 require('./src/stable-diffusion').registerEndpoints(app, jsonParser);
 // LLM and SD Horde generation
 require('./src/horde').registerEndpoints(app, jsonParser);
 // Vector storage DB
 require('./src/vectors').registerEndpoints(app, jsonParser);
 // Chat translation
 require('./src/translate').registerEndpoints(app, jsonParser);
 // Emotion classification
-import('./src/classify.mjs').then(module => {
+require('./src/classify').registerEndpoints(app, jsonParser);
-    module.default.registerEndpoints(app, jsonParser);
+
 }).catch(err => {
    console.error(err);
 });
 // Image captioning
-import('./src/caption.mjs').then(module => {
+require('./src/caption').registerEndpoints(app, jsonParser);
    module.default.registerEndpoints(app, jsonParser);
 }).catch(err => {
    console.error(err);
 });
--- a/src/caption.js
+++ b/src/caption.js
@@ -0,0 +1,29 @@
 const TASK = 'image-to-text';
 /**
 * @param {import("express").Express} app
 * @param {any} jsonParser
 */
 function registerEndpoints(app, jsonParser) {
    app.post('/api/extra/caption', jsonParser, async (req, res) => {
        try {
            const { image } = req.body;
            const module = await import('./transformers.mjs');
            const rawImage = module.default.getRawImage(image);
            const pipe = await module.default.getPipeline(TASK);
            const result = await pipe(rawImage);
            const text = result[0].generated_text;
            console.log('Image caption:', text);
            return res.json({ caption: text });
        } catch (error) {
            console.error(error);
            return res.sendStatus(500);
        }
    });
 }
 module.exports = {
    registerEndpoints,
 };
--- a/src/caption.mjs
+++ b/src/caption.mjs
@@ -1,72 +0,0 @@
 import { pipeline, env, RawImage } from 'sillytavern-transformers';
 import path from 'path';
 import { getConfig } from './util.js';
 // Limit the number of threads to 1 to avoid issues on Android
 env.backends.onnx.wasm.numThreads = 1;
 // Use WASM from a local folder to avoid CDN connections
 env.backends.onnx.wasm.wasmPaths = path.join(process.cwd(), 'dist') +  path.sep;
 class PipelineAccessor {
    /**
     * @type {import("sillytavern-transformers").ImageToTextPipeline}
     */
    pipe;
    async get() {
        if (!this.pipe) {
            const cache_dir = path.join(process.cwd(), 'cache');
            const model = this.getCaptioningModel();
            this.pipe = await pipeline('image-to-text', model, { cache_dir, quantized: true });
        }
        return this.pipe;
    }
    getCaptioningModel() {
        const DEFAULT_MODEL = 'Xenova/vit-gpt2-image-captioning';
        try {
            const config = getConfig();
            const model = config?.extras?.captioningModel;
            return model || DEFAULT_MODEL;
        } catch (error) {
            console.warn('Failed to read config.conf, using default captioning model.');
            return DEFAULT_MODEL;
        }
    }
 }
 /**
 * @param {import("express").Express} app
 * @param {any} jsonParser
 */
 function registerEndpoints(app, jsonParser) {
    const pipelineAccessor = new PipelineAccessor();
    app.post('/api/extra/caption', jsonParser, async (req, res) => {
        try {
            const { image } = req.body;
            // base64 string to blob
            const buffer = Buffer.from(image, 'base64');
            const byteArray = new Uint8Array(buffer);
            const blob = new Blob([byteArray]);
            const rawImage = await RawImage.fromBlob(blob);
            const pipe = await pipelineAccessor.get();
            const result = await pipe(rawImage);
            const text = result[0].generated_text;
            console.log('Image caption:', text);
            return res.json({ caption: text });
        } catch (error) {
            console.error(error);
            return res.sendStatus(500);
        }
    });
 }
 export default {
    registerEndpoints,
 };
--- a/src/classify.js
+++ b/src/classify.js
@@ -0,0 +1,53 @@
 const TASK = 'text-classification';
 /**
 * @param {import("express").Express} app
 * @param {any} jsonParser
 */
 function registerEndpoints(app, jsonParser) {
    const cacheObject = {};
    app.post('/api/extra/classify/labels', jsonParser, async (req, res) => {
        try {
            const module = await import('./transformers.mjs');
            const pipe = await module.default.getPipeline(TASK);
            const result = Object.keys(pipe.model.config.label2id);
            return res.json({ labels: result });
        } catch (error) {
            console.error(error);
            return res.sendStatus(500);
        }
    });
    app.post('/api/extra/classify', jsonParser, async (req, res) => {
        try {
            const { text } = req.body;
            async function getResult(text) {
                if (cacheObject.hasOwnProperty(text)) {
                    return cacheObject[text];
                } else {
                    const module = await import('./transformers.mjs');
                    const pipe = await module.default.getPipeline(TASK);
                    const result = await pipe(text, { topk: 5 });
                    result.sort((a, b) => b.score - a.score);
                    cacheObject[text] = result;
                    return result;
                }
            }
            console.log('Classify input:', text);
            const result = await getResult(text);
            console.log('Classify output:', result);
            return res.json({ classification: result });
        } catch (error) {
            console.error(error);
            return res.sendStatus(500);
        }
    });
 }
 module.exports = {
    registerEndpoints,
 };
--- a/src/classify.mjs
+++ b/src/classify.mjs
@@ -1,89 +0,0 @@
 import { pipeline, env } from 'sillytavern-transformers';
 import path from 'path';
 import { getConfig } from './util.js';
 // Limit the number of threads to 1 to avoid issues on Android
 env.backends.onnx.wasm.numThreads = 1;
 // Use WASM from a local folder to avoid CDN connections
 env.backends.onnx.wasm.wasmPaths = path.join(process.cwd(), 'dist') + path.sep;
 class PipelineAccessor {
    /**
     * @type {import("sillytavern-transformers").TextClassificationPipeline}
     */
    pipe;
    async get() {
        if (!this.pipe) {
            const cache_dir = path.join(process.cwd(), 'cache');
            const model = this.getClassificationModel();
            this.pipe = await pipeline('text-classification', model, { cache_dir, quantized: true });
        }
        return this.pipe;
    }
    getClassificationModel() {
        const DEFAULT_MODEL = 'Cohee/distilbert-base-uncased-go-emotions-onnx';
        try {
            const config = getConfig();
            const model = config?.extras?.classificationModel;
            return model || DEFAULT_MODEL;
        } catch (error) {
            console.warn('Failed to read config.conf, using default classification model.');
            return DEFAULT_MODEL;
        }
    }
 }
 /**
 * @param {import("express").Express} app
 * @param {any} jsonParser
 */
 function registerEndpoints(app, jsonParser) {
    const cacheObject = {};
    const pipelineAccessor = new PipelineAccessor();
    app.post('/api/extra/classify/labels', jsonParser, async (req, res) => {
        try {
            const pipe = await pipelineAccessor.get();
            const result = Object.keys(pipe.model.config.label2id);
            return res.json({ labels: result });
        } catch (error) {
            console.error(error);
            return res.sendStatus(500);
        }
    });
    app.post('/api/extra/classify', jsonParser, async (req, res) => {
        try {
            const { text } = req.body;
            async function getResult(text) {
                if (cacheObject.hasOwnProperty(text)) {
                    return cacheObject[text];
                } else {
                    const pipe = await pipelineAccessor.get();
                    const result = await pipe(text, { topk: 5 });
                    result.sort((a, b) => b.score - a.score);
                    cacheObject[text] = result;
                    return result;
                }
            }
            console.log('Classify input:', text);
            const result = await getResult(text);
            console.log('Classify output:', result);
            return res.json({ classification: result });
        } catch (error) {
            console.error(error);
            return res.sendStatus(500);
        }
    });
 }
 export default {
    registerEndpoints,
 };
--- a/src/transformers.mjs
+++ b/src/transformers.mjs
@@ -0,0 +1,76 @@
 import { pipeline, env, RawImage } from 'sillytavern-transformers';
 import { getConfig } from './util.js';
 import path from 'path';
 import _ from 'lodash';
 configureTransformers();
 function configureTransformers() {
    // Limit the number of threads to 1 to avoid issues on Android
    env.backends.onnx.wasm.numThreads = 1;
    // Use WASM from a local folder to avoid CDN connections
    env.backends.onnx.wasm.wasmPaths = path.join(process.cwd(), 'dist') + path.sep;
 }
 const tasks = {
    'text-classification': {
        defaultModel: 'Cohee/distilbert-base-uncased-go-emotions-onnx',
        pipeline: null,
        configField: 'extras.classificationModel',
    },
    'image-to-text': {
        defaultModel: 'Xenova/vit-gpt2-image-captioning',
        pipeline: null,
        configField: 'extras.captioningModel',
    },
    'feature-extraction': {
        defaultModel: 'Xenova/all-mpnet-base-v2',
        pipeline: null,
        configField: 'extras.embeddingModel',
    },
 }
 async function getRawImage(image) {
    const buffer = Buffer.from(image, 'base64');
    const byteArray = new Uint8Array(buffer);
    const blob = new Blob([byteArray]);
    const rawImage = await RawImage.fromBlob(blob);
    return rawImage;
 }
 function getModelForTask(task) {
    const defaultModel = tasks[task].defaultModel;
    try {
        const config = getConfig();
        const model = _.get(config, tasks[task].configField, null);
        return model || defaultModel;
    } catch (error) {
        console.warn('Failed to read config.conf, using default classification model.');
        return defaultModel;
    }
 }
 function progressCallback() {
    // TODO: Implement progress callback
    // console.log(arguments);
 }
 async function getPipeline(task) {
    if (tasks[task].pipeline) {
        return tasks[task].pipeline;
    }
    const cache_dir = path.join(process.cwd(), 'cache');
    const model = getModelForTask(task);
    console.log('Initializing transformers.js pipeline for task', task, 'with model', model);
    const instance = await pipeline(task, model, { cache_dir, quantized: true, progress_callback: progressCallback });
    tasks[task].pipeline = instance;
    return instance;
 }
 export default {
    getPipeline,
    getRawImage,
 }