2023-09-14 23:40:13 +03:00
|
|
|
const TASK = 'feature-extraction';
|
|
|
|
|
|
|
|
/**
|
2024-01-24 13:56:13 +02:00
|
|
|
* Gets the vectorized text in form of an array of numbers.
|
2023-09-14 23:40:13 +03:00
|
|
|
* @param {string} text - The text to vectorize
|
|
|
|
* @returns {Promise<number[]>} - The vectorized text in form of an array of numbers
|
|
|
|
*/
|
|
|
|
async function getTransformersVector(text) {
|
|
|
|
const module = await import('./transformers.mjs');
|
|
|
|
const pipe = await module.default.getPipeline(TASK);
|
|
|
|
const result = await pipe(text, { pooling: 'mean', normalize: true });
|
|
|
|
const vector = Array.from(result.data);
|
|
|
|
return vector;
|
|
|
|
}
|
|
|
|
|
2024-01-24 13:56:13 +02:00
|
|
|
/**
|
|
|
|
* Gets the vectorized texts in form of an array of arrays of numbers.
|
|
|
|
* @param {string[]} texts - The texts to vectorize
|
|
|
|
* @returns {Promise<number[][]>} - The vectorized texts in form of an array of arrays of numbers
|
|
|
|
*/
|
|
|
|
async function getTransformersBatchVector(texts) {
|
|
|
|
const result = [];
|
|
|
|
for (const text of texts) {
|
|
|
|
result.push(await getTransformersVector(text));
|
|
|
|
}
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2023-09-14 23:40:13 +03:00
|
|
|
module.exports = {
|
|
|
|
getTransformersVector,
|
2024-01-24 13:56:13 +02:00
|
|
|
getTransformersBatchVector,
|
2023-12-02 21:11:06 +02:00
|
|
|
};
|