Add support for Office plugin
This commit is contained in:
parent
ca89be8930
commit
4264d170e2
|
@ -32,6 +32,7 @@ import {
|
|||
getStringHash,
|
||||
humanFileSize,
|
||||
saveBase64AsFile,
|
||||
extractTextFromOffice,
|
||||
} from './utils.js';
|
||||
import { extension_settings, renderExtensionTemplateAsync, saveMetadataDebounced } from './extensions.js';
|
||||
import { POPUP_RESULT, POPUP_TYPE, callGenericPopup } from './popup.js';
|
||||
|
@ -46,6 +47,12 @@ import { ScraperManager } from './scrapers.js';
|
|||
* @property {string} [text] File text
|
||||
*/
|
||||
|
||||
/**
|
||||
* @typedef {function} ConverterFunction
|
||||
* @param {File} file File object
|
||||
* @returns {Promise<string>} Converted file text
|
||||
*/
|
||||
|
||||
const fileSizeLimit = 1024 * 1024 * 10; // 10 MB
|
||||
const ATTACHMENT_SOURCE = {
|
||||
GLOBAL: 'global',
|
||||
|
@ -53,20 +60,60 @@ const ATTACHMENT_SOURCE = {
|
|||
CHARACTER: 'character',
|
||||
};
|
||||
|
||||
/**
|
||||
* @type {Record<string, ConverterFunction>} File converters
|
||||
*/
|
||||
const converters = {
|
||||
'application/pdf': extractTextFromPDF,
|
||||
'text/html': extractTextFromHTML,
|
||||
'text/markdown': extractTextFromMarkdown,
|
||||
'application/epub+zip': extractTextFromEpub,
|
||||
'application/vnd.openxmlformats-officedocument.wordprocessingml.document': extractTextFromOffice,
|
||||
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': extractTextFromOffice,
|
||||
'application/vnd.openxmlformats-officedocument.presentationml.presentation': extractTextFromOffice,
|
||||
'application/vnd.oasis.opendocument.text': extractTextFromOffice,
|
||||
'application/vnd.oasis.opendocument.presentation': extractTextFromOffice,
|
||||
'application/vnd.oasis.opendocument.spreadsheet': extractTextFromOffice,
|
||||
};
|
||||
|
||||
/**
|
||||
* Finds a matching key in the converters object.
|
||||
* @param {string} type MIME type
|
||||
* @returns {string} Matching key
|
||||
*/
|
||||
function findConverterKey(type) {
|
||||
return Object.keys(converters).find((key) => {
|
||||
// Match exact type
|
||||
if (type === key) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Match wildcards
|
||||
if (key.endsWith('*')) {
|
||||
return type.startsWith(key.substring(0, key.length - 1));
|
||||
}
|
||||
|
||||
return false;
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines if the file type has a converter function.
|
||||
* @param {string} type MIME type
|
||||
* @returns {boolean} True if the file type is convertible, false otherwise.
|
||||
*/
|
||||
function isConvertible(type) {
|
||||
return Object.keys(converters).includes(type);
|
||||
return Boolean(findConverterKey(type));
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the converter function for a file type.
|
||||
* @param {string} type MIME type
|
||||
* @returns {ConverterFunction} Converter function
|
||||
*/
|
||||
function getConverter(type) {
|
||||
const key = findConverterKey(type);
|
||||
return key && converters[key];
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -152,7 +199,7 @@ export async function populateFileAttachment(message, inputId = 'file_form_input
|
|||
|
||||
if (isConvertible(file.type)) {
|
||||
try {
|
||||
const converter = converters[file.type];
|
||||
const converter = getConverter(file.type);
|
||||
const fileText = await converter(file);
|
||||
base64Data = window.btoa(unescape(encodeURIComponent(fileText)));
|
||||
} catch (error) {
|
||||
|
@ -748,7 +795,7 @@ async function openAttachmentManager() {
|
|||
}
|
||||
|
||||
async function renderAttachments() {
|
||||
/** @type {FileAttachment[]} */
|
||||
/** @type {FileAttachment[]} */
|
||||
const globalAttachments = extension_settings.attachments ?? [];
|
||||
/** @type {FileAttachment[]} */
|
||||
const chatAttachments = chat_metadata.attachments ?? [];
|
||||
|
@ -855,7 +902,7 @@ export async function uploadFileAttachmentToServer(file, target) {
|
|||
|
||||
if (isConvertible(file.type)) {
|
||||
try {
|
||||
const converter = converters[file.type];
|
||||
const converter = getConverter(file.type);
|
||||
const fileText = await converter(file);
|
||||
base64Data = window.btoa(unescape(encodeURIComponent(fileText)));
|
||||
} catch (error) {
|
||||
|
@ -950,6 +997,26 @@ export function getDataBankAttachmentsForSource(source) {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Registers a file converter function.
|
||||
* @param {string} mimeType MIME type
|
||||
* @param {ConverterFunction} converter Function to convert file
|
||||
* @returns {void}
|
||||
*/
|
||||
export function registerFileConverter(mimeType, converter) {
|
||||
if (typeof mimeType !== 'string' || typeof converter !== 'function') {
|
||||
console.error('Invalid converter registration');
|
||||
return;
|
||||
}
|
||||
|
||||
if (Object.keys(converters).includes(mimeType)) {
|
||||
console.error('Converter already registered');
|
||||
return;
|
||||
}
|
||||
|
||||
converters[mimeType] = converter;
|
||||
}
|
||||
|
||||
jQuery(function () {
|
||||
$(document).on('click', '.mes_hide', async function () {
|
||||
const messageBlock = $(this).closest('.mes');
|
||||
|
|
|
@ -179,7 +179,7 @@ class FileScraper {
|
|||
return new Promise(resolve => {
|
||||
const fileInput = document.createElement('input');
|
||||
fileInput.type = 'file';
|
||||
fileInput.accept = '.txt, .md, .pdf, .html, .htm, .epub';
|
||||
fileInput.accept = '*/*';
|
||||
fileInput.multiple = true;
|
||||
fileInput.onchange = () => resolve(Array.from(fileInput.files));
|
||||
fileInput.click();
|
||||
|
|
|
@ -1355,6 +1355,47 @@ export async function extractTextFromEpub(blob) {
|
|||
return postProcessText(text.join('\n'), false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Extracts text from an Office document using the server plugin.
|
||||
* @param {File} blob File to extract text from
|
||||
* @returns {Promise<string>} A promise that resolves to the extracted text.
|
||||
*/
|
||||
export async function extractTextFromOffice(blob) {
|
||||
async function checkPluginAvailability() {
|
||||
try {
|
||||
const result = await fetch('/api/plugins/office/probe', {
|
||||
method: 'POST',
|
||||
headers: getRequestHeaders(),
|
||||
});
|
||||
|
||||
return result.ok;
|
||||
} catch (error) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
const isPluginAvailable = await checkPluginAvailability();
|
||||
|
||||
if (!isPluginAvailable) {
|
||||
throw new Error('Importing Office documents requires a server plugin. Please refer to the documentation for more information.');
|
||||
}
|
||||
|
||||
const base64 = await getBase64Async(blob);
|
||||
|
||||
const response = await fetch('/api/plugins/office/parse', {
|
||||
method: 'POST',
|
||||
headers: getRequestHeaders(),
|
||||
body: JSON.stringify({ data: base64 }),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error('Failed to parse the Office document');
|
||||
}
|
||||
|
||||
const data = await response.text();
|
||||
return postProcessText(data, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets a value in an object by a path.
|
||||
* @param {object} obj Object to set value in
|
||||
|
|
Loading…
Reference in New Issue