From 4264d170e2e8b05132e2cbd1bdd088ddfc9c8cdc Mon Sep 17 00:00:00 2001 From: Cohee <18619528+Cohee1207@users.noreply.github.com> Date: Sun, 21 Apr 2024 16:27:44 +0300 Subject: [PATCH] Add support for Office plugin --- public/scripts/chats.js | 75 ++++++++++++++++++++++++++++++++++++-- public/scripts/scrapers.js | 2 +- public/scripts/utils.js | 41 +++++++++++++++++++++ 3 files changed, 113 insertions(+), 5 deletions(-) diff --git a/public/scripts/chats.js b/public/scripts/chats.js index 71949e829..f08a66e62 100644 --- a/public/scripts/chats.js +++ b/public/scripts/chats.js @@ -32,6 +32,7 @@ import { getStringHash, humanFileSize, saveBase64AsFile, + extractTextFromOffice, } from './utils.js'; import { extension_settings, renderExtensionTemplateAsync, saveMetadataDebounced } from './extensions.js'; import { POPUP_RESULT, POPUP_TYPE, callGenericPopup } from './popup.js'; @@ -46,6 +47,12 @@ import { ScraperManager } from './scrapers.js'; * @property {string} [text] File text */ +/** + * @typedef {function} ConverterFunction + * @param {File} file File object + * @returns {Promise} Converted file text + */ + const fileSizeLimit = 1024 * 1024 * 10; // 10 MB const ATTACHMENT_SOURCE = { GLOBAL: 'global', @@ -53,20 +60,60 @@ const ATTACHMENT_SOURCE = { CHARACTER: 'character', }; +/** + * @type {Record} File converters + */ const converters = { 'application/pdf': extractTextFromPDF, 'text/html': extractTextFromHTML, 'text/markdown': extractTextFromMarkdown, 'application/epub+zip': extractTextFromEpub, + 'application/vnd.openxmlformats-officedocument.wordprocessingml.document': extractTextFromOffice, + 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': extractTextFromOffice, + 'application/vnd.openxmlformats-officedocument.presentationml.presentation': extractTextFromOffice, + 'application/vnd.oasis.opendocument.text': extractTextFromOffice, + 'application/vnd.oasis.opendocument.presentation': extractTextFromOffice, + 'application/vnd.oasis.opendocument.spreadsheet': extractTextFromOffice, }; +/** + * Finds a matching key in the converters object. + * @param {string} type MIME type + * @returns {string} Matching key + */ +function findConverterKey(type) { + return Object.keys(converters).find((key) => { + // Match exact type + if (type === key) { + return true; + } + + // Match wildcards + if (key.endsWith('*')) { + return type.startsWith(key.substring(0, key.length - 1)); + } + + return false; + }); +} + /** * Determines if the file type has a converter function. * @param {string} type MIME type * @returns {boolean} True if the file type is convertible, false otherwise. */ function isConvertible(type) { - return Object.keys(converters).includes(type); + return Boolean(findConverterKey(type)); +} + +/** + * Gets the converter function for a file type. + * @param {string} type MIME type + * @returns {ConverterFunction} Converter function + */ +function getConverter(type) { + const key = findConverterKey(type); + return key && converters[key]; } /** @@ -152,7 +199,7 @@ export async function populateFileAttachment(message, inputId = 'file_form_input if (isConvertible(file.type)) { try { - const converter = converters[file.type]; + const converter = getConverter(file.type); const fileText = await converter(file); base64Data = window.btoa(unescape(encodeURIComponent(fileText))); } catch (error) { @@ -748,7 +795,7 @@ async function openAttachmentManager() { } async function renderAttachments() { - /** @type {FileAttachment[]} */ + /** @type {FileAttachment[]} */ const globalAttachments = extension_settings.attachments ?? []; /** @type {FileAttachment[]} */ const chatAttachments = chat_metadata.attachments ?? []; @@ -855,7 +902,7 @@ export async function uploadFileAttachmentToServer(file, target) { if (isConvertible(file.type)) { try { - const converter = converters[file.type]; + const converter = getConverter(file.type); const fileText = await converter(file); base64Data = window.btoa(unescape(encodeURIComponent(fileText))); } catch (error) { @@ -950,6 +997,26 @@ export function getDataBankAttachmentsForSource(source) { } } +/** + * Registers a file converter function. + * @param {string} mimeType MIME type + * @param {ConverterFunction} converter Function to convert file + * @returns {void} + */ +export function registerFileConverter(mimeType, converter) { + if (typeof mimeType !== 'string' || typeof converter !== 'function') { + console.error('Invalid converter registration'); + return; + } + + if (Object.keys(converters).includes(mimeType)) { + console.error('Converter already registered'); + return; + } + + converters[mimeType] = converter; +} + jQuery(function () { $(document).on('click', '.mes_hide', async function () { const messageBlock = $(this).closest('.mes'); diff --git a/public/scripts/scrapers.js b/public/scripts/scrapers.js index 7d6457ed2..8a7bf1212 100644 --- a/public/scripts/scrapers.js +++ b/public/scripts/scrapers.js @@ -179,7 +179,7 @@ class FileScraper { return new Promise(resolve => { const fileInput = document.createElement('input'); fileInput.type = 'file'; - fileInput.accept = '.txt, .md, .pdf, .html, .htm, .epub'; + fileInput.accept = '*/*'; fileInput.multiple = true; fileInput.onchange = () => resolve(Array.from(fileInput.files)); fileInput.click(); diff --git a/public/scripts/utils.js b/public/scripts/utils.js index 4102d21c9..37f40ad66 100644 --- a/public/scripts/utils.js +++ b/public/scripts/utils.js @@ -1355,6 +1355,47 @@ export async function extractTextFromEpub(blob) { return postProcessText(text.join('\n'), false); } +/** + * Extracts text from an Office document using the server plugin. + * @param {File} blob File to extract text from + * @returns {Promise} A promise that resolves to the extracted text. + */ +export async function extractTextFromOffice(blob) { + async function checkPluginAvailability() { + try { + const result = await fetch('/api/plugins/office/probe', { + method: 'POST', + headers: getRequestHeaders(), + }); + + return result.ok; + } catch (error) { + return false; + } + } + + const isPluginAvailable = await checkPluginAvailability(); + + if (!isPluginAvailable) { + throw new Error('Importing Office documents requires a server plugin. Please refer to the documentation for more information.'); + } + + const base64 = await getBase64Async(blob); + + const response = await fetch('/api/plugins/office/parse', { + method: 'POST', + headers: getRequestHeaders(), + body: JSON.stringify({ data: base64 }), + }); + + if (!response.ok) { + throw new Error('Failed to parse the Office document'); + } + + const data = await response.text(); + return postProcessText(data, false); +} + /** * Sets a value in an object by a path. * @param {object} obj Object to set value in