[FEATURE_REQUEST] Sending PDF/HTML files? #1414

This commit is contained in:
Cohee
2023-11-29 17:51:30 +02:00
parent 1ce009b84e
commit e0bf2b8e3e
10 changed files with 74770 additions and 19 deletions

View File

@ -22,7 +22,8 @@
"droll", "droll",
"handlebars", "handlebars",
"highlight.js", "highlight.js",
"localforage" "localforage",
"pdfjs-dist"
] ]
} }
} }

17398
public/lib/pdf.mjs Normal file

File diff suppressed because it is too large Load Diff

1
public/lib/pdf.mjs.map Normal file

File diff suppressed because one or more lines are too long

57124
public/lib/pdf.worker.mjs Normal file

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -195,7 +195,7 @@ import { getBackgrounds, initBackgrounds } from "./scripts/backgrounds.js";
import { hideLoader, showLoader } from "./scripts/loader.js"; import { hideLoader, showLoader } from "./scripts/loader.js";
import { CharacterContextMenu, BulkEditOverlay } from "./scripts/BulkEditOverlay.js"; import { CharacterContextMenu, BulkEditOverlay } from "./scripts/BulkEditOverlay.js";
import { loadMancerModels } from "./scripts/mancer-settings.js"; import { loadMancerModels } from "./scripts/mancer-settings.js";
import { hasPendingFileAttachment, populateFileAttachment } from "./scripts/chats.js"; import { getFileAttachment, hasPendingFileAttachment, populateFileAttachment } from "./scripts/chats.js";
import { replaceVariableMacros } from "./scripts/variables.js"; import { replaceVariableMacros } from "./scripts/variables.js";
//exporting functions and vars for mods //exporting functions and vars for mods
@ -3019,22 +3019,27 @@ async function Generate(type, { automatic_trigger, force_name2, resolve, reject,
coreChat.pop(); coreChat.pop();
} }
coreChat = coreChat.map(chatItem => { coreChat = await Promise.all(coreChat.map(async (chatItem) => {
let message = chatItem.mes; let message = chatItem.mes;
let regexType = chatItem.is_user ? regex_placement.USER_INPUT : regex_placement.AI_OUTPUT; let regexType = chatItem.is_user ? regex_placement.USER_INPUT : regex_placement.AI_OUTPUT;
let options = { isPrompt: true }; let options = { isPrompt: true };
let regexedMessage = getRegexedString(message, regexType, options); let regexedMessage = getRegexedString(message, regexType, options);
if (chatItem.extra?.file?.text) { if (chatItem.extra?.file) {
regexedMessage += `\n\n${chatItem.extra.file.text}`; const fileText = chatItem.extra.file.text || (await getFileAttachment(chatItem.extra.file.url));
if (fileText) {
chatItem.extra.fileStart = regexedMessage.length;
regexedMessage += `\n\n${fileText}`;
}
} }
return { return {
...chatItem, ...chatItem,
mes: regexedMessage, mes: regexedMessage,
}; };
}); }));
// Determine token limit // Determine token limit
let this_max_context = getMaxContextSize(); let this_max_context = getMaxContextSize();

View File

@ -8,14 +8,33 @@ import {
eventSource, eventSource,
event_types, event_types,
getCurrentChatId, getCurrentChatId,
getRequestHeaders,
hideSwipeButtons, hideSwipeButtons,
name2, name2,
saveChatDebounced, saveChatDebounced,
showSwipeButtons, showSwipeButtons,
} from "../script.js"; } from "../script.js";
import { getBase64Async, humanFileSize, saveBase64AsFile } from "./utils.js"; import {
extractTextFromHTML,
extractTextFromMarkdown,
extractTextFromPDF,
getBase64Async,
getStringHash,
humanFileSize,
saveBase64AsFile,
} from "./utils.js";
const fileSizeLimit = 1024 * 1024 * 1; // 1 MB const fileSizeLimit = 1024 * 1024 * 10; // 10 MB
const converters = {
'application/pdf': extractTextFromPDF,
'text/html': extractTextFromHTML,
'text/markdown': extractTextFromMarkdown,
}
function isConvertible(type) {
return Object.keys(converters).includes(type);
}
/** /**
* Mark message as hidden (system message). * Mark message as hidden (system message).
@ -70,7 +89,7 @@ export async function unhideChatMessage(messageId, messageBlock) {
/** /**
* Adds a file attachment to the message. * Adds a file attachment to the message.
* @param {object} message Message object * @param {object} message Message object
* @returns {Promise<void>} * @returns {Promise<void>} A promise that resolves when file is uploaded.
*/ */
export async function populateFileAttachment(message, inputId = 'file_form_input') { export async function populateFileAttachment(message, inputId = 'file_form_input') {
try { try {
@ -81,18 +100,38 @@ export async function populateFileAttachment(message, inputId = 'file_form_input
const file = fileInput.files[0]; const file = fileInput.files[0];
if (!file) return; if (!file) return;
const fileBase64 = await getBase64Async(file);
let base64Data = fileBase64.split(',')[1];
// If file is image // If file is image
if (file.type.startsWith('image/')) { if (file.type.startsWith('image/')) {
const base64Img = await getBase64Async(file);
const base64ImgData = base64Img.split(',')[1];
const extension = file.type.split('/')[1]; const extension = file.type.split('/')[1];
const imageUrl = await saveBase64AsFile(base64ImgData, name2, file.name, extension); const imageUrl = await saveBase64AsFile(base64Data, name2, file.name, extension);
message.extra.image = imageUrl; message.extra.image = imageUrl;
message.extra.inline_image = true; message.extra.inline_image = true;
} else { } else {
const fileText = await file.text(); const slug = getStringHash(file.name);
const uniqueFileName = `${Date.now()}_${slug}.txt`;
if (isConvertible(file.type)) {
try {
const converter = converters[file.type];
const fileText = await converter(file);
base64Data = window.btoa(unescape(encodeURIComponent(fileText)));
} catch (error) {
toastr.error(error, 'Could not convert file');
console.error('Could not convert file', error);
}
}
const fileUrl = await uploadFileAttachment(uniqueFileName, base64Data);
if (!fileUrl) {
return;
}
message.extra.file = { message.extra.file = {
text: fileText, url: fileUrl,
size: file.size, size: file.size,
name: file.name, name: file.name,
}; };
@ -105,6 +144,62 @@ export async function populateFileAttachment(message, inputId = 'file_form_input
} }
} }
/**
* Uploads file to the server.
* @param {string} fileName
* @param {string} base64Data
* @returns {Promise<string>} File URL
*/
export async function uploadFileAttachment(fileName, base64Data) {
try {
const result = await fetch('/api/file/upload', {
method: 'POST',
headers: getRequestHeaders(),
body: JSON.stringify({
name: fileName,
data: base64Data,
}),
});
if (!result.ok) {
const error = await result.text();
throw new Error(error);
}
const responseData = await result.json();
return responseData.path.replace(/\\/g, '/');
} catch (error) {
toastr.error(error, 'Could not upload file');
console.error('Could not upload file', error);
}
}
/**
* Downloads file from the server.
* @param {string} url File URL
* @returns {Promise<string>} File text
*/
export async function getFileAttachment(url) {
try {
const result = await fetch(url, {
method: 'GET',
cache: 'force-cache',
headers: getRequestHeaders(),
});
if (!result.ok) {
const error = await result.text();
throw new Error(error);
}
const text = await result.text();
return text;
} catch (error) {
toastr.error(error, 'Could not download file');
console.error('Could not download file', error);
}
}
/** /**
* Validates file to make sure it is not binary or not image. * Validates file to make sure it is not binary or not image.
* @param {File} file File object * @param {File} file File object
@ -121,7 +216,7 @@ async function validateFile(file) {
} }
// If file is binary // If file is binary
if (isBinary && !isImage) { if (isBinary && !isImage && !isConvertible(file.type)) {
toastr.error('Binary files are not supported. Select a text file or image.'); toastr.error('Binary files are not supported. Select a text file or image.');
return false; return false;
} }
@ -193,22 +288,23 @@ async function deleteMessageFile(messageId) {
* @param {number} messageId Message ID * @param {number} messageId Message ID
*/ */
async function viewMessageFile(messageId) { async function viewMessageFile(messageId) {
const messageText = chat[messageId]?.extra?.file?.text; const messageFile = chat[messageId]?.extra?.file;
if (!messageText) { if (!messageFile) {
console.debug('Message has no file or it is empty'); console.debug('Message has no file or it is empty');
return; return;
} }
const fileText = messageFile.text || (await getFileAttachment(messageFile.url));
const modalTemplate = $('<div><pre><code></code></pre></div>'); const modalTemplate = $('<div><pre><code></code></pre></div>');
modalTemplate.find('code').addClass('txt').text(messageText); modalTemplate.find('code').addClass('txt').text(fileText);
modalTemplate.addClass('file_modal'); modalTemplate.addClass('file_modal');
addCopyToCodeBlocks(modalTemplate); addCopyToCodeBlocks(modalTemplate);
callPopup(modalTemplate, 'text'); callPopup(modalTemplate, 'text');
} }
/** /**
* Inserts a file embed into the message. * Inserts a file embed into the message.
* @param {number} messageId * @param {number} messageId

View File

@ -1,6 +1,7 @@
import { getContext } from "./extensions.js"; import { getContext } from "./extensions.js";
import { getRequestHeaders } from "../script.js"; import { getRequestHeaders } from "../script.js";
import { isMobile } from "./RossAscends-mods.js"; import { isMobile } from "./RossAscends-mods.js";
import { collapseNewlines } from "./power-user.js";
/** /**
* Pagination status string template. * Pagination status string template.
@ -1066,3 +1067,99 @@ export function uuidv4() {
return v.toString(16); return v.toString(16);
}); });
} }
function postProcessText(text) {
// Collapse multiple newlines into one
text = collapseNewlines(text);
// Trim leading and trailing whitespace, and remove empty lines
text = text.split('\n').map(l => l.trim()).filter(Boolean).join('\n');
// Remove carriage returns
text = text.replace(/\r/g, '');
// Normalize unicode spaces
text = text.replace(/\u00A0/g, ' ');
// Collapse multiple spaces into one (except for newlines)
text = text.replace(/ {2,}/g, ' ');
// Remove leading and trailing spaces
text = text.trim();
return text;
}
/**
* Use pdf.js to load and parse text from PDF pages
* @param {Blob} blob PDF file blob
* @returns {Promise<string>} A promise that resolves to the parsed text.
*/
export async function extractTextFromPDF(blob) {
async function initPdfJs() {
const promises = [];
const workerPromise = new Promise((resolve, reject) => {
const workerScript = document.createElement('script');
workerScript.type = 'module';
workerScript.async = true;
workerScript.src = 'lib/pdf.worker.mjs';
workerScript.onload = resolve;
workerScript.onerror = reject;
document.head.appendChild(workerScript);
});
promises.push(workerPromise);
const pdfjsPromise = new Promise((resolve, reject) => {
const pdfjsScript = document.createElement('script');
pdfjsScript.type = 'module';
pdfjsScript.async = true;
pdfjsScript.src = 'lib/pdf.mjs';
pdfjsScript.onload = resolve;
pdfjsScript.onerror = reject;
document.head.appendChild(pdfjsScript);
});
promises.push(pdfjsPromise);
return Promise.all(promises);
}
if (!('pdfjsLib' in window)) {
await initPdfJs();
}
const buffer = await getFileBuffer(blob);
const pdf = await pdfjsLib.getDocument(buffer).promise;
const pages = [];
for (let i = 1; i <= pdf.numPages; i++) {
const page = await pdf.getPage(i);
const textContent = await page.getTextContent();
const text = textContent.items.map(item => item.str).join(' ');
pages.push(text);
}
return postProcessText(pages.join('\n'));
}
/**
* Use DOMParser to load and parse text from HTML
* @param {Blob} blob HTML content blob
* @returns {Promise<string>} A promise that resolves to the parsed text.
*/
export async function extractTextFromHTML(blob) {
const html = await blob.text();
const domParser = new DOMParser();
const document = domParser.parseFromString(DOMPurify.sanitize(html), 'text/html');
const text = postProcessText(document.body.textContent);
return text;
}
/**
* Use showdown to load and parse text from Markdown
* @param {Blob} blob Markdown content blob
* @returns {Promise<string>} A promise that resolves to the parsed text.
*/
export async function extractTextFromMarkdown(blob) {
const markdown = await blob.text();
const converter = new showdown.Converter();
const html = converter.makeHtml(markdown);
const domParser = new DOMParser();
const document = domParser.parseFromString(DOMPurify.sanitize(html), 'text/html');
const text = postProcessText(document.body.textContent);
return text;
}

View File

@ -3,6 +3,7 @@ const fs = require('fs');
const sanitize = require('sanitize-filename'); const sanitize = require('sanitize-filename');
const fetch = require('node-fetch').default; const fetch = require('node-fetch').default;
const { finished } = require('stream/promises'); const { finished } = require('stream/promises');
const writeFileSyncAtomic = require('write-file-atomic').sync;
const { DIRECTORIES, UNSAFE_EXTENSIONS } = require('./constants'); const { DIRECTORIES, UNSAFE_EXTENSIONS } = require('./constants');
const VALID_CATEGORIES = ["bgm", "ambient", "blip", "live2d"]; const VALID_CATEGORIES = ["bgm", "ambient", "blip", "live2d"];
@ -297,6 +298,32 @@ function registerEndpoints(app, jsonParser) {
return response.sendStatus(500); return response.sendStatus(500);
} }
}); });
app.post('/api/file/upload', jsonParser, async (request, response) => {
try {
if (!request.body.name) {
return response.status(400).send("No upload name specified");
}
if (!request.body.data) {
return response.status(400).send("No upload data specified");
}
const safeInput = checkAssetFileName(request.body.name);
if (!safeInput) {
return response.status(400).send("Invalid upload name");
}
const pathToUpload = path.join(DIRECTORIES.files, safeInput);
writeFileSyncAtomic(pathToUpload, request.body.data, 'base64');
const url = path.normalize(pathToUpload.replace('public' + path.sep, ''));
return response.send({ path: url });
} catch (error) {
console.log(error);
return response.sendStatus(500);
}
});
} }
module.exports = { module.exports = {

View File

@ -24,6 +24,7 @@ const DIRECTORIES = {
quickreplies: 'public/QuickReplies', quickreplies: 'public/QuickReplies',
assets: 'public/assets', assets: 'public/assets',
comfyWorkflows: 'public/user/workflows', comfyWorkflows: 'public/user/workflows',
files: 'public/user/files',
}; };
const UNSAFE_EXTENSIONS = [ const UNSAFE_EXTENSIONS = [