[FEATURE_REQUEST] Sending PDF/HTML files? #1414
This commit is contained in:
parent
1ce009b84e
commit
e0bf2b8e3e
|
@ -22,7 +22,8 @@
|
|||
"droll",
|
||||
"handlebars",
|
||||
"highlight.js",
|
||||
"localforage"
|
||||
"localforage",
|
||||
"pdfjs-dist"
|
||||
]
|
||||
}
|
||||
}
|
||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -195,7 +195,7 @@ import { getBackgrounds, initBackgrounds } from "./scripts/backgrounds.js";
|
|||
import { hideLoader, showLoader } from "./scripts/loader.js";
|
||||
import { CharacterContextMenu, BulkEditOverlay } from "./scripts/BulkEditOverlay.js";
|
||||
import { loadMancerModels } from "./scripts/mancer-settings.js";
|
||||
import { hasPendingFileAttachment, populateFileAttachment } from "./scripts/chats.js";
|
||||
import { getFileAttachment, hasPendingFileAttachment, populateFileAttachment } from "./scripts/chats.js";
|
||||
import { replaceVariableMacros } from "./scripts/variables.js";
|
||||
|
||||
//exporting functions and vars for mods
|
||||
|
@ -3019,22 +3019,27 @@ async function Generate(type, { automatic_trigger, force_name2, resolve, reject,
|
|||
coreChat.pop();
|
||||
}
|
||||
|
||||
coreChat = coreChat.map(chatItem => {
|
||||
coreChat = await Promise.all(coreChat.map(async (chatItem) => {
|
||||
let message = chatItem.mes;
|
||||
let regexType = chatItem.is_user ? regex_placement.USER_INPUT : regex_placement.AI_OUTPUT;
|
||||
let options = { isPrompt: true };
|
||||
|
||||
let regexedMessage = getRegexedString(message, regexType, options);
|
||||
|
||||
if (chatItem.extra?.file?.text) {
|
||||
regexedMessage += `\n\n${chatItem.extra.file.text}`;
|
||||
if (chatItem.extra?.file) {
|
||||
const fileText = chatItem.extra.file.text || (await getFileAttachment(chatItem.extra.file.url));
|
||||
|
||||
if (fileText) {
|
||||
chatItem.extra.fileStart = regexedMessage.length;
|
||||
regexedMessage += `\n\n${fileText}`;
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
...chatItem,
|
||||
mes: regexedMessage,
|
||||
};
|
||||
});
|
||||
}));
|
||||
|
||||
// Determine token limit
|
||||
let this_max_context = getMaxContextSize();
|
||||
|
|
|
@ -8,14 +8,33 @@ import {
|
|||
eventSource,
|
||||
event_types,
|
||||
getCurrentChatId,
|
||||
getRequestHeaders,
|
||||
hideSwipeButtons,
|
||||
name2,
|
||||
saveChatDebounced,
|
||||
showSwipeButtons,
|
||||
} from "../script.js";
|
||||
import { getBase64Async, humanFileSize, saveBase64AsFile } from "./utils.js";
|
||||
import {
|
||||
extractTextFromHTML,
|
||||
extractTextFromMarkdown,
|
||||
extractTextFromPDF,
|
||||
getBase64Async,
|
||||
getStringHash,
|
||||
humanFileSize,
|
||||
saveBase64AsFile,
|
||||
} from "./utils.js";
|
||||
|
||||
const fileSizeLimit = 1024 * 1024 * 1; // 1 MB
|
||||
const fileSizeLimit = 1024 * 1024 * 10; // 10 MB
|
||||
|
||||
const converters = {
|
||||
'application/pdf': extractTextFromPDF,
|
||||
'text/html': extractTextFromHTML,
|
||||
'text/markdown': extractTextFromMarkdown,
|
||||
}
|
||||
|
||||
function isConvertible(type) {
|
||||
return Object.keys(converters).includes(type);
|
||||
}
|
||||
|
||||
/**
|
||||
* Mark message as hidden (system message).
|
||||
|
@ -70,7 +89,7 @@ export async function unhideChatMessage(messageId, messageBlock) {
|
|||
/**
|
||||
* Adds a file attachment to the message.
|
||||
* @param {object} message Message object
|
||||
* @returns {Promise<void>}
|
||||
* @returns {Promise<void>} A promise that resolves when file is uploaded.
|
||||
*/
|
||||
export async function populateFileAttachment(message, inputId = 'file_form_input') {
|
||||
try {
|
||||
|
@ -81,18 +100,38 @@ export async function populateFileAttachment(message, inputId = 'file_form_input
|
|||
const file = fileInput.files[0];
|
||||
if (!file) return;
|
||||
|
||||
const fileBase64 = await getBase64Async(file);
|
||||
let base64Data = fileBase64.split(',')[1];
|
||||
|
||||
// If file is image
|
||||
if (file.type.startsWith('image/')) {
|
||||
const base64Img = await getBase64Async(file);
|
||||
const base64ImgData = base64Img.split(',')[1];
|
||||
const extension = file.type.split('/')[1];
|
||||
const imageUrl = await saveBase64AsFile(base64ImgData, name2, file.name, extension);
|
||||
const imageUrl = await saveBase64AsFile(base64Data, name2, file.name, extension);
|
||||
message.extra.image = imageUrl;
|
||||
message.extra.inline_image = true;
|
||||
} else {
|
||||
const fileText = await file.text();
|
||||
const slug = getStringHash(file.name);
|
||||
const uniqueFileName = `${Date.now()}_${slug}.txt`;
|
||||
|
||||
if (isConvertible(file.type)) {
|
||||
try {
|
||||
const converter = converters[file.type];
|
||||
const fileText = await converter(file);
|
||||
base64Data = window.btoa(unescape(encodeURIComponent(fileText)));
|
||||
} catch (error) {
|
||||
toastr.error(error, 'Could not convert file');
|
||||
console.error('Could not convert file', error);
|
||||
}
|
||||
}
|
||||
|
||||
const fileUrl = await uploadFileAttachment(uniqueFileName, base64Data);
|
||||
|
||||
if (!fileUrl) {
|
||||
return;
|
||||
}
|
||||
|
||||
message.extra.file = {
|
||||
text: fileText,
|
||||
url: fileUrl,
|
||||
size: file.size,
|
||||
name: file.name,
|
||||
};
|
||||
|
@ -105,6 +144,62 @@ export async function populateFileAttachment(message, inputId = 'file_form_input
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Uploads file to the server.
|
||||
* @param {string} fileName
|
||||
* @param {string} base64Data
|
||||
* @returns {Promise<string>} File URL
|
||||
*/
|
||||
export async function uploadFileAttachment(fileName, base64Data) {
|
||||
try {
|
||||
const result = await fetch('/api/file/upload', {
|
||||
method: 'POST',
|
||||
headers: getRequestHeaders(),
|
||||
body: JSON.stringify({
|
||||
name: fileName,
|
||||
data: base64Data,
|
||||
}),
|
||||
});
|
||||
|
||||
if (!result.ok) {
|
||||
const error = await result.text();
|
||||
throw new Error(error);
|
||||
}
|
||||
|
||||
const responseData = await result.json();
|
||||
return responseData.path.replace(/\\/g, '/');
|
||||
} catch (error) {
|
||||
toastr.error(error, 'Could not upload file');
|
||||
console.error('Could not upload file', error);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Downloads file from the server.
|
||||
* @param {string} url File URL
|
||||
* @returns {Promise<string>} File text
|
||||
*/
|
||||
export async function getFileAttachment(url) {
|
||||
try {
|
||||
const result = await fetch(url, {
|
||||
method: 'GET',
|
||||
cache: 'force-cache',
|
||||
headers: getRequestHeaders(),
|
||||
});
|
||||
|
||||
if (!result.ok) {
|
||||
const error = await result.text();
|
||||
throw new Error(error);
|
||||
}
|
||||
|
||||
const text = await result.text();
|
||||
return text;
|
||||
} catch (error) {
|
||||
toastr.error(error, 'Could not download file');
|
||||
console.error('Could not download file', error);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Validates file to make sure it is not binary or not image.
|
||||
* @param {File} file File object
|
||||
|
@ -121,7 +216,7 @@ async function validateFile(file) {
|
|||
}
|
||||
|
||||
// If file is binary
|
||||
if (isBinary && !isImage) {
|
||||
if (isBinary && !isImage && !isConvertible(file.type)) {
|
||||
toastr.error('Binary files are not supported. Select a text file or image.');
|
||||
return false;
|
||||
}
|
||||
|
@ -193,22 +288,23 @@ async function deleteMessageFile(messageId) {
|
|||
* @param {number} messageId Message ID
|
||||
*/
|
||||
async function viewMessageFile(messageId) {
|
||||
const messageText = chat[messageId]?.extra?.file?.text;
|
||||
const messageFile = chat[messageId]?.extra?.file;
|
||||
|
||||
if (!messageText) {
|
||||
if (!messageFile) {
|
||||
console.debug('Message has no file or it is empty');
|
||||
return;
|
||||
}
|
||||
|
||||
const fileText = messageFile.text || (await getFileAttachment(messageFile.url));
|
||||
|
||||
const modalTemplate = $('<div><pre><code></code></pre></div>');
|
||||
modalTemplate.find('code').addClass('txt').text(messageText);
|
||||
modalTemplate.find('code').addClass('txt').text(fileText);
|
||||
modalTemplate.addClass('file_modal');
|
||||
addCopyToCodeBlocks(modalTemplate);
|
||||
|
||||
callPopup(modalTemplate, 'text');
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Inserts a file embed into the message.
|
||||
* @param {number} messageId
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
import { getContext } from "./extensions.js";
|
||||
import { getRequestHeaders } from "../script.js";
|
||||
import { isMobile } from "./RossAscends-mods.js";
|
||||
import { collapseNewlines } from "./power-user.js";
|
||||
|
||||
/**
|
||||
* Pagination status string template.
|
||||
|
@ -1066,3 +1067,99 @@ export function uuidv4() {
|
|||
return v.toString(16);
|
||||
});
|
||||
}
|
||||
|
||||
function postProcessText(text) {
|
||||
// Collapse multiple newlines into one
|
||||
text = collapseNewlines(text);
|
||||
// Trim leading and trailing whitespace, and remove empty lines
|
||||
text = text.split('\n').map(l => l.trim()).filter(Boolean).join('\n');
|
||||
// Remove carriage returns
|
||||
text = text.replace(/\r/g, '');
|
||||
// Normalize unicode spaces
|
||||
text = text.replace(/\u00A0/g, ' ');
|
||||
// Collapse multiple spaces into one (except for newlines)
|
||||
text = text.replace(/ {2,}/g, ' ');
|
||||
// Remove leading and trailing spaces
|
||||
text = text.trim();
|
||||
return text;
|
||||
}
|
||||
|
||||
/**
|
||||
* Use pdf.js to load and parse text from PDF pages
|
||||
* @param {Blob} blob PDF file blob
|
||||
* @returns {Promise<string>} A promise that resolves to the parsed text.
|
||||
*/
|
||||
export async function extractTextFromPDF(blob) {
|
||||
async function initPdfJs() {
|
||||
const promises = [];
|
||||
|
||||
const workerPromise = new Promise((resolve, reject) => {
|
||||
const workerScript = document.createElement('script');
|
||||
workerScript.type = 'module';
|
||||
workerScript.async = true;
|
||||
workerScript.src = 'lib/pdf.worker.mjs';
|
||||
workerScript.onload = resolve;
|
||||
workerScript.onerror = reject;
|
||||
document.head.appendChild(workerScript);
|
||||
});
|
||||
|
||||
promises.push(workerPromise);
|
||||
|
||||
const pdfjsPromise = new Promise((resolve, reject) => {
|
||||
const pdfjsScript = document.createElement('script');
|
||||
pdfjsScript.type = 'module';
|
||||
pdfjsScript.async = true;
|
||||
pdfjsScript.src = 'lib/pdf.mjs';
|
||||
pdfjsScript.onload = resolve;
|
||||
pdfjsScript.onerror = reject;
|
||||
document.head.appendChild(pdfjsScript);
|
||||
});
|
||||
|
||||
promises.push(pdfjsPromise);
|
||||
|
||||
return Promise.all(promises);
|
||||
}
|
||||
|
||||
if (!('pdfjsLib' in window)) {
|
||||
await initPdfJs();
|
||||
}
|
||||
|
||||
const buffer = await getFileBuffer(blob);
|
||||
const pdf = await pdfjsLib.getDocument(buffer).promise;
|
||||
const pages = [];
|
||||
for (let i = 1; i <= pdf.numPages; i++) {
|
||||
const page = await pdf.getPage(i);
|
||||
const textContent = await page.getTextContent();
|
||||
const text = textContent.items.map(item => item.str).join(' ');
|
||||
pages.push(text);
|
||||
}
|
||||
return postProcessText(pages.join('\n'));
|
||||
}
|
||||
|
||||
/**
|
||||
* Use DOMParser to load and parse text from HTML
|
||||
* @param {Blob} blob HTML content blob
|
||||
* @returns {Promise<string>} A promise that resolves to the parsed text.
|
||||
*/
|
||||
export async function extractTextFromHTML(blob) {
|
||||
const html = await blob.text();
|
||||
const domParser = new DOMParser();
|
||||
const document = domParser.parseFromString(DOMPurify.sanitize(html), 'text/html');
|
||||
const text = postProcessText(document.body.textContent);
|
||||
return text;
|
||||
}
|
||||
|
||||
/**
|
||||
* Use showdown to load and parse text from Markdown
|
||||
* @param {Blob} blob Markdown content blob
|
||||
* @returns {Promise<string>} A promise that resolves to the parsed text.
|
||||
*/
|
||||
export async function extractTextFromMarkdown(blob) {
|
||||
const markdown = await blob.text();
|
||||
const converter = new showdown.Converter();
|
||||
const html = converter.makeHtml(markdown);
|
||||
const domParser = new DOMParser();
|
||||
const document = domParser.parseFromString(DOMPurify.sanitize(html), 'text/html');
|
||||
const text = postProcessText(document.body.textContent);
|
||||
return text;
|
||||
}
|
||||
|
|
|
@ -3,6 +3,7 @@ const fs = require('fs');
|
|||
const sanitize = require('sanitize-filename');
|
||||
const fetch = require('node-fetch').default;
|
||||
const { finished } = require('stream/promises');
|
||||
const writeFileSyncAtomic = require('write-file-atomic').sync;
|
||||
const { DIRECTORIES, UNSAFE_EXTENSIONS } = require('./constants');
|
||||
|
||||
const VALID_CATEGORIES = ["bgm", "ambient", "blip", "live2d"];
|
||||
|
@ -297,6 +298,32 @@ function registerEndpoints(app, jsonParser) {
|
|||
return response.sendStatus(500);
|
||||
}
|
||||
});
|
||||
|
||||
app.post('/api/file/upload', jsonParser, async (request, response) => {
|
||||
try {
|
||||
if (!request.body.name) {
|
||||
return response.status(400).send("No upload name specified");
|
||||
}
|
||||
|
||||
if (!request.body.data) {
|
||||
return response.status(400).send("No upload data specified");
|
||||
}
|
||||
|
||||
const safeInput = checkAssetFileName(request.body.name);
|
||||
|
||||
if (!safeInput) {
|
||||
return response.status(400).send("Invalid upload name");
|
||||
}
|
||||
|
||||
const pathToUpload = path.join(DIRECTORIES.files, safeInput);
|
||||
writeFileSyncAtomic(pathToUpload, request.body.data, 'base64');
|
||||
const url = path.normalize(pathToUpload.replace('public' + path.sep, ''));
|
||||
return response.send({ path: url });
|
||||
} catch (error) {
|
||||
console.log(error);
|
||||
return response.sendStatus(500);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
|
|
|
@ -24,6 +24,7 @@ const DIRECTORIES = {
|
|||
quickreplies: 'public/QuickReplies',
|
||||
assets: 'public/assets',
|
||||
comfyWorkflows: 'public/user/workflows',
|
||||
files: 'public/user/files',
|
||||
};
|
||||
|
||||
const UNSAFE_EXTENSIONS = [
|
||||
|
|
Loading…
Reference in New Issue