mirror of
https://github.com/SillyTavern/SillyTavern.git
synced 2025-06-05 21:59:27 +02:00
[FEATURE_REQUEST] Sending PDF/HTML files? #1414
This commit is contained in:
@ -22,7 +22,8 @@
|
||||
"droll",
|
||||
"handlebars",
|
||||
"highlight.js",
|
||||
"localforage"
|
||||
"localforage",
|
||||
"pdfjs-dist"
|
||||
]
|
||||
}
|
||||
}
|
||||
|
17398
public/lib/pdf.mjs
Normal file
17398
public/lib/pdf.mjs
Normal file
File diff suppressed because it is too large
Load Diff
1
public/lib/pdf.mjs.map
Normal file
1
public/lib/pdf.mjs.map
Normal file
File diff suppressed because one or more lines are too long
57124
public/lib/pdf.worker.mjs
Normal file
57124
public/lib/pdf.worker.mjs
Normal file
File diff suppressed because one or more lines are too long
1
public/lib/pdf.worker.mjs.map
Normal file
1
public/lib/pdf.worker.mjs.map
Normal file
File diff suppressed because one or more lines are too long
@ -195,7 +195,7 @@ import { getBackgrounds, initBackgrounds } from "./scripts/backgrounds.js";
|
||||
import { hideLoader, showLoader } from "./scripts/loader.js";
|
||||
import { CharacterContextMenu, BulkEditOverlay } from "./scripts/BulkEditOverlay.js";
|
||||
import { loadMancerModels } from "./scripts/mancer-settings.js";
|
||||
import { hasPendingFileAttachment, populateFileAttachment } from "./scripts/chats.js";
|
||||
import { getFileAttachment, hasPendingFileAttachment, populateFileAttachment } from "./scripts/chats.js";
|
||||
import { replaceVariableMacros } from "./scripts/variables.js";
|
||||
|
||||
//exporting functions and vars for mods
|
||||
@ -3019,22 +3019,27 @@ async function Generate(type, { automatic_trigger, force_name2, resolve, reject,
|
||||
coreChat.pop();
|
||||
}
|
||||
|
||||
coreChat = coreChat.map(chatItem => {
|
||||
coreChat = await Promise.all(coreChat.map(async (chatItem) => {
|
||||
let message = chatItem.mes;
|
||||
let regexType = chatItem.is_user ? regex_placement.USER_INPUT : regex_placement.AI_OUTPUT;
|
||||
let options = { isPrompt: true };
|
||||
|
||||
let regexedMessage = getRegexedString(message, regexType, options);
|
||||
|
||||
if (chatItem.extra?.file?.text) {
|
||||
regexedMessage += `\n\n${chatItem.extra.file.text}`;
|
||||
if (chatItem.extra?.file) {
|
||||
const fileText = chatItem.extra.file.text || (await getFileAttachment(chatItem.extra.file.url));
|
||||
|
||||
if (fileText) {
|
||||
chatItem.extra.fileStart = regexedMessage.length;
|
||||
regexedMessage += `\n\n${fileText}`;
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
...chatItem,
|
||||
mes: regexedMessage,
|
||||
};
|
||||
});
|
||||
}));
|
||||
|
||||
// Determine token limit
|
||||
let this_max_context = getMaxContextSize();
|
||||
|
@ -8,14 +8,33 @@ import {
|
||||
eventSource,
|
||||
event_types,
|
||||
getCurrentChatId,
|
||||
getRequestHeaders,
|
||||
hideSwipeButtons,
|
||||
name2,
|
||||
saveChatDebounced,
|
||||
showSwipeButtons,
|
||||
} from "../script.js";
|
||||
import { getBase64Async, humanFileSize, saveBase64AsFile } from "./utils.js";
|
||||
import {
|
||||
extractTextFromHTML,
|
||||
extractTextFromMarkdown,
|
||||
extractTextFromPDF,
|
||||
getBase64Async,
|
||||
getStringHash,
|
||||
humanFileSize,
|
||||
saveBase64AsFile,
|
||||
} from "./utils.js";
|
||||
|
||||
const fileSizeLimit = 1024 * 1024 * 1; // 1 MB
|
||||
const fileSizeLimit = 1024 * 1024 * 10; // 10 MB
|
||||
|
||||
const converters = {
|
||||
'application/pdf': extractTextFromPDF,
|
||||
'text/html': extractTextFromHTML,
|
||||
'text/markdown': extractTextFromMarkdown,
|
||||
}
|
||||
|
||||
function isConvertible(type) {
|
||||
return Object.keys(converters).includes(type);
|
||||
}
|
||||
|
||||
/**
|
||||
* Mark message as hidden (system message).
|
||||
@ -70,7 +89,7 @@ export async function unhideChatMessage(messageId, messageBlock) {
|
||||
/**
|
||||
* Adds a file attachment to the message.
|
||||
* @param {object} message Message object
|
||||
* @returns {Promise<void>}
|
||||
* @returns {Promise<void>} A promise that resolves when file is uploaded.
|
||||
*/
|
||||
export async function populateFileAttachment(message, inputId = 'file_form_input') {
|
||||
try {
|
||||
@ -81,18 +100,38 @@ export async function populateFileAttachment(message, inputId = 'file_form_input
|
||||
const file = fileInput.files[0];
|
||||
if (!file) return;
|
||||
|
||||
const fileBase64 = await getBase64Async(file);
|
||||
let base64Data = fileBase64.split(',')[1];
|
||||
|
||||
// If file is image
|
||||
if (file.type.startsWith('image/')) {
|
||||
const base64Img = await getBase64Async(file);
|
||||
const base64ImgData = base64Img.split(',')[1];
|
||||
const extension = file.type.split('/')[1];
|
||||
const imageUrl = await saveBase64AsFile(base64ImgData, name2, file.name, extension);
|
||||
const imageUrl = await saveBase64AsFile(base64Data, name2, file.name, extension);
|
||||
message.extra.image = imageUrl;
|
||||
message.extra.inline_image = true;
|
||||
} else {
|
||||
const fileText = await file.text();
|
||||
const slug = getStringHash(file.name);
|
||||
const uniqueFileName = `${Date.now()}_${slug}.txt`;
|
||||
|
||||
if (isConvertible(file.type)) {
|
||||
try {
|
||||
const converter = converters[file.type];
|
||||
const fileText = await converter(file);
|
||||
base64Data = window.btoa(unescape(encodeURIComponent(fileText)));
|
||||
} catch (error) {
|
||||
toastr.error(error, 'Could not convert file');
|
||||
console.error('Could not convert file', error);
|
||||
}
|
||||
}
|
||||
|
||||
const fileUrl = await uploadFileAttachment(uniqueFileName, base64Data);
|
||||
|
||||
if (!fileUrl) {
|
||||
return;
|
||||
}
|
||||
|
||||
message.extra.file = {
|
||||
text: fileText,
|
||||
url: fileUrl,
|
||||
size: file.size,
|
||||
name: file.name,
|
||||
};
|
||||
@ -105,6 +144,62 @@ export async function populateFileAttachment(message, inputId = 'file_form_input
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Uploads file to the server.
|
||||
* @param {string} fileName
|
||||
* @param {string} base64Data
|
||||
* @returns {Promise<string>} File URL
|
||||
*/
|
||||
export async function uploadFileAttachment(fileName, base64Data) {
|
||||
try {
|
||||
const result = await fetch('/api/file/upload', {
|
||||
method: 'POST',
|
||||
headers: getRequestHeaders(),
|
||||
body: JSON.stringify({
|
||||
name: fileName,
|
||||
data: base64Data,
|
||||
}),
|
||||
});
|
||||
|
||||
if (!result.ok) {
|
||||
const error = await result.text();
|
||||
throw new Error(error);
|
||||
}
|
||||
|
||||
const responseData = await result.json();
|
||||
return responseData.path.replace(/\\/g, '/');
|
||||
} catch (error) {
|
||||
toastr.error(error, 'Could not upload file');
|
||||
console.error('Could not upload file', error);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Downloads file from the server.
|
||||
* @param {string} url File URL
|
||||
* @returns {Promise<string>} File text
|
||||
*/
|
||||
export async function getFileAttachment(url) {
|
||||
try {
|
||||
const result = await fetch(url, {
|
||||
method: 'GET',
|
||||
cache: 'force-cache',
|
||||
headers: getRequestHeaders(),
|
||||
});
|
||||
|
||||
if (!result.ok) {
|
||||
const error = await result.text();
|
||||
throw new Error(error);
|
||||
}
|
||||
|
||||
const text = await result.text();
|
||||
return text;
|
||||
} catch (error) {
|
||||
toastr.error(error, 'Could not download file');
|
||||
console.error('Could not download file', error);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Validates file to make sure it is not binary or not image.
|
||||
* @param {File} file File object
|
||||
@ -121,7 +216,7 @@ async function validateFile(file) {
|
||||
}
|
||||
|
||||
// If file is binary
|
||||
if (isBinary && !isImage) {
|
||||
if (isBinary && !isImage && !isConvertible(file.type)) {
|
||||
toastr.error('Binary files are not supported. Select a text file or image.');
|
||||
return false;
|
||||
}
|
||||
@ -193,22 +288,23 @@ async function deleteMessageFile(messageId) {
|
||||
* @param {number} messageId Message ID
|
||||
*/
|
||||
async function viewMessageFile(messageId) {
|
||||
const messageText = chat[messageId]?.extra?.file?.text;
|
||||
const messageFile = chat[messageId]?.extra?.file;
|
||||
|
||||
if (!messageText) {
|
||||
if (!messageFile) {
|
||||
console.debug('Message has no file or it is empty');
|
||||
return;
|
||||
}
|
||||
|
||||
const fileText = messageFile.text || (await getFileAttachment(messageFile.url));
|
||||
|
||||
const modalTemplate = $('<div><pre><code></code></pre></div>');
|
||||
modalTemplate.find('code').addClass('txt').text(messageText);
|
||||
modalTemplate.find('code').addClass('txt').text(fileText);
|
||||
modalTemplate.addClass('file_modal');
|
||||
addCopyToCodeBlocks(modalTemplate);
|
||||
|
||||
callPopup(modalTemplate, 'text');
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Inserts a file embed into the message.
|
||||
* @param {number} messageId
|
||||
|
@ -1,6 +1,7 @@
|
||||
import { getContext } from "./extensions.js";
|
||||
import { getRequestHeaders } from "../script.js";
|
||||
import { isMobile } from "./RossAscends-mods.js";
|
||||
import { collapseNewlines } from "./power-user.js";
|
||||
|
||||
/**
|
||||
* Pagination status string template.
|
||||
@ -1066,3 +1067,99 @@ export function uuidv4() {
|
||||
return v.toString(16);
|
||||
});
|
||||
}
|
||||
|
||||
function postProcessText(text) {
|
||||
// Collapse multiple newlines into one
|
||||
text = collapseNewlines(text);
|
||||
// Trim leading and trailing whitespace, and remove empty lines
|
||||
text = text.split('\n').map(l => l.trim()).filter(Boolean).join('\n');
|
||||
// Remove carriage returns
|
||||
text = text.replace(/\r/g, '');
|
||||
// Normalize unicode spaces
|
||||
text = text.replace(/\u00A0/g, ' ');
|
||||
// Collapse multiple spaces into one (except for newlines)
|
||||
text = text.replace(/ {2,}/g, ' ');
|
||||
// Remove leading and trailing spaces
|
||||
text = text.trim();
|
||||
return text;
|
||||
}
|
||||
|
||||
/**
|
||||
* Use pdf.js to load and parse text from PDF pages
|
||||
* @param {Blob} blob PDF file blob
|
||||
* @returns {Promise<string>} A promise that resolves to the parsed text.
|
||||
*/
|
||||
export async function extractTextFromPDF(blob) {
|
||||
async function initPdfJs() {
|
||||
const promises = [];
|
||||
|
||||
const workerPromise = new Promise((resolve, reject) => {
|
||||
const workerScript = document.createElement('script');
|
||||
workerScript.type = 'module';
|
||||
workerScript.async = true;
|
||||
workerScript.src = 'lib/pdf.worker.mjs';
|
||||
workerScript.onload = resolve;
|
||||
workerScript.onerror = reject;
|
||||
document.head.appendChild(workerScript);
|
||||
});
|
||||
|
||||
promises.push(workerPromise);
|
||||
|
||||
const pdfjsPromise = new Promise((resolve, reject) => {
|
||||
const pdfjsScript = document.createElement('script');
|
||||
pdfjsScript.type = 'module';
|
||||
pdfjsScript.async = true;
|
||||
pdfjsScript.src = 'lib/pdf.mjs';
|
||||
pdfjsScript.onload = resolve;
|
||||
pdfjsScript.onerror = reject;
|
||||
document.head.appendChild(pdfjsScript);
|
||||
});
|
||||
|
||||
promises.push(pdfjsPromise);
|
||||
|
||||
return Promise.all(promises);
|
||||
}
|
||||
|
||||
if (!('pdfjsLib' in window)) {
|
||||
await initPdfJs();
|
||||
}
|
||||
|
||||
const buffer = await getFileBuffer(blob);
|
||||
const pdf = await pdfjsLib.getDocument(buffer).promise;
|
||||
const pages = [];
|
||||
for (let i = 1; i <= pdf.numPages; i++) {
|
||||
const page = await pdf.getPage(i);
|
||||
const textContent = await page.getTextContent();
|
||||
const text = textContent.items.map(item => item.str).join(' ');
|
||||
pages.push(text);
|
||||
}
|
||||
return postProcessText(pages.join('\n'));
|
||||
}
|
||||
|
||||
/**
|
||||
* Use DOMParser to load and parse text from HTML
|
||||
* @param {Blob} blob HTML content blob
|
||||
* @returns {Promise<string>} A promise that resolves to the parsed text.
|
||||
*/
|
||||
export async function extractTextFromHTML(blob) {
|
||||
const html = await blob.text();
|
||||
const domParser = new DOMParser();
|
||||
const document = domParser.parseFromString(DOMPurify.sanitize(html), 'text/html');
|
||||
const text = postProcessText(document.body.textContent);
|
||||
return text;
|
||||
}
|
||||
|
||||
/**
|
||||
* Use showdown to load and parse text from Markdown
|
||||
* @param {Blob} blob Markdown content blob
|
||||
* @returns {Promise<string>} A promise that resolves to the parsed text.
|
||||
*/
|
||||
export async function extractTextFromMarkdown(blob) {
|
||||
const markdown = await blob.text();
|
||||
const converter = new showdown.Converter();
|
||||
const html = converter.makeHtml(markdown);
|
||||
const domParser = new DOMParser();
|
||||
const document = domParser.parseFromString(DOMPurify.sanitize(html), 'text/html');
|
||||
const text = postProcessText(document.body.textContent);
|
||||
return text;
|
||||
}
|
||||
|
@ -3,6 +3,7 @@ const fs = require('fs');
|
||||
const sanitize = require('sanitize-filename');
|
||||
const fetch = require('node-fetch').default;
|
||||
const { finished } = require('stream/promises');
|
||||
const writeFileSyncAtomic = require('write-file-atomic').sync;
|
||||
const { DIRECTORIES, UNSAFE_EXTENSIONS } = require('./constants');
|
||||
|
||||
const VALID_CATEGORIES = ["bgm", "ambient", "blip", "live2d"];
|
||||
@ -297,6 +298,32 @@ function registerEndpoints(app, jsonParser) {
|
||||
return response.sendStatus(500);
|
||||
}
|
||||
});
|
||||
|
||||
app.post('/api/file/upload', jsonParser, async (request, response) => {
|
||||
try {
|
||||
if (!request.body.name) {
|
||||
return response.status(400).send("No upload name specified");
|
||||
}
|
||||
|
||||
if (!request.body.data) {
|
||||
return response.status(400).send("No upload data specified");
|
||||
}
|
||||
|
||||
const safeInput = checkAssetFileName(request.body.name);
|
||||
|
||||
if (!safeInput) {
|
||||
return response.status(400).send("Invalid upload name");
|
||||
}
|
||||
|
||||
const pathToUpload = path.join(DIRECTORIES.files, safeInput);
|
||||
writeFileSyncAtomic(pathToUpload, request.body.data, 'base64');
|
||||
const url = path.normalize(pathToUpload.replace('public' + path.sep, ''));
|
||||
return response.send({ path: url });
|
||||
} catch (error) {
|
||||
console.log(error);
|
||||
return response.sendStatus(500);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
|
@ -24,6 +24,7 @@ const DIRECTORIES = {
|
||||
quickreplies: 'public/QuickReplies',
|
||||
assets: 'public/assets',
|
||||
comfyWorkflows: 'public/user/workflows',
|
||||
files: 'public/user/files',
|
||||
};
|
||||
|
||||
const UNSAFE_EXTENSIONS = [
|
||||
|
Reference in New Issue
Block a user