mirror of
https://github.com/SillyTavern/SillyTavern.git
synced 2025-06-05 21:59:27 +02:00
[FEATURE_REQUEST] Sending PDF/HTML files? #1414
This commit is contained in:
@ -22,7 +22,8 @@
|
|||||||
"droll",
|
"droll",
|
||||||
"handlebars",
|
"handlebars",
|
||||||
"highlight.js",
|
"highlight.js",
|
||||||
"localforage"
|
"localforage",
|
||||||
|
"pdfjs-dist"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
17398
public/lib/pdf.mjs
Normal file
17398
public/lib/pdf.mjs
Normal file
File diff suppressed because it is too large
Load Diff
1
public/lib/pdf.mjs.map
Normal file
1
public/lib/pdf.mjs.map
Normal file
File diff suppressed because one or more lines are too long
57124
public/lib/pdf.worker.mjs
Normal file
57124
public/lib/pdf.worker.mjs
Normal file
File diff suppressed because one or more lines are too long
1
public/lib/pdf.worker.mjs.map
Normal file
1
public/lib/pdf.worker.mjs.map
Normal file
File diff suppressed because one or more lines are too long
@ -195,7 +195,7 @@ import { getBackgrounds, initBackgrounds } from "./scripts/backgrounds.js";
|
|||||||
import { hideLoader, showLoader } from "./scripts/loader.js";
|
import { hideLoader, showLoader } from "./scripts/loader.js";
|
||||||
import { CharacterContextMenu, BulkEditOverlay } from "./scripts/BulkEditOverlay.js";
|
import { CharacterContextMenu, BulkEditOverlay } from "./scripts/BulkEditOverlay.js";
|
||||||
import { loadMancerModels } from "./scripts/mancer-settings.js";
|
import { loadMancerModels } from "./scripts/mancer-settings.js";
|
||||||
import { hasPendingFileAttachment, populateFileAttachment } from "./scripts/chats.js";
|
import { getFileAttachment, hasPendingFileAttachment, populateFileAttachment } from "./scripts/chats.js";
|
||||||
import { replaceVariableMacros } from "./scripts/variables.js";
|
import { replaceVariableMacros } from "./scripts/variables.js";
|
||||||
|
|
||||||
//exporting functions and vars for mods
|
//exporting functions and vars for mods
|
||||||
@ -3019,22 +3019,27 @@ async function Generate(type, { automatic_trigger, force_name2, resolve, reject,
|
|||||||
coreChat.pop();
|
coreChat.pop();
|
||||||
}
|
}
|
||||||
|
|
||||||
coreChat = coreChat.map(chatItem => {
|
coreChat = await Promise.all(coreChat.map(async (chatItem) => {
|
||||||
let message = chatItem.mes;
|
let message = chatItem.mes;
|
||||||
let regexType = chatItem.is_user ? regex_placement.USER_INPUT : regex_placement.AI_OUTPUT;
|
let regexType = chatItem.is_user ? regex_placement.USER_INPUT : regex_placement.AI_OUTPUT;
|
||||||
let options = { isPrompt: true };
|
let options = { isPrompt: true };
|
||||||
|
|
||||||
let regexedMessage = getRegexedString(message, regexType, options);
|
let regexedMessage = getRegexedString(message, regexType, options);
|
||||||
|
|
||||||
if (chatItem.extra?.file?.text) {
|
if (chatItem.extra?.file) {
|
||||||
regexedMessage += `\n\n${chatItem.extra.file.text}`;
|
const fileText = chatItem.extra.file.text || (await getFileAttachment(chatItem.extra.file.url));
|
||||||
|
|
||||||
|
if (fileText) {
|
||||||
|
chatItem.extra.fileStart = regexedMessage.length;
|
||||||
|
regexedMessage += `\n\n${fileText}`;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return {
|
return {
|
||||||
...chatItem,
|
...chatItem,
|
||||||
mes: regexedMessage,
|
mes: regexedMessage,
|
||||||
};
|
};
|
||||||
});
|
}));
|
||||||
|
|
||||||
// Determine token limit
|
// Determine token limit
|
||||||
let this_max_context = getMaxContextSize();
|
let this_max_context = getMaxContextSize();
|
||||||
|
@ -8,14 +8,33 @@ import {
|
|||||||
eventSource,
|
eventSource,
|
||||||
event_types,
|
event_types,
|
||||||
getCurrentChatId,
|
getCurrentChatId,
|
||||||
|
getRequestHeaders,
|
||||||
hideSwipeButtons,
|
hideSwipeButtons,
|
||||||
name2,
|
name2,
|
||||||
saveChatDebounced,
|
saveChatDebounced,
|
||||||
showSwipeButtons,
|
showSwipeButtons,
|
||||||
} from "../script.js";
|
} from "../script.js";
|
||||||
import { getBase64Async, humanFileSize, saveBase64AsFile } from "./utils.js";
|
import {
|
||||||
|
extractTextFromHTML,
|
||||||
|
extractTextFromMarkdown,
|
||||||
|
extractTextFromPDF,
|
||||||
|
getBase64Async,
|
||||||
|
getStringHash,
|
||||||
|
humanFileSize,
|
||||||
|
saveBase64AsFile,
|
||||||
|
} from "./utils.js";
|
||||||
|
|
||||||
const fileSizeLimit = 1024 * 1024 * 1; // 1 MB
|
const fileSizeLimit = 1024 * 1024 * 10; // 10 MB
|
||||||
|
|
||||||
|
const converters = {
|
||||||
|
'application/pdf': extractTextFromPDF,
|
||||||
|
'text/html': extractTextFromHTML,
|
||||||
|
'text/markdown': extractTextFromMarkdown,
|
||||||
|
}
|
||||||
|
|
||||||
|
function isConvertible(type) {
|
||||||
|
return Object.keys(converters).includes(type);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Mark message as hidden (system message).
|
* Mark message as hidden (system message).
|
||||||
@ -70,7 +89,7 @@ export async function unhideChatMessage(messageId, messageBlock) {
|
|||||||
/**
|
/**
|
||||||
* Adds a file attachment to the message.
|
* Adds a file attachment to the message.
|
||||||
* @param {object} message Message object
|
* @param {object} message Message object
|
||||||
* @returns {Promise<void>}
|
* @returns {Promise<void>} A promise that resolves when file is uploaded.
|
||||||
*/
|
*/
|
||||||
export async function populateFileAttachment(message, inputId = 'file_form_input') {
|
export async function populateFileAttachment(message, inputId = 'file_form_input') {
|
||||||
try {
|
try {
|
||||||
@ -81,18 +100,38 @@ export async function populateFileAttachment(message, inputId = 'file_form_input
|
|||||||
const file = fileInput.files[0];
|
const file = fileInput.files[0];
|
||||||
if (!file) return;
|
if (!file) return;
|
||||||
|
|
||||||
|
const fileBase64 = await getBase64Async(file);
|
||||||
|
let base64Data = fileBase64.split(',')[1];
|
||||||
|
|
||||||
// If file is image
|
// If file is image
|
||||||
if (file.type.startsWith('image/')) {
|
if (file.type.startsWith('image/')) {
|
||||||
const base64Img = await getBase64Async(file);
|
|
||||||
const base64ImgData = base64Img.split(',')[1];
|
|
||||||
const extension = file.type.split('/')[1];
|
const extension = file.type.split('/')[1];
|
||||||
const imageUrl = await saveBase64AsFile(base64ImgData, name2, file.name, extension);
|
const imageUrl = await saveBase64AsFile(base64Data, name2, file.name, extension);
|
||||||
message.extra.image = imageUrl;
|
message.extra.image = imageUrl;
|
||||||
message.extra.inline_image = true;
|
message.extra.inline_image = true;
|
||||||
} else {
|
} else {
|
||||||
const fileText = await file.text();
|
const slug = getStringHash(file.name);
|
||||||
|
const uniqueFileName = `${Date.now()}_${slug}.txt`;
|
||||||
|
|
||||||
|
if (isConvertible(file.type)) {
|
||||||
|
try {
|
||||||
|
const converter = converters[file.type];
|
||||||
|
const fileText = await converter(file);
|
||||||
|
base64Data = window.btoa(unescape(encodeURIComponent(fileText)));
|
||||||
|
} catch (error) {
|
||||||
|
toastr.error(error, 'Could not convert file');
|
||||||
|
console.error('Could not convert file', error);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const fileUrl = await uploadFileAttachment(uniqueFileName, base64Data);
|
||||||
|
|
||||||
|
if (!fileUrl) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
message.extra.file = {
|
message.extra.file = {
|
||||||
text: fileText,
|
url: fileUrl,
|
||||||
size: file.size,
|
size: file.size,
|
||||||
name: file.name,
|
name: file.name,
|
||||||
};
|
};
|
||||||
@ -105,6 +144,62 @@ export async function populateFileAttachment(message, inputId = 'file_form_input
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Uploads file to the server.
|
||||||
|
* @param {string} fileName
|
||||||
|
* @param {string} base64Data
|
||||||
|
* @returns {Promise<string>} File URL
|
||||||
|
*/
|
||||||
|
export async function uploadFileAttachment(fileName, base64Data) {
|
||||||
|
try {
|
||||||
|
const result = await fetch('/api/file/upload', {
|
||||||
|
method: 'POST',
|
||||||
|
headers: getRequestHeaders(),
|
||||||
|
body: JSON.stringify({
|
||||||
|
name: fileName,
|
||||||
|
data: base64Data,
|
||||||
|
}),
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!result.ok) {
|
||||||
|
const error = await result.text();
|
||||||
|
throw new Error(error);
|
||||||
|
}
|
||||||
|
|
||||||
|
const responseData = await result.json();
|
||||||
|
return responseData.path.replace(/\\/g, '/');
|
||||||
|
} catch (error) {
|
||||||
|
toastr.error(error, 'Could not upload file');
|
||||||
|
console.error('Could not upload file', error);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Downloads file from the server.
|
||||||
|
* @param {string} url File URL
|
||||||
|
* @returns {Promise<string>} File text
|
||||||
|
*/
|
||||||
|
export async function getFileAttachment(url) {
|
||||||
|
try {
|
||||||
|
const result = await fetch(url, {
|
||||||
|
method: 'GET',
|
||||||
|
cache: 'force-cache',
|
||||||
|
headers: getRequestHeaders(),
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!result.ok) {
|
||||||
|
const error = await result.text();
|
||||||
|
throw new Error(error);
|
||||||
|
}
|
||||||
|
|
||||||
|
const text = await result.text();
|
||||||
|
return text;
|
||||||
|
} catch (error) {
|
||||||
|
toastr.error(error, 'Could not download file');
|
||||||
|
console.error('Could not download file', error);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Validates file to make sure it is not binary or not image.
|
* Validates file to make sure it is not binary or not image.
|
||||||
* @param {File} file File object
|
* @param {File} file File object
|
||||||
@ -121,7 +216,7 @@ async function validateFile(file) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// If file is binary
|
// If file is binary
|
||||||
if (isBinary && !isImage) {
|
if (isBinary && !isImage && !isConvertible(file.type)) {
|
||||||
toastr.error('Binary files are not supported. Select a text file or image.');
|
toastr.error('Binary files are not supported. Select a text file or image.');
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@ -193,22 +288,23 @@ async function deleteMessageFile(messageId) {
|
|||||||
* @param {number} messageId Message ID
|
* @param {number} messageId Message ID
|
||||||
*/
|
*/
|
||||||
async function viewMessageFile(messageId) {
|
async function viewMessageFile(messageId) {
|
||||||
const messageText = chat[messageId]?.extra?.file?.text;
|
const messageFile = chat[messageId]?.extra?.file;
|
||||||
|
|
||||||
if (!messageText) {
|
if (!messageFile) {
|
||||||
console.debug('Message has no file or it is empty');
|
console.debug('Message has no file or it is empty');
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const fileText = messageFile.text || (await getFileAttachment(messageFile.url));
|
||||||
|
|
||||||
const modalTemplate = $('<div><pre><code></code></pre></div>');
|
const modalTemplate = $('<div><pre><code></code></pre></div>');
|
||||||
modalTemplate.find('code').addClass('txt').text(messageText);
|
modalTemplate.find('code').addClass('txt').text(fileText);
|
||||||
modalTemplate.addClass('file_modal');
|
modalTemplate.addClass('file_modal');
|
||||||
addCopyToCodeBlocks(modalTemplate);
|
addCopyToCodeBlocks(modalTemplate);
|
||||||
|
|
||||||
callPopup(modalTemplate, 'text');
|
callPopup(modalTemplate, 'text');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Inserts a file embed into the message.
|
* Inserts a file embed into the message.
|
||||||
* @param {number} messageId
|
* @param {number} messageId
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
import { getContext } from "./extensions.js";
|
import { getContext } from "./extensions.js";
|
||||||
import { getRequestHeaders } from "../script.js";
|
import { getRequestHeaders } from "../script.js";
|
||||||
import { isMobile } from "./RossAscends-mods.js";
|
import { isMobile } from "./RossAscends-mods.js";
|
||||||
|
import { collapseNewlines } from "./power-user.js";
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Pagination status string template.
|
* Pagination status string template.
|
||||||
@ -1066,3 +1067,99 @@ export function uuidv4() {
|
|||||||
return v.toString(16);
|
return v.toString(16);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function postProcessText(text) {
|
||||||
|
// Collapse multiple newlines into one
|
||||||
|
text = collapseNewlines(text);
|
||||||
|
// Trim leading and trailing whitespace, and remove empty lines
|
||||||
|
text = text.split('\n').map(l => l.trim()).filter(Boolean).join('\n');
|
||||||
|
// Remove carriage returns
|
||||||
|
text = text.replace(/\r/g, '');
|
||||||
|
// Normalize unicode spaces
|
||||||
|
text = text.replace(/\u00A0/g, ' ');
|
||||||
|
// Collapse multiple spaces into one (except for newlines)
|
||||||
|
text = text.replace(/ {2,}/g, ' ');
|
||||||
|
// Remove leading and trailing spaces
|
||||||
|
text = text.trim();
|
||||||
|
return text;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Use pdf.js to load and parse text from PDF pages
|
||||||
|
* @param {Blob} blob PDF file blob
|
||||||
|
* @returns {Promise<string>} A promise that resolves to the parsed text.
|
||||||
|
*/
|
||||||
|
export async function extractTextFromPDF(blob) {
|
||||||
|
async function initPdfJs() {
|
||||||
|
const promises = [];
|
||||||
|
|
||||||
|
const workerPromise = new Promise((resolve, reject) => {
|
||||||
|
const workerScript = document.createElement('script');
|
||||||
|
workerScript.type = 'module';
|
||||||
|
workerScript.async = true;
|
||||||
|
workerScript.src = 'lib/pdf.worker.mjs';
|
||||||
|
workerScript.onload = resolve;
|
||||||
|
workerScript.onerror = reject;
|
||||||
|
document.head.appendChild(workerScript);
|
||||||
|
});
|
||||||
|
|
||||||
|
promises.push(workerPromise);
|
||||||
|
|
||||||
|
const pdfjsPromise = new Promise((resolve, reject) => {
|
||||||
|
const pdfjsScript = document.createElement('script');
|
||||||
|
pdfjsScript.type = 'module';
|
||||||
|
pdfjsScript.async = true;
|
||||||
|
pdfjsScript.src = 'lib/pdf.mjs';
|
||||||
|
pdfjsScript.onload = resolve;
|
||||||
|
pdfjsScript.onerror = reject;
|
||||||
|
document.head.appendChild(pdfjsScript);
|
||||||
|
});
|
||||||
|
|
||||||
|
promises.push(pdfjsPromise);
|
||||||
|
|
||||||
|
return Promise.all(promises);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!('pdfjsLib' in window)) {
|
||||||
|
await initPdfJs();
|
||||||
|
}
|
||||||
|
|
||||||
|
const buffer = await getFileBuffer(blob);
|
||||||
|
const pdf = await pdfjsLib.getDocument(buffer).promise;
|
||||||
|
const pages = [];
|
||||||
|
for (let i = 1; i <= pdf.numPages; i++) {
|
||||||
|
const page = await pdf.getPage(i);
|
||||||
|
const textContent = await page.getTextContent();
|
||||||
|
const text = textContent.items.map(item => item.str).join(' ');
|
||||||
|
pages.push(text);
|
||||||
|
}
|
||||||
|
return postProcessText(pages.join('\n'));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Use DOMParser to load and parse text from HTML
|
||||||
|
* @param {Blob} blob HTML content blob
|
||||||
|
* @returns {Promise<string>} A promise that resolves to the parsed text.
|
||||||
|
*/
|
||||||
|
export async function extractTextFromHTML(blob) {
|
||||||
|
const html = await blob.text();
|
||||||
|
const domParser = new DOMParser();
|
||||||
|
const document = domParser.parseFromString(DOMPurify.sanitize(html), 'text/html');
|
||||||
|
const text = postProcessText(document.body.textContent);
|
||||||
|
return text;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Use showdown to load and parse text from Markdown
|
||||||
|
* @param {Blob} blob Markdown content blob
|
||||||
|
* @returns {Promise<string>} A promise that resolves to the parsed text.
|
||||||
|
*/
|
||||||
|
export async function extractTextFromMarkdown(blob) {
|
||||||
|
const markdown = await blob.text();
|
||||||
|
const converter = new showdown.Converter();
|
||||||
|
const html = converter.makeHtml(markdown);
|
||||||
|
const domParser = new DOMParser();
|
||||||
|
const document = domParser.parseFromString(DOMPurify.sanitize(html), 'text/html');
|
||||||
|
const text = postProcessText(document.body.textContent);
|
||||||
|
return text;
|
||||||
|
}
|
||||||
|
@ -3,6 +3,7 @@ const fs = require('fs');
|
|||||||
const sanitize = require('sanitize-filename');
|
const sanitize = require('sanitize-filename');
|
||||||
const fetch = require('node-fetch').default;
|
const fetch = require('node-fetch').default;
|
||||||
const { finished } = require('stream/promises');
|
const { finished } = require('stream/promises');
|
||||||
|
const writeFileSyncAtomic = require('write-file-atomic').sync;
|
||||||
const { DIRECTORIES, UNSAFE_EXTENSIONS } = require('./constants');
|
const { DIRECTORIES, UNSAFE_EXTENSIONS } = require('./constants');
|
||||||
|
|
||||||
const VALID_CATEGORIES = ["bgm", "ambient", "blip", "live2d"];
|
const VALID_CATEGORIES = ["bgm", "ambient", "blip", "live2d"];
|
||||||
@ -297,6 +298,32 @@ function registerEndpoints(app, jsonParser) {
|
|||||||
return response.sendStatus(500);
|
return response.sendStatus(500);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
app.post('/api/file/upload', jsonParser, async (request, response) => {
|
||||||
|
try {
|
||||||
|
if (!request.body.name) {
|
||||||
|
return response.status(400).send("No upload name specified");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!request.body.data) {
|
||||||
|
return response.status(400).send("No upload data specified");
|
||||||
|
}
|
||||||
|
|
||||||
|
const safeInput = checkAssetFileName(request.body.name);
|
||||||
|
|
||||||
|
if (!safeInput) {
|
||||||
|
return response.status(400).send("Invalid upload name");
|
||||||
|
}
|
||||||
|
|
||||||
|
const pathToUpload = path.join(DIRECTORIES.files, safeInput);
|
||||||
|
writeFileSyncAtomic(pathToUpload, request.body.data, 'base64');
|
||||||
|
const url = path.normalize(pathToUpload.replace('public' + path.sep, ''));
|
||||||
|
return response.send({ path: url });
|
||||||
|
} catch (error) {
|
||||||
|
console.log(error);
|
||||||
|
return response.sendStatus(500);
|
||||||
|
}
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
module.exports = {
|
module.exports = {
|
||||||
|
@ -24,6 +24,7 @@ const DIRECTORIES = {
|
|||||||
quickreplies: 'public/QuickReplies',
|
quickreplies: 'public/QuickReplies',
|
||||||
assets: 'public/assets',
|
assets: 'public/assets',
|
||||||
comfyWorkflows: 'public/user/workflows',
|
comfyWorkflows: 'public/user/workflows',
|
||||||
|
files: 'public/user/files',
|
||||||
};
|
};
|
||||||
|
|
||||||
const UNSAFE_EXTENSIONS = [
|
const UNSAFE_EXTENSIONS = [
|
||||||
|
Reference in New Issue
Block a user