mirror of
https://github.com/SillyTavern/SillyTavern.git
synced 2025-06-05 21:59:27 +02:00
Add EPUB import for data bank
This commit is contained in:
1
public/lib/epub.min.js
vendored
Normal file
1
public/lib/epub.min.js
vendored
Normal file
File diff suppressed because one or more lines are too long
13
public/lib/jszip.min.js
vendored
Normal file
13
public/lib/jszip.min.js
vendored
Normal file
File diff suppressed because one or more lines are too long
@ -27,6 +27,7 @@ import {
|
|||||||
extractTextFromHTML,
|
extractTextFromHTML,
|
||||||
extractTextFromMarkdown,
|
extractTextFromMarkdown,
|
||||||
extractTextFromPDF,
|
extractTextFromPDF,
|
||||||
|
extractTextFromEpub,
|
||||||
getBase64Async,
|
getBase64Async,
|
||||||
getStringHash,
|
getStringHash,
|
||||||
humanFileSize,
|
humanFileSize,
|
||||||
@ -56,6 +57,7 @@ const converters = {
|
|||||||
'application/pdf': extractTextFromPDF,
|
'application/pdf': extractTextFromPDF,
|
||||||
'text/html': extractTextFromHTML,
|
'text/html': extractTextFromHTML,
|
||||||
'text/markdown': extractTextFromMarkdown,
|
'text/markdown': extractTextFromMarkdown,
|
||||||
|
'application/epub+zip': extractTextFromEpub,
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -7,8 +7,8 @@
|
|||||||
<div data-i18n="These files will be available for extensions that support attachments (e.g. Vector Storage).">
|
<div data-i18n="These files will be available for extensions that support attachments (e.g. Vector Storage).">
|
||||||
These files will be available for extensions that support attachments (e.g. Vector Storage).
|
These files will be available for extensions that support attachments (e.g. Vector Storage).
|
||||||
</div>
|
</div>
|
||||||
<div data-i18n="Supported file types: Plain Text, PDF, Markdown, HTML." class="marginTopBot5">
|
<div data-i18n="Supported file types: Plain Text, PDF, Markdown, HTML, EPUB." class="marginTopBot5">
|
||||||
Supported file types: Plain Text, PDF, Markdown, HTML.
|
Supported file types: Plain Text, PDF, Markdown, HTML, EPUB.
|
||||||
</div>
|
</div>
|
||||||
<div class="flex-container marginTopBot5">
|
<div class="flex-container marginTopBot5">
|
||||||
<input type="search" id="attachmentSearch" class="attachmentSearch text_pole margin0 flex1" placeholder="Search...">
|
<input type="search" id="attachmentSearch" class="attachmentSearch text_pole margin0 flex1" placeholder="Search...">
|
||||||
|
@ -179,7 +179,7 @@ class FileScraper {
|
|||||||
return new Promise(resolve => {
|
return new Promise(resolve => {
|
||||||
const fileInput = document.createElement('input');
|
const fileInput = document.createElement('input');
|
||||||
fileInput.type = 'file';
|
fileInput.type = 'file';
|
||||||
fileInput.accept = '.txt, .md, .pdf, .html, .htm';
|
fileInput.accept = '.txt, .md, .pdf, .html, .htm, .epub';
|
||||||
fileInput.multiple = true;
|
fileInput.multiple = true;
|
||||||
fileInput.onchange = () => resolve(Array.from(fileInput.files));
|
fileInput.onchange = () => resolve(Array.from(fileInput.files));
|
||||||
fileInput.click();
|
fileInput.click();
|
||||||
|
@ -695,7 +695,7 @@ export function splitRecursive(input, length, delimiters = ['\n\n', '\n', ' ', '
|
|||||||
|
|
||||||
const flatParts = parts.flatMap(p => {
|
const flatParts = parts.flatMap(p => {
|
||||||
if (p.length < length) return p;
|
if (p.length < length) return p;
|
||||||
return splitRecursive(input, length, delimiters.slice(1));
|
return splitRecursive(p, length, delimiters.slice(1));
|
||||||
});
|
});
|
||||||
|
|
||||||
// Merge short chunks
|
// Merge short chunks
|
||||||
@ -1300,6 +1300,54 @@ export async function extractTextFromMarkdown(blob) {
|
|||||||
return text;
|
return text;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export async function extractTextFromEpub(blob) {
|
||||||
|
async function initEpubJs() {
|
||||||
|
const epubScript = new Promise((resolve, reject) => {
|
||||||
|
const epubScript = document.createElement('script');
|
||||||
|
epubScript.async = true;
|
||||||
|
epubScript.src = 'lib/epub.min.js';
|
||||||
|
epubScript.onload = resolve;
|
||||||
|
epubScript.onerror = reject;
|
||||||
|
document.head.appendChild(epubScript);
|
||||||
|
});
|
||||||
|
|
||||||
|
const jszipScript = new Promise((resolve, reject) => {
|
||||||
|
const jszipScript = document.createElement('script');
|
||||||
|
jszipScript.async = true;
|
||||||
|
jszipScript.src = 'lib/jszip.min.js';
|
||||||
|
jszipScript.onload = resolve;
|
||||||
|
jszipScript.onerror = reject;
|
||||||
|
document.head.appendChild(jszipScript);
|
||||||
|
});
|
||||||
|
|
||||||
|
return Promise.all([epubScript, jszipScript]);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!('ePub' in window)) {
|
||||||
|
await initEpubJs();
|
||||||
|
}
|
||||||
|
|
||||||
|
const book = ePub(blob);
|
||||||
|
await book.ready;
|
||||||
|
const sectionPromises = [];
|
||||||
|
|
||||||
|
book.spine.each((section) => {
|
||||||
|
const sectionPromise = (async () => {
|
||||||
|
const chapter = await book.load(section.href);
|
||||||
|
if (!(chapter instanceof Document) || !chapter.body?.textContent) {
|
||||||
|
return '';
|
||||||
|
}
|
||||||
|
return chapter.body.textContent.trim();
|
||||||
|
})();
|
||||||
|
|
||||||
|
sectionPromises.push(sectionPromise);
|
||||||
|
});
|
||||||
|
|
||||||
|
const content = await Promise.all(sectionPromises);
|
||||||
|
const text = content.filter(text => text);
|
||||||
|
return postProcessText(text.join('\n'), false);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Sets a value in an object by a path.
|
* Sets a value in an object by a path.
|
||||||
* @param {object} obj Object to set value in
|
* @param {object} obj Object to set value in
|
||||||
|
Reference in New Issue
Block a user