mirror of
https://github.com/SillyTavern/SillyTavern.git
synced 2025-02-27 01:17:41 +01:00
Add EPUB import for data bank
This commit is contained in:
parent
3ff5884112
commit
78d1d48ea9
1
public/lib/epub.min.js
vendored
Normal file
1
public/lib/epub.min.js
vendored
Normal file
File diff suppressed because one or more lines are too long
13
public/lib/jszip.min.js
vendored
Normal file
13
public/lib/jszip.min.js
vendored
Normal file
File diff suppressed because one or more lines are too long
@ -27,6 +27,7 @@ import {
|
||||
extractTextFromHTML,
|
||||
extractTextFromMarkdown,
|
||||
extractTextFromPDF,
|
||||
extractTextFromEpub,
|
||||
getBase64Async,
|
||||
getStringHash,
|
||||
humanFileSize,
|
||||
@ -56,6 +57,7 @@ const converters = {
|
||||
'application/pdf': extractTextFromPDF,
|
||||
'text/html': extractTextFromHTML,
|
||||
'text/markdown': extractTextFromMarkdown,
|
||||
'application/epub+zip': extractTextFromEpub,
|
||||
};
|
||||
|
||||
/**
|
||||
|
@ -7,8 +7,8 @@
|
||||
<div data-i18n="These files will be available for extensions that support attachments (e.g. Vector Storage).">
|
||||
These files will be available for extensions that support attachments (e.g. Vector Storage).
|
||||
</div>
|
||||
<div data-i18n="Supported file types: Plain Text, PDF, Markdown, HTML." class="marginTopBot5">
|
||||
Supported file types: Plain Text, PDF, Markdown, HTML.
|
||||
<div data-i18n="Supported file types: Plain Text, PDF, Markdown, HTML, EPUB." class="marginTopBot5">
|
||||
Supported file types: Plain Text, PDF, Markdown, HTML, EPUB.
|
||||
</div>
|
||||
<div class="flex-container marginTopBot5">
|
||||
<input type="search" id="attachmentSearch" class="attachmentSearch text_pole margin0 flex1" placeholder="Search...">
|
||||
|
@ -179,7 +179,7 @@ class FileScraper {
|
||||
return new Promise(resolve => {
|
||||
const fileInput = document.createElement('input');
|
||||
fileInput.type = 'file';
|
||||
fileInput.accept = '.txt, .md, .pdf, .html, .htm';
|
||||
fileInput.accept = '.txt, .md, .pdf, .html, .htm, .epub';
|
||||
fileInput.multiple = true;
|
||||
fileInput.onchange = () => resolve(Array.from(fileInput.files));
|
||||
fileInput.click();
|
||||
|
@ -695,7 +695,7 @@ export function splitRecursive(input, length, delimiters = ['\n\n', '\n', ' ', '
|
||||
|
||||
const flatParts = parts.flatMap(p => {
|
||||
if (p.length < length) return p;
|
||||
return splitRecursive(input, length, delimiters.slice(1));
|
||||
return splitRecursive(p, length, delimiters.slice(1));
|
||||
});
|
||||
|
||||
// Merge short chunks
|
||||
@ -1300,6 +1300,54 @@ export async function extractTextFromMarkdown(blob) {
|
||||
return text;
|
||||
}
|
||||
|
||||
export async function extractTextFromEpub(blob) {
|
||||
async function initEpubJs() {
|
||||
const epubScript = new Promise((resolve, reject) => {
|
||||
const epubScript = document.createElement('script');
|
||||
epubScript.async = true;
|
||||
epubScript.src = 'lib/epub.min.js';
|
||||
epubScript.onload = resolve;
|
||||
epubScript.onerror = reject;
|
||||
document.head.appendChild(epubScript);
|
||||
});
|
||||
|
||||
const jszipScript = new Promise((resolve, reject) => {
|
||||
const jszipScript = document.createElement('script');
|
||||
jszipScript.async = true;
|
||||
jszipScript.src = 'lib/jszip.min.js';
|
||||
jszipScript.onload = resolve;
|
||||
jszipScript.onerror = reject;
|
||||
document.head.appendChild(jszipScript);
|
||||
});
|
||||
|
||||
return Promise.all([epubScript, jszipScript]);
|
||||
}
|
||||
|
||||
if (!('ePub' in window)) {
|
||||
await initEpubJs();
|
||||
}
|
||||
|
||||
const book = ePub(blob);
|
||||
await book.ready;
|
||||
const sectionPromises = [];
|
||||
|
||||
book.spine.each((section) => {
|
||||
const sectionPromise = (async () => {
|
||||
const chapter = await book.load(section.href);
|
||||
if (!(chapter instanceof Document) || !chapter.body?.textContent) {
|
||||
return '';
|
||||
}
|
||||
return chapter.body.textContent.trim();
|
||||
})();
|
||||
|
||||
sectionPromises.push(sectionPromise);
|
||||
});
|
||||
|
||||
const content = await Promise.all(sectionPromises);
|
||||
const text = content.filter(text => text);
|
||||
return postProcessText(text.join('\n'), false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets a value in an object by a path.
|
||||
* @param {object} obj Object to set value in
|
||||
|
Loading…
x
Reference in New Issue
Block a user