mirror of
https://github.com/SillyTavern/SillyTavern.git
synced 2025-06-05 21:59:27 +02:00
Implement generic interface for adding Data Bank scrapers
This commit is contained in:
@@ -215,6 +215,7 @@ import { evaluateMacros } from './scripts/macros.js';
|
|||||||
import { currentUser, setUserControls } from './scripts/user.js';
|
import { currentUser, setUserControls } from './scripts/user.js';
|
||||||
import { callGenericPopup } from './scripts/popup.js';
|
import { callGenericPopup } from './scripts/popup.js';
|
||||||
import { renderTemplate, renderTemplateAsync } from './scripts/templates.js';
|
import { renderTemplate, renderTemplateAsync } from './scripts/templates.js';
|
||||||
|
import { ScraperManager } from './scripts/scrapers.js';
|
||||||
|
|
||||||
//exporting functions and vars for mods
|
//exporting functions and vars for mods
|
||||||
export {
|
export {
|
||||||
@@ -7756,6 +7757,7 @@ window['SillyTavern'].getContext = function () {
|
|||||||
*/
|
*/
|
||||||
renderExtensionTemplate: renderExtensionTemplate,
|
renderExtensionTemplate: renderExtensionTemplate,
|
||||||
renderExtensionTemplateAsync: renderExtensionTemplateAsync,
|
renderExtensionTemplateAsync: renderExtensionTemplateAsync,
|
||||||
|
registerDataBankScraper: ScraperManager.registerDataBankScraper,
|
||||||
callPopup: callPopup,
|
callPopup: callPopup,
|
||||||
callGenericPopup: callGenericPopup,
|
callGenericPopup: callGenericPopup,
|
||||||
mainApi: main_api,
|
mainApi: main_api,
|
||||||
|
@@ -31,10 +31,10 @@ import {
|
|||||||
getStringHash,
|
getStringHash,
|
||||||
humanFileSize,
|
humanFileSize,
|
||||||
saveBase64AsFile,
|
saveBase64AsFile,
|
||||||
isValidUrl,
|
|
||||||
} from './utils.js';
|
} from './utils.js';
|
||||||
import { extension_settings, renderExtensionTemplateAsync, saveMetadataDebounced, writeExtensionField } from './extensions.js';
|
import { extension_settings, renderExtensionTemplateAsync, saveMetadataDebounced, writeExtensionField } from './extensions.js';
|
||||||
import { POPUP_RESULT, POPUP_TYPE, callGenericPopup } from './popup.js';
|
import { POPUP_RESULT, POPUP_TYPE, callGenericPopup } from './popup.js';
|
||||||
|
import { ScraperManager } from './scrapers.js';
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @typedef {Object} FileAttachment
|
* @typedef {Object} FileAttachment
|
||||||
@@ -677,18 +677,67 @@ async function openAttachmentManager() {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Renders buttons for the attachment manager.
|
* Renders buttons for the attachment manager.
|
||||||
* @param {string} source Source of the buttons
|
|
||||||
*/
|
*/
|
||||||
function renderButtons(source) {
|
function renderButtons() {
|
||||||
const sources = {
|
const sources = {
|
||||||
[ATTACHMENT_SOURCE.GLOBAL]: '.globalAttachmentsTitle',
|
[ATTACHMENT_SOURCE.GLOBAL]: '.globalAttachmentsTitle',
|
||||||
[ATTACHMENT_SOURCE.CHARACTER]: '.characterAttachmentsTitle',
|
[ATTACHMENT_SOURCE.CHARACTER]: '.characterAttachmentsTitle',
|
||||||
[ATTACHMENT_SOURCE.CHAT]: '.chatAttachmentsTitle',
|
[ATTACHMENT_SOURCE.CHAT]: '.chatAttachmentsTitle',
|
||||||
};
|
};
|
||||||
|
|
||||||
const buttonsList = template.find('.actionButtonsTemplate .actionButtons').clone();
|
const modal = template.find('.actionButtonsModal').hide();
|
||||||
buttonsList.find('.menu_button').data('attachment-manager-target', source);
|
const scrapers = ScraperManager.getDataBankScrapers();
|
||||||
template.find(sources[source]).append(buttonsList);
|
|
||||||
|
for (const scraper of scrapers) {
|
||||||
|
const buttonTemplate = template.find('.actionButtonTemplate .actionButton').clone();
|
||||||
|
buttonTemplate.find('.actionButtonIcon').addClass(scraper.iconClass);
|
||||||
|
buttonTemplate.find('.actionButtonText').text(scraper.name);
|
||||||
|
buttonTemplate.attr('title', scraper.description);
|
||||||
|
buttonTemplate.on('click', () => {
|
||||||
|
const target = modal.attr('data-attachment-manager-target');
|
||||||
|
runScraper(scraper.id, target, renderAttachments);
|
||||||
|
});
|
||||||
|
modal.append(buttonTemplate);
|
||||||
|
}
|
||||||
|
|
||||||
|
const modalButtonData = Object.entries(sources).map(entry => {
|
||||||
|
const [source, selector] = entry;
|
||||||
|
const button = template.find(selector).find('.openActionModalButton').get(0);
|
||||||
|
|
||||||
|
if (!button) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const bodyListener = (e) => {
|
||||||
|
if (modal.is(':visible') && (!$(e.target).closest('.openActionModalButton').length)) {
|
||||||
|
modal.hide();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Replay a click if the modal was already open by another button
|
||||||
|
if ($(e.target).closest('.openActionModalButton').length && !modal.is(':visible')) {
|
||||||
|
modal.show();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
document.body.addEventListener('click', bodyListener);
|
||||||
|
|
||||||
|
const popper = Popper.createPopper(button, modal.get(0), { placement: 'bottom-end' });
|
||||||
|
button.addEventListener('click', () => {
|
||||||
|
modal.attr('data-attachment-manager-target', source);
|
||||||
|
modal.toggle();
|
||||||
|
popper.update();
|
||||||
|
});
|
||||||
|
|
||||||
|
return [popper, bodyListener];
|
||||||
|
}).filter(Boolean);
|
||||||
|
|
||||||
|
return () => {
|
||||||
|
modalButtonData.forEach(p => {
|
||||||
|
const [popper, bodyListener] = p;
|
||||||
|
popper.destroy();
|
||||||
|
document.body.removeEventListener('click', bodyListener);
|
||||||
|
});
|
||||||
|
modal.remove();
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
async function renderAttachments() {
|
async function renderAttachments() {
|
||||||
@@ -718,20 +767,9 @@ async function openAttachmentManager() {
|
|||||||
let sortField = localStorage.getItem('DataBank_sortField') || 'created';
|
let sortField = localStorage.getItem('DataBank_sortField') || 'created';
|
||||||
let sortOrder = localStorage.getItem('DataBank_sortOrder') || 'desc';
|
let sortOrder = localStorage.getItem('DataBank_sortOrder') || 'desc';
|
||||||
let filterString = '';
|
let filterString = '';
|
||||||
const hasFandomPlugin = await isFandomPluginAvailable();
|
|
||||||
const template = $(await renderExtensionTemplateAsync('attachments', 'manager', {}));
|
const template = $(await renderExtensionTemplateAsync('attachments', 'manager', {}));
|
||||||
renderButtons(ATTACHMENT_SOURCE.GLOBAL);
|
|
||||||
renderButtons(ATTACHMENT_SOURCE.CHARACTER);
|
|
||||||
renderButtons(ATTACHMENT_SOURCE.CHAT);
|
|
||||||
template.find('.scrapeWebpageButton').on('click', function () {
|
|
||||||
openWebpageScraper(String($(this).data('attachment-manager-target')), renderAttachments);
|
|
||||||
});
|
|
||||||
template.find('.scrapeFandomButton').toggle(hasFandomPlugin).on('click', function () {
|
|
||||||
openFandomScraper(String($(this).data('attachment-manager-target')), renderAttachments);
|
|
||||||
});
|
|
||||||
template.find('.uploadFileButton').on('click', function () {
|
|
||||||
openFileUploader(String($(this).data('attachment-manager-target')), renderAttachments);
|
|
||||||
});
|
|
||||||
template.find('.attachmentSearch').on('input', function () {
|
template.find('.attachmentSearch').on('input', function () {
|
||||||
filterString = String($(this).val());
|
filterString = String($(this).val());
|
||||||
renderAttachments();
|
renderAttachments();
|
||||||
@@ -747,167 +785,50 @@ async function openAttachmentManager() {
|
|||||||
localStorage.setItem('DataBank_sortOrder', sortOrder);
|
localStorage.setItem('DataBank_sortOrder', sortOrder);
|
||||||
renderAttachments();
|
renderAttachments();
|
||||||
});
|
});
|
||||||
|
|
||||||
|
const cleanupFn = renderButtons();
|
||||||
await renderAttachments();
|
await renderAttachments();
|
||||||
callGenericPopup(template, POPUP_TYPE.TEXT, '', { wide: true, large: true, okButton: 'Close' });
|
await callGenericPopup(template, POPUP_TYPE.TEXT, '', { wide: true, large: true, okButton: 'Close' });
|
||||||
|
|
||||||
|
cleanupFn();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Scrapes a webpage for attachments.
|
* Runs a known scraper on a source and saves the result as an attachment.
|
||||||
|
* @param {string} scraperId Id of the scraper
|
||||||
* @param {string} target Target for the attachment
|
* @param {string} target Target for the attachment
|
||||||
* @param {function} callback Callback function
|
* @param {function} callback Callback function
|
||||||
|
* @returns {Promise<void>} A promise that resolves when the source is scraped.
|
||||||
*/
|
*/
|
||||||
async function openWebpageScraper(target, callback) {
|
async function runScraper(scraperId, target, callback) {
|
||||||
const template = $(await renderExtensionTemplateAsync('attachments', 'web-scrape', {}));
|
|
||||||
const link = await callGenericPopup(template, POPUP_TYPE.INPUT, '', { wide: false, large: false, okButton: 'Scrape', cancelButton: 'Cancel' });
|
|
||||||
|
|
||||||
if (!link) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
try {
|
||||||
if (!isValidUrl(link)) {
|
console.log(`Running scraper ${scraperId} for ${target}`);
|
||||||
toastr.error('Invalid URL');
|
const files = await ScraperManager.runDataBankScraper(scraperId);
|
||||||
|
|
||||||
|
if (!Array.isArray(files)) {
|
||||||
|
console.warn('Scraping returned nothing');
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
const result = await fetch('/api/serpapi/visit', {
|
if (files.length === 0) {
|
||||||
method: 'POST',
|
console.warn('Scraping returned no files');
|
||||||
headers: getRequestHeaders(),
|
toastr.info('No files were scraped.', 'Data Bank');
|
||||||
body: JSON.stringify({ url: link }),
|
return;
|
||||||
});
|
}
|
||||||
|
|
||||||
const blob = await result.blob();
|
for (const file of files) {
|
||||||
const domain = new URL(link).hostname;
|
|
||||||
const timestamp = Date.now();
|
|
||||||
const title = await getTitleFromHtmlBlob(blob) || 'webpage';
|
|
||||||
const file = new File([blob], `${title} - ${domain} - ${timestamp}.html`, { type: 'text/html' });
|
|
||||||
await uploadFileAttachmentToServer(file, target);
|
await uploadFileAttachmentToServer(file, target);
|
||||||
|
}
|
||||||
|
|
||||||
|
toastr.success(`Scraped ${files.length} files from ${scraperId} to ${target}.`, 'Data Bank');
|
||||||
callback();
|
callback();
|
||||||
} catch (error) {
|
}
|
||||||
|
catch (error) {
|
||||||
console.error('Scraping failed', error);
|
console.error('Scraping failed', error);
|
||||||
toastr.error('Check browser console for details.', 'Scraping failed');
|
toastr.error('Check browser console for details.', 'Scraping failed');
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
*
|
|
||||||
* @param {Blob} blob Blob of the HTML file
|
|
||||||
* @returns {Promise<string>} Title of the HTML file
|
|
||||||
*/
|
|
||||||
async function getTitleFromHtmlBlob(blob) {
|
|
||||||
const text = await blob.text();
|
|
||||||
const titleMatch = text.match(/<title>(.*?)<\/title>/i);
|
|
||||||
return titleMatch ? titleMatch[1] : '';
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Scrapes a Fandom page for attachments.
|
|
||||||
* @param {string} target Target for the attachment
|
|
||||||
* @param {function} callback Callback function
|
|
||||||
*/
|
|
||||||
async function openFandomScraper(target, callback) {
|
|
||||||
if (!await isFandomPluginAvailable()) {
|
|
||||||
toastr.error('Fandom scraper plugin is not available');
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
let fandom = '';
|
|
||||||
let filter = '';
|
|
||||||
let output = 'single';
|
|
||||||
|
|
||||||
const template = $(await renderExtensionTemplateAsync('attachments', 'fandom-scrape', {}));
|
|
||||||
template.find('input[name="fandomScrapeInput"]').on('input', function () {
|
|
||||||
fandom = String($(this).val());
|
|
||||||
});
|
|
||||||
template.find('input[name="fandomScrapeFilter"]').on('input', function () {
|
|
||||||
filter = String($(this).val());
|
|
||||||
});
|
|
||||||
template.find('input[name="fandomScrapeOutput"]').on('input', function () {
|
|
||||||
output = String($(this).val());
|
|
||||||
});
|
|
||||||
|
|
||||||
const confirm = await callGenericPopup(template, POPUP_TYPE.CONFIRM, '', { wide: false, large: false, okButton: 'Scrape', cancelButton: 'Cancel' });
|
|
||||||
|
|
||||||
if (confirm !== POPUP_RESULT.AFFIRMATIVE) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!fandom) {
|
|
||||||
toastr.error('Fandom name is required');
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
|
||||||
const result = await fetch('/api/plugins/fandom/scrape', {
|
|
||||||
method: 'POST',
|
|
||||||
headers: getRequestHeaders(),
|
|
||||||
body: JSON.stringify({ fandom, filter }),
|
|
||||||
});
|
|
||||||
|
|
||||||
if (!result.ok) {
|
|
||||||
const error = await result.text();
|
|
||||||
throw new Error(error);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get domain name part if it's a URL
|
|
||||||
try {
|
|
||||||
const url = new URL(fandom);
|
|
||||||
const fandomId = url.hostname.split('.')[0] || fandom;
|
|
||||||
fandom = fandomId;
|
|
||||||
} catch {
|
|
||||||
// Ignore
|
|
||||||
}
|
|
||||||
|
|
||||||
const data = await result.json();
|
|
||||||
let numberOfAttachments;
|
|
||||||
|
|
||||||
if (output === 'multi') {
|
|
||||||
numberOfAttachments = data.length;
|
|
||||||
for (const attachment of data) {
|
|
||||||
const file = new File([String(attachment.content).trim()], `${String(attachment.title).trim()}.txt`, { type: 'text/plain' });
|
|
||||||
await uploadFileAttachmentToServer(file, target);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (output === 'single') {
|
|
||||||
numberOfAttachments = 1;
|
|
||||||
const combinedContent = data.map((a) => String(a.title).trim() + '\n\n' + String(a.content).trim()).join('\n\n\n\n');
|
|
||||||
const file = new File([combinedContent], `${fandom}.txt`, { type: 'text/plain' });
|
|
||||||
await uploadFileAttachmentToServer(file, target);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (numberOfAttachments) {
|
|
||||||
toastr.success(`Scraped ${numberOfAttachments} attachments from ${fandom}`);
|
|
||||||
}
|
|
||||||
|
|
||||||
callback();
|
|
||||||
} catch (error) {
|
|
||||||
console.error('Fandom scraping failed', error);
|
|
||||||
toastr.error('Check browser console for details.', 'Fandom scraping failed');
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Uploads a file attachment.
|
|
||||||
* @param {string} target File upload target
|
|
||||||
* @param {function} callback Callback function
|
|
||||||
*/
|
|
||||||
async function openFileUploader(target, callback) {
|
|
||||||
const fileInput = document.createElement('input');
|
|
||||||
fileInput.type = 'file';
|
|
||||||
fileInput.accept = '.txt, .md, .pdf, .html, .htm';
|
|
||||||
fileInput.onchange = async function () {
|
|
||||||
const file = fileInput.files[0];
|
|
||||||
if (!file) return;
|
|
||||||
|
|
||||||
await uploadFileAttachmentToServer(file, target);
|
|
||||||
|
|
||||||
callback();
|
|
||||||
};
|
|
||||||
|
|
||||||
fileInput.click();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Uploads a file attachment to the server.
|
* Uploads a file attachment to the server.
|
||||||
* @param {File} file File to upload
|
* @param {File} file File to upload
|
||||||
@@ -1008,24 +929,6 @@ export function getDataBankAttachments() {
|
|||||||
return [...globalAttachments, ...chatAttachments, ...characterAttachments];
|
return [...globalAttachments, ...chatAttachments, ...characterAttachments];
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Probes the server to check if the Fandom plugin is available.
|
|
||||||
* @returns {Promise<boolean>} True if the plugin is available, false otherwise.
|
|
||||||
*/
|
|
||||||
async function isFandomPluginAvailable() {
|
|
||||||
try {
|
|
||||||
const result = await fetch('/api/plugins/fandom/probe', {
|
|
||||||
method: 'POST',
|
|
||||||
headers: getRequestHeaders(),
|
|
||||||
});
|
|
||||||
|
|
||||||
return result.ok;
|
|
||||||
} catch (error) {
|
|
||||||
console.debug('Could not probe Fandom plugin', error);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
jQuery(function () {
|
jQuery(function () {
|
||||||
$(document).on('click', '.mes_hide', async function () {
|
$(document).on('click', '.mes_hide', async function () {
|
||||||
const messageBlock = $(this).closest('.mes');
|
const messageBlock = $(this).closest('.mes');
|
||||||
|
@@ -39,6 +39,10 @@
|
|||||||
<span data-i18n="Global Attachments">
|
<span data-i18n="Global Attachments">
|
||||||
Global Attachments
|
Global Attachments
|
||||||
</span>
|
</span>
|
||||||
|
<div class="openActionModalButton menu_button menu_button_icon">
|
||||||
|
<i class="fa-solid fa-plus"></i>
|
||||||
|
<span data-i18n="Add">Add</span>
|
||||||
|
</div>
|
||||||
</h3>
|
</h3>
|
||||||
<small data-i18n="These files are available for all characters in all chats.">
|
<small data-i18n="These files are available for all characters in all chats.">
|
||||||
These files are available for all characters in all chats.
|
These files are available for all characters in all chats.
|
||||||
@@ -51,6 +55,10 @@
|
|||||||
<span data-i18n="Character Attachments">
|
<span data-i18n="Character Attachments">
|
||||||
Character Attachments
|
Character Attachments
|
||||||
</span>
|
</span>
|
||||||
|
<div class="openActionModalButton menu_button menu_button_icon">
|
||||||
|
<i class="fa-solid fa-plus"></i>
|
||||||
|
<span data-i18n="Add">Add</span>
|
||||||
|
</div>
|
||||||
</h3>
|
</h3>
|
||||||
<div class="flex-container flexFlowColumn">
|
<div class="flex-container flexFlowColumn">
|
||||||
<strong><small class="characterAttachmentsName"></small></strong>
|
<strong><small class="characterAttachmentsName"></small></strong>
|
||||||
@@ -66,6 +74,10 @@
|
|||||||
<span data-i18n="Chat Attachments">
|
<span data-i18n="Chat Attachments">
|
||||||
Chat Attachments
|
Chat Attachments
|
||||||
</span>
|
</span>
|
||||||
|
<div class="openActionModalButton menu_button menu_button_icon">
|
||||||
|
<i class="fa-solid fa-plus"></i>
|
||||||
|
<span data-i18n="Add">Add</span>
|
||||||
|
</div>
|
||||||
</h3>
|
</h3>
|
||||||
<div class="flex-container flexFlowColumn">
|
<div class="flex-container flexFlowColumn">
|
||||||
<strong><small class="chatAttachmentsName"></small></strong>
|
<strong><small class="chatAttachmentsName"></small></strong>
|
||||||
@@ -87,26 +99,12 @@
|
|||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div class="actionButtonsTemplate template_element">
|
<div class="actionButtonTemplate">
|
||||||
<div class="actionButtons flex-container flexGap10">
|
<div class="actionButton list-group-item flex-container flexGap5" title="">
|
||||||
<div class="scrapeWebpageButton menu_button_icon menu_button" data-attachment-manager-target="" title="Download a page from the web.">
|
<i class="actionButtonIcon"></i>
|
||||||
<i class="fa-fw fa-solid fa-globe"></i>
|
<span class="actionButtonText"></span>
|
||||||
<span data-i18n="From Web">
|
|
||||||
From Web
|
|
||||||
</span>
|
|
||||||
</div>
|
|
||||||
<div class="scrapeFandomButton menu_button_icon menu_button" data-attachment-manager-target="" title="Download a page from the Fandom wiki.">
|
|
||||||
<i class="fa-fw fa-solid fa-fire"></i>
|
|
||||||
<span data-i18n="From Fandom">
|
|
||||||
From Fandom
|
|
||||||
</span>
|
|
||||||
</div>
|
|
||||||
<div class="uploadFileButton menu_button_icon menu_button" data-attachment-manager-target="" title="Upload a file from your computer.">
|
|
||||||
<i class="fa-fw fa-solid fa-upload"></i>
|
|
||||||
<span data-i18n="From File">
|
|
||||||
From File
|
|
||||||
</span>
|
|
||||||
</div>
|
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
<div class="actionButtonsModal popper-modal options-content list-group"></div>
|
||||||
</div>
|
</div>
|
||||||
|
@@ -1,3 +1,3 @@
|
|||||||
<div data-i18n="Enter a web address to scrape:">
|
<div data-i18n="Enter web URLs to scrape (one per line):">
|
||||||
Enter a web address to scrape:
|
Enter web URLs to scrape (one per line):
|
||||||
</div>
|
</div>
|
||||||
|
280
public/scripts/scrapers.js
Normal file
280
public/scripts/scrapers.js
Normal file
@@ -0,0 +1,280 @@
|
|||||||
|
import { getRequestHeaders } from '../script.js';
|
||||||
|
import { renderExtensionTemplateAsync } from './extensions.js';
|
||||||
|
import { POPUP_RESULT, POPUP_TYPE, callGenericPopup } from './popup.js';
|
||||||
|
import { isValidUrl } from './utils.js';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @typedef {Object} Scraper
|
||||||
|
* @property {string} id
|
||||||
|
* @property {string} name
|
||||||
|
* @property {string} description
|
||||||
|
* @property {string} iconClass
|
||||||
|
* @property {() => Promise<boolean>} isAvailable
|
||||||
|
* @property {() => Promise<File[]>} scrape
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @typedef {Object} ScraperInfo
|
||||||
|
* @property {string} id
|
||||||
|
* @property {string} name
|
||||||
|
* @property {string} description
|
||||||
|
* @property {string} iconClass
|
||||||
|
*/
|
||||||
|
|
||||||
|
export class ScraperManager {
|
||||||
|
/**
|
||||||
|
* @type {Scraper[]}
|
||||||
|
*/
|
||||||
|
static #scrapers = [];
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Register a scraper to be used by the Data Bank.
|
||||||
|
* @param {Scraper} scraper Instance of a scraper to register
|
||||||
|
*/
|
||||||
|
static registerDataBankScraper(scraper) {
|
||||||
|
if (ScraperManager.#scrapers.some(s => s.id === scraper.id)) {
|
||||||
|
console.warn(`Scraper with ID ${scraper.id} already registered`);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
ScraperManager.#scrapers.push(scraper);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets a list of scrapers available for the Data Bank.
|
||||||
|
* @returns {ScraperInfo[]} List of scrapers available for the Data Bank
|
||||||
|
*/
|
||||||
|
static getDataBankScrapers() {
|
||||||
|
return ScraperManager.#scrapers.map(s => ({ id: s.id, name: s.name, description: s.description, iconClass: s.iconClass }));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Run a scraper to scrape data into the Data Bank.
|
||||||
|
* @param {string} scraperId ID of the scraper to run
|
||||||
|
* @returns {Promise<File[]>} List of files scraped by the scraper
|
||||||
|
*/
|
||||||
|
static runDataBankScraper(scraperId) {
|
||||||
|
const scraper = ScraperManager.#scrapers.find(s => s.id === scraperId);
|
||||||
|
if (!scraper) {
|
||||||
|
console.warn(`Scraper with ID ${scraperId} not found`);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
return scraper.scrape();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Scrape data from a webpage.
|
||||||
|
* @implements {Scraper}
|
||||||
|
*/
|
||||||
|
class WebScraper {
|
||||||
|
constructor() {
|
||||||
|
this.id = 'web';
|
||||||
|
this.name = 'Web';
|
||||||
|
this.description = 'Download a page from the web.';
|
||||||
|
this.iconClass = 'fa-solid fa-globe';
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if the scraper is available.
|
||||||
|
* @returns {Promise<boolean>}
|
||||||
|
*/
|
||||||
|
isAvailable() {
|
||||||
|
return Promise.resolve(true);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Parse the title of an HTML file from a Blob.
|
||||||
|
* @param {Blob} blob Blob of the HTML file
|
||||||
|
* @returns {Promise<string>} Title of the HTML file
|
||||||
|
*/
|
||||||
|
async getTitleFromHtmlBlob(blob) {
|
||||||
|
const text = await blob.text();
|
||||||
|
const titleMatch = text.match(/<title>(.*?)<\/title>/i);
|
||||||
|
return titleMatch ? titleMatch[1] : '';
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Scrape file attachments from a webpage.
|
||||||
|
* @returns {Promise<File[]>} File attachments scraped from the webpage
|
||||||
|
*/
|
||||||
|
async scrape() {
|
||||||
|
const template = $(await renderExtensionTemplateAsync('attachments', 'web-scrape', {}));
|
||||||
|
const linksString = await callGenericPopup(template, POPUP_TYPE.INPUT, '', { wide: false, large: false, okButton: 'Scrape', cancelButton: 'Cancel', rows: 4 });
|
||||||
|
|
||||||
|
if (!linksString) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const links = String(linksString).split('\n').map(l => l.trim()).filter(l => l).filter(l => isValidUrl(l));
|
||||||
|
|
||||||
|
if (links.length === 0) {
|
||||||
|
toastr.error('Invalid URL');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const toast = toastr.info('Working, please wait...');
|
||||||
|
|
||||||
|
const files = [];
|
||||||
|
|
||||||
|
for (const link of links) {
|
||||||
|
const result = await fetch('/api/serpapi/visit', {
|
||||||
|
method: 'POST',
|
||||||
|
headers: getRequestHeaders(),
|
||||||
|
body: JSON.stringify({ url: link }),
|
||||||
|
});
|
||||||
|
|
||||||
|
const blob = await result.blob();
|
||||||
|
const domain = new URL(link).hostname;
|
||||||
|
const timestamp = Date.now();
|
||||||
|
const title = await this.getTitleFromHtmlBlob(blob) || 'webpage';
|
||||||
|
const file = new File([blob], `${title} - ${domain} - ${timestamp}.html`, { type: 'text/html' });
|
||||||
|
files.push(file);
|
||||||
|
}
|
||||||
|
|
||||||
|
toastr.clear(toast);
|
||||||
|
return files;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Scrape data from a file selection.
|
||||||
|
* @implements {Scraper}
|
||||||
|
*/
|
||||||
|
class FileScraper {
|
||||||
|
constructor() {
|
||||||
|
this.id = 'file';
|
||||||
|
this.name = 'File';
|
||||||
|
this.description = 'Upload a file from your computer.';
|
||||||
|
this.iconClass = 'fa-solid fa-upload';
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if the scraper is available.
|
||||||
|
* @returns {Promise<boolean>}
|
||||||
|
*/
|
||||||
|
isAvailable() {
|
||||||
|
return Promise.resolve(true);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Scrape file attachments from a file.
|
||||||
|
* @returns {Promise<File[]>} File attachments scraped from the files
|
||||||
|
*/
|
||||||
|
async scrape() {
|
||||||
|
return new Promise(resolve => {
|
||||||
|
const fileInput = document.createElement('input');
|
||||||
|
fileInput.type = 'file';
|
||||||
|
fileInput.accept = '.txt, .md, .pdf, .html, .htm';
|
||||||
|
fileInput.multiple = true;
|
||||||
|
fileInput.onchange = () => resolve(Array.from(fileInput.files));
|
||||||
|
fileInput.click();
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Scrape data from a Fandom wiki.
|
||||||
|
* @implements {Scraper}
|
||||||
|
*/
|
||||||
|
class FandomScraper {
|
||||||
|
constructor() {
|
||||||
|
this.id = 'fandom';
|
||||||
|
this.name = 'Fandom';
|
||||||
|
this.description = 'Download a page from the Fandom wiki.';
|
||||||
|
this.iconClass = 'fa-solid fa-fire';
|
||||||
|
}
|
||||||
|
|
||||||
|
async isAvailable() {
|
||||||
|
try {
|
||||||
|
const result = await fetch('/api/plugins/fandom/probe', {
|
||||||
|
method: 'POST',
|
||||||
|
headers: getRequestHeaders(),
|
||||||
|
});
|
||||||
|
|
||||||
|
return result.ok;
|
||||||
|
} catch (error) {
|
||||||
|
console.debug('Could not probe Fandom plugin', error);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the ID of a fandom from a URL or name.
|
||||||
|
* @param {string} fandom URL or name of the fandom
|
||||||
|
* @returns {string} ID of the fandom
|
||||||
|
*/
|
||||||
|
getFandomId(fandom) {
|
||||||
|
try {
|
||||||
|
const url = new URL(fandom);
|
||||||
|
return url.hostname.split('.')[0] || fandom;
|
||||||
|
} catch {
|
||||||
|
return fandom;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async scrape() {
|
||||||
|
let fandom = '';
|
||||||
|
let filter = '';
|
||||||
|
let output = 'single';
|
||||||
|
|
||||||
|
const template = $(await renderExtensionTemplateAsync('attachments', 'fandom-scrape', {}));
|
||||||
|
template.find('input[name="fandomScrapeInput"]').on('input', function () {
|
||||||
|
fandom = String($(this).val()).trim();
|
||||||
|
});
|
||||||
|
template.find('input[name="fandomScrapeFilter"]').on('input', function () {
|
||||||
|
filter = String($(this).val());
|
||||||
|
});
|
||||||
|
template.find('input[name="fandomScrapeOutput"]').on('input', function () {
|
||||||
|
output = String($(this).val());
|
||||||
|
});
|
||||||
|
|
||||||
|
const confirm = await callGenericPopup(template, POPUP_TYPE.CONFIRM, '', { wide: false, large: false, okButton: 'Scrape', cancelButton: 'Cancel' });
|
||||||
|
|
||||||
|
if (confirm !== POPUP_RESULT.AFFIRMATIVE) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!fandom) {
|
||||||
|
toastr.error('Fandom name is required');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const toast = toastr.info('Working, please wait...');
|
||||||
|
|
||||||
|
const result = await fetch('/api/plugins/fandom/scrape', {
|
||||||
|
method: 'POST',
|
||||||
|
headers: getRequestHeaders(),
|
||||||
|
body: JSON.stringify({ fandom, filter }),
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!result.ok) {
|
||||||
|
const error = await result.text();
|
||||||
|
throw new Error(error);
|
||||||
|
}
|
||||||
|
|
||||||
|
const data = await result.json();
|
||||||
|
toastr.clear(toast);
|
||||||
|
|
||||||
|
if (output === 'multi') {
|
||||||
|
const files = [];
|
||||||
|
for (const attachment of data) {
|
||||||
|
const file = new File([String(attachment.content).trim()], `${String(attachment.title).trim()}.txt`, { type: 'text/plain' });
|
||||||
|
files.push(file);
|
||||||
|
}
|
||||||
|
return files;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (output === 'single') {
|
||||||
|
const combinedContent = data.map((a) => String(a.title).trim() + '\n\n' + String(a.content).trim()).join('\n\n\n\n');
|
||||||
|
const file = new File([combinedContent], `${fandom}.txt`, { type: 'text/plain' });
|
||||||
|
return [file];
|
||||||
|
}
|
||||||
|
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ScraperManager.registerDataBankScraper(new FileScraper());
|
||||||
|
ScraperManager.registerDataBankScraper(new WebScraper());
|
||||||
|
ScraperManager.registerDataBankScraper(new FandomScraper());
|
@@ -709,7 +709,8 @@ body .panelControlBar {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#options,
|
#options,
|
||||||
#extensionsMenu {
|
#extensionsMenu,
|
||||||
|
.shadow_popup .popper-modal {
|
||||||
display: flex;
|
display: flex;
|
||||||
z-index: 29999;
|
z-index: 29999;
|
||||||
background-color: var(--SmartThemeBlurTintColor);
|
background-color: var(--SmartThemeBlurTintColor);
|
||||||
|
Reference in New Issue
Block a user