Add generic mediawiki downloader

2025-06-05 21:59:27 +02:00 · 2024-05-05 22:26:13 +03:00
parent 181b5aff97
commit 55d31a976f
2 changed files with 140 additions and 0 deletions
--- a/public/scripts/extensions/attachments/mediawiki-scrape.html
+++ b/public/scripts/extensions/attachments/mediawiki-scrape.html
@ -0,0 +1,54 @@
 <div>
    <div class="flex-container flexFlowColumn">
        <label for="scrapeInput" data-i18n="Enter a base URL of the MediaWiki to scrape.">
            Enter a <strong>base URL</strong> of the MediaWiki to scrape.
        </label>
        <i data-i18n="Don't include the page name!">
            Don't include the page name!
        </i>
        <small>
            <span data-i18n=Examples:">Examples:</span>
            <code>https://streetcat.wiki/index.php</code>
            <span data-i18n="or">or</span>
            <code>https://tcrf.net</code>
        </small>
        <input type="text" id="scrapeInput" name="scrapeInput" class="text_pole" placeholder="">
    </div>
    <div class="flex-container flexFlowColumn">
        <label for="scrapeFilter">
            Optional regex to pick the content by its title:
        </label>
        <small>
            <span data-i18n="Example:">Example:</span>
            <code>/Mr. (Fresh|Snack)/gi</code>
        </small>
        <input type="text" id="scrapeFilter" name="scrapeFilter" class="text_pole" placeholder="">
    </div>
    <div class="flex-container flexFlowColumn">
        <label>
            Output format:
        </label>
        <label class="checkbox_label justifyLeft" for="scrapeOutputSingle">
            <input id="scrapeOutputSingle" type="radio" name="scrapeOutput" value="single" checked>
            <div class="flex-container flexFlowColumn flexNoGap">
                <span data-i18n="Single file">
                    Single file
                </span>
                <small data-i18n="All articles will be concatenated into a single file.">
                    All articles will be concatenated into a single file.
                </small>
            </div>
        </label>
        <label class="checkbox_label justifyLeft" for="scrapeOutputMulti">
            <input id="scrapeOutputMulti" type="radio" name="scrapeOutput" value="multi">
            <div class="flex-container flexFlowColumn flexNoGap">
                <span data-i18n="File per article">
                    File per article
                </span>
                <small data-i18n="Each article will be saved as a separate file.">
                    Not recommended. Each article will be saved as a separate file.
                </small>
            </div>
        </label>
    </div>
 </div>
--- a/public/scripts/scrapers.js
+++ b/public/scripts/scrapers.js
@ -238,6 +238,91 @@ class FileScraper {
    }
 }
 class MediaWikiScraper {
    constructor() {
        this.id = 'mediawiki';
        this.name = 'MediaWiki';
        this.description = 'Download a page from a MediaWiki wiki.';
        this.iconClass = 'fa-brands fa-wikipedia-w';
        this.iconAvailable = true;
    }
    async isAvailable() {
        try {
            const result = await fetch('/api/plugins/fandom/probe-mediawiki', {
                method: 'POST',
                headers: getRequestHeaders(),
            });
            return result.ok;
        } catch (error) {
            console.debug('Could not probe Fandom/MediaWiki plugin', error);
            return false;
        }
    }
    async scrape() {
        let url = '';
        let filter = '';
        let output = 'single';
        const template = $(await renderExtensionTemplateAsync('attachments', 'mediawiki-scrape', {}));
        template.find('input[name="scrapeInput"]').on('input', function () {
            url = String($(this).val()).trim();
        });
        template.find('input[name="scrapeFilter"]').on('input', function () {
            filter = String($(this).val());
        });
        template.find('input[name="scrapeOutput"]').on('input', function () {
            output = String($(this).val());
        });
        const confirm = await callGenericPopup(template, POPUP_TYPE.CONFIRM, '', { wide: false, large: false, okButton: 'Scrape', cancelButton: 'Cancel' });
        if (confirm !== POPUP_RESULT.AFFIRMATIVE) {
            return;
        }
        if (!url) {
            toastr.error('URL name is required');
            return;
        }
        const toast = toastr.info('Working, please wait...');
        const result = await fetch('/api/plugins/fandom/scrape-mediawiki', {
            method: 'POST',
            headers: getRequestHeaders(),
            body: JSON.stringify({ url, filter }),
        });
        if (!result.ok) {
            const error = await result.text();
            throw new Error(error);
        }
        const data = await result.json();
        toastr.clear(toast);
        if (output === 'multi') {
            const files = [];
            for (const attachment of data) {
                const file = new File([String(attachment.content).trim()], `${String(attachment.title).trim()}.txt`, { type: 'text/plain' });
                files.push(file);
            }
            return files;
        }
        if (output === 'single') {
            const combinedContent = data.map((a) => String(a.title).trim() + '\n\n' + String(a.content).trim()).join('\n\n\n\n');
            const file = new File([combinedContent], `${url}.txt`, { type: 'text/plain' });
            return [file];
        }
        return [];
    }
 }
 /**
 * Scrape data from a Fandom wiki.
 * @implements {Scraper}
@ -419,5 +504,6 @@ class YouTubeScraper {
 ScraperManager.registerDataBankScraper(new FileScraper());
 ScraperManager.registerDataBankScraper(new Notepad());
 ScraperManager.registerDataBankScraper(new WebScraper());
 ScraperManager.registerDataBankScraper(new MediaWikiScraper());
 ScraperManager.registerDataBankScraper(new FandomScraper());
 ScraperManager.registerDataBankScraper(new YouTubeScraper());