Add generic mediawiki downloader

2025-06-05 21:59:27 +02:00 · 2024-05-05 22:26:13 +03:00
parent 181b5aff97
commit 55d31a976f
2 changed files with 140 additions and 0 deletions
--- a/public/scripts/extensions/attachments/mediawiki-scrape.html
+++ b/public/scripts/extensions/attachments/mediawiki-scrape.html
@ -0,0 +1,54 @@
+<div>
+    <div class="flex-container flexFlowColumn">
+        <label for="scrapeInput" data-i18n="Enter a base URL of the MediaWiki to scrape.">
+            Enter a <strong>base URL</strong> of the MediaWiki to scrape.
+        </label>
+        <i data-i18n="Don't include the page name!">
+            Don't include the page name!
+        </i>
+        <small>
+            <span data-i18n=Examples:">Examples:</span>
+            <code>https://streetcat.wiki/index.php</code>
+            <span data-i18n="or">or</span>
+            <code>https://tcrf.net</code>
+        </small>
+        <input type="text" id="scrapeInput" name="scrapeInput" class="text_pole" placeholder="">
+    </div>
+    <div class="flex-container flexFlowColumn">
+        <label for="scrapeFilter">
+            Optional regex to pick the content by its title:
+        </label>
+        <small>
+            <span data-i18n="Example:">Example:</span>
+            <code>/Mr. (Fresh|Snack)/gi</code>
+        </small>
+        <input type="text" id="scrapeFilter" name="scrapeFilter" class="text_pole" placeholder="">
+    </div>
+    <div class="flex-container flexFlowColumn">
+        <label>
+            Output format:
+        </label>
+        <label class="checkbox_label justifyLeft" for="scrapeOutputSingle">
+            <input id="scrapeOutputSingle" type="radio" name="scrapeOutput" value="single" checked>
+            <div class="flex-container flexFlowColumn flexNoGap">
+                <span data-i18n="Single file">
+                    Single file
+                </span>
+                <small data-i18n="All articles will be concatenated into a single file.">
+                    All articles will be concatenated into a single file.
+                </small>
+            </div>
+        </label>
+        <label class="checkbox_label justifyLeft" for="scrapeOutputMulti">
+            <input id="scrapeOutputMulti" type="radio" name="scrapeOutput" value="multi">
+            <div class="flex-container flexFlowColumn flexNoGap">
+                <span data-i18n="File per article">
+                    File per article
+                </span>
+                <small data-i18n="Each article will be saved as a separate file.">
+                    Not recommended. Each article will be saved as a separate file.
+                </small>
+            </div>
+        </label>
+    </div>
+</div>
--- a/public/scripts/scrapers.js
+++ b/public/scripts/scrapers.js
@ -238,6 +238,91 @@ class FileScraper {
    }
 }

+class MediaWikiScraper {
+    constructor() {
+        this.id = 'mediawiki';
+        this.name = 'MediaWiki';
+        this.description = 'Download a page from a MediaWiki wiki.';
+        this.iconClass = 'fa-brands fa-wikipedia-w';
+        this.iconAvailable = true;
+    }
+
+    async isAvailable() {
+        try {
+            const result = await fetch('/api/plugins/fandom/probe-mediawiki', {
+                method: 'POST',
+                headers: getRequestHeaders(),
+            });
+
+            return result.ok;
+        } catch (error) {
+            console.debug('Could not probe Fandom/MediaWiki plugin', error);
+            return false;
+        }
+    }
+
+    async scrape() {
+        let url = '';
+        let filter = '';
+        let output = 'single';
+
+        const template = $(await renderExtensionTemplateAsync('attachments', 'mediawiki-scrape', {}));
+        template.find('input[name="scrapeInput"]').on('input', function () {
+            url = String($(this).val()).trim();
+        });
+        template.find('input[name="scrapeFilter"]').on('input', function () {
+            filter = String($(this).val());
+        });
+        template.find('input[name="scrapeOutput"]').on('input', function () {
+            output = String($(this).val());
+        });
+
+        const confirm = await callGenericPopup(template, POPUP_TYPE.CONFIRM, '', { wide: false, large: false, okButton: 'Scrape', cancelButton: 'Cancel' });
+
+        if (confirm !== POPUP_RESULT.AFFIRMATIVE) {
+            return;
+        }
+
+        if (!url) {
+            toastr.error('URL name is required');
+            return;
+        }
+
+        const toast = toastr.info('Working, please wait...');
+
+        const result = await fetch('/api/plugins/fandom/scrape-mediawiki', {
+            method: 'POST',
+            headers: getRequestHeaders(),
+            body: JSON.stringify({ url, filter }),
+        });
+
+        if (!result.ok) {
+            const error = await result.text();
+            throw new Error(error);
+        }
+
+        const data = await result.json();
+        toastr.clear(toast);
+
+        if (output === 'multi') {
+            const files = [];
+            for (const attachment of data) {
+                const file = new File([String(attachment.content).trim()], `${String(attachment.title).trim()}.txt`, { type: 'text/plain' });
+                files.push(file);
+            }
+            return files;
+        }
+
+        if (output === 'single') {
+            const combinedContent = data.map((a) => String(a.title).trim() + '\n\n' + String(a.content).trim()).join('\n\n\n\n');
+            const file = new File([combinedContent], `${url}.txt`, { type: 'text/plain' });
+            return [file];
+        }
+
+        return [];
+    }
+}
+
 /**
 * Scrape data from a Fandom wiki.
 * @implements {Scraper}
@ -419,5 +504,6 @@ class YouTubeScraper {
 ScraperManager.registerDataBankScraper(new FileScraper());
 ScraperManager.registerDataBankScraper(new Notepad());
 ScraperManager.registerDataBankScraper(new WebScraper());
+ScraperManager.registerDataBankScraper(new MediaWikiScraper());
 ScraperManager.registerDataBankScraper(new FandomScraper());
 ScraperManager.registerDataBankScraper(new YouTubeScraper());