Add generic mediawiki downloader

This commit is contained in:
Cohee
2024-05-05 22:26:13 +03:00
parent 181b5aff97
commit 55d31a976f
2 changed files with 140 additions and 0 deletions

View File

@ -0,0 +1,54 @@
<div>
<div class="flex-container flexFlowColumn">
<label for="scrapeInput" data-i18n="Enter a base URL of the MediaWiki to scrape.">
Enter a <strong>base URL</strong> of the MediaWiki to scrape.
</label>
<i data-i18n="Don't include the page name!">
Don't include the page name!
</i>
<small>
<span data-i18n=Examples:">Examples:</span>
<code>https://streetcat.wiki/index.php</code>
<span data-i18n="or">or</span>
<code>https://tcrf.net</code>
</small>
<input type="text" id="scrapeInput" name="scrapeInput" class="text_pole" placeholder="">
</div>
<div class="flex-container flexFlowColumn">
<label for="scrapeFilter">
Optional regex to pick the content by its title:
</label>
<small>
<span data-i18n="Example:">Example:</span>
<code>/Mr. (Fresh|Snack)/gi</code>
</small>
<input type="text" id="scrapeFilter" name="scrapeFilter" class="text_pole" placeholder="">
</div>
<div class="flex-container flexFlowColumn">
<label>
Output format:
</label>
<label class="checkbox_label justifyLeft" for="scrapeOutputSingle">
<input id="scrapeOutputSingle" type="radio" name="scrapeOutput" value="single" checked>
<div class="flex-container flexFlowColumn flexNoGap">
<span data-i18n="Single file">
Single file
</span>
<small data-i18n="All articles will be concatenated into a single file.">
All articles will be concatenated into a single file.
</small>
</div>
</label>
<label class="checkbox_label justifyLeft" for="scrapeOutputMulti">
<input id="scrapeOutputMulti" type="radio" name="scrapeOutput" value="multi">
<div class="flex-container flexFlowColumn flexNoGap">
<span data-i18n="File per article">
File per article
</span>
<small data-i18n="Each article will be saved as a separate file.">
Not recommended. Each article will be saved as a separate file.
</small>
</div>
</label>
</div>
</div>

View File

@ -238,6 +238,91 @@ class FileScraper {
}
}
class MediaWikiScraper {
constructor() {
this.id = 'mediawiki';
this.name = 'MediaWiki';
this.description = 'Download a page from a MediaWiki wiki.';
this.iconClass = 'fa-brands fa-wikipedia-w';
this.iconAvailable = true;
}
async isAvailable() {
try {
const result = await fetch('/api/plugins/fandom/probe-mediawiki', {
method: 'POST',
headers: getRequestHeaders(),
});
return result.ok;
} catch (error) {
console.debug('Could not probe Fandom/MediaWiki plugin', error);
return false;
}
}
async scrape() {
let url = '';
let filter = '';
let output = 'single';
const template = $(await renderExtensionTemplateAsync('attachments', 'mediawiki-scrape', {}));
template.find('input[name="scrapeInput"]').on('input', function () {
url = String($(this).val()).trim();
});
template.find('input[name="scrapeFilter"]').on('input', function () {
filter = String($(this).val());
});
template.find('input[name="scrapeOutput"]').on('input', function () {
output = String($(this).val());
});
const confirm = await callGenericPopup(template, POPUP_TYPE.CONFIRM, '', { wide: false, large: false, okButton: 'Scrape', cancelButton: 'Cancel' });
if (confirm !== POPUP_RESULT.AFFIRMATIVE) {
return;
}
if (!url) {
toastr.error('URL name is required');
return;
}
const toast = toastr.info('Working, please wait...');
const result = await fetch('/api/plugins/fandom/scrape-mediawiki', {
method: 'POST',
headers: getRequestHeaders(),
body: JSON.stringify({ url, filter }),
});
if (!result.ok) {
const error = await result.text();
throw new Error(error);
}
const data = await result.json();
toastr.clear(toast);
if (output === 'multi') {
const files = [];
for (const attachment of data) {
const file = new File([String(attachment.content).trim()], `${String(attachment.title).trim()}.txt`, { type: 'text/plain' });
files.push(file);
}
return files;
}
if (output === 'single') {
const combinedContent = data.map((a) => String(a.title).trim() + '\n\n' + String(a.content).trim()).join('\n\n\n\n');
const file = new File([combinedContent], `${url}.txt`, { type: 'text/plain' });
return [file];
}
return [];
}
}
/**
* Scrape data from a Fandom wiki.
* @implements {Scraper}
@ -419,5 +504,6 @@ class YouTubeScraper {
ScraperManager.registerDataBankScraper(new FileScraper());
ScraperManager.registerDataBankScraper(new Notepad());
ScraperManager.registerDataBankScraper(new WebScraper());
ScraperManager.registerDataBankScraper(new MediaWikiScraper());
ScraperManager.registerDataBankScraper(new FandomScraper());
ScraperManager.registerDataBankScraper(new YouTubeScraper());