mirror of
https://github.com/SillyTavern/SillyTavern.git
synced 2025-06-05 21:59:27 +02:00
Add generic mediawiki downloader
This commit is contained in:
54
public/scripts/extensions/attachments/mediawiki-scrape.html
Normal file
54
public/scripts/extensions/attachments/mediawiki-scrape.html
Normal file
@ -0,0 +1,54 @@
|
||||
<div>
|
||||
<div class="flex-container flexFlowColumn">
|
||||
<label for="scrapeInput" data-i18n="Enter a base URL of the MediaWiki to scrape.">
|
||||
Enter a <strong>base URL</strong> of the MediaWiki to scrape.
|
||||
</label>
|
||||
<i data-i18n="Don't include the page name!">
|
||||
Don't include the page name!
|
||||
</i>
|
||||
<small>
|
||||
<span data-i18n=Examples:">Examples:</span>
|
||||
<code>https://streetcat.wiki/index.php</code>
|
||||
<span data-i18n="or">or</span>
|
||||
<code>https://tcrf.net</code>
|
||||
</small>
|
||||
<input type="text" id="scrapeInput" name="scrapeInput" class="text_pole" placeholder="">
|
||||
</div>
|
||||
<div class="flex-container flexFlowColumn">
|
||||
<label for="scrapeFilter">
|
||||
Optional regex to pick the content by its title:
|
||||
</label>
|
||||
<small>
|
||||
<span data-i18n="Example:">Example:</span>
|
||||
<code>/Mr. (Fresh|Snack)/gi</code>
|
||||
</small>
|
||||
<input type="text" id="scrapeFilter" name="scrapeFilter" class="text_pole" placeholder="">
|
||||
</div>
|
||||
<div class="flex-container flexFlowColumn">
|
||||
<label>
|
||||
Output format:
|
||||
</label>
|
||||
<label class="checkbox_label justifyLeft" for="scrapeOutputSingle">
|
||||
<input id="scrapeOutputSingle" type="radio" name="scrapeOutput" value="single" checked>
|
||||
<div class="flex-container flexFlowColumn flexNoGap">
|
||||
<span data-i18n="Single file">
|
||||
Single file
|
||||
</span>
|
||||
<small data-i18n="All articles will be concatenated into a single file.">
|
||||
All articles will be concatenated into a single file.
|
||||
</small>
|
||||
</div>
|
||||
</label>
|
||||
<label class="checkbox_label justifyLeft" for="scrapeOutputMulti">
|
||||
<input id="scrapeOutputMulti" type="radio" name="scrapeOutput" value="multi">
|
||||
<div class="flex-container flexFlowColumn flexNoGap">
|
||||
<span data-i18n="File per article">
|
||||
File per article
|
||||
</span>
|
||||
<small data-i18n="Each article will be saved as a separate file.">
|
||||
Not recommended. Each article will be saved as a separate file.
|
||||
</small>
|
||||
</div>
|
||||
</label>
|
||||
</div>
|
||||
</div>
|
@ -238,6 +238,91 @@ class FileScraper {
|
||||
}
|
||||
}
|
||||
|
||||
class MediaWikiScraper {
|
||||
constructor() {
|
||||
this.id = 'mediawiki';
|
||||
this.name = 'MediaWiki';
|
||||
this.description = 'Download a page from a MediaWiki wiki.';
|
||||
this.iconClass = 'fa-brands fa-wikipedia-w';
|
||||
this.iconAvailable = true;
|
||||
}
|
||||
|
||||
async isAvailable() {
|
||||
try {
|
||||
const result = await fetch('/api/plugins/fandom/probe-mediawiki', {
|
||||
method: 'POST',
|
||||
headers: getRequestHeaders(),
|
||||
});
|
||||
|
||||
return result.ok;
|
||||
} catch (error) {
|
||||
console.debug('Could not probe Fandom/MediaWiki plugin', error);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
async scrape() {
|
||||
let url = '';
|
||||
let filter = '';
|
||||
let output = 'single';
|
||||
|
||||
const template = $(await renderExtensionTemplateAsync('attachments', 'mediawiki-scrape', {}));
|
||||
template.find('input[name="scrapeInput"]').on('input', function () {
|
||||
url = String($(this).val()).trim();
|
||||
});
|
||||
template.find('input[name="scrapeFilter"]').on('input', function () {
|
||||
filter = String($(this).val());
|
||||
});
|
||||
template.find('input[name="scrapeOutput"]').on('input', function () {
|
||||
output = String($(this).val());
|
||||
});
|
||||
|
||||
const confirm = await callGenericPopup(template, POPUP_TYPE.CONFIRM, '', { wide: false, large: false, okButton: 'Scrape', cancelButton: 'Cancel' });
|
||||
|
||||
if (confirm !== POPUP_RESULT.AFFIRMATIVE) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (!url) {
|
||||
toastr.error('URL name is required');
|
||||
return;
|
||||
}
|
||||
|
||||
const toast = toastr.info('Working, please wait...');
|
||||
|
||||
const result = await fetch('/api/plugins/fandom/scrape-mediawiki', {
|
||||
method: 'POST',
|
||||
headers: getRequestHeaders(),
|
||||
body: JSON.stringify({ url, filter }),
|
||||
});
|
||||
|
||||
if (!result.ok) {
|
||||
const error = await result.text();
|
||||
throw new Error(error);
|
||||
}
|
||||
|
||||
const data = await result.json();
|
||||
toastr.clear(toast);
|
||||
|
||||
if (output === 'multi') {
|
||||
const files = [];
|
||||
for (const attachment of data) {
|
||||
const file = new File([String(attachment.content).trim()], `${String(attachment.title).trim()}.txt`, { type: 'text/plain' });
|
||||
files.push(file);
|
||||
}
|
||||
return files;
|
||||
}
|
||||
|
||||
if (output === 'single') {
|
||||
const combinedContent = data.map((a) => String(a.title).trim() + '\n\n' + String(a.content).trim()).join('\n\n\n\n');
|
||||
const file = new File([combinedContent], `${url}.txt`, { type: 'text/plain' });
|
||||
return [file];
|
||||
}
|
||||
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Scrape data from a Fandom wiki.
|
||||
* @implements {Scraper}
|
||||
@ -419,5 +504,6 @@ class YouTubeScraper {
|
||||
ScraperManager.registerDataBankScraper(new FileScraper());
|
||||
ScraperManager.registerDataBankScraper(new Notepad());
|
||||
ScraperManager.registerDataBankScraper(new WebScraper());
|
||||
ScraperManager.registerDataBankScraper(new MediaWikiScraper());
|
||||
ScraperManager.registerDataBankScraper(new FandomScraper());
|
||||
ScraperManager.registerDataBankScraper(new YouTubeScraper());
|
||||
|
Reference in New Issue
Block a user