mirror of
https://github.com/SillyTavern/SillyTavern.git
synced 2025-06-05 21:59:27 +02:00
Add generic mediawiki downloader
This commit is contained in:
54
public/scripts/extensions/attachments/mediawiki-scrape.html
Normal file
54
public/scripts/extensions/attachments/mediawiki-scrape.html
Normal file
@ -0,0 +1,54 @@
|
|||||||
|
<div>
|
||||||
|
<div class="flex-container flexFlowColumn">
|
||||||
|
<label for="scrapeInput" data-i18n="Enter a base URL of the MediaWiki to scrape.">
|
||||||
|
Enter a <strong>base URL</strong> of the MediaWiki to scrape.
|
||||||
|
</label>
|
||||||
|
<i data-i18n="Don't include the page name!">
|
||||||
|
Don't include the page name!
|
||||||
|
</i>
|
||||||
|
<small>
|
||||||
|
<span data-i18n=Examples:">Examples:</span>
|
||||||
|
<code>https://streetcat.wiki/index.php</code>
|
||||||
|
<span data-i18n="or">or</span>
|
||||||
|
<code>https://tcrf.net</code>
|
||||||
|
</small>
|
||||||
|
<input type="text" id="scrapeInput" name="scrapeInput" class="text_pole" placeholder="">
|
||||||
|
</div>
|
||||||
|
<div class="flex-container flexFlowColumn">
|
||||||
|
<label for="scrapeFilter">
|
||||||
|
Optional regex to pick the content by its title:
|
||||||
|
</label>
|
||||||
|
<small>
|
||||||
|
<span data-i18n="Example:">Example:</span>
|
||||||
|
<code>/Mr. (Fresh|Snack)/gi</code>
|
||||||
|
</small>
|
||||||
|
<input type="text" id="scrapeFilter" name="scrapeFilter" class="text_pole" placeholder="">
|
||||||
|
</div>
|
||||||
|
<div class="flex-container flexFlowColumn">
|
||||||
|
<label>
|
||||||
|
Output format:
|
||||||
|
</label>
|
||||||
|
<label class="checkbox_label justifyLeft" for="scrapeOutputSingle">
|
||||||
|
<input id="scrapeOutputSingle" type="radio" name="scrapeOutput" value="single" checked>
|
||||||
|
<div class="flex-container flexFlowColumn flexNoGap">
|
||||||
|
<span data-i18n="Single file">
|
||||||
|
Single file
|
||||||
|
</span>
|
||||||
|
<small data-i18n="All articles will be concatenated into a single file.">
|
||||||
|
All articles will be concatenated into a single file.
|
||||||
|
</small>
|
||||||
|
</div>
|
||||||
|
</label>
|
||||||
|
<label class="checkbox_label justifyLeft" for="scrapeOutputMulti">
|
||||||
|
<input id="scrapeOutputMulti" type="radio" name="scrapeOutput" value="multi">
|
||||||
|
<div class="flex-container flexFlowColumn flexNoGap">
|
||||||
|
<span data-i18n="File per article">
|
||||||
|
File per article
|
||||||
|
</span>
|
||||||
|
<small data-i18n="Each article will be saved as a separate file.">
|
||||||
|
Not recommended. Each article will be saved as a separate file.
|
||||||
|
</small>
|
||||||
|
</div>
|
||||||
|
</label>
|
||||||
|
</div>
|
||||||
|
</div>
|
@ -238,6 +238,91 @@ class FileScraper {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
class MediaWikiScraper {
|
||||||
|
constructor() {
|
||||||
|
this.id = 'mediawiki';
|
||||||
|
this.name = 'MediaWiki';
|
||||||
|
this.description = 'Download a page from a MediaWiki wiki.';
|
||||||
|
this.iconClass = 'fa-brands fa-wikipedia-w';
|
||||||
|
this.iconAvailable = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
async isAvailable() {
|
||||||
|
try {
|
||||||
|
const result = await fetch('/api/plugins/fandom/probe-mediawiki', {
|
||||||
|
method: 'POST',
|
||||||
|
headers: getRequestHeaders(),
|
||||||
|
});
|
||||||
|
|
||||||
|
return result.ok;
|
||||||
|
} catch (error) {
|
||||||
|
console.debug('Could not probe Fandom/MediaWiki plugin', error);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async scrape() {
|
||||||
|
let url = '';
|
||||||
|
let filter = '';
|
||||||
|
let output = 'single';
|
||||||
|
|
||||||
|
const template = $(await renderExtensionTemplateAsync('attachments', 'mediawiki-scrape', {}));
|
||||||
|
template.find('input[name="scrapeInput"]').on('input', function () {
|
||||||
|
url = String($(this).val()).trim();
|
||||||
|
});
|
||||||
|
template.find('input[name="scrapeFilter"]').on('input', function () {
|
||||||
|
filter = String($(this).val());
|
||||||
|
});
|
||||||
|
template.find('input[name="scrapeOutput"]').on('input', function () {
|
||||||
|
output = String($(this).val());
|
||||||
|
});
|
||||||
|
|
||||||
|
const confirm = await callGenericPopup(template, POPUP_TYPE.CONFIRM, '', { wide: false, large: false, okButton: 'Scrape', cancelButton: 'Cancel' });
|
||||||
|
|
||||||
|
if (confirm !== POPUP_RESULT.AFFIRMATIVE) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!url) {
|
||||||
|
toastr.error('URL name is required');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const toast = toastr.info('Working, please wait...');
|
||||||
|
|
||||||
|
const result = await fetch('/api/plugins/fandom/scrape-mediawiki', {
|
||||||
|
method: 'POST',
|
||||||
|
headers: getRequestHeaders(),
|
||||||
|
body: JSON.stringify({ url, filter }),
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!result.ok) {
|
||||||
|
const error = await result.text();
|
||||||
|
throw new Error(error);
|
||||||
|
}
|
||||||
|
|
||||||
|
const data = await result.json();
|
||||||
|
toastr.clear(toast);
|
||||||
|
|
||||||
|
if (output === 'multi') {
|
||||||
|
const files = [];
|
||||||
|
for (const attachment of data) {
|
||||||
|
const file = new File([String(attachment.content).trim()], `${String(attachment.title).trim()}.txt`, { type: 'text/plain' });
|
||||||
|
files.push(file);
|
||||||
|
}
|
||||||
|
return files;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (output === 'single') {
|
||||||
|
const combinedContent = data.map((a) => String(a.title).trim() + '\n\n' + String(a.content).trim()).join('\n\n\n\n');
|
||||||
|
const file = new File([combinedContent], `${url}.txt`, { type: 'text/plain' });
|
||||||
|
return [file];
|
||||||
|
}
|
||||||
|
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Scrape data from a Fandom wiki.
|
* Scrape data from a Fandom wiki.
|
||||||
* @implements {Scraper}
|
* @implements {Scraper}
|
||||||
@ -419,5 +504,6 @@ class YouTubeScraper {
|
|||||||
ScraperManager.registerDataBankScraper(new FileScraper());
|
ScraperManager.registerDataBankScraper(new FileScraper());
|
||||||
ScraperManager.registerDataBankScraper(new Notepad());
|
ScraperManager.registerDataBankScraper(new Notepad());
|
||||||
ScraperManager.registerDataBankScraper(new WebScraper());
|
ScraperManager.registerDataBankScraper(new WebScraper());
|
||||||
|
ScraperManager.registerDataBankScraper(new MediaWikiScraper());
|
||||||
ScraperManager.registerDataBankScraper(new FandomScraper());
|
ScraperManager.registerDataBankScraper(new FandomScraper());
|
||||||
ScraperManager.registerDataBankScraper(new YouTubeScraper());
|
ScraperManager.registerDataBankScraper(new YouTubeScraper());
|
||||||
|
Reference in New Issue
Block a user