mirror of
https://gitlab.com/octtspacc/sitoctt
synced 2025-06-05 22:09:20 +02:00
Agg. misti; Nuovo articolo: 🐕 Epicyon, piattaforma del Fediverso durata 2 giorni
This commit is contained in:
@ -7,7 +7,7 @@ const BlogURL = 'https://listed.to/@u8'; // Full base URL of the Listed blog (an
|
||||
const SiteName = 'sitoctt';
|
||||
//const DefaultMode = 'Include' // 'Include' or 'Exclude' | Not implemented
|
||||
const PostsFileDate = true; // Append dates (YYYY-MM-DD) to posts file names
|
||||
const Replacements = { // Format: { ReplaceWithString: [ToFindString] }
|
||||
let Replacements = { // Format: { ReplaceWithString: [ToFindString] }
|
||||
"<h2>[:HNotesRefsHTML:]</h2>": "<h2>🏷️ Note e Riferimenti</h2>",
|
||||
'<div class="footnotes">': ['<div class="footnotes"><hr>', '<div class="footnotes">\n<hr>'],
|
||||
'"><a class="footnote-ref" href="#fn': '"><a href="#fn',
|
||||
@ -15,9 +15,12 @@ const Replacements = { // Format: { ReplaceWithString: [ToFindString] }
|
||||
" src=\"[staticoso:CustomPath:Assets]/": " src=\"https://sitoctt-assets.octt.eu.org/",
|
||||
// TODO: Fix anchor rels
|
||||
};
|
||||
const TestURL = 'https://listed.to/p/hDaMhJ2ts7';
|
||||
|
||||
const MetadataBlockSelect = '.MetadataBlock, .MetadataBlock + :Where(Div, Pre, Code)';
|
||||
const ReplacementsBlockSelect = '.ReplacementsBlock, .ReplacementsBlock + :Where(Div, Pre, Code)';
|
||||
const ExtractCodeBlockSelect = '.ExtractCodeBlock, .ExtractCodeBlock + :Where(Div, Pre, Code)';
|
||||
const DeleteElementBlockSelect = '.DeleteElementBlock';
|
||||
|
||||
const TryReadFileSync = Path => {
|
||||
if (fs.existsSync(Path)) {
|
||||
@ -44,6 +47,10 @@ const GetFragHTML = Frag => {
|
||||
return Dom.window.document.body.innerHTML.trim();
|
||||
};
|
||||
|
||||
const CSSFirstTokenSelector = Select => {
|
||||
return Select.trim().replaceAll('.', '').replaceAll(',', '').split(' ')[0];
|
||||
};
|
||||
|
||||
const CheckDownsync = Body => {
|
||||
if (Body) {
|
||||
const Lines = Body.trim().toLowerCase().split('\n');
|
||||
@ -124,7 +131,7 @@ const MakeMetaStr = Post => {
|
||||
return Str;
|
||||
};
|
||||
|
||||
const HandlePost = PostSrc => {
|
||||
const HandlePost = (PostSrc, Output) => {
|
||||
let ContentDom, LinkPath;
|
||||
let Post = {'Meta': {}, 'Macros': {}};
|
||||
|
||||
@ -135,7 +142,7 @@ const HandlePost = PostSrc => {
|
||||
|
||||
ContentDom = JSDOM.fragment(Post.Content);
|
||||
|
||||
// Handle MetadataBlock elements
|
||||
// Handle .MetadataBlock elements
|
||||
let MetadataBlocks = ContentDom.querySelectorAll(MetadataBlockSelect);
|
||||
for (let i=0; i<MetadataBlocks.length; i++) {
|
||||
const Elem = MetadataBlocks[i];
|
||||
@ -162,6 +169,24 @@ const HandlePost = PostSrc => {
|
||||
};
|
||||
};
|
||||
|
||||
// Handle .ReplacementsBlock elements: Add replacements to do to the default ones or override them.
|
||||
let ReplBlocks = ContentDom.querySelectorAll(ReplacementsBlockSelect);
|
||||
for (let i=0; i<ReplBlocks.length; i++) {
|
||||
const Elem = ReplBlocks[i];
|
||||
let Text = Elem.textContent.trim();
|
||||
if (Text) {
|
||||
if (!(Text.startsWith('{') && Text.endsWith('}'))) {
|
||||
Text = `{${Text}}`;
|
||||
};
|
||||
try {
|
||||
Replacements = Object.assign(Replacements, JSON.parse(Text));
|
||||
} catch(e) {
|
||||
console.log(`[W] : Problem parsing JSON in a ReplacementsBlock; Ignoring!`);
|
||||
};
|
||||
};
|
||||
ReplBlocks[i].outerHTML = '';
|
||||
};
|
||||
|
||||
Post.Content = GetFragHTML(ContentDom);
|
||||
|
||||
const PathFile = LinkPath.split('/').slice(-1)[0];
|
||||
@ -189,12 +214,21 @@ const HandlePost = PostSrc => {
|
||||
|
||||
ContentDom = JSDOM.fragment(Post.Content);
|
||||
|
||||
// Handle ExtractCodeBlock elements
|
||||
// TODO: Opposite of extract blocks? (Allowing some HTML to remain on Listed but get deleted from here)
|
||||
// Handle .DeleteElementBlock elements: Elements that must be visible on Listed but deleted here.
|
||||
let DelElemBlocks = ContentDom.querySelectorAll(DeleteElementBlockSelect);
|
||||
for (let i=0; i<DelElemBlocks.length; i++) {
|
||||
const Elem = DelElemBlocks[i];
|
||||
if (!Elem.textContent) {
|
||||
DelElemBlocks[i].nextElementSibling.outerHTML = '';
|
||||
};
|
||||
DelElemBlocks[i].outerHTML = '';
|
||||
};
|
||||
|
||||
// Handle .ExtractCodeBlock elements: Allow for text to be treated as plain on Listed, and then extracted here.
|
||||
let ExtCodeBlocks = ContentDom.querySelectorAll(ExtractCodeBlockSelect);
|
||||
for (let i=0; i<ExtCodeBlocks.length; i++) {
|
||||
const Elem = ExtCodeBlocks[i];
|
||||
const Find = ExtractCodeBlockSelect.trim().replaceAll('.', '').replaceAll(',', '').split(' ')[0];
|
||||
const Find = CSSFirstTokenSelector(ExtractCodeBlockSelect);
|
||||
if (Array.from(Elem.classList).includes(Find)) {
|
||||
ExtCodeBlocks[i].outerHTML = ''; // Remove the ExtractCodeBlock upper-marker
|
||||
} else {
|
||||
@ -204,13 +238,17 @@ const HandlePost = PostSrc => {
|
||||
|
||||
Post.Content = GetFragHTML(ContentDom);
|
||||
|
||||
TryMkdirSync(PathDir);
|
||||
fs.writeFileSync(FinalFilePath, `\
|
||||
if (Output == 'file') {
|
||||
TryMkdirSync(PathDir);
|
||||
fs.writeFileSync(FinalFilePath, `\
|
||||
${MakeMetaStr(Post)}
|
||||
<h1>${Post.Meta.HTMLTitle ? Post.Meta.HTMLTitle : Post.Meta.Title}</h1>
|
||||
|
||||
${Post.Content}
|
||||
`);
|
||||
} else if (Output == 'stdout') {
|
||||
console.log(Post.Content);
|
||||
};
|
||||
};
|
||||
|
||||
const Main = _ => {
|
||||
@ -220,9 +258,19 @@ const Main = _ => {
|
||||
const Elem = JSDOM.fragment(Data).querySelector('script[data-component-name="AuthorAll"]');
|
||||
const Posts = JSON.parse(Elem.childNodes[0].data).posts;
|
||||
for (let i=0; i<Posts.length; i++) {
|
||||
HandlePost(Posts[i]);
|
||||
HandlePost(Posts[i], 'file');
|
||||
};
|
||||
});
|
||||
};
|
||||
|
||||
const Test = _ => {
|
||||
console.log('[I] Testing...');
|
||||
fetch(TestURL).then(Response => Response.text()).then(Data => {
|
||||
const Elem = JSDOM.fragment(Data).querySelector('script[data-component-name="PostShow"]');
|
||||
const Post = JSON.parse(Elem.childNodes[0].data).post;
|
||||
HandlePost(Post, 'stdout');
|
||||
});
|
||||
};
|
||||
|
||||
Main();
|
||||
//Test();
|
||||
|
@ -2,9 +2,13 @@
|
||||
import hashlib, os, shutil, sys, time
|
||||
from pathlib import Path
|
||||
|
||||
GoogleLangs = ['af', 'sq', 'am', 'ar', 'hy', 'as', 'ay', 'az', 'bm', 'eu', 'be', 'bn', 'bho', 'bs', 'bg', 'ca', 'ceb', 'ny', 'zh', 'zh_HANT', 'co', 'hr', 'cs', 'da', 'dv', 'doi', 'nl', 'en', 'eo', 'et', 'ee', 'tl', 'fi', 'fr', 'fy', 'gl', 'ka', 'de', 'el', 'gn', 'gu', 'ht', 'ha', 'haw', 'iw', 'hi', 'hmn', 'hu', 'is', 'ig', 'ilo', 'id', 'ga', 'it', 'ja', 'jw', 'kn', 'kk', 'km', 'rw', 'gom', 'ko', 'kri', 'ku', 'ckb', 'ky', 'lo', 'la', 'lv', 'ln', 'lt', 'lg', 'lb', 'mk', 'mai', 'mg', 'ms', 'ml', 'mt', 'mi', 'mr', 'mni-Mtei', 'lus', 'mn', 'my', 'ne', 'no', 'or', 'om', 'ps', 'fa', 'pl', 'pt', 'pa', 'qu', 'ro', 'ru', 'sm', 'sa', 'gd', 'nso', 'sr', 'st', 'sn', 'sd', 'si', 'sk', 'sl', 'so', 'es', 'su', 'sw', 'sv', 'tg', 'ta', 'tt', 'te', 'th', 'ti', 'ts', 'tr', 'tk', 'ak', 'uk', 'ur', 'ug', 'uz', 'vi', 'cy', 'xh', 'yi', 'yo', 'zu'] # {LingvaURL}/api/v1/languages
|
||||
DeepLLangs = ['et', 'ja', 'lt', 'lv', 'de', 'hu', 'ru', 'zh', 'ro', 'da', 'it', 'es', 'nl', 'fr', 'sk', 'sl', 'pt', 'en', 'sv', 'fi', 'pl', 'el', 'bg', 'cs'] # All from output of `deepl --help`
|
||||
UserAgent = 'sitoctt:Translate.py'
|
||||
|
||||
Engine = 'Google' # 'Google' or 'DeepL'
|
||||
SourceLang = 'it'
|
||||
DestLangs = ['de', 'en', 'es', 'fr', 'it', 'jp', 'ko', 'pt', 'ru', 'zh'] #['et', 'ja', 'lt', 'lv', 'de', 'hu', 'ru', 'zh', 'ro', 'da', 'it', 'es', 'nl', 'fr', 'sk', 'sl', 'pt', 'en', 'sv', 'fi', 'pl', 'el', 'bg', 'cs'] # All from output of `deepl --help`
|
||||
DestLangs = GoogleLangs
|
||||
|
||||
# With shutil.copytree copy only folder struct, no files; https://stackoverflow.com/a/15664273
|
||||
def IgnoreFiles(Dir, Files):
|
||||
@ -29,7 +33,7 @@ def StrReverse(Str):
|
||||
def TryTranslate(Text):
|
||||
try:
|
||||
if Engine.lower() == 'google':
|
||||
return json.loads(urlopen(Request(f'{LingvaInstance}/api/v1/{SourceLang}/{Lang}/{URLParse.quote(Text, safe="")}')).read())["translation"]
|
||||
return json.loads(urlopen(Request(f'{LingvaURL}/api/v1/{SourceLang}/{Lang}/{URLParse.quote(Text, safe="")}', headers={'User-Agent':UserAgent})).read())["translation"]
|
||||
elif Engine.lower() == 'deepl':
|
||||
return Translate.translate(Text)
|
||||
except Exception as e:
|
||||
@ -37,7 +41,7 @@ def TryTranslate(Text):
|
||||
return False
|
||||
|
||||
if Engine.lower() == 'google':
|
||||
LingvaInstance = sys.argv[1]
|
||||
LingvaURL = sys.argv[1]
|
||||
import json
|
||||
from urllib import parse as URLParse
|
||||
from urllib.request import urlopen, Request
|
||||
|
Reference in New Issue
Block a user