sitoctt/Scripts/ListedDownsync.js

282 lines
9.4 KiB
JavaScript
Raw Permalink Normal View History

2022-12-12 23:40:40 +01:00
#!/usr/bin/env -S node --experimental-fetch
2023-05-07 16:35:05 +02:00
require('./Lib/Syncers.js').importAll();
2022-12-12 23:40:40 +01:00
const JSDOM = require('jsdom').JSDOM;
const Html2Markdown = require('./Lib/Html2Markdown.js').Transform;
2022-12-12 23:40:40 +01:00
const BlogURL = 'https://listed.to/@u8'; // Full base URL of the Listed blog (any server)
2022-12-12 23:40:40 +01:00
const SiteName = 'sitoctt';
//const DefaultMode = 'Include' // 'Include' or 'Exclude' | Not implemented
const PostsFileDate = true; // Append dates (YYYY-MM-DD) to posts file names
2024-08-25 17:18:20 +02:00
const FrontmatterNew = {
createdon: "Date",
updatedon: "Lastmod",
2024-09-01 12:10:37 +02:00
editedon: "Lastmod",
image: "Featured_Image",
2024-08-27 01:06:25 +02:00
categories: [],
urls: ["Aliases"],
2024-08-25 17:18:20 +02:00
htmltitle: false,
};
let Replacements = { // Format: { ReplaceWithString: [ToFindString] }
"<h2>{{% i18n notes-refs %}}</h2>": "<h2>🏷️ Note e Riferimenti</h2>",
//'<div class="footnotes">': ['<div class="footnotes"><hr>', '<div class="footnotes">\n<hr>'],
//'"><a class="footnote-ref" href="#fn': '"><a href="#fn',
'<div class="footnotes"><span class="footnotes"><hr></span>': '<div class="footnotes"><hr>',
' href="{{< assetsRoot >}}/': ' href="https://sitoctt-assets.octt.eu.org/',
' src="{{< assetsRoot >}}/': ' src="https://sitoctt-assets.octt.eu.org/',
// TODO: Fix anchor rels
2022-12-12 23:40:40 +01:00
};
let LateReplacements = {
"{{< assetsRoot >}}": "{{&lt; assetsRoot &gt;}}",
'<script async="async" defer="defer" src="https://liminalgici.spacc.eu.org/embed.js"></script><iframe title="Pixelfed Post Embed" src="https://liminalgici.spacc.eu.org/': '<iframe title="Pixelfed Post Embed" src="https://liminalgici.spacc.eu.org/',
};
const TestURL = 'https://listed.to/p/hDaMhJ2ts7';
2022-12-12 23:40:40 +01:00
const MetadataBlockSelect = '.MetadataBlock, .MetadataBlock + :Where(Div, Pre, Code)';
const ReplacementsBlockSelect = '.ReplacementsBlock, .ReplacementsBlock + :Where(Div, Pre, Code)';
const ExtractCodeBlockSelect = '.ExtractCodeBlock, .ExtractCodeBlock + :Where(Div, Pre, Code)';
const DeleteElementBlockSelect = '.DeleteElementBlock';
2022-12-12 23:40:40 +01:00
const TryReadFileSync = Path => {
2023-05-07 16:35:05 +02:00
if (Fs.existsSync(Path)) {
return Fs.readFileSync(Path, 'utf8');
2022-12-12 23:40:40 +01:00
};
};
2022-12-12 23:40:40 +01:00
const TryMkdirSync = Path => {
2023-05-07 16:35:05 +02:00
if (!Fs.existsSync(Path)) {
return Fs.mkdirSync(Path, {recursive: true});
2022-12-12 23:40:40 +01:00
};
};
const GetPath = URL => {
if (URL.startsWith('http://') || URL.startsWith('https://')) {
URL = URL.split('/').slice(3).join('/');
};
return URL;
};
const GetFragHTML = Frag => {
let Dom = new JSDOM('<body></body>');
Dom.window.document.body.appendChild(Frag);
return Dom.window.document.body.innerHTML.trim();
};
const CSSFirstTokenSelector = Select => {
return Select.trim().replaceAll('.', '').replaceAll(',', '').split(' ')[0];
};
const CheckDownsync = Body => {
if (Body) {
const Lines = Body.trim().toLowerCase().split('\n');
2022-12-12 23:40:40 +01:00
for (let i=0; i<Lines.length; i++) {
const Line = Lines[i].trim()
const CheckLine = Line.replaceAll(' ', ' ').replaceAll(':', ' : ').replaceAll('=', ' = ');
if (CheckLine.startsWith('// ')) {
const Tokens = CheckLine.split(' ').filter(i => {return i != ''});
if (Tokens[1] == '%' && Tokens[2] == 'downsync' && [':', '='].includes(Tokens[3])) {
if (['false', 'disabled', 'off', 'no', '0'].includes(Tokens[4])) {
return false;
} else if (Tokens[4].startsWith('/')) {
return Line.substring(Line.indexOf('/', 2));
};
2022-12-12 23:40:40 +01:00
};
};
};
};
return true;
2022-12-12 23:40:40 +01:00
};
const GetLinkElem = Dom => {
2022-12-12 23:40:40 +01:00
let Elem;
Elem = Dom.querySelector(`.Mirror-${SiteName}`);
if (!Elem) {
Elem = Dom.querySelector(`.Mirror-${SiteName}-Include`);
};
return Elem;
};
const MakeMetaStr = Post => {
let Str = '';
2023-05-07 16:35:05 +02:00
['Meta', 'Macros'].forEach((Type) => {
const Marks = { Meta: "%", Macros: "$" };
2023-05-07 16:35:05 +02:00
Object.keys(Post[Type]).forEach((Key) => {
//Str += `// ${Marks[Type]} ${Key} = ${Post[Type][Key]}\n`;
2024-08-25 17:18:20 +02:00
// TODO: should this handle bools properly?
let Value = Post[Type][Key];
2024-09-01 12:10:37 +02:00
let KeyNew = FrontmatterNew[Key.toLowerCase()];
let ToJson = true;
2024-08-25 17:18:20 +02:00
if (KeyNew === false) {
return;
2024-09-01 12:10:37 +02:00
} else if (Array.isArray(KeyNew)) {
KeyNew = KeyNew[0];
Value = Value.split(' ');
//Value = `[ "${Value.split(' ').join('", "')}" ]`;
//ToJson = false;
2024-08-25 17:18:20 +02:00
}
2024-09-01 12:10:37 +02:00
KeyNew ||= Key;
switch (KeyNew.toLowerCase()) {
default:
break; case 'featured_image':
Value = Value.replace('[staticoso:CustomPath:Assets]', '@').replace('https://sitoctt-assets.octt.eu.org', '@');
break; case 'categories':
if (Post[Type].Downsync.toLowerCase().split('/').includes(Value[0].toLowerCase())) {
Value = Value.slice(1);
}
}
if (Array.isArray(Value)) {
Value = '[ ' + Value.map(item => JSON.stringify(item)).join(', ') + ' ]';
ToJson = false;
}
Str += `${KeyNew} = ${isNaN(Value.replaceAll('-', '')) && ToJson ? JSON.stringify(Value) : Value}\n`;
2023-05-07 16:35:05 +02:00
});
});
return `+++\n${Str}+++\n`; //Str;
};
const HandlePost = (PostSrc, Output) => {
let ContentDom, LinkPath;
let Post = {'Meta': {}, 'Macros': {}};
Post.Meta.Title = PostSrc.title;
Post.Meta.CreatedOn = PostSrc.created_at.split('T')[0];
Post.Content = PostSrc.rendered_text;
console.log(`[I] => [${Post.Meta.CreatedOn}] ${Post.Meta.Title}`);
ContentDom = JSDOM.fragment(Post.Content);
// Handle .MetadataBlock elements
2023-05-07 16:35:05 +02:00
Array.from(ContentDom.querySelectorAll(MetadataBlockSelect)).forEach((Elem) => {
if (Elem.textContent) {
const Meta = ParseMeta(Post.RawMeta = Elem.textContent);
Post.Meta = Object.assign(Post.Meta, Meta.Meta);
Post.Macros = Object.assign(Post.Macros, Meta.Macros);
Post.IsToml = Meta.IsToml;
};
2023-05-07 16:35:05 +02:00
Elem.outerHTML = '';
});
// NOTE: Maybe would be better to first do string replacements?
let LinkElem = GetLinkElem(ContentDom);
if (LinkElem) {
LinkPath = GetPath(JSDOM.fragment(LinkElem.outerHTML).querySelector('[href]').href);
LinkElem.outerHTML = '';
} else {
let Check = Post.Meta.Downsync;
if (typeof(Check) == 'string' && Check.startsWith('/')) {
LinkPath = Check.substring(1);
} else {
console.log(`[I] : No Downsync flag set with URL in source body; Skipping!`);
return;
2022-12-12 23:40:40 +01:00
};
};
// Handle .ReplacementsBlock elements: Add replacements to do to the default ones or override them.
2023-05-07 16:35:05 +02:00
Array.from(ContentDom.querySelectorAll(ReplacementsBlockSelect)).forEach((Elem) => {
let Text = Elem.textContent.trim();
if (Text) {
if (!(Text.startsWith('{') && Text.endsWith('}'))) {
Text = `{${Text}}`;
};
try {
Replacements = Object.assign(Replacements, JSON.parse(Text));
} catch(e) {
console.log(`[W] : Problem parsing JSON in a ReplacementsBlock; Ignoring!`);
};
};
2023-05-07 16:35:05 +02:00
Elem.outerHTML = '';
});
Post.Content = GetFragHTML(ContentDom);
const PathFile = LinkPath.split('/').slice(-1)[0];
const PathDir = ('./content/' + LinkPath.split('/').slice(0, (LinkPath.split('/').length - 1)).join('/'));
const DatePrefix = PostsFileDate ? Post.Meta.CreatedOn + '-' : '';
const FinalFilePath = `${PathDir}/${DatePrefix}${PathFile.substring(0, (PathFile.length - 4))}md`;
if (!CheckDownsync(TryReadFileSync(FinalFilePath))) {
console.log(`[I] : Downsync disabled in destination body; Skipping!`);
return;
};
2022-12-12 23:40:40 +01:00
// Do string replacements
// TODO: Replacements written in post body?
2023-05-07 16:35:05 +02:00
Object.keys(Replacements).forEach((To) => {
let FromList = Replacements[To];
if (typeof(FromList) != 'object') {
FromList = [FromList];
2022-12-12 23:40:40 +01:00
};
2023-05-07 16:35:05 +02:00
FromList.forEach((From) => {
Post.Content = Post.Content.replaceAll(From, To);
});
});
ContentDom = JSDOM.fragment(Post.Content);
// Handle .DeleteElementBlock elements: Elements that must be visible on Listed but deleted here.
2023-05-07 16:35:05 +02:00
Array.from(ContentDom.querySelectorAll(DeleteElementBlockSelect)).forEach((Elem) => {
if (!Elem.textContent) {
2023-05-07 16:35:05 +02:00
Elem.nextElementSibling.outerHTML = '';
};
2023-05-07 16:35:05 +02:00
Elem.outerHTML = '';
});
// Handle .ExtractCodeBlock elements: Allow for text to be treated as plain on Listed, and then extracted here.
2023-05-07 16:35:05 +02:00
Array.from(ContentDom.querySelectorAll(ExtractCodeBlockSelect)).forEach((Elem) => {
const Find = CSSFirstTokenSelector(ExtractCodeBlockSelect);
if (Array.from(Elem.classList).includes(Find)) {
2023-05-07 16:35:05 +02:00
// Remove the ExtractCodeBlock upper-marker
Elem.outerHTML = '';
} else {
2023-05-07 16:35:05 +02:00
// Extract the marker's text as raw HTML
Elem.outerHTML = Elem.textContent;
2022-12-12 23:40:40 +01:00
};
2023-05-07 16:35:05 +02:00
});
2022-12-12 23:40:40 +01:00
Post.Content = GetFragHTML(ContentDom);
Post.Content = Html2Markdown(Post.Content);
Object.keys(LateReplacements).forEach((To) => {
let FromList = LateReplacements[To];
if (typeof(FromList) != 'object') {
FromList = [FromList];
};
FromList.forEach((From) => {
Post.Content = Post.Content.replaceAll(From, To);
});
});
if (Output == 'file') {
TryMkdirSync(PathDir);
2023-05-07 16:35:05 +02:00
Fs.writeFileSync(FinalFilePath, `\
2024-08-25 17:18:20 +02:00
${Post.IsToml ? Post.RawMeta : MakeMetaStr(Post)}
<!-- Autogenerated by ListedDownsync.js. Do not edit (unless also set "% Downsync = False") - it would be overwritten. -->
2022-12-12 23:40:40 +01:00
${Post.Content}
2022-12-12 23:40:40 +01:00
`);
} else if (Output == 'stdout') {
console.log(Post.Content);
};
2022-12-12 23:40:40 +01:00
};
const Main = _ => {
console.log('[I] Downloading...');
fetch(`${BlogURL}/all`).then(Response => Response.text()).then(Data => {
console.log('[I] Parsing...');
const Elem = JSDOM.fragment(Data).querySelector('script[data-component-name="AuthorAll"]');
const Posts = JSON.parse(Elem.childNodes[0].data).posts;
for (let i=0; i<Posts.length; i++) {
HandlePost(Posts[i], 'file');
2022-12-12 23:40:40 +01:00
};
});
};
const Test = _ => {
console.log('[I] Testing...');
fetch(TestURL).then(Response => Response.text()).then(Data => {
const Elem = JSDOM.fragment(Data).querySelector('script[data-component-name="PostShow"]');
const Post = JSON.parse(Elem.childNodes[0].data).post;
HandlePost(Post, 'stdout');
});
};
2022-12-12 23:40:40 +01:00
Main();
//Test();