From 55647f0ad0db0dec778077affacff87346c29a00 Mon Sep 17 00:00:00 2001 From: octospacc Date: Wed, 13 Jul 2022 00:14:37 +0200 Subject: [PATCH] Gemlog format fix; Cringe XML sitemap generation --- README.md | 2 +- Source/Build.py | 23 ++++++++++++++++++++--- Source/Modules/Feed.py | 18 +++++++++++------- Source/Modules/Gemini.py | 15 ++++++++++++++- 4 files changed, 46 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index 9d00f09..c2f54c3 100644 --- a/README.md +++ b/README.md @@ -36,7 +36,7 @@ Feel free to experiment with all of this stuff! - [ ] Pug support for base templates and page side parts - [ ] Differential recompile (to optimize resource waste on non-ephemeral servers) - [ ] Hot-recompile (for website development) -- [ ] XML sitemap generation +- [x] XML sitemap generation - [x] Atom + RSS feed generation for posts - [x] Generation of website page tree in left sidebar - [x] Generation of titles in right sidebar with clickable links diff --git a/Source/Build.py b/Source/Build.py index f89ddaf..a5ccc20 100755 --- a/Source/Build.py +++ b/Source/Build.py @@ -472,7 +472,7 @@ def GetConfMenu(Conf): print(Menu) return Menu -def Main(Args, FeedEntries): +def Main(Args, FeedEntries, SitemapOut): HavePages, HavePosts = False, False SiteConf = LoadConf('Site.ini') #SiteMenu = GetConfMenu(SiteConf) @@ -541,6 +541,19 @@ def Main(Args, FeedEntries): SiteDomain=SiteDomain, MaxEntries=FeedEntries, Lang=SiteLang, + FullMap=False, + Minify=True if Args.Minify and Args.Minify not in ('False', 'None') else False) + + if SitemapOut: + print("[I] Generating Sitemap") + MakeFeed( + Pages=Pages, + SiteName=SiteName, + SiteTagline=SiteTagline, + SiteDomain=SiteDomain, + MaxEntries=FeedEntries, + Lang=SiteLang, + FullMap=True, Minify=True if Args.Minify and Args.Minify not in ('False', 'None') else False) if ActivityPub and MastodonURL and MastodonToken and SiteDomain: @@ -593,6 +606,7 @@ if __name__ == '__main__': Parser.add_argument('--GemtextOut', type=bool) Parser.add_argument('--GemtextHeader', type=str) Parser.add_argument('--SiteTagline', type=str) + Parser.add_argument('--SitemapOut', type=bool) Parser.add_argument('--FeedEntries', type=int) Parser.add_argument('--FolderRoots', type=str) Parser.add_argument('--ContextParts', type=str) @@ -606,10 +620,13 @@ if __name__ == '__main__': import lxml from Modules.Feed import * FeedEntries = Args.FeedEntries if Args.FeedEntries or Args.FeedEntries == 0 else 10 + SitemapOut = True if Args.SitemapOut else False except: - print("[E] Can't load the Atom/RSS feed libraries. Their generation is disabled. Make sure the 'lxml' library is installed.") + print("[E] Can't load the XML libraries. XML Feeds and Sitemaps generation is disabled. Make sure the 'lxml' library is installed.") FeedEntries = 0 + SitemapOut = False Main( Args=Args, - FeedEntries=FeedEntries) + FeedEntries=FeedEntries, + SitemapOut=SitemapOut) diff --git a/Source/Modules/Feed.py b/Source/Modules/Feed.py index b4dd7d9..bcfc5fd 100644 --- a/Source/Modules/Feed.py +++ b/Source/Modules/Feed.py @@ -12,7 +12,7 @@ from Libs.feedgen.feed import FeedGenerator from Modules.Utils import * -def MakeFeed(Pages, SiteName, SiteTagline, SiteDomain, MaxEntries, Lang, Minify=False): +def MakeFeed(Pages, SiteName, SiteTagline, SiteDomain, MaxEntries, Lang, FullMap=False, Minify=False): Feed = FeedGenerator() Link = SiteDomain if SiteDomain else ' ' Feed.id(Link) @@ -24,21 +24,22 @@ def MakeFeed(Pages, SiteName, SiteTagline, SiteDomain, MaxEntries, Lang, Minify= Feed.language(Lang) DoPages = [] + if FullMap: + MaxEntries = 50000 # Sitemap standard limit for e in Pages: - if MaxEntries != 0 and e[3]['Type'] == 'Post': + if MaxEntries != 0 and (FullMap or (not FullMap and e[3]['Type'] == 'Post')): DoPages += [e] MaxEntries -= 1 DoPages.reverse() for File, Content, Titles, Meta, ContentHTML, SlimHTML, Description, Image in DoPages: - if Meta['Type'] == 'Post': + if FullMap or (not FullMap and Meta['Type'] == 'Post'): Entry = Feed.add_entry() File = '{}.html'.format(StripExt(File)) Content = ReadFile('public/'+File) Link = SiteDomain + '/' + File if SiteDomain else ' ' CreatedOn = GetFullDate(Meta['CreatedOn']) EditedOn = GetFullDate(Meta['EditedOn']) - Entry.id(Link) Entry.title(Meta['Title'] if Meta['Title'] else ' ') Entry.description(Description) @@ -49,6 +50,9 @@ def MakeFeed(Pages, SiteName, SiteTagline, SiteDomain, MaxEntries, Lang, Minify= EditedOn = EditedOn if EditedOn else CreatedOn if CreatedOn and not EditedOn else '1970-01-01T00:00+00:00' Entry.updated(EditedOn) - os.mkdir('public/feed') - Feed.atom_file('public/feed/atom.xml', pretty=(not Minify)) - Feed.rss_file('public/feed/rss.xml', pretty=(not Minify)) + if FullMap: + Feed.atom_file('public/sitemap.xml', pretty=(not Minify)) + else: + os.mkdir('public/feed') + Feed.atom_file('public/feed/atom.xml', pretty=(not Minify)) + Feed.rss_file('public/feed/rss.xml', pretty=(not Minify)) diff --git a/Source/Modules/Gemini.py b/Source/Modules/Gemini.py index f368ab4..abc5509 100644 --- a/Source/Modules/Gemini.py +++ b/Source/Modules/Gemini.py @@ -31,6 +31,15 @@ def StripAttrs(HTML): t.attrs = {} return str(Soup) +def FixGemlogDateLine(Line): + if len(Line) >= 2 and Line[0] == '[' and Line[1].isdigit(): + Line = Line[1:] + else: + Words = Line.split(' ') + if len(Words) >= 2 and len(Words[1]) >= 2 and Words[1][0] == '[' and Words[1][1].isdigit(): + Line = Words[0] + '\n' + Words[1][1:] + ' ' + ' '.join(Words[2:]) + return Line + def GemtextCompileList(Pages, Header=''): Cmd = '' for File, Content, Titles, Meta, ContentHTML, SlimHTML, Description, Image in Pages: @@ -45,7 +54,11 @@ def GemtextCompileList(Pages, Header=''): os.system(Cmd) for File, Content, Titles, Meta, ContentHTML, SlimHTML, Description, Image in Pages: Dst = 'public.gmi/{}.gmi'.format(StripExt(File)) - WriteFile(Dst, Header + ReadFile(Dst)) + Gemtext = '' + for Line in ReadFile(Dst).splitlines(): + Line = FixGemlogDateLine(Line) + Gemtext += Line + '\n' + WriteFile(Dst, Header + Gemtext) def FindEarliest(Str, Items): Pos, Item = 0, ''