From 784d8e4732b0d41994a0973759e9ab9d8a454526 Mon Sep 17 00:00:00 2001 From: octospacc Date: Fri, 10 Mar 2023 17:52:40 +0100 Subject: [PATCH] Minor changes; Mastodon posting message with better description + tags --- App/Source/Modules/ActivityPub.py | 82 +++++++++++++++++-------------- App/Source/Modules/HTML.py | 15 +++++- App/Source/Modules/Logging.py | 1 + App/Source/Modules/Site.py | 31 +++++++----- App/Source/Modules/Social.py | 13 +++-- App/Source/Modules/Utils.py | 47 +++++++++++++++--- 6 files changed, 127 insertions(+), 62 deletions(-) diff --git a/App/Source/Modules/ActivityPub.py b/App/Source/Modules/ActivityPub.py index d2d94de..1f42d0c 100644 --- a/App/Source/Modules/ActivityPub.py +++ b/App/Source/Modules/ActivityPub.py @@ -11,6 +11,7 @@ import time from Libs.dateutil.parser import parse as date_parse from Libs.mastodon import Mastodon from Modules.HTML import * +from Modules.Logging import * from Modules.Utils import * def MastodonGetSession(InstanceURL, Token): @@ -18,22 +19,19 @@ def MastodonGetSession(InstanceURL, Token): api_base_url=InstanceURL, access_token=Token) -def MastodonGetMyID(Session): - return Session.me()['id'] - def MastodonGetPostsFromUserID(Session, UserID): return Session.account_statuses( UserID, exclude_replies=True) -def MastodonDoPost(Session, Text, Lang=None, Visibility='public'): +def MastodonDoPost(Session, Text:str, Lang:str=None, Visibility:str='public'): if Text: return Session.status_post( Text, language=Lang, visibility=Visibility) -def MastodonGetLinkFromPost(Post, Domain=None): +def MastodonGetLinkFromPost(Post, Domain:str=None): Parse = MkSoup(Post['content']) if Parse.a: Link = Parse.find_all('a')[-1]['href'] @@ -43,56 +41,66 @@ def MastodonGetLinkFromPost(Post, Domain=None): 'Link': Link} return None -def MastodonGetAllLinkPosts(Session, Domain=None): +def MastodonGetAllLinkPosts(Session, Domain:str=None): Posts = [] - for p in MastodonGetPostsFromUserID(Session, MastodonGetMyID(Session)): + for p in MastodonGetPostsFromUserID(Session, Session.me()['id']): Post = MastodonGetLinkFromPost(p, Domain) if Post: Posts += [Post] return Posts -def MastodonShare(Flags, Pages, Locale): +# TODO: +# - Get post lang from page lang instead of site +# - Fix message including some messed up paragraphs with the new methods +def MastodonShare(Flags:dict, Pages:list, Locale:dict): + f = NameSpace(Flags) SaidPosting = False - SiteDomain, SiteLang = Flags['SiteDomain'], Flags['SiteLang'] - InstanceURL, Token = Flags['MastodonURL'], Flags['MastodonToken'] - TypeFilter, HoursLimit, CategoryFilter = Flags['ActivityPubTypeFilter'], Flags['ActivityPubHoursLimit'], Flags['FeedCategoryFilter'] - Session = MastodonGetSession(InstanceURL, Token) - Posts = MastodonGetAllLinkPosts(Session, SiteDomain) - Pages.sort() + NoteLimit, UrlLen = 500, 24 + Token = f.MastodonToken + Check = ';Debug=True' + if Token.endswith(Check): + Debug = True + Token = Token[:-len(Check)] + else: + Debug = False + TypeFilter, HoursLimit, CategoryFilter = f.ActivityPubTypeFilter, f.ActivityPubHoursLimit, f.FeedCategoryFilter + Session = MastodonGetSession(f.MastodonURL, Token) + Posts = MastodonGetAllLinkPosts(Session, f.SiteDomain) + Pages.sort() # Ensure new posts are sent in order from oldest to newest for File, Content, Titles, Meta, ContentHTML, SlimHTML, Description, Image in Pages: if (not TypeFilter or (TypeFilter and (Meta['Type'] == TypeFilter or TypeFilter == '*'))) and (not CategoryFilter or (CategoryFilter and (CategoryFilter in Meta['Categories'] or CategoryFilter == '*'))): - URL = f"{SiteDomain}/{StripExt(File)}.html" + URL = f"{f.SiteDomain}/{StripExt(File)}.html" DoPost = True for p in Posts: - if p['Link'] in [URL]+Meta['URLs']: + # If already a post linking to this page exists on the net, don't repost + if p['Link'] in [URL]+Meta['URLs'] and not Debug: DoPost = False break if DoPost and Meta['Feed'] == 'True' and (not HoursLimit or (Meta['CreatedOn'] and time.time() - time.mktime(date_parse(Meta['CreatedOn']).timetuple()) < 60*60*HoursLimit)): - Desc = '' - Paragraphs = MkSoup(ContentHTML).p.get_text().split('\n') - Read = '...' + Locale['ReadFullPost'] + ':\n' - for p in Paragraphs: - if p and len(Read+Desc+p)+25 < 500: - Desc += p + '\n\n' - else: - if Desc: - break - else: - Desc = p[:500-25-5-len(Read)] + '...' + Read = f'\n\n...{Locale["ReadFullPost"]}:\n' + Hashtags = '' + for Cat in Meta['Categories']: + Hashtags += f' #{Cat.replace("-", "")}' + Hashtags = '\n\n' + Hashtags.strip() + Desc = LimitText(HtmlParagraphsToText(ContentHTML, '\n'), NoteLimit - len(Read) - UrlLen - len(Hashtags)) if not SaidPosting: - print("[I] Posting to Mastodon") + logging.info("Posting to Mastodon") SaidPosting = True - time.sleep(5) # Prevent flooding - Post = MastodonGetLinkFromPost( - Post=MastodonDoPost( - Session, - Text=Desc+Read+URL, - Lang=SiteLang), - Domain=SiteDomain) - if Post: - Posts += [Post] + if Debug: + Text = Desc + Read + URL + Hashtags + print(f'{len(Desc+Read+Hashtags)+UrlLen}:\n{Text}\n\n\n\n') + else: + time.sleep(5) # Prevent flooding + Post = MastodonGetLinkFromPost( + Post=MastodonDoPost( + Session, + Text=Desc+Read+URL+Hashtags, + Lang=f.SiteLang), + Domain=f.SiteDomain) + if Post: + Posts += [Post] return Posts diff --git a/App/Source/Modules/HTML.py b/App/Source/Modules/HTML.py index a6a44a9..8a485e5 100644 --- a/App/Source/Modules/HTML.py +++ b/App/Source/Modules/HTML.py @@ -17,8 +17,11 @@ from Modules.Utils import * warnings.filterwarnings('ignore', message='The input looks more like a filename than markup.') warnings.filterwarnings('ignore', message='The soupsieve package is not installed.') -def MkSoup(Html:str): - return BeautifulSoup(Html, 'html.parser') +def MkSoup(Html): + if type(Html) == str: + return BeautifulSoup(Html, 'html.parser') + elif type(Html) == BeautifulSoup: + return Html def StripAttrs(Html:str): Soup = MkSoup(Html) @@ -88,6 +91,14 @@ def SquareFnrefs(Html:str): # Different combinations of formatting for Soup .pre s.replace_with(f'[{t}]') return str(Soup.prettify(formatter=None)) +def HtmlParagraphsToText(Html:str, Sep:str='\n\n'): + Soup, Text = MkSoup(Html), '' + for Par in Soup.find_all('p'): + Par = Par.get_text().strip() + if Par: + Text += Par + Sep + return Text + def DoMinifyHTML(Html:str, KeepComments:bool): return htmlmin.minify( input=Html, diff --git a/App/Source/Modules/Logging.py b/App/Source/Modules/Logging.py index 3a2548f..5a80fd5 100644 --- a/App/Source/Modules/Logging.py +++ b/App/Source/Modules/Logging.py @@ -9,6 +9,7 @@ import logging import sys +from traceback import format_exc as TracebackText from Modules.Config import * LoggingFormat = '[%(levelname)s] %(message)s' diff --git a/App/Source/Modules/Site.py b/App/Source/Modules/Site.py index 6cde703..9805830 100644 --- a/App/Source/Modules/Site.py +++ b/App/Source/Modules/Site.py @@ -77,12 +77,8 @@ def PatchHtml(Flags:dict, Pages:list, Page:dict, Context:dict, Snippets:dict, Lo BodyDescription, BodyImage = '', '' if not File.lower().endswith('.txt'): Soup = MkSoup(Content) - if not BodyDescription:# and Soup.p: - #BodyDescription = Soup.p.get_text()[:150].replace('\n', ' ').replace('"', "'") + '...' - for t in Soup.find_all('p'): - if t.get_text(): - BodyDescription = t.get_text()[:150].replace('\n', ' ').replace('"', "'") + '...' - break + if not BodyDescription: + BodyDescription = html.escape(LimitText(HtmlParagraphsToText(Soup, Sep='\n'), 150).replace('\n', ' ')) if not BodyImage and Soup.img and Soup.img['src']: BodyImage = Soup.img['src'] @@ -338,12 +334,6 @@ def HandlePage(Flags:dict, Page:list, Pages:list, Categories, LimitFiles, Snippe return {"File": File, "Content": Content, "Titles": Titles, "Meta": Meta, "ContentHtml": ContentHTML, "SlimHtml": SlimHTML, "Description": Description, "Image": Image} -def MultiprocPagePreprocessor(d:dict): - return PagePreprocessor(d['Flags'], d['Page'], d['GlobalMacros'], d['LightRun']) - -def MultiprocHandlePage(d:dict): - return HandlePage(d['Flags'], d['Page'], d['Pages'], d['Categories'], d['LimitFiles'], d['Snippets'], d['ConfMenu'], d['Locale']) - def FindPagesPaths(): Paths = {"Pages":[], "Posts":[]} for Ext in FileExtensions['Pages']: @@ -400,12 +390,25 @@ def PreprocessSourcePages(Flags:dict, PagesPaths:dict, LimitFiles, GlobalMacros: MultiprocPages += [{'Flags': Flags, 'Page': [f"{Type}s/{File}", TempPath, Type, None], 'GlobalMacros': GlobalMacros, 'LightRun': LightRun}] return DoMultiProc(MultiprocPagePreprocessor, MultiprocPages, PoolSize, True) +def MultiprocPagePreprocessor(d:dict): + return PagePreprocessor(d['Flags'], d['Page'], d['GlobalMacros'], d['LightRun']) + def WriteProcessedPages(Flags:dict, Pages:list, Categories, ConfMenu, Snippets, LimitFiles, PoolSize:int, Locale:dict): MultiprocPages = [] for i, Page in enumerate(Pages): MultiprocPages += [{'Flags': Flags, 'Page': Page, 'Pages': Pages, 'Categories': Categories, 'LimitFiles': LimitFiles, 'Snippets': Snippets, 'ConfMenu': ConfMenu, 'Locale': Locale}] return DoMultiProc(MultiprocHandlePage, MultiprocPages, PoolSize, True) +def MultiprocHandlePage(d:dict): + return HandlePage(d['Flags'], d['Page'], d['Pages'], d['Categories'], d['LimitFiles'], d['Snippets'], d['ConfMenu'], d['Locale']) + +def HandleTransclusionsCaller(Base:str, Caller:str, Pages:list): + MultiPages = [] + return DoMultiProc(MultiprocHandleTransclusions, MultiPages, PoolSize, True) + +def MultiprocHandleTransclusions(d:dict): + return + def MakeSite(Flags:dict, LimitFiles, Snippets, ConfMenu, GlobalMacros:dict, Locale:dict, Threads:int): Pages, MadePages, Categories = [], [], {} PoolSize = cpu_count() if Threads <= 0 else Threads @@ -442,4 +445,8 @@ def MakeSite(Flags:dict, LimitFiles, Snippets, ConfMenu, GlobalMacros:dict, Loca logging.info("Writing Pages") MadePages = WriteProcessedPages(Flags, Pages, Categories, ConfMenu, Snippets, LimitFiles, PoolSize, Locale) + # TODO: Finish this and remove the transclusion feature from above + #logging.info("Resolving Page Transclusions") + #HandleTransclusionsCaller(Pages) + return MadePages diff --git a/App/Source/Modules/Social.py b/App/Source/Modules/Social.py index aad717c..72583d4 100644 --- a/App/Source/Modules/Social.py +++ b/App/Source/Modules/Social.py @@ -7,6 +7,7 @@ | Copyright (C) 2022-2023, OctoSpacc | | ================================== """ +from Modules.Elements import * from Modules.Logging import * from Modules.Utils import * @@ -19,13 +20,14 @@ except: def ApplySocialIntegrations(Flags, Pages, LimitFiles, Locale): f = NameSpace(Flags) - FinalPaths = [] + FinalPaths, MastodonPosts = [], [] if ActivityPub and f.MastodonURL and f.MastodonToken and f.SiteDomain: logging.info("Mastodon Operations") - MastodonPosts = MastodonShare(Flags, Pages, Locale) - else: - MastodonPosts = [] + try: + MastodonPosts = MastodonShare(Flags, Pages, Locale) + except: + print(TracebackText()) for File, Content, Titles, Meta, ContentHTML, SlimHTML, Description, Image in Pages: if IsLightRun(File, LimitFiles): @@ -34,7 +36,7 @@ def ApplySocialIntegrations(Flags, Pages, LimitFiles, Locale): Content = ReadFile(File) Post = '' for p in MastodonPosts: - if p['Link'] == SiteDomain + '/' + File[len(f'{f.OutDir}/'):]: + if p['Link'] == f.SiteDomain + '/' + File[len(f'{f.OutDir}/'):]: Post = HTMLCommentsBlock.format( StrComments=Locale['Comments'], StrOpen=Locale['OpenInNewTab'], @@ -42,6 +44,7 @@ def ApplySocialIntegrations(Flags, Pages, LimitFiles, Locale): break #Content = ReplWithEsc(Content, '[staticoso:Comments]', Post) Content = ReplWithEsc(Content, '', Post) + Content = ReplWithEsc(Content, '', Post) WriteFile(File, Content) FinalPaths += [File] diff --git a/App/Source/Modules/Utils.py b/App/Source/Modules/Utils.py index 435779b..47241d4 100644 --- a/App/Source/Modules/Utils.py +++ b/App/Source/Modules/Utils.py @@ -90,7 +90,7 @@ def FindAllIndex(Str:str, Sub:str): i = Str.find(Sub, i+1) # Replace substrings in a string, except when an escape char is prepended -def ReplWithEsc(Str:str, Find:str, Repl:str, Esc:str='\\'): +def ReplWithEsc(Str:str, Find:str, Repl:str, Html:bool=True, Esc:str='\\'): New = '' Sects = Str.split(Find) # Every time a substring is found @@ -100,16 +100,17 @@ def ReplWithEsc(Str:str, Find:str, Repl:str, Esc:str='\\'): New += e # Wrapping parts of the escaped substrings in HTML tags is done to avoid multiple calls of this function nullifying escaping elif i > 0: - # If prev. split ends with 2 times the escape (= escaping of the escape) + # If prev. split ends with 2 times the escape (escaping of the escape) if Sects[i-1].endswith(Esc*2): - Wrap = f'{New[-1]}' - New = New[:-2] + Wrap + Wrap1 = f'{New[-1]}' if Html else New[-1] + Wrap2 = f'{New[-2]}' if Html else New[-2] + New = New[:-3] + Wrap2 + Wrap1 New += Repl + e # If prev. split ends with 1 time the escape (escaping of the substring) elif Sects[i-1].endswith(Esc): New = New[:-1] - Wrap = f'{Find[0]}' - New += Wrap + Find[1:] + e + Wrap1 = f'{Find[0]}' if Html else Find[0] + New += Wrap1 + Find[1:] + e # If no escape char else: New += Repl + e @@ -216,3 +217,37 @@ def WhileFuncResultChanges(Func, Args:dict, ResultKey:str): Result = Func(**Args) if ResultOld == Result: return Result + +# Ellipsize text if it isn't already, optionally writing over the last chars instead of appending +def TryEllipsizeText(Text:str, Overwrite:bool=False, Ellipses:str='...'): + if not Text.endswith(Ellipses): + if Overwrite: + Text = Text[:-len(Ellipses)] + Ellipses + # Append normally + else: + Text += Ellipses + return Text + +# Limit the length of a text, and account for if paragraphs should be sliced or entirely deleted to fit the limit +def LimitText(Text:str, MaxChars:int, SliceParagraphs:bool=False, ParagraphSep:str='\n'): + New = '' + Paras = Text.split(ParagraphSep) + if not Paras: + return '' + # The first paragraph; must always be present, ellipsized if needed + New = Paras[0] + if len(New) > MaxChars: + New = TryEllipsizeText(New[:MaxChars], Overwrite=True) + # Add a newline to the first paragraph if it wasn't ellipsized + else: + New += ParagraphSep + # All other paragraphs + for Par in Paras[1:]: + # If adding this paragraph to the new text would go over the limit, and we are allowed to slice, append and ellipsize it + if len(New + Par) > MaxChars: + if SliceParagraphs: + New = TryEllipsizeText(New[:MaxChars], Overwrite=True) + # If we still are whitin the limit, just append the paragraph + else: + New += Par + ParagraphSep + return New.strip()