From 784d8e4732b0d41994a0973759e9ab9d8a454526 Mon Sep 17 00:00:00 2001
From: octospacc <octo@tutamail.com>
Date: Fri, 10 Mar 2023 17:52:40 +0100
Subject: [PATCH] Minor changes; Mastodon posting message with better
 description + tags

---
 App/Source/Modules/ActivityPub.py | 82 +++++++++++++++++--------------
 App/Source/Modules/HTML.py        | 15 +++++-
 App/Source/Modules/Logging.py     |  1 +
 App/Source/Modules/Site.py        | 31 +++++++-----
 App/Source/Modules/Social.py      | 13 +++--
 App/Source/Modules/Utils.py       | 47 +++++++++++++++---
 6 files changed, 127 insertions(+), 62 deletions(-)

diff --git a/App/Source/Modules/ActivityPub.py b/App/Source/Modules/ActivityPub.py
index d2d94de..1f42d0c 100644
--- a/App/Source/Modules/ActivityPub.py
+++ b/App/Source/Modules/ActivityPub.py
@@ -11,6 +11,7 @@ import time
 from Libs.dateutil.parser import parse as date_parse
 from Libs.mastodon import Mastodon
 from Modules.HTML import *
+from Modules.Logging import *
 from Modules.Utils import *
 
 def MastodonGetSession(InstanceURL, Token):
@@ -18,22 +19,19 @@ def MastodonGetSession(InstanceURL, Token):
 		api_base_url=InstanceURL,
 		access_token=Token)
 
-def MastodonGetMyID(Session):
-	return Session.me()['id']
-
 def MastodonGetPostsFromUserID(Session, UserID):
 	return Session.account_statuses(
 		UserID,
 		exclude_replies=True)
 
-def MastodonDoPost(Session, Text, Lang=None, Visibility='public'):
+def MastodonDoPost(Session, Text:str, Lang:str=None, Visibility:str='public'):
 	if Text:
 		return Session.status_post(
 			Text,
 			language=Lang,
 			visibility=Visibility)
 
-def MastodonGetLinkFromPost(Post, Domain=None):
+def MastodonGetLinkFromPost(Post, Domain:str=None):
 	Parse = MkSoup(Post['content'])
 	if Parse.a:
 		Link = Parse.find_all('a')[-1]['href']
@@ -43,56 +41,66 @@ def MastodonGetLinkFromPost(Post, Domain=None):
 				'Link': Link}
 	return None
 
-def MastodonGetAllLinkPosts(Session, Domain=None):
+def MastodonGetAllLinkPosts(Session, Domain:str=None):
 	Posts = []
-	for p in MastodonGetPostsFromUserID(Session, MastodonGetMyID(Session)):
+	for p in MastodonGetPostsFromUserID(Session, Session.me()['id']):
 		Post = MastodonGetLinkFromPost(p, Domain)
 		if Post:
 			Posts += [Post]
 	return Posts
 
-def MastodonShare(Flags, Pages, Locale):
+# TODO:
+# - Get post lang from page lang instead of site
+# - Fix message including some messed up paragraphs with the new methods
+def MastodonShare(Flags:dict, Pages:list, Locale:dict):
+	f = NameSpace(Flags)
 	SaidPosting = False
-	SiteDomain, SiteLang = Flags['SiteDomain'], Flags['SiteLang']
-	InstanceURL, Token = Flags['MastodonURL'], Flags['MastodonToken']
-	TypeFilter, HoursLimit, CategoryFilter = Flags['ActivityPubTypeFilter'], Flags['ActivityPubHoursLimit'], Flags['FeedCategoryFilter']
-	Session = MastodonGetSession(InstanceURL, Token)
-	Posts = MastodonGetAllLinkPosts(Session, SiteDomain)
-	Pages.sort()
+	NoteLimit, UrlLen = 500, 24
+	Token = f.MastodonToken
+	Check = ';Debug=True'
+	if Token.endswith(Check):
+		Debug = True
+		Token = Token[:-len(Check)]
+	else:
+		Debug = False
+	TypeFilter, HoursLimit, CategoryFilter = f.ActivityPubTypeFilter, f.ActivityPubHoursLimit, f.FeedCategoryFilter
+	Session = MastodonGetSession(f.MastodonURL, Token)
+	Posts = MastodonGetAllLinkPosts(Session, f.SiteDomain)
+	Pages.sort() # Ensure new posts are sent in order from oldest to newest
 	for File, Content, Titles, Meta, ContentHTML, SlimHTML, Description, Image in Pages:
 		if (not TypeFilter or (TypeFilter and (Meta['Type'] == TypeFilter or TypeFilter == '*'))) and (not CategoryFilter or (CategoryFilter and (CategoryFilter in Meta['Categories'] or CategoryFilter == '*'))):
-			URL = f"{SiteDomain}/{StripExt(File)}.html"
+			URL = f"{f.SiteDomain}/{StripExt(File)}.html"
 			DoPost = True
 			for p in Posts:
-				if p['Link'] in [URL]+Meta['URLs']:
+				# If already a post linking to this page exists on the net, don't repost
+				if p['Link'] in [URL]+Meta['URLs'] and not Debug:
 					DoPost = False
 					break
 
 			if DoPost and Meta['Feed'] == 'True' and (not HoursLimit or (Meta['CreatedOn'] and time.time() - time.mktime(date_parse(Meta['CreatedOn']).timetuple()) < 60*60*HoursLimit)):
-				Desc = ''
-				Paragraphs = MkSoup(ContentHTML).p.get_text().split('\n')
-				Read = '...' + Locale['ReadFullPost'] + ':\n'
-				for p in Paragraphs:
-					if p and len(Read+Desc+p)+25 < 500:
-						Desc += p + '\n\n'
-					else:
-						if Desc:
-							break
-						else:
-							Desc = p[:500-25-5-len(Read)] + '...'
+				Read = f'\n\n...{Locale["ReadFullPost"]}:\n'
+				Hashtags = ''
+				for Cat in Meta['Categories']:
+					Hashtags += f' #{Cat.replace("-", "")}'
+				Hashtags = '\n\n' + Hashtags.strip()
+				Desc = LimitText(HtmlParagraphsToText(ContentHTML, '\n'), NoteLimit - len(Read) - UrlLen - len(Hashtags))
 
 				if not SaidPosting:
-					print("[I] Posting to Mastodon")
+					logging.info("Posting to Mastodon")
 					SaidPosting = True
 
-				time.sleep(5) # Prevent flooding
-				Post = MastodonGetLinkFromPost(
-					Post=MastodonDoPost(
-						Session,
-						Text=Desc+Read+URL,
-						Lang=SiteLang),
-					Domain=SiteDomain)
-				if Post:
-					Posts += [Post]
+				if Debug:
+					Text = Desc + Read + URL + Hashtags
+					print(f'{len(Desc+Read+Hashtags)+UrlLen}:\n{Text}\n\n\n\n')
+				else:
+					time.sleep(5) # Prevent flooding
+					Post = MastodonGetLinkFromPost(
+						Post=MastodonDoPost(
+							Session,
+							Text=Desc+Read+URL+Hashtags,
+							Lang=f.SiteLang),
+						Domain=f.SiteDomain)
+					if Post:
+						Posts += [Post]
 
 	return Posts
diff --git a/App/Source/Modules/HTML.py b/App/Source/Modules/HTML.py
index a6a44a9..8a485e5 100644
--- a/App/Source/Modules/HTML.py
+++ b/App/Source/Modules/HTML.py
@@ -17,8 +17,11 @@ from Modules.Utils import *
 warnings.filterwarnings('ignore', message='The input looks more like a filename than markup.')
 warnings.filterwarnings('ignore', message='The soupsieve package is not installed.')
 
-def MkSoup(Html:str):
-	return BeautifulSoup(Html, 'html.parser')
+def MkSoup(Html):
+	if type(Html) == str:
+		return BeautifulSoup(Html, 'html.parser')
+	elif type(Html) == BeautifulSoup:
+		return Html
 
 def StripAttrs(Html:str):
 	Soup = MkSoup(Html)
@@ -88,6 +91,14 @@ def SquareFnrefs(Html:str): # Different combinations of formatting for Soup .pre
 			s.replace_with(f'[{t}]')
 	return str(Soup.prettify(formatter=None))
 
+def HtmlParagraphsToText(Html:str, Sep:str='\n\n'):
+	Soup, Text = MkSoup(Html), ''
+	for Par in Soup.find_all('p'):
+		Par = Par.get_text().strip()
+		if Par:
+			Text += Par + Sep
+	return Text
+
 def DoMinifyHTML(Html:str, KeepComments:bool):
 	return htmlmin.minify(
 		input=Html,
diff --git a/App/Source/Modules/Logging.py b/App/Source/Modules/Logging.py
index 3a2548f..5a80fd5 100644
--- a/App/Source/Modules/Logging.py
+++ b/App/Source/Modules/Logging.py
@@ -9,6 +9,7 @@
 
 import logging
 import sys
+from traceback import format_exc as TracebackText
 from Modules.Config import *
 
 LoggingFormat = '[%(levelname)s] %(message)s'
diff --git a/App/Source/Modules/Site.py b/App/Source/Modules/Site.py
index 6cde703..9805830 100644
--- a/App/Source/Modules/Site.py
+++ b/App/Source/Modules/Site.py
@@ -77,12 +77,8 @@ def PatchHtml(Flags:dict, Pages:list, Page:dict, Context:dict, Snippets:dict, Lo
 	BodyDescription, BodyImage = '', ''
 	if not File.lower().endswith('.txt'):
 		Soup = MkSoup(Content)
-		if not BodyDescription:# and Soup.p:
-			#BodyDescription = Soup.p.get_text()[:150].replace('\n', ' ').replace('"', "'") + '...'
-			for t in Soup.find_all('p'):
-				if t.get_text():
-					BodyDescription = t.get_text()[:150].replace('\n', ' ').replace('"', "'") + '...'
-					break
+		if not BodyDescription:
+			BodyDescription = html.escape(LimitText(HtmlParagraphsToText(Soup, Sep='\n'), 150).replace('\n', ' '))
 		if not BodyImage and Soup.img and Soup.img['src']:
 			BodyImage = Soup.img['src']
 
@@ -338,12 +334,6 @@ def HandlePage(Flags:dict, Page:list, Pages:list, Categories, LimitFiles, Snippe
 
 	return {"File": File, "Content": Content, "Titles": Titles, "Meta": Meta, "ContentHtml": ContentHTML, "SlimHtml": SlimHTML, "Description": Description, "Image": Image}
 
-def MultiprocPagePreprocessor(d:dict):
-	return PagePreprocessor(d['Flags'], d['Page'], d['GlobalMacros'], d['LightRun'])
-
-def MultiprocHandlePage(d:dict):
-	return HandlePage(d['Flags'], d['Page'], d['Pages'], d['Categories'], d['LimitFiles'], d['Snippets'], d['ConfMenu'], d['Locale'])
-
 def FindPagesPaths():
 	Paths = {"Pages":[], "Posts":[]}
 	for Ext in FileExtensions['Pages']:
@@ -400,12 +390,25 @@ def PreprocessSourcePages(Flags:dict, PagesPaths:dict, LimitFiles, GlobalMacros:
 			MultiprocPages += [{'Flags': Flags, 'Page': [f"{Type}s/{File}", TempPath, Type, None], 'GlobalMacros': GlobalMacros, 'LightRun': LightRun}]
 	return DoMultiProc(MultiprocPagePreprocessor, MultiprocPages, PoolSize, True)
 
+def MultiprocPagePreprocessor(d:dict):
+	return PagePreprocessor(d['Flags'], d['Page'], d['GlobalMacros'], d['LightRun'])
+
 def WriteProcessedPages(Flags:dict, Pages:list, Categories, ConfMenu, Snippets, LimitFiles, PoolSize:int, Locale:dict):
 	MultiprocPages = []
 	for i, Page in enumerate(Pages):
 		MultiprocPages += [{'Flags': Flags, 'Page': Page, 'Pages': Pages, 'Categories': Categories, 'LimitFiles': LimitFiles, 'Snippets': Snippets, 'ConfMenu': ConfMenu, 'Locale': Locale}]
 	return DoMultiProc(MultiprocHandlePage, MultiprocPages, PoolSize, True)
 
+def MultiprocHandlePage(d:dict):
+	return HandlePage(d['Flags'], d['Page'], d['Pages'], d['Categories'], d['LimitFiles'], d['Snippets'], d['ConfMenu'], d['Locale'])
+
+def HandleTransclusionsCaller(Base:str, Caller:str, Pages:list):
+	MultiPages = []
+	return DoMultiProc(MultiprocHandleTransclusions, MultiPages, PoolSize, True)
+
+def MultiprocHandleTransclusions(d:dict):
+	return
+
 def MakeSite(Flags:dict, LimitFiles, Snippets, ConfMenu, GlobalMacros:dict, Locale:dict, Threads:int):
 	Pages, MadePages, Categories = [], [], {}
 	PoolSize = cpu_count() if Threads <= 0 else Threads
@@ -442,4 +445,8 @@ def MakeSite(Flags:dict, LimitFiles, Snippets, ConfMenu, GlobalMacros:dict, Loca
 	logging.info("Writing Pages")
 	MadePages = WriteProcessedPages(Flags, Pages, Categories, ConfMenu, Snippets, LimitFiles, PoolSize, Locale)
 
+	# TODO: Finish this and remove the transclusion feature from above
+	#logging.info("Resolving Page Transclusions")
+	#HandleTransclusionsCaller(Pages)
+
 	return MadePages
diff --git a/App/Source/Modules/Social.py b/App/Source/Modules/Social.py
index aad717c..72583d4 100644
--- a/App/Source/Modules/Social.py
+++ b/App/Source/Modules/Social.py
@@ -7,6 +7,7 @@
 |   Copyright (C) 2022-2023, OctoSpacc |
 | ================================== """
 
+from Modules.Elements import *
 from Modules.Logging import *
 from Modules.Utils import *
 
@@ -19,13 +20,14 @@ except:
 
 def ApplySocialIntegrations(Flags, Pages, LimitFiles, Locale):
 	f = NameSpace(Flags)
-	FinalPaths = []
+	FinalPaths, MastodonPosts = [], []
 
 	if ActivityPub and f.MastodonURL and f.MastodonToken and f.SiteDomain:
 		logging.info("Mastodon Operations")
-		MastodonPosts = MastodonShare(Flags, Pages, Locale)
-	else:
-		MastodonPosts = []
+		try:
+			MastodonPosts = MastodonShare(Flags, Pages, Locale)
+		except:
+			print(TracebackText())
 
 	for File, Content, Titles, Meta, ContentHTML, SlimHTML, Description, Image in Pages:
 		if IsLightRun(File, LimitFiles):
@@ -34,7 +36,7 @@ def ApplySocialIntegrations(Flags, Pages, LimitFiles, Locale):
 		Content = ReadFile(File)
 		Post = ''
 		for p in MastodonPosts:
-			if p['Link'] == SiteDomain + '/' + File[len(f'{f.OutDir}/'):]:
+			if p['Link'] == f.SiteDomain + '/' + File[len(f'{f.OutDir}/'):]:
 				Post = HTMLCommentsBlock.format(
 					StrComments=Locale['Comments'],
 					StrOpen=Locale['OpenInNewTab'],
@@ -42,6 +44,7 @@ def ApplySocialIntegrations(Flags, Pages, LimitFiles, Locale):
 				break
 		#Content = ReplWithEsc(Content, '[staticoso:Comments]', Post)
 		Content = ReplWithEsc(Content, '<staticoso:Comments>', Post)
+		Content = ReplWithEsc(Content, '<staticoso:comments>', Post)
 		WriteFile(File, Content)
 		FinalPaths += [File]
 
diff --git a/App/Source/Modules/Utils.py b/App/Source/Modules/Utils.py
index 435779b..47241d4 100644
--- a/App/Source/Modules/Utils.py
+++ b/App/Source/Modules/Utils.py
@@ -90,7 +90,7 @@ def FindAllIndex(Str:str, Sub:str):
 		i = Str.find(Sub, i+1)
 
 # Replace substrings in a string, except when an escape char is prepended
-def ReplWithEsc(Str:str, Find:str, Repl:str, Esc:str='\\'):
+def ReplWithEsc(Str:str, Find:str, Repl:str, Html:bool=True, Esc:str='\\'):
 	New = ''
 	Sects = Str.split(Find)
 	# Every time a substring is found
@@ -100,16 +100,17 @@ def ReplWithEsc(Str:str, Find:str, Repl:str, Esc:str='\\'):
 			New += e
 		# Wrapping parts of the escaped substrings in HTML tags is done to avoid multiple calls of this function nullifying escaping
 		elif i > 0:
-			# If prev. split ends with 2 times the escape (= escaping of the escape)
+			# If prev. split ends with 2 times the escape (escaping of the escape)
 			if Sects[i-1].endswith(Esc*2):
-				Wrap = f'<span>{New[-1]}</span>'
-				New = New[:-2] + Wrap
+				Wrap1 = f'<span>{New[-1]}</span>' if Html else New[-1]
+				Wrap2 = f'<span>{New[-2]}</span>' if Html else New[-2]
+				New = New[:-3] + Wrap2 + Wrap1
 				New += Repl + e
 			# If prev. split ends with 1 time the escape (escaping of the substring)
 			elif Sects[i-1].endswith(Esc):
 				New = New[:-1]
-				Wrap = f'<span>{Find[0]}</span>'
-				New += Wrap + Find[1:] + e
+				Wrap1 = f'<span>{Find[0]}</span>' if Html else Find[0]
+				New += Wrap1 + Find[1:] + e
 			# If no escape char
 			else:
 				New += Repl + e
@@ -216,3 +217,37 @@ def WhileFuncResultChanges(Func, Args:dict, ResultKey:str):
 		Result = Func(**Args)
 		if ResultOld == Result:
 			return Result
+
+# Ellipsize text if it isn't already, optionally writing over the last chars instead of appending
+def TryEllipsizeText(Text:str, Overwrite:bool=False, Ellipses:str='...'):
+	if not Text.endswith(Ellipses):
+		if Overwrite:
+			Text = Text[:-len(Ellipses)] + Ellipses
+		# Append normally
+		else:
+			Text += Ellipses
+	return Text
+
+# Limit the length of a text, and account for if paragraphs should be sliced or entirely deleted to fit the limit
+def LimitText(Text:str, MaxChars:int, SliceParagraphs:bool=False, ParagraphSep:str='\n'):
+	New = ''
+	Paras = Text.split(ParagraphSep)
+	if not Paras:
+		return ''
+	# The first paragraph; must always be present, ellipsized if needed
+	New = Paras[0]
+	if len(New) > MaxChars:
+		New = TryEllipsizeText(New[:MaxChars], Overwrite=True)
+	# Add a newline to the first paragraph if it wasn't ellipsized
+	else:
+		New += ParagraphSep
+	# All other paragraphs
+	for Par in Paras[1:]:
+		# If adding this paragraph to the new text would go over the limit, and we are allowed to slice, append and ellipsize it
+		if len(New + Par) > MaxChars:
+			if SliceParagraphs:
+				New = TryEllipsizeText(New[:MaxChars], Overwrite=True)
+		# If we still are whitin the limit, just append the paragraph
+		else:
+			New += Par + ParagraphSep
+	return New.strip()