From 491b99f339f440c8e5ad808babcb53c85858befb Mon Sep 17 00:00:00 2001 From: "octo@swiss" Date: Mon, 22 Aug 2022 21:12:05 +0200 Subject: [PATCH] Handle items older than last 20; TODO: Fix order --- .gitignore | 1 + MastodonFeedHTML/MastodonFeedHTML.py | 210 +++++++++++++----------- MastodonFeedHTML/Start.MastodonFeedHTML | 3 + 3 files changed, 118 insertions(+), 96 deletions(-) create mode 100755 MastodonFeedHTML/Start.MastodonFeedHTML diff --git a/.gitignore b/.gitignore index 2a7e9c1..5f4b2dc 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ *.pyc +*.db Config.py diff --git a/MastodonFeedHTML/MastodonFeedHTML.py b/MastodonFeedHTML/MastodonFeedHTML.py index 3d67b7a..44ec853 100755 --- a/MastodonFeedHTML/MastodonFeedHTML.py +++ b/MastodonFeedHTML/MastodonFeedHTML.py @@ -1,6 +1,5 @@ #!/usr/bin/env python3 import base64 -import feedparser import os import time import email, smtplib, ssl @@ -22,49 +21,77 @@ def MakePathStr(Str): Str = Str.replace(' ', '_') return Str +def HandleFeedsList(List): + for Feed in List: + print(f"[I] Handling Feed ->\n: {Feed}") + HandleFeed( + URLs=SureList(Feed['URLs']), + IncludeRetoots=Feed['IncludeRetoots'] if 'IncludeRetoots' in Feed else True, + IncludeReplies=Feed['IncludeReplies'] if 'IncludeReplies' in Feed else True, + LocalSave=Feed['LocalSave'] if 'LocalSave' in Feed else True, + SendMail=Feed['SendMail'] if 'SendMail' in Feed else True if 'To' in Feed and Feed['To'] else False, + MailTo=SureList(Feed['MailTo']) if 'MailTo' in Feed and Feed['MailTo'] else []) + def HandleFeed(URLs, IncludeRetoots, IncludeReplies, LocalSave, SendMail, MailTo): for URL in URLs: + if not (LocalSave or SendMail): + print("[I] Partial dry-run for this URL (LocalSave and SendMail are disabled).") URL = URL.removesuffix('/').removesuffix('/with_replies') + '/with_replies' Usertag = f"{URL.split('/')[-2]}@{URL.split('/')[-3]}" - try: - Response = urlopen(Request(URL, headers={'User-Agent':UserAgent})) - Data = Response.read() - Soup = BeautifulSoup(Data, 'html.parser') - Feed = Soup.find_all('div', class_='entry') - Feed.reverse() # Order from oldest to newest + LastEntryIsNew, PageOlder = HandleURL(URL, Usertag, IncludeRetoots, IncludeReplies, LocalSave, SendMail, MailTo) + while LastEntryIsNew and PageOlder: # TODO: Fix this, make a single run for all items / start from oldest, otherwise order is messed up + LastEntryIsNew, PageOlder = HandleURL(PageOlder, Usertag, IncludeRetoots, IncludeReplies, LocalSave, SendMail, MailTo) - for Entry in Feed: - Attached = '' - GlobalId = Entry.find('a', class_='u-url') - if GlobalId: - GlobalId = GlobalId['href'].removeprefix('https://').removeprefix('http://') - else: - continue +def HandleURL(URL, Usertag, IncludeRetoots, IncludeReplies, LocalSave, SendMail, MailTo): + LastEntryIsNew = False + PageOlder = '' + try: + Response = urlopen(Request(URL, headers={'User-Agent':UserAgent})) + Data = Response.read() + Soup = BeautifulSoup(Data, 'html.parser') + Feed = Soup.find_all('div', class_='entry') + Feed.reverse() # Order from oldest to newest - if os.path.isfile(f'{AppName}.db'): - with open(f'{AppName}.db', 'r') as Db: - if f'{Usertag} {GlobalId}' in Db.read().splitlines(): - continue + Index = 0 + for Entry in Feed: + Attached = '' + Anchor = Entry.find('a', class_='u-url') + if Anchor: + GlobalId = Anchor['href'].removeprefix('https://').removeprefix('http://') + Index += 1 + else: + Anchor = Entry.find('a', class_='load-more') + if Anchor: + if '?max_id=' in Anchor['href']: + PageOlder = Anchor['href'] + continue - LocalId = GlobalId.split('/')[-1] - Username = Entry.find('a', class_='status__display-name').get_text().strip() - Content = Entry.find('div', class_='e-content') - StatusPrepend = Entry.find('div', class_='status__prepend') - StatusPrepend = StatusPrepend.get_text().strip()[len(Username):] if StatusPrepend else '' - StatusPrepend = ' ' + StatusPrepend.strip() if StatusPrepend else '' - if not IncludeRetoots and StatusPrepend: - continue - if not StatusPrepend and IncludeReplies and Entry.find('i', class_='fa-reply-all'): - StatusPrepend = ' replied' - Title = Content.get_text().strip() - Title = f"{Usertag}{StatusPrepend}: {Title[:32]}..." - for Emoji in Entry.find_all('img', class_='custom-emoji'): # Custom emojis in text - Emoji['style'] = 'max-height:1em;' - Entry.find('img', class_='u-photo account__avatar')['style'] = 'max-height:4em;' # Profile pics - Entry.find('div', class_='status__action-bar').replace_with('') + if os.path.isfile(f'{AppName}.db'): + with open(f'{AppName}.db', 'r') as Db: + if f'{Usertag} {GlobalId}' in Db.read().splitlines(): + continue - print(f"-> {LocalId} - {Title}") - HTML = f"""\ + if Index == 1: + LastEntryIsNew = True + LocalId = GlobalId.split('/')[-1] + Username = Entry.find('a', class_='status__display-name').get_text().strip() + Content = Entry.find('div', class_='e-content') + StatusPrepend = Entry.find('div', class_='status__prepend') + StatusPrepend = StatusPrepend.get_text().strip()[len(Username):] if StatusPrepend else '' + StatusPrepend = ' ' + StatusPrepend.strip() if StatusPrepend else '' + if not IncludeRetoots and StatusPrepend: + continue + if not StatusPrepend and IncludeReplies and Entry.find('i', class_='fa-reply-all'): + StatusPrepend = ' replied' + Title = Content.get_text().strip() + Title = f"{Usertag}{StatusPrepend}: {Title[:32]}..." + for Emoji in Entry.find_all('img', class_='custom-emoji'): # Custom emojis in text + Emoji['style'] = 'max-height:1em;' + Entry.find('img', class_='u-photo account__avatar')['style'] = 'max-height:4em;' # Profile pics + Entry.find('div', class_='status__action-bar').replace_with('') + + print(f"-> {LocalId} - {Title}") + HTML = f"""\

{Title}

@@ -77,76 +104,67 @@ def HandleFeed(URLs, IncludeRetoots, IncludeReplies, LocalSave, SendMail, MailTo

Via https://gitlab.com/octospacc/bottocto/-/tree/main/MastodonFeedHTML

""" - if SendMail: - Message = MIMEMultipart() - Message['From'] = MailUsername - Message['To'] = ', '.join(MailTo) - Message['Subject'] = Title - Message.attach(MIMEText(HTML.replace('{ Replace:Attached }', ''), 'html')) + if SendMail: + Message = MIMEMultipart() + Message['From'] = MailUsername + Message['To'] = ', '.join(MailTo) + Message['Subject'] = Title + Message.attach(MIMEText(HTML.replace('{ Replace:Attached }', ''), 'html')) - Attachments = Entry.find('ul', class_='attachment-list__list') - if Attachments: - for Attachment in Attachments: - Href, Alt = '', '' - Attachment = str(Attachment).strip().replace("'",'"').split('"') - for i,e in enumerate(Attachment): - if e.endswith('" - Attached += f"""{Opening} style="max-width:100%; max-height:100vh;" src="data:{Mime};base64,{base64.b64encode(Data).decode()}"{Closing}\n""" - if SendMail: - File = MIMEBase(Mime.split('/')[0], Mime.split('/')[1]) - File.set_payload(Data) - encoders.encode_base64(File) - File.add_header( - "Content-Disposition", - f"attachment; filename={Href.split('/')[-1]}") - Message.attach(File) + Attachments = Entry.find('ul', class_='attachment-list__list') + if Attachments and (LocalSave or SendMail): + for Attachment in Attachments: + Href, Alt = '', '' + Attachment = str(Attachment).strip().replace("'",'"').split('"') + for i,e in enumerate(Attachment): + if e.endswith('" + Attached += f"""{Opening} style="max-width:100%; max-height:100vh;" src="data:{Mime};base64,{base64.b64encode(Data).decode()}"{Closing}\n""" + if SendMail: + File = MIMEBase(Mime.split('/')[0], Mime.split('/')[1]) + File.set_payload(Data) + encoders.encode_base64(File) + File.add_header( + "Content-Disposition", + f"attachment; filename={Href.split('/')[-1]}") + Message.attach(File) - if SendMail: - with smtplib.SMTP_SSL(MailServer, MailPort, context=ssl.create_default_context()) as Client: - Client.login(MailUsername, MailPassword) - Client.sendmail(MailUsername, MailTo, Message.as_string()) - time.sleep(MailSleep) + if SendMail: + with smtplib.SMTP_SSL(MailServer, MailPort, context=ssl.create_default_context()) as Client: + Client.login(MailUsername, MailPassword) + Client.sendmail(MailUsername, MailTo, Message.as_string()) + time.sleep(MailSleep) - if LocalSave: - LocalBackupDir = MakePathStr(Usertag) - if not os.path.isdir(LocalBackupDir): - os.mkdir(LocalBackupDir) - FileName = MakePathStr(f"{GlobalId.split('/')[-1]} - {Title}") - with open(f'{LocalBackupDir}/{FileName}.html', 'w') as File: - File.write(HTML.replace('{ Replace:Attached }', Attached)) + if LocalSave: + LocalBackupDir = MakePathStr(Usertag) + if not os.path.isdir(LocalBackupDir): + os.mkdir(LocalBackupDir) + FileName = MakePathStr(f"{GlobalId.split('/')[-1]} - {Title}") + with open(f'{LocalBackupDir}/{FileName}.html', 'w') as File: + File.write(HTML.replace('{ Replace:Attached }', Attached)) - with open(f'{AppName}.db', 'a') as Db: - Db.write(f'{Usertag} {GlobalId}' + '\n') + with open(f'{AppName}.db', 'a') as Db: + Db.write(f'{Usertag} {GlobalId}' + '\n') - except Exception: - raise + return LastEntryIsNew, PageOlder -def HandleFeedList(List): - for Feed in List: - print(f"[I] Handling Feed ->\n: {Feed}") - HandleFeed( - URLs=SureList(Feed['URLs']), - IncludeRetoots=Feed['IncludeRetoots'] if 'IncludeRetoots' in Feed else True, - IncludeReplies=Feed['IncludeReplies'] if 'IncludeReplies' in Feed else True, - LocalSave=Feed['LocalSave'] if 'LocalSave' in Feed else True, - SendMail=Feed['SendMail'] if 'SendMail' in Feed else True if 'To' in Feed and Feed['To'] else False, - MailTo=SureList(Feed['MailTo']) if 'MailTo' in Feed and Feed['MailTo'] else []) + except Exception: + raise if __name__ == '__main__': while True: print("[I] Scraping...") - HandleFeedList(Feeds) + HandleFeedsList(Feeds) if LoopTime <= 0: exit() print(f"[I] Sleeping for {LoopTime}s...") diff --git a/MastodonFeedHTML/Start.MastodonFeedHTML b/MastodonFeedHTML/Start.MastodonFeedHTML new file mode 100755 index 0000000..00bb1c3 --- /dev/null +++ b/MastodonFeedHTML/Start.MastodonFeedHTML @@ -0,0 +1,3 @@ +#!/bin/sh +cd "$( dirname "$( realpath "$0" )" )" +./MastodonFeedHTML.py