#!/usr/bin/env python3 import base64 import feedparser import os import time import email, smtplib, ssl from bs4 import BeautifulSoup from email import encoders from email.mime.base import MIMEBase from email.mime.multipart import MIMEMultipart from email.mime.text import MIMEText from urllib.request import urlopen, Request from Config import * def SureList(Item): return Item if type(Item) == list else [Item] def MakePathStr(Str): for c in ('<>:"/\\|?*'): Str = Str.replace(c, '_') if not SpacesInFiles: Str = Str.replace(' ', '_') return Str def HandleFeed(URLs, IncludeRetoots, IncludeReplies, LocalSave, SendMail, MailTo): for URL in URLs: URL = URL.removesuffix('/').removesuffix('/with_replies') + '/with_replies' Usertag = f"{URL.split('/')[-2]}@{URL.split('/')[-3]}" try: Response = urlopen(Request(URL, headers={'User-Agent':UserAgent})) Data = Response.read() Soup = BeautifulSoup(Data, 'html.parser') Feed = Soup.find_all('div', class_='entry') Feed.reverse() # Order from oldest to newest for Entry in Feed: Attached = '' GlobalId = Entry.find('a', class_='u-url') if GlobalId: GlobalId = GlobalId['href'].removeprefix('https://').removeprefix('http://') else: continue if os.path.isfile(f'{AppName}.db'): with open(f'{AppName}.db', 'r') as Db: if f'{Usertag} {GlobalId}' in Db.read().splitlines(): continue LocalId = GlobalId.split('/')[-1] Username = Entry.find('a', class_='status__display-name').get_text().strip() Content = Entry.find('div', class_='e-content') StatusPrepend = Entry.find('div', class_='status__prepend') StatusPrepend = StatusPrepend.get_text().strip()[len(Username):] if StatusPrepend else '' StatusPrepend = ' ' + StatusPrepend.strip() if StatusPrepend else '' if not IncludeRetoots and StatusPrepend: continue if not StatusPrepend and IncludeReplies and Entry.find('i', class_='fa-reply-all'): StatusPrepend = ' replied' Title = Content.get_text().strip() Title = f"{Usertag}{StatusPrepend}: {Title[:32]}..." for Emoji in Entry.find_all('img', class_='custom-emoji'): # Custom emojis in text Emoji['style'] = 'max-height:1em;' Entry.find('img', class_='u-photo account__avatar')['style'] = 'max-height:4em;' # Profile pics Entry.find('div', class_='status__action-bar').replace_with('') print(f"-> {LocalId} - {Title}") HTML = f"""\
Via https://gitlab.com/octospacc/bottocto/-/tree/main/MastodonFeedHTML
""" if SendMail: Message = MIMEMultipart() Message['From'] = MailUsername Message['To'] = ', '.join(MailTo) Message['Subject'] = Title Message.attach(MIMEText(HTML.replace('{ Replace:Attached }', ''), 'html')) Attachments = Entry.find('ul', class_='attachment-list__list') if Attachments: for Attachment in Attachments: Href, Alt = '', '' Attachment = str(Attachment).strip().replace("'",'"').split('"') for i,e in enumerate(Attachment): if e.endswith('{Tag}>" Attached += f"""{Opening} style="max-width:100%; max-height:100vh;" src="data:{Mime};base64,{base64.b64encode(Data).decode()}"{Closing}\n""" if SendMail: File = MIMEBase(Mime.split('/')[0], Mime.split('/')[1]) File.set_payload(Data) encoders.encode_base64(File) File.add_header( "Content-Disposition", f"attachment; filename={Href.split('/')[-1]}") Message.attach(File) if SendMail: with smtplib.SMTP_SSL(MailServer, MailPort, context=ssl.create_default_context()) as Client: Client.login(MailUsername, MailPassword) Client.sendmail(MailUsername, MailTo, Message.as_string()) time.sleep(MailSleep) if LocalSave: LocalBackupDir = MakePathStr(Usertag) if not os.path.isdir(LocalBackupDir): os.mkdir(LocalBackupDir) FileName = MakePathStr(f"{GlobalId.split('/')[-1]} - {Title}") with open(f'{LocalBackupDir}/{FileName}.html', 'w') as File: File.write(HTML.replace('{ Replace:Attached }', Attached)) with open(f'{AppName}.db', 'a') as Db: Db.write(f'{Usertag} {GlobalId}' + '\n') except Exception: raise def HandleFeedList(List): for Feed in List: print(f"[I] Handling Feed ->\n: {Feed}") HandleFeed( URLs=SureList(Feed['URLs']), IncludeRetoots=Feed['IncludeRetoots'] if 'IncludeRetoots' in Feed else True, IncludeReplies=Feed['IncludeReplies'] if 'IncludeReplies' in Feed else True, LocalSave=Feed['LocalSave'] if 'LocalSave' in Feed else True, SendMail=Feed['SendMail'] if 'SendMail' in Feed else True if 'To' in Feed and Feed['To'] else False, MailTo=SureList(Feed['MailTo']) if 'MailTo' in Feed and Feed['MailTo'] else []) if __name__ == '__main__': while True: print("[I] Scraping...") HandleFeedList(Feeds) if LoopTime <= 0: exit() print(f"[I] Sleeping for {LoopTime}s...") time.sleep(LoopTime)