{Title}
+ +-
Published on {Entry['published']}
-From {Entry['link']}
- -- -
From 60d5b90866c88fe048eb649bb531d599fb43ffa3 Mon Sep 17 00:00:00 2001 From: octt <6083316-octospacc@users.noreply.gitlab.com> Date: Mon, 15 Aug 2022 22:32:34 +0000 Subject: [PATCH] Parse HTML instead of RSS (to get more content) --- MastodonFeedHTML.py | 134 ++++++++++++++++++++++++++++---------------- 1 file changed, 87 insertions(+), 47 deletions(-) diff --git a/MastodonFeedHTML.py b/MastodonFeedHTML.py index 1b3973c..ebfc2cc 100644 --- a/MastodonFeedHTML.py +++ b/MastodonFeedHTML.py @@ -4,6 +4,7 @@ import feedparser import os import urllib.request import email, smtplib, ssl +from bs4 import BeautifulSoup from email import encoders from email.mime.base import MIMEBase from email.mime.multipart import MIMEMultipart @@ -18,60 +19,97 @@ Password = "Example" Server = "smtp.example.com" Port = 465 +OnlyOwnPosts = False +MailSend = False LocalSave = True -MailSend = True +NoSpacesFile = False +StripWS = '\t\r\n' + def MakePathStr(Str): - for c in ('<>:"/\\|?* '): + for c in ('<>:"/\\|?*'): Str = Str.replace(c, '_') + if NoSpacesFile: + Str = Str.replace(' ', '_') return Str def Main(): - Feeds = [feedparser.parse(URL)['entries'] for URL in URLs] - for Feed in Feeds: - Feed.reverse() # Order from oldest to newest - for Entry in Feed: - if os.path.isfile('MastodonFeedToHTML.db'): - with open('MastodonFeedToHTML.db', 'r') as Db: - if Entry['id'] in Db.read().splitlines(): - continue + for URL in URLs: + URL = URL.rstrip('/').rstrip('/with_replies') + '/with_replies' + Usertag = f"{URL.split('/')[-2]}@{URL.split('/')[-3]}" + try: + Response = urllib.request.urlopen(URL) + Data = Response.read() + Soup = BeautifulSoup(Data, 'html.parser') + Feed = Soup.find_all('div', class_='entry') + Feed.reverse() # Order from oldest to newest - try: - print(f"{Entry['id']} - {Entry['title']}") + for Entry in Feed: Attached = '' - HTML = f"""\ -
Published on {Entry['published']}
-From {Entry['link']}
- -