Upd. MastodonFeedHTML to handle media correctly and more feeds

This commit is contained in:
octospacc 2022-08-16 12:50:39 +02:00
parent 60d5b90866
commit cdbc07120a
1 changed files with 92 additions and 65 deletions

View File

@ -2,6 +2,7 @@
import base64
import feedparser
import os
import time
import urllib.request
import email, smtplib, ssl
from bs4 import BeautifulSoup
@ -9,24 +10,37 @@ from email import encoders
from email.mime.base import MIMEBase
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
# from Config import *
Feeds = [
{
"URLs": ["https://botsin.space/@sitoctt"],
"IncludeRetoots": True,
"IncludeReplies": True,
"LocalSave": True,
"SendMail": True,
"MailTo": ["example@example.com"]
}
]
URLs = ["https://botsin.space/@sitoctt.rss"]
MailUsername = "example@example.com"
MailPassword = "Example"
MailServer = "smtp.example.com"
MailPort = 465
Receivers = ["example@example.com"]
Sender = "example@example.com"
Password = "Example"
Server = "smtp.example.com"
Port = 465
OnlyOwnPosts = False
MailSend = False
LocalSave = True
NoSpacesFile = False
LoopTime = 0
MailSleep = 10
AppName = "MastodonFeedHTML"
StripWS = '\t\r\n'
def SureList(Item):
return Item if type(Item) == list else [Item]
def MakePathStr(Str):
for c in ('<>:"/\\|?*'):
Str = Str.replace(c, '_')
@ -34,7 +48,7 @@ def MakePathStr(Str):
Str = Str.replace(' ', '_')
return Str
def Main():
def HandleFeed(URLs, IncludeRetoots, IncludeReplies, LocalSave, SendMail, MailTo):
for URL in URLs:
URL = URL.rstrip('/').rstrip('/with_replies') + '/with_replies'
Usertag = f"{URL.split('/')[-2]}@{URL.split('/')[-3]}"
@ -47,45 +61,29 @@ def Main():
for Entry in Feed:
Attached = ''
GlobalId = Entry.find('a', class_='u-url')['href'].lstrip('https://').lstrip('http://')
print(Entry)
GlobalId = Entry.find('a', class_='u-url')
if GlobalId:
GlobalId = GlobalId['href'].lstrip('https://').lstrip('http://')
else:
continue
LocalId = GlobalId.split('/')[-1]
Username = Entry.find('a', class_='status__display-name').get_text().strip(StripWS)
Content = Entry.find('div', class_='e-content')
StatusPrepend = Entry.find('div', class_='status__prepend')
StatusPrepend = StatusPrepend.get_text().strip(StripWS)[len(Username):] if StatusPrepend else ''
StatusPrepend = ' ' + StatusPrepend.strip(StripWS) if StatusPrepend else ''
if OnlyOwnPosts and StatusPrepend:
if not IncludeRetoots and StatusPrepend:
continue
if not StatusPrepend and IncludeReplies and Entry.find('i', class_='fa-reply-all'):
StatusPrepend = ' replied'
Title = Content.get_text().strip(StripWS)
Title = f"{Usertag}{StatusPrepend}: {Title[:32]}..."
for Emoji in Entry.find_all('img', class_='custom-emoji'):
Emoji['style'] = 'max-height:1em;'
Attachments = Entry.find('ul', class_='attachment-list__list')
if Attachments:
for Attachment in Attachments:
Href, Alt = '', ''
Attachment = str(Attachment).strip(StripWS).replace("'",'"').split('"')
for i,e in enumerate(Attachment):
if e.endswith('<a href='):
Href = Attachment[i+1]
elif e.endswith('title='):
Alt = "'".join(Attachment[i+1:-1])
if Href:
Response = urllib.request.urlopen(Href)
Data = Response.read()
Type = 'img' if Href.lower().endswith(('.png','.jpg','.jpeg')) else 'img'
Mime = f"image/{Href.lower().split('.')[-1]}"
Opening = f'<{Type} alt="{Alt}" title="{Alt}"' if Type == 'img' else f'<{Type} controls'
Closing = '>' if Type == 'img' else f"></{Type}>"
Attached += f"""{Opening} style="max-width:100%;max-height:100vh;" src="data:{Mime};base64,{base64.b64encode(Data).decode()}"{Closing}\n"""
Entry.find('img', class_='u-photo account__avatar')['style'] = 'max-height:2em;'#'display:none; visibility:hidden;'
Entry.find('div', class_='status__action-bar').replace_with('')
if os.path.isfile('MastodonFeedToHTML.db'):
with open('MastodonFeedToHTML.db', 'r') as Db:
if GlobalId in Db.read().splitlines():
pass #continue
print(f"-> {LocalId} - {Title}")
HTML = f"""\
<h1>{Title}</h1>
@ -100,38 +98,50 @@ def Main():
<p><i>Via <a href="https://gitlab.com/-/snippets/2388397">https://gitlab.com/-/snippets/2388397</a></i></p>
"""
#print(HTML)
if MailSend:
if SendMail:
Message = MIMEMultipart()
Message['From'] = Sender
Message['To'] = ', '.join(Receivers)
Message['Subject'] = Entry['title']
Message['From'] = MailUsername
Message['To'] = ', '.join(MailTo)
Message['Subject'] = Title
Message.attach(MIMEText(HTML.replace('{ Replace:Attached }', ''), 'html'))
for Link in []:#Entry['links']:
if Link['type'].startswith(('audio/', 'image/', 'video/')):
Response = urllib.request.urlopen(Link['href'])
Attachments = Entry.find('ul', class_='attachment-list__list')
if Attachments:
for Attachment in Attachments:
Href, Alt = '', ''
Attachment = str(Attachment).strip(StripWS).replace("'",'"').split('"')
for i,e in enumerate(Attachment):
if e.endswith('<a href='):
Href = Attachment[i+1]
elif e.endswith('title='):
Alt = "'".join(Attachment[i+1:-1])
if Href:
Response = urllib.request.urlopen(Href)
Data = Response.read()
Type = 'img' if Link['type'].startswith('image/') else Link['type'].split('/')[0]
Opening = f"<{Type}" if Type == 'img' else f"<{Type} controls"
Closing = '>' if Type == 'img' else f"></{Type}>"
Attached += f"""{Opening} style="max-width:100%;max-height:100vh;" src="data:{Link['type']};base64,{base64.b64encode(Data).decode()}"{Closing}\n"""
if MailSend:
File = MIMEBase(Link['type'].split('/')[0], Link['type'].split('/')[1])
Mime = Response.info().get_content_type()
Tag = 'img' if Mime.split('/')[0] == 'image' else Mime.split('/')[0]
Opening = f'<{Tag} alt="{Alt}" title="{Alt}"' if Tag == 'img' else f'<{Tag} controls'
Closing = '>' if Tag == 'img' else f"></{Tag}>"
Attached += f"""{Opening} style="max-width:100%;max-height:100vh;" src="data:{Mime};base64,{base64.b64encode(Data).decode()}"{Closing}\n"""
if SendMail:
File = MIMEBase(Mime.split('/')[0], Mime.split('/')[1])
File.set_payload(Data)
encoders.encode_base64(File)
File.add_header(
"Content-Disposition",
f"attachment; filename= {Link['href'].split('/')[-1]}",
)
f"attachment; filename={Href.split('/')[-1]}")
Message.attach(File)
if MailSend:
with smtplib.SMTP_SSL(Server, Port, context=ssl.create_default_context()) as Client:
Client.login(Sender, Password)
Client.sendmail(Sender, Receivers, Message.as_string())
if os.path.isfile(f'{AppName}.db'):
with open(f'{AppName}.db', 'r') as Db:
if GlobalId in Db.read().splitlines():
pass #continue
if SendMail:
with smtplib.SMTP_SSL(MailServer, MailPort, context=ssl.create_default_context()) as Client:
Client.login(MailUsername, MailPassword)
Client.sendmail(MailUsername, MailTo, Message.as_string())
time.sleep(MailSleep)
if LocalSave:
LocalBackupDir = MakePathStr(Usertag)
@ -141,11 +151,28 @@ def Main():
with open(f'{LocalBackupDir}/{FileName}.html', 'w') as File:
File.write(HTML.replace('{ Replace:Attached }', Attached))
with open('MastodonFeedToHTML.db', 'a') as Db:
with open(f'{AppName}.db', 'a') as Db:
pass #Db.write(GlobalId + '\n')
except Exception:
raise
def HandleFeedList(List):
for Feed in List:
print(f"[I] Handling item:\n{Feed}")
HandleFeed(
URLs=SureList(Feed['URLs']),
IncludeRetoots=Feed['IncludeRetoots'] if 'IncludeRetoots' in Feed else True,
IncludeReplies=Feed['IncludeReplies'] if 'IncludeReplies' in Feed else True,
LocalSave=Feed['LocalSave'] if 'LocalSave' in Feed else True,
SendMail=Feed['SendMail'] if 'SendMail' in Feed else True if 'To' in Feed and Feed['To'] else False,
MailTo=SureList(Feed['MailTo']) if 'MailTo' in Feed and Feed['MailTo'] else [])
if __name__ == '__main__':
Main()
while True:
print("[I] Scraping...")
HandleFeedList(Feeds)
if LoopTime <= 0:
exit()
print(f"[I] Sleeping for {LoopTime}s...")
time.sleep(LoopTime)