Upd. MastodonFeedHTML to handle media correctly and more feeds

This commit is contained in:
2022-08-16 12:50:39 +02:00
parent 60d5b90866
commit cdbc07120a

View File

@ -2,6 +2,7 @@
import base64 import base64
import feedparser import feedparser
import os import os
import time
import urllib.request import urllib.request
import email, smtplib, ssl import email, smtplib, ssl
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
@ -9,24 +10,37 @@ from email import encoders
from email.mime.base import MIMEBase from email.mime.base import MIMEBase
from email.mime.multipart import MIMEMultipart from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText from email.mime.text import MIMEText
# from Config import *
Feeds = [
{
"URLs": ["https://botsin.space/@sitoctt"],
"IncludeRetoots": True,
"IncludeReplies": True,
"LocalSave": True,
"SendMail": True,
"MailTo": ["example@example.com"]
}
]
URLs = ["https://botsin.space/@sitoctt.rss"] MailUsername = "example@example.com"
MailPassword = "Example"
MailServer = "smtp.example.com"
MailPort = 465
Receivers = ["example@example.com"]
Sender = "example@example.com"
Password = "Example"
Server = "smtp.example.com"
Port = 465
OnlyOwnPosts = False
MailSend = False
LocalSave = True
NoSpacesFile = False NoSpacesFile = False
LoopTime = 0
MailSleep = 10
AppName = "MastodonFeedHTML"
StripWS = '\t\r\n' StripWS = '\t\r\n'
def SureList(Item):
return Item if type(Item) == list else [Item]
def MakePathStr(Str): def MakePathStr(Str):
for c in ('<>:"/\\|?*'): for c in ('<>:"/\\|?*'):
Str = Str.replace(c, '_') Str = Str.replace(c, '_')
@ -34,7 +48,7 @@ def MakePathStr(Str):
Str = Str.replace(' ', '_') Str = Str.replace(' ', '_')
return Str return Str
def Main(): def HandleFeed(URLs, IncludeRetoots, IncludeReplies, LocalSave, SendMail, MailTo):
for URL in URLs: for URL in URLs:
URL = URL.rstrip('/').rstrip('/with_replies') + '/with_replies' URL = URL.rstrip('/').rstrip('/with_replies') + '/with_replies'
Usertag = f"{URL.split('/')[-2]}@{URL.split('/')[-3]}" Usertag = f"{URL.split('/')[-2]}@{URL.split('/')[-3]}"
@ -47,45 +61,29 @@ def Main():
for Entry in Feed: for Entry in Feed:
Attached = '' Attached = ''
GlobalId = Entry.find('a', class_='u-url')['href'].lstrip('https://').lstrip('http://') print(Entry)
GlobalId = Entry.find('a', class_='u-url')
if GlobalId:
GlobalId = GlobalId['href'].lstrip('https://').lstrip('http://')
else:
continue
LocalId = GlobalId.split('/')[-1] LocalId = GlobalId.split('/')[-1]
Username = Entry.find('a', class_='status__display-name').get_text().strip(StripWS) Username = Entry.find('a', class_='status__display-name').get_text().strip(StripWS)
Content = Entry.find('div', class_='e-content') Content = Entry.find('div', class_='e-content')
StatusPrepend = Entry.find('div', class_='status__prepend') StatusPrepend = Entry.find('div', class_='status__prepend')
StatusPrepend = StatusPrepend.get_text().strip(StripWS)[len(Username):] if StatusPrepend else '' StatusPrepend = StatusPrepend.get_text().strip(StripWS)[len(Username):] if StatusPrepend else ''
StatusPrepend = ' ' + StatusPrepend.strip(StripWS) if StatusPrepend else '' StatusPrepend = ' ' + StatusPrepend.strip(StripWS) if StatusPrepend else ''
if OnlyOwnPosts and StatusPrepend: if not IncludeRetoots and StatusPrepend:
continue continue
if not StatusPrepend and IncludeReplies and Entry.find('i', class_='fa-reply-all'):
StatusPrepend = ' replied'
Title = Content.get_text().strip(StripWS) Title = Content.get_text().strip(StripWS)
Title = f"{Usertag}{StatusPrepend}: {Title[:32]}..." Title = f"{Usertag}{StatusPrepend}: {Title[:32]}..."
for Emoji in Entry.find_all('img', class_='custom-emoji'): for Emoji in Entry.find_all('img', class_='custom-emoji'):
Emoji['style'] = 'max-height:1em;' Emoji['style'] = 'max-height:1em;'
Attachments = Entry.find('ul', class_='attachment-list__list')
if Attachments:
for Attachment in Attachments:
Href, Alt = '', ''
Attachment = str(Attachment).strip(StripWS).replace("'",'"').split('"')
for i,e in enumerate(Attachment):
if e.endswith('<a href='):
Href = Attachment[i+1]
elif e.endswith('title='):
Alt = "'".join(Attachment[i+1:-1])
if Href:
Response = urllib.request.urlopen(Href)
Data = Response.read()
Type = 'img' if Href.lower().endswith(('.png','.jpg','.jpeg')) else 'img'
Mime = f"image/{Href.lower().split('.')[-1]}"
Opening = f'<{Type} alt="{Alt}" title="{Alt}"' if Type == 'img' else f'<{Type} controls'
Closing = '>' if Type == 'img' else f"></{Type}>"
Attached += f"""{Opening} style="max-width:100%;max-height:100vh;" src="data:{Mime};base64,{base64.b64encode(Data).decode()}"{Closing}\n"""
Entry.find('img', class_='u-photo account__avatar')['style'] = 'max-height:2em;'#'display:none; visibility:hidden;' Entry.find('img', class_='u-photo account__avatar')['style'] = 'max-height:2em;'#'display:none; visibility:hidden;'
Entry.find('div', class_='status__action-bar').replace_with('') Entry.find('div', class_='status__action-bar').replace_with('')
if os.path.isfile('MastodonFeedToHTML.db'):
with open('MastodonFeedToHTML.db', 'r') as Db:
if GlobalId in Db.read().splitlines():
pass #continue
print(f"-> {LocalId} - {Title}") print(f"-> {LocalId} - {Title}")
HTML = f"""\ HTML = f"""\
<h1>{Title}</h1> <h1>{Title}</h1>
@ -100,38 +98,50 @@ def Main():
<p><i>Via <a href="https://gitlab.com/-/snippets/2388397">https://gitlab.com/-/snippets/2388397</a></i></p> <p><i>Via <a href="https://gitlab.com/-/snippets/2388397">https://gitlab.com/-/snippets/2388397</a></i></p>
""" """
#print(HTML) if SendMail:
if MailSend:
Message = MIMEMultipart() Message = MIMEMultipart()
Message['From'] = Sender Message['From'] = MailUsername
Message['To'] = ', '.join(Receivers) Message['To'] = ', '.join(MailTo)
Message['Subject'] = Entry['title'] Message['Subject'] = Title
Message.attach(MIMEText(HTML.replace('{ Replace:Attached }', ''), 'html')) Message.attach(MIMEText(HTML.replace('{ Replace:Attached }', ''), 'html'))
for Link in []:#Entry['links']: Attachments = Entry.find('ul', class_='attachment-list__list')
if Link['type'].startswith(('audio/', 'image/', 'video/')): if Attachments:
Response = urllib.request.urlopen(Link['href']) for Attachment in Attachments:
Data = Response.read() Href, Alt = '', ''
Type = 'img' if Link['type'].startswith('image/') else Link['type'].split('/')[0] Attachment = str(Attachment).strip(StripWS).replace("'",'"').split('"')
Opening = f"<{Type}" if Type == 'img' else f"<{Type} controls" for i,e in enumerate(Attachment):
Closing = '>' if Type == 'img' else f"></{Type}>" if e.endswith('<a href='):
Attached += f"""{Opening} style="max-width:100%;max-height:100vh;" src="data:{Link['type']};base64,{base64.b64encode(Data).decode()}"{Closing}\n""" Href = Attachment[i+1]
elif e.endswith('title='):
Alt = "'".join(Attachment[i+1:-1])
if Href:
Response = urllib.request.urlopen(Href)
Data = Response.read()
Mime = Response.info().get_content_type()
Tag = 'img' if Mime.split('/')[0] == 'image' else Mime.split('/')[0]
Opening = f'<{Tag} alt="{Alt}" title="{Alt}"' if Tag == 'img' else f'<{Tag} controls'
Closing = '>' if Tag == 'img' else f"></{Tag}>"
Attached += f"""{Opening} style="max-width:100%;max-height:100vh;" src="data:{Mime};base64,{base64.b64encode(Data).decode()}"{Closing}\n"""
if SendMail:
File = MIMEBase(Mime.split('/')[0], Mime.split('/')[1])
File.set_payload(Data)
encoders.encode_base64(File)
File.add_header(
"Content-Disposition",
f"attachment; filename={Href.split('/')[-1]}")
Message.attach(File)
if MailSend: if os.path.isfile(f'{AppName}.db'):
File = MIMEBase(Link['type'].split('/')[0], Link['type'].split('/')[1]) with open(f'{AppName}.db', 'r') as Db:
File.set_payload(Data) if GlobalId in Db.read().splitlines():
encoders.encode_base64(File) pass #continue
File.add_header(
"Content-Disposition",
f"attachment; filename= {Link['href'].split('/')[-1]}",
)
Message.attach(File)
if MailSend: if SendMail:
with smtplib.SMTP_SSL(Server, Port, context=ssl.create_default_context()) as Client: with smtplib.SMTP_SSL(MailServer, MailPort, context=ssl.create_default_context()) as Client:
Client.login(Sender, Password) Client.login(MailUsername, MailPassword)
Client.sendmail(Sender, Receivers, Message.as_string()) Client.sendmail(MailUsername, MailTo, Message.as_string())
time.sleep(MailSleep)
if LocalSave: if LocalSave:
LocalBackupDir = MakePathStr(Usertag) LocalBackupDir = MakePathStr(Usertag)
@ -141,11 +151,28 @@ def Main():
with open(f'{LocalBackupDir}/{FileName}.html', 'w') as File: with open(f'{LocalBackupDir}/{FileName}.html', 'w') as File:
File.write(HTML.replace('{ Replace:Attached }', Attached)) File.write(HTML.replace('{ Replace:Attached }', Attached))
with open('MastodonFeedToHTML.db', 'a') as Db: with open(f'{AppName}.db', 'a') as Db:
pass #Db.write(GlobalId + '\n') pass #Db.write(GlobalId + '\n')
except Exception: except Exception:
raise raise
def HandleFeedList(List):
for Feed in List:
print(f"[I] Handling item:\n{Feed}")
HandleFeed(
URLs=SureList(Feed['URLs']),
IncludeRetoots=Feed['IncludeRetoots'] if 'IncludeRetoots' in Feed else True,
IncludeReplies=Feed['IncludeReplies'] if 'IncludeReplies' in Feed else True,
LocalSave=Feed['LocalSave'] if 'LocalSave' in Feed else True,
SendMail=Feed['SendMail'] if 'SendMail' in Feed else True if 'To' in Feed and Feed['To'] else False,
MailTo=SureList(Feed['MailTo']) if 'MailTo' in Feed and Feed['MailTo'] else [])
if __name__ == '__main__': if __name__ == '__main__':
Main() while True:
print("[I] Scraping...")
HandleFeedList(Feeds)
if LoopTime <= 0:
exit()
print(f"[I] Sleeping for {LoopTime}s...")
time.sleep(LoopTime)