bottocto/MastodonFeedHTML.py

179 lines
5.7 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
import base64
import feedparser
import os
import time
import urllib.request
import email, smtplib, ssl
from bs4 import BeautifulSoup
from email import encoders
from email.mime.base import MIMEBase
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
# from Config import *
Feeds = [
{
"URLs": ["https://botsin.space/@sitoctt"],
"IncludeRetoots": True,
"IncludeReplies": True,
"LocalSave": True,
"SendMail": True,
"MailTo": ["example@example.com"]
}
]
MailUsername = "example@example.com"
MailPassword = "Example"
MailServer = "smtp.example.com"
MailPort = 465
NoSpacesFile = False
LoopTime = 0
MailSleep = 10
AppName = "MastodonFeedHTML"
StripWS = '\t\r\n'
def SureList(Item):
return Item if type(Item) == list else [Item]
def MakePathStr(Str):
for c in ('<>:"/\\|?*'):
Str = Str.replace(c, '_')
if NoSpacesFile:
Str = Str.replace(' ', '_')
return Str
def HandleFeed(URLs, IncludeRetoots, IncludeReplies, LocalSave, SendMail, MailTo):
for URL in URLs:
URL = URL.rstrip('/').rstrip('/with_replies') + '/with_replies'
Usertag = f"{URL.split('/')[-2]}@{URL.split('/')[-3]}"
try:
Response = urllib.request.urlopen(URL)
Data = Response.read()
Soup = BeautifulSoup(Data, 'html.parser')
Feed = Soup.find_all('div', class_='entry')
Feed.reverse() # Order from oldest to newest
for Entry in Feed:
Attached = ''
print(Entry)
GlobalId = Entry.find('a', class_='u-url')
if GlobalId:
GlobalId = GlobalId['href'].lstrip('https://').lstrip('http://')
else:
continue
LocalId = GlobalId.split('/')[-1]
Username = Entry.find('a', class_='status__display-name').get_text().strip(StripWS)
Content = Entry.find('div', class_='e-content')
StatusPrepend = Entry.find('div', class_='status__prepend')
StatusPrepend = StatusPrepend.get_text().strip(StripWS)[len(Username):] if StatusPrepend else ''
StatusPrepend = ' ' + StatusPrepend.strip(StripWS) if StatusPrepend else ''
if not IncludeRetoots and StatusPrepend:
continue
if not StatusPrepend and IncludeReplies and Entry.find('i', class_='fa-reply-all'):
StatusPrepend = ' replied'
Title = Content.get_text().strip(StripWS)
Title = f"{Usertag}{StatusPrepend}: {Title[:32]}..."
for Emoji in Entry.find_all('img', class_='custom-emoji'):
Emoji['style'] = 'max-height:1em;'
Entry.find('img', class_='u-photo account__avatar')['style'] = 'max-height:2em;'#'display:none; visibility:hidden;'
Entry.find('div', class_='status__action-bar').replace_with('')
print(f"-> {LocalId} - {Title}")
HTML = f"""\
<h1>{Title}</h1>
<div>
{Entry}
{{ Replace:Attached }}
</div>
<br><hr><br>
<p><i>Via <a href="https://gitlab.com/-/snippets/2388397">https://gitlab.com/-/snippets/2388397</a></i></p>
"""
if SendMail:
Message = MIMEMultipart()
Message['From'] = MailUsername
Message['To'] = ', '.join(MailTo)
Message['Subject'] = Title
Message.attach(MIMEText(HTML.replace('{ Replace:Attached }', ''), 'html'))
Attachments = Entry.find('ul', class_='attachment-list__list')
if Attachments:
for Attachment in Attachments:
Href, Alt = '', ''
Attachment = str(Attachment).strip(StripWS).replace("'",'"').split('"')
for i,e in enumerate(Attachment):
if e.endswith('<a href='):
Href = Attachment[i+1]
elif e.endswith('title='):
Alt = "'".join(Attachment[i+1:-1])
if Href:
Response = urllib.request.urlopen(Href)
Data = Response.read()
Mime = Response.info().get_content_type()
Tag = 'img' if Mime.split('/')[0] == 'image' else Mime.split('/')[0]
Opening = f'<{Tag} alt="{Alt}" title="{Alt}"' if Tag == 'img' else f'<{Tag} controls'
Closing = '>' if Tag == 'img' else f"></{Tag}>"
Attached += f"""{Opening} style="max-width:100%;max-height:100vh;" src="data:{Mime};base64,{base64.b64encode(Data).decode()}"{Closing}\n"""
if SendMail:
File = MIMEBase(Mime.split('/')[0], Mime.split('/')[1])
File.set_payload(Data)
encoders.encode_base64(File)
File.add_header(
"Content-Disposition",
f"attachment; filename={Href.split('/')[-1]}")
Message.attach(File)
if os.path.isfile(f'{AppName}.db'):
with open(f'{AppName}.db', 'r') as Db:
if GlobalId in Db.read().splitlines():
pass #continue
if SendMail:
with smtplib.SMTP_SSL(MailServer, MailPort, context=ssl.create_default_context()) as Client:
Client.login(MailUsername, MailPassword)
Client.sendmail(MailUsername, MailTo, Message.as_string())
time.sleep(MailSleep)
if LocalSave:
LocalBackupDir = MakePathStr(Usertag)
if not os.path.isdir(LocalBackupDir):
os.mkdir(LocalBackupDir)
FileName = MakePathStr(f"{GlobalId.split('/')[-1]} - {Title}")
with open(f'{LocalBackupDir}/{FileName}.html', 'w') as File:
File.write(HTML.replace('{ Replace:Attached }', Attached))
with open(f'{AppName}.db', 'a') as Db:
pass #Db.write(GlobalId + '\n')
except Exception:
raise
def HandleFeedList(List):
for Feed in List:
print(f"[I] Handling item:\n{Feed}")
HandleFeed(
URLs=SureList(Feed['URLs']),
IncludeRetoots=Feed['IncludeRetoots'] if 'IncludeRetoots' in Feed else True,
IncludeReplies=Feed['IncludeReplies'] if 'IncludeReplies' in Feed else True,
LocalSave=Feed['LocalSave'] if 'LocalSave' in Feed else True,
SendMail=Feed['SendMail'] if 'SendMail' in Feed else True if 'To' in Feed and Feed['To'] else False,
MailTo=SureList(Feed['MailTo']) if 'MailTo' in Feed and Feed['MailTo'] else [])
if __name__ == '__main__':
while True:
print("[I] Scraping...")
HandleFeedList(Feeds)
if LoopTime <= 0:
exit()
print(f"[I] Sleeping for {LoopTime}s...")
time.sleep(LoopTime)