mirror of
https://gitlab.com/octospacc/bottocto
synced 2025-06-05 22:19:22 +02:00
Handle items older than last 20; TODO: Fix order
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@ -1,2 +1,3 @@
|
|||||||
*.pyc
|
*.pyc
|
||||||
|
*.db
|
||||||
Config.py
|
Config.py
|
||||||
|
@ -1,6 +1,5 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
import base64
|
import base64
|
||||||
import feedparser
|
|
||||||
import os
|
import os
|
||||||
import time
|
import time
|
||||||
import email, smtplib, ssl
|
import email, smtplib, ssl
|
||||||
@ -22,10 +21,30 @@ def MakePathStr(Str):
|
|||||||
Str = Str.replace(' ', '_')
|
Str = Str.replace(' ', '_')
|
||||||
return Str
|
return Str
|
||||||
|
|
||||||
|
def HandleFeedsList(List):
|
||||||
|
for Feed in List:
|
||||||
|
print(f"[I] Handling Feed ->\n: {Feed}")
|
||||||
|
HandleFeed(
|
||||||
|
URLs=SureList(Feed['URLs']),
|
||||||
|
IncludeRetoots=Feed['IncludeRetoots'] if 'IncludeRetoots' in Feed else True,
|
||||||
|
IncludeReplies=Feed['IncludeReplies'] if 'IncludeReplies' in Feed else True,
|
||||||
|
LocalSave=Feed['LocalSave'] if 'LocalSave' in Feed else True,
|
||||||
|
SendMail=Feed['SendMail'] if 'SendMail' in Feed else True if 'To' in Feed and Feed['To'] else False,
|
||||||
|
MailTo=SureList(Feed['MailTo']) if 'MailTo' in Feed and Feed['MailTo'] else [])
|
||||||
|
|
||||||
def HandleFeed(URLs, IncludeRetoots, IncludeReplies, LocalSave, SendMail, MailTo):
|
def HandleFeed(URLs, IncludeRetoots, IncludeReplies, LocalSave, SendMail, MailTo):
|
||||||
for URL in URLs:
|
for URL in URLs:
|
||||||
|
if not (LocalSave or SendMail):
|
||||||
|
print("[I] Partial dry-run for this URL (LocalSave and SendMail are disabled).")
|
||||||
URL = URL.removesuffix('/').removesuffix('/with_replies') + '/with_replies'
|
URL = URL.removesuffix('/').removesuffix('/with_replies') + '/with_replies'
|
||||||
Usertag = f"{URL.split('/')[-2]}@{URL.split('/')[-3]}"
|
Usertag = f"{URL.split('/')[-2]}@{URL.split('/')[-3]}"
|
||||||
|
LastEntryIsNew, PageOlder = HandleURL(URL, Usertag, IncludeRetoots, IncludeReplies, LocalSave, SendMail, MailTo)
|
||||||
|
while LastEntryIsNew and PageOlder: # TODO: Fix this, make a single run for all items / start from oldest, otherwise order is messed up
|
||||||
|
LastEntryIsNew, PageOlder = HandleURL(PageOlder, Usertag, IncludeRetoots, IncludeReplies, LocalSave, SendMail, MailTo)
|
||||||
|
|
||||||
|
def HandleURL(URL, Usertag, IncludeRetoots, IncludeReplies, LocalSave, SendMail, MailTo):
|
||||||
|
LastEntryIsNew = False
|
||||||
|
PageOlder = ''
|
||||||
try:
|
try:
|
||||||
Response = urlopen(Request(URL, headers={'User-Agent':UserAgent}))
|
Response = urlopen(Request(URL, headers={'User-Agent':UserAgent}))
|
||||||
Data = Response.read()
|
Data = Response.read()
|
||||||
@ -33,12 +52,18 @@ def HandleFeed(URLs, IncludeRetoots, IncludeReplies, LocalSave, SendMail, MailTo
|
|||||||
Feed = Soup.find_all('div', class_='entry')
|
Feed = Soup.find_all('div', class_='entry')
|
||||||
Feed.reverse() # Order from oldest to newest
|
Feed.reverse() # Order from oldest to newest
|
||||||
|
|
||||||
|
Index = 0
|
||||||
for Entry in Feed:
|
for Entry in Feed:
|
||||||
Attached = ''
|
Attached = ''
|
||||||
GlobalId = Entry.find('a', class_='u-url')
|
Anchor = Entry.find('a', class_='u-url')
|
||||||
if GlobalId:
|
if Anchor:
|
||||||
GlobalId = GlobalId['href'].removeprefix('https://').removeprefix('http://')
|
GlobalId = Anchor['href'].removeprefix('https://').removeprefix('http://')
|
||||||
|
Index += 1
|
||||||
else:
|
else:
|
||||||
|
Anchor = Entry.find('a', class_='load-more')
|
||||||
|
if Anchor:
|
||||||
|
if '?max_id=' in Anchor['href']:
|
||||||
|
PageOlder = Anchor['href']
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if os.path.isfile(f'{AppName}.db'):
|
if os.path.isfile(f'{AppName}.db'):
|
||||||
@ -46,6 +71,8 @@ def HandleFeed(URLs, IncludeRetoots, IncludeReplies, LocalSave, SendMail, MailTo
|
|||||||
if f'{Usertag} {GlobalId}' in Db.read().splitlines():
|
if f'{Usertag} {GlobalId}' in Db.read().splitlines():
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
if Index == 1:
|
||||||
|
LastEntryIsNew = True
|
||||||
LocalId = GlobalId.split('/')[-1]
|
LocalId = GlobalId.split('/')[-1]
|
||||||
Username = Entry.find('a', class_='status__display-name').get_text().strip()
|
Username = Entry.find('a', class_='status__display-name').get_text().strip()
|
||||||
Content = Entry.find('div', class_='e-content')
|
Content = Entry.find('div', class_='e-content')
|
||||||
@ -85,7 +112,7 @@ def HandleFeed(URLs, IncludeRetoots, IncludeReplies, LocalSave, SendMail, MailTo
|
|||||||
Message.attach(MIMEText(HTML.replace('{ Replace:Attached }', ''), 'html'))
|
Message.attach(MIMEText(HTML.replace('{ Replace:Attached }', ''), 'html'))
|
||||||
|
|
||||||
Attachments = Entry.find('ul', class_='attachment-list__list')
|
Attachments = Entry.find('ul', class_='attachment-list__list')
|
||||||
if Attachments:
|
if Attachments and (LocalSave or SendMail):
|
||||||
for Attachment in Attachments:
|
for Attachment in Attachments:
|
||||||
Href, Alt = '', ''
|
Href, Alt = '', ''
|
||||||
Attachment = str(Attachment).strip().replace("'",'"').split('"')
|
Attachment = str(Attachment).strip().replace("'",'"').split('"')
|
||||||
@ -129,24 +156,15 @@ def HandleFeed(URLs, IncludeRetoots, IncludeReplies, LocalSave, SendMail, MailTo
|
|||||||
with open(f'{AppName}.db', 'a') as Db:
|
with open(f'{AppName}.db', 'a') as Db:
|
||||||
Db.write(f'{Usertag} {GlobalId}' + '\n')
|
Db.write(f'{Usertag} {GlobalId}' + '\n')
|
||||||
|
|
||||||
|
return LastEntryIsNew, PageOlder
|
||||||
|
|
||||||
except Exception:
|
except Exception:
|
||||||
raise
|
raise
|
||||||
|
|
||||||
def HandleFeedList(List):
|
|
||||||
for Feed in List:
|
|
||||||
print(f"[I] Handling Feed ->\n: {Feed}")
|
|
||||||
HandleFeed(
|
|
||||||
URLs=SureList(Feed['URLs']),
|
|
||||||
IncludeRetoots=Feed['IncludeRetoots'] if 'IncludeRetoots' in Feed else True,
|
|
||||||
IncludeReplies=Feed['IncludeReplies'] if 'IncludeReplies' in Feed else True,
|
|
||||||
LocalSave=Feed['LocalSave'] if 'LocalSave' in Feed else True,
|
|
||||||
SendMail=Feed['SendMail'] if 'SendMail' in Feed else True if 'To' in Feed and Feed['To'] else False,
|
|
||||||
MailTo=SureList(Feed['MailTo']) if 'MailTo' in Feed and Feed['MailTo'] else [])
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
while True:
|
while True:
|
||||||
print("[I] Scraping...")
|
print("[I] Scraping...")
|
||||||
HandleFeedList(Feeds)
|
HandleFeedsList(Feeds)
|
||||||
if LoopTime <= 0:
|
if LoopTime <= 0:
|
||||||
exit()
|
exit()
|
||||||
print(f"[I] Sleeping for {LoopTime}s...")
|
print(f"[I] Sleeping for {LoopTime}s...")
|
||||||
|
3
MastodonFeedHTML/Start.MastodonFeedHTML
Executable file
3
MastodonFeedHTML/Start.MastodonFeedHTML
Executable file
@ -0,0 +1,3 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
cd "$( dirname "$( realpath "$0" )" )"
|
||||||
|
./MastodonFeedHTML.py
|
Reference in New Issue
Block a user