mirror of
				https://gitlab.com/octospacc/bottocto
				synced 2025-06-05 22:19:22 +02:00 
			
		
		
		
	Handle items older than last 20; TODO: Fix order
This commit is contained in:
		
							
								
								
									
										1
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										1
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							@@ -1,2 +1,3 @@
 | 
			
		||||
*.pyc
 | 
			
		||||
*.db
 | 
			
		||||
Config.py
 | 
			
		||||
 
 | 
			
		||||
@@ -1,6 +1,5 @@
 | 
			
		||||
#!/usr/bin/env python3
 | 
			
		||||
import base64
 | 
			
		||||
import feedparser
 | 
			
		||||
import os
 | 
			
		||||
import time
 | 
			
		||||
import email, smtplib, ssl
 | 
			
		||||
@@ -22,10 +21,30 @@ def MakePathStr(Str):
 | 
			
		||||
		Str = Str.replace(' ', '_')
 | 
			
		||||
	return Str
 | 
			
		||||
 | 
			
		||||
def HandleFeedsList(List):
 | 
			
		||||
	for Feed in List:
 | 
			
		||||
		print(f"[I] Handling Feed ->\n: {Feed}")
 | 
			
		||||
		HandleFeed(
 | 
			
		||||
			URLs=SureList(Feed['URLs']),
 | 
			
		||||
			IncludeRetoots=Feed['IncludeRetoots'] if 'IncludeRetoots' in Feed else True,
 | 
			
		||||
			IncludeReplies=Feed['IncludeReplies'] if 'IncludeReplies' in Feed else True,
 | 
			
		||||
			LocalSave=Feed['LocalSave'] if 'LocalSave' in Feed else True,
 | 
			
		||||
			SendMail=Feed['SendMail'] if 'SendMail' in Feed else True if 'To' in Feed and Feed['To'] else False,
 | 
			
		||||
			MailTo=SureList(Feed['MailTo']) if 'MailTo' in Feed and Feed['MailTo'] else [])
 | 
			
		||||
 | 
			
		||||
def HandleFeed(URLs, IncludeRetoots, IncludeReplies, LocalSave, SendMail, MailTo):
 | 
			
		||||
	for URL in URLs:
 | 
			
		||||
		if not (LocalSave or SendMail):
 | 
			
		||||
			print("[I] Partial dry-run for this URL (LocalSave and SendMail are disabled).")
 | 
			
		||||
		URL = URL.removesuffix('/').removesuffix('/with_replies') + '/with_replies'
 | 
			
		||||
		Usertag = f"{URL.split('/')[-2]}@{URL.split('/')[-3]}"
 | 
			
		||||
		LastEntryIsNew, PageOlder = HandleURL(URL, Usertag, IncludeRetoots, IncludeReplies, LocalSave, SendMail, MailTo)
 | 
			
		||||
		while LastEntryIsNew and PageOlder: # TODO: Fix this, make a single run for all items / start from oldest, otherwise order is messed up
 | 
			
		||||
			LastEntryIsNew, PageOlder = HandleURL(PageOlder, Usertag, IncludeRetoots, IncludeReplies, LocalSave, SendMail, MailTo)
 | 
			
		||||
 | 
			
		||||
def HandleURL(URL, Usertag, IncludeRetoots, IncludeReplies, LocalSave, SendMail, MailTo):
 | 
			
		||||
	LastEntryIsNew = False
 | 
			
		||||
	PageOlder = ''
 | 
			
		||||
	try:
 | 
			
		||||
		Response = urlopen(Request(URL, headers={'User-Agent':UserAgent}))
 | 
			
		||||
		Data = Response.read()
 | 
			
		||||
@@ -33,12 +52,18 @@ def HandleFeed(URLs, IncludeRetoots, IncludeReplies, LocalSave, SendMail, MailTo
 | 
			
		||||
		Feed = Soup.find_all('div', class_='entry')
 | 
			
		||||
		Feed.reverse() # Order from oldest to newest
 | 
			
		||||
 | 
			
		||||
		Index = 0
 | 
			
		||||
		for Entry in Feed:
 | 
			
		||||
			Attached = ''
 | 
			
		||||
				GlobalId = Entry.find('a', class_='u-url')
 | 
			
		||||
				if GlobalId:
 | 
			
		||||
					GlobalId = GlobalId['href'].removeprefix('https://').removeprefix('http://')
 | 
			
		||||
			Anchor = Entry.find('a', class_='u-url')
 | 
			
		||||
			if Anchor:
 | 
			
		||||
				GlobalId = Anchor['href'].removeprefix('https://').removeprefix('http://')
 | 
			
		||||
				Index += 1
 | 
			
		||||
			else:
 | 
			
		||||
				Anchor = Entry.find('a', class_='load-more')
 | 
			
		||||
				if Anchor:
 | 
			
		||||
					if '?max_id=' in Anchor['href']:
 | 
			
		||||
						PageOlder = Anchor['href']
 | 
			
		||||
				continue
 | 
			
		||||
 | 
			
		||||
			if os.path.isfile(f'{AppName}.db'):
 | 
			
		||||
@@ -46,6 +71,8 @@ def HandleFeed(URLs, IncludeRetoots, IncludeReplies, LocalSave, SendMail, MailTo
 | 
			
		||||
					if f'{Usertag} {GlobalId}' in Db.read().splitlines():
 | 
			
		||||
						continue
 | 
			
		||||
 | 
			
		||||
			if Index == 1:
 | 
			
		||||
				LastEntryIsNew = True
 | 
			
		||||
			LocalId = GlobalId.split('/')[-1]
 | 
			
		||||
			Username = Entry.find('a', class_='status__display-name').get_text().strip()
 | 
			
		||||
			Content = Entry.find('div', class_='e-content')
 | 
			
		||||
@@ -85,7 +112,7 @@ def HandleFeed(URLs, IncludeRetoots, IncludeReplies, LocalSave, SendMail, MailTo
 | 
			
		||||
				Message.attach(MIMEText(HTML.replace('{ Replace:Attached }', ''), 'html'))
 | 
			
		||||
 | 
			
		||||
			Attachments = Entry.find('ul', class_='attachment-list__list')
 | 
			
		||||
				if Attachments:
 | 
			
		||||
			if Attachments and (LocalSave or SendMail):
 | 
			
		||||
				for Attachment in Attachments:
 | 
			
		||||
					Href, Alt = '', ''
 | 
			
		||||
					Attachment = str(Attachment).strip().replace("'",'"').split('"')
 | 
			
		||||
@@ -129,24 +156,15 @@ def HandleFeed(URLs, IncludeRetoots, IncludeReplies, LocalSave, SendMail, MailTo
 | 
			
		||||
			with open(f'{AppName}.db', 'a') as Db:
 | 
			
		||||
				Db.write(f'{Usertag} {GlobalId}' + '\n')
 | 
			
		||||
 | 
			
		||||
		return LastEntryIsNew, PageOlder
 | 
			
		||||
 | 
			
		||||
	except Exception:
 | 
			
		||||
		raise
 | 
			
		||||
 | 
			
		||||
def HandleFeedList(List):
 | 
			
		||||
	for Feed in List:
 | 
			
		||||
		print(f"[I] Handling Feed ->\n: {Feed}")
 | 
			
		||||
		HandleFeed(
 | 
			
		||||
			URLs=SureList(Feed['URLs']),
 | 
			
		||||
			IncludeRetoots=Feed['IncludeRetoots'] if 'IncludeRetoots' in Feed else True,
 | 
			
		||||
			IncludeReplies=Feed['IncludeReplies'] if 'IncludeReplies' in Feed else True,
 | 
			
		||||
			LocalSave=Feed['LocalSave'] if 'LocalSave' in Feed else True,
 | 
			
		||||
			SendMail=Feed['SendMail'] if 'SendMail' in Feed else True if 'To' in Feed and Feed['To'] else False,
 | 
			
		||||
			MailTo=SureList(Feed['MailTo']) if 'MailTo' in Feed and Feed['MailTo'] else [])
 | 
			
		||||
 | 
			
		||||
if __name__ == '__main__':
 | 
			
		||||
	while True:
 | 
			
		||||
		print("[I] Scraping...")
 | 
			
		||||
		HandleFeedList(Feeds)
 | 
			
		||||
		HandleFeedsList(Feeds)
 | 
			
		||||
		if LoopTime <= 0:
 | 
			
		||||
			exit()
 | 
			
		||||
		print(f"[I] Sleeping for {LoopTime}s...")
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										3
									
								
								MastodonFeedHTML/Start.MastodonFeedHTML
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										3
									
								
								MastodonFeedHTML/Start.MastodonFeedHTML
									
									
									
									
									
										Executable file
									
								
							@@ -0,0 +1,3 @@
 | 
			
		||||
#!/bin/sh
 | 
			
		||||
cd "$( dirname "$( realpath "$0" )" )"
 | 
			
		||||
./MastodonFeedHTML.py
 | 
			
		||||
		Reference in New Issue
	
	Block a user