RSSToMisskey/x61bot.py

93 lines
3.4 KiB
Python
Raw Normal View History

2021-02-03 01:06:44 +01:00
#!/usr/bin/python3
# -*- coding: UTF-8 -*-
2021-02-03 12:20:28 +01:00
2021-02-03 01:06:44 +01:00
import json
2021-02-04 14:11:08 +01:00
import re
2021-02-03 01:06:44 +01:00
import sqlite3
import requests
import xmltodict
import time
2021-02-03 12:20:28 +01:00
from misskey import Misskey
2021-02-04 08:00:07 +01:00
from bs4 import BeautifulSoup
2021-02-03 01:06:44 +01:00
2021-02-03 04:56:54 +01:00
time_start = time.time()
print(
''' __ __ _ _ __ __ _ _ \n | \/ (_) | | / //_ | | | | \n | \ / |_ ___ ___| | _____ _ _ __ __/ /_ | | |__ ___ | |_ \n | |\/| | / __/ __| |/ / _ \ | | | \ \/ / '_ \| | '_ \ / _ \| __|\n | | | | \__ \__ \ < __/ |_| | > <| (_) | | |_) | (_) | |_ \n |_| |_|_|___/___/_|\_\___|\__, | /_/\_\\___/|_|_.__/ \___/ \__|\n __/ | \n |___/ ''')
2021-02-03 01:06:44 +01:00
conn = sqlite3.connect('DB.sqlite3')
print("Opened database successfully")
2021-02-03 12:20:28 +01:00
def json_read(file):
config_file = open(file, 'r')
config = json.loads(config_file.read())
config_file.close()
return config
2021-02-04 08:00:07 +01:00
2021-02-03 01:06:44 +01:00
def xml_to_json(xml):
pars = xmltodict.parse(xml)
return json.dumps(pars)
def spider(rule_name, rss_url):
2021-02-03 12:20:28 +01:00
print("Fetch RSS: [" + rule_name + "] ", rss_url)
start = time.time()
2021-02-03 01:06:44 +01:00
c = conn.cursor()
2021-02-03 04:56:54 +01:00
fetch = requests.get(rss_url)
if fetch.status_code != 200:
print("Failed to fetch")
return False
result = xmltodict.parse(fetch.content)
2021-02-03 01:06:44 +01:00
c.execute('INSERT INTO "main"."spider_log" ("rule_name", "rss_url", "result_json", "timestamp") '
'VALUES (?, ?, ?, ?)', (rule_name, rss_url, json.dumps(result), time.time()))
2021-02-04 01:22:39 +01:00
item_list = result['rss']['channel']['item']
for i in item_list:
2021-02-04 08:00:07 +01:00
unique = c.execute('SELECT * FROM "main"."result" WHERE "title" = ? LIMIT 0,1', (i['title'],)).fetchone()
2021-02-04 14:11:08 +01:00
re_cdata = re.compile('//<![CDATA[[^>]*//]]>', re.I)
title = re_cdata.sub('', i['title'])
2021-02-04 08:00:07 +01:00
if not (unique is None):
2021-02-04 14:11:08 +01:00
print("Skip: ", title)
2021-02-04 08:00:07 +01:00
continue
2021-02-04 14:11:08 +01:00
print("Got: ", title)
2021-02-04 08:00:07 +01:00
desc = i['description'].replace("<blockquote>", "").replace("</blockquote>", "")
2021-02-04 01:22:39 +01:00
c.execute('INSERT INTO "main"."result" ("rule_name", "url", "title", "description", "timestamp")'
2021-02-04 14:11:08 +01:00
' VALUES (?, ?, ?, ?, ?)', (rule_name, i['link'], title, desc, time.time()))
2021-02-04 01:22:39 +01:00
2021-02-03 01:06:44 +01:00
c.close()
2021-02-03 12:20:28 +01:00
end = time.time()
print("Fetch done in", end - start, "s")
2021-02-03 01:06:44 +01:00
return result
2021-02-04 08:00:07 +01:00
def fetch_img(url):
2021-02-03 12:20:28 +01:00
print()
2021-02-03 01:06:44 +01:00
if __name__ == '__main__':
2021-02-03 04:56:54 +01:00
print("Misskey X61 RSS Bot initialized")
2021-02-03 12:20:28 +01:00
config = json_read("config.json")
rules = json_read("rules.json")
2021-02-04 01:22:39 +01:00
for key in rules:
spider(key, rules[key]['rss_source'])
2021-02-04 08:00:07 +01:00
name = rules[key]['identity']
Misskey.baseurl = config[name]['url']
c = conn.cursor()
r = c.execute('''SELECT * FROM "main"."result"
WHERE "rule_name" = ? AND
"post_time" = '0' ORDER BY "rid" DESC''', (key,)).fetchone()
if not(r is None):
res = c.execute('UPDATE "main"."result" SET "post_time" = ? WHERE rowid = ?', (time.time(), r[0]))
if not (res is None):
content = r[3]+"\n<"+r[2]+">\n\n"+rules[key]['extra_content']
Misskey.post(self=Misskey,
content=content,
i=config[name]['token'], visibility=config[name]['visibility'])
2021-02-03 01:06:44 +01:00
conn.commit()
conn.close()
2021-02-03 04:56:54 +01:00
time_end = time.time()
print("X61 bot: done in", time_end - time_start, "s")