2021-02-03 01:06:44 +01:00
|
|
|
#!/usr/bin/python3
|
|
|
|
# -*- coding: UTF-8 -*-
|
2021-02-03 12:20:28 +01:00
|
|
|
|
2021-02-03 01:06:44 +01:00
|
|
|
import json
|
2021-02-04 14:11:08 +01:00
|
|
|
import re
|
2021-02-03 01:06:44 +01:00
|
|
|
import sqlite3
|
|
|
|
import requests
|
|
|
|
import xmltodict
|
|
|
|
import time
|
2021-02-03 12:20:28 +01:00
|
|
|
from misskey import Misskey
|
2021-02-04 08:00:07 +01:00
|
|
|
from bs4 import BeautifulSoup
|
2021-02-03 01:06:44 +01:00
|
|
|
|
2021-02-03 04:56:54 +01:00
|
|
|
time_start = time.time()
|
|
|
|
print(
|
|
|
|
''' __ __ _ _ __ __ _ _ \n | \/ (_) | | / //_ | | | | \n | \ / |_ ___ ___| | _____ _ _ __ __/ /_ | | |__ ___ | |_ \n | |\/| | / __/ __| |/ / _ \ | | | \ \/ / '_ \| | '_ \ / _ \| __|\n | | | | \__ \__ \ < __/ |_| | > <| (_) | | |_) | (_) | |_ \n |_| |_|_|___/___/_|\_\___|\__, | /_/\_\\___/|_|_.__/ \___/ \__|\n __/ | \n |___/ ''')
|
2021-02-03 01:06:44 +01:00
|
|
|
conn = sqlite3.connect('DB.sqlite3')
|
|
|
|
print("Opened database successfully")
|
|
|
|
|
|
|
|
|
2021-02-03 12:20:28 +01:00
|
|
|
def json_read(file):
|
|
|
|
config_file = open(file, 'r')
|
|
|
|
config = json.loads(config_file.read())
|
|
|
|
config_file.close()
|
|
|
|
return config
|
|
|
|
|
2021-02-04 08:00:07 +01:00
|
|
|
|
2021-02-03 01:06:44 +01:00
|
|
|
def xml_to_json(xml):
|
|
|
|
pars = xmltodict.parse(xml)
|
|
|
|
return json.dumps(pars)
|
|
|
|
|
|
|
|
|
|
|
|
def spider(rule_name, rss_url):
|
2021-02-03 12:20:28 +01:00
|
|
|
print("Fetch RSS: [" + rule_name + "] ", rss_url)
|
|
|
|
start = time.time()
|
2021-02-03 01:06:44 +01:00
|
|
|
c = conn.cursor()
|
2021-02-03 04:56:54 +01:00
|
|
|
fetch = requests.get(rss_url)
|
|
|
|
if fetch.status_code != 200:
|
|
|
|
print("Failed to fetch")
|
|
|
|
return False
|
|
|
|
result = xmltodict.parse(fetch.content)
|
2021-02-03 01:06:44 +01:00
|
|
|
c.execute('INSERT INTO "main"."spider_log" ("rule_name", "rss_url", "result_json", "timestamp") '
|
|
|
|
'VALUES (?, ?, ?, ?)', (rule_name, rss_url, json.dumps(result), time.time()))
|
2021-02-04 01:22:39 +01:00
|
|
|
item_list = result['rss']['channel']['item']
|
|
|
|
for i in item_list:
|
2021-02-04 08:00:07 +01:00
|
|
|
unique = c.execute('SELECT * FROM "main"."result" WHERE "title" = ? LIMIT 0,1', (i['title'],)).fetchone()
|
2021-02-04 14:11:08 +01:00
|
|
|
re_cdata = re.compile('//<![CDATA[[^>]*//]]>', re.I)
|
|
|
|
title = re_cdata.sub('', i['title'])
|
2021-02-04 08:00:07 +01:00
|
|
|
if not (unique is None):
|
2021-02-04 14:11:08 +01:00
|
|
|
print("Skip: ", title)
|
2021-02-04 08:00:07 +01:00
|
|
|
continue
|
2021-02-04 14:11:08 +01:00
|
|
|
print("Got: ", title)
|
2021-02-04 08:00:07 +01:00
|
|
|
desc = i['description'].replace("<blockquote>", "“").replace("</blockquote>", "”")
|
2021-02-04 01:22:39 +01:00
|
|
|
c.execute('INSERT INTO "main"."result" ("rule_name", "url", "title", "description", "timestamp")'
|
2021-02-04 14:11:08 +01:00
|
|
|
' VALUES (?, ?, ?, ?, ?)', (rule_name, i['link'], title, desc, time.time()))
|
2021-02-04 01:22:39 +01:00
|
|
|
|
2021-02-03 01:06:44 +01:00
|
|
|
c.close()
|
2021-02-03 12:20:28 +01:00
|
|
|
end = time.time()
|
|
|
|
print("Fetch done in", end - start, "s")
|
2021-02-03 01:06:44 +01:00
|
|
|
return result
|
|
|
|
|
|
|
|
|
2021-02-04 08:00:07 +01:00
|
|
|
def fetch_img(url):
|
2021-02-03 12:20:28 +01:00
|
|
|
print()
|
|
|
|
|
|
|
|
|
2021-02-03 01:06:44 +01:00
|
|
|
if __name__ == '__main__':
|
2021-02-03 04:56:54 +01:00
|
|
|
print("Misskey X61 RSS Bot initialized")
|
2021-02-03 12:20:28 +01:00
|
|
|
|
|
|
|
config = json_read("config.json")
|
|
|
|
rules = json_read("rules.json")
|
|
|
|
|
2021-02-04 01:22:39 +01:00
|
|
|
for key in rules:
|
|
|
|
spider(key, rules[key]['rss_source'])
|
2021-02-04 08:00:07 +01:00
|
|
|
name = rules[key]['identity']
|
|
|
|
Misskey.baseurl = config[name]['url']
|
|
|
|
|
|
|
|
c = conn.cursor()
|
|
|
|
r = c.execute('''SELECT * FROM "main"."result"
|
|
|
|
WHERE "rule_name" = ? AND
|
|
|
|
"post_time" = '0' ORDER BY "rid" DESC''', (key,)).fetchone()
|
|
|
|
if not(r is None):
|
|
|
|
res = c.execute('UPDATE "main"."result" SET "post_time" = ? WHERE rowid = ?', (time.time(), r[0]))
|
|
|
|
if not (res is None):
|
|
|
|
content = r[3]+"\n<"+r[2]+">\n\n"+rules[key]['extra_content']
|
|
|
|
Misskey.post(self=Misskey,
|
|
|
|
content=content,
|
|
|
|
i=config[name]['token'], visibility=config[name]['visibility'])
|
2021-02-03 01:06:44 +01:00
|
|
|
conn.commit()
|
|
|
|
conn.close()
|
2021-02-03 04:56:54 +01:00
|
|
|
|
|
|
|
time_end = time.time()
|
|
|
|
print("X61 bot: done in", time_end - time_start, "s")
|