diff --git a/rules.json b/rules.json index 6a0e381..07d01ee 100644 --- a/rules.json +++ b/rules.json @@ -13,5 +13,15 @@ "rss_source": "https://rsshub.app/solidot/www", "identity": "misskey.dev", "extra_content": "" + }, + "ZaoBao": { + "rss_source": "https://rsshub.app/zaobao/realtime/china", + "identity": "ZaobaoBot@x61.uk", + "extra_content": "#News" + }, + "NIKKEI": { + "rss_source": "https://rsshub.app/nikkei/index", + "identity": "NIKKEI@x61.uk", + "extra_content": "#News" } } \ No newline at end of file diff --git a/x61bot.py b/x61bot.py index bc8e9ae..b58e6d6 100644 --- a/x61bot.py +++ b/x61bot.py @@ -2,6 +2,7 @@ # -*- coding: UTF-8 -*- import json +import re import sqlite3 import requests import xmltodict @@ -42,13 +43,15 @@ def spider(rule_name, rss_url): item_list = result['rss']['channel']['item'] for i in item_list: unique = c.execute('SELECT * FROM "main"."result" WHERE "title" = ? LIMIT 0,1', (i['title'],)).fetchone() + re_cdata = re.compile('//]*//]]>', re.I) + title = re_cdata.sub('', i['title']) if not (unique is None): - print("Skip: ", i['title']) + print("Skip: ", title) continue - print("Got: ", i['title']) + print("Got: ", title) desc = i['description'].replace("
", "“").replace("", "”") c.execute('INSERT INTO "main"."result" ("rule_name", "url", "title", "description", "timestamp")' - ' VALUES (?, ?, ?, ?, ?)', (rule_name, i['link'], i['title'], desc, time.time())) + ' VALUES (?, ?, ?, ?, ?)', (rule_name, i['link'], title, desc, time.time())) c.close() end = time.time()