过滤CDATA
This commit is contained in:
parent
03bd04c4f9
commit
83a0aeea34
10
rules.json
10
rules.json
|
@ -13,5 +13,15 @@
|
|||
"rss_source": "https://rsshub.app/solidot/www",
|
||||
"identity": "misskey.dev",
|
||||
"extra_content": ""
|
||||
},
|
||||
"ZaoBao": {
|
||||
"rss_source": "https://rsshub.app/zaobao/realtime/china",
|
||||
"identity": "ZaobaoBot@x61.uk",
|
||||
"extra_content": "#News"
|
||||
},
|
||||
"NIKKEI": {
|
||||
"rss_source": "https://rsshub.app/nikkei/index",
|
||||
"identity": "NIKKEI@x61.uk",
|
||||
"extra_content": "#News"
|
||||
}
|
||||
}
|
|
@ -2,6 +2,7 @@
|
|||
# -*- coding: UTF-8 -*-
|
||||
|
||||
import json
|
||||
import re
|
||||
import sqlite3
|
||||
import requests
|
||||
import xmltodict
|
||||
|
@ -42,13 +43,15 @@ def spider(rule_name, rss_url):
|
|||
item_list = result['rss']['channel']['item']
|
||||
for i in item_list:
|
||||
unique = c.execute('SELECT * FROM "main"."result" WHERE "title" = ? LIMIT 0,1', (i['title'],)).fetchone()
|
||||
re_cdata = re.compile('//<![CDATA[[^>]*//]]>', re.I)
|
||||
title = re_cdata.sub('', i['title'])
|
||||
if not (unique is None):
|
||||
print("Skip: ", i['title'])
|
||||
print("Skip: ", title)
|
||||
continue
|
||||
print("Got: ", i['title'])
|
||||
print("Got: ", title)
|
||||
desc = i['description'].replace("<blockquote>", "“").replace("</blockquote>", "”")
|
||||
c.execute('INSERT INTO "main"."result" ("rule_name", "url", "title", "description", "timestamp")'
|
||||
' VALUES (?, ?, ?, ?, ?)', (rule_name, i['link'], i['title'], desc, time.time()))
|
||||
' VALUES (?, ?, ?, ?, ?)', (rule_name, i['link'], title, desc, time.time()))
|
||||
|
||||
c.close()
|
||||
end = time.time()
|
||||
|
|
Loading…
Reference in New Issue