mirror of
https://github.com/martinrotter/rssguard.git
synced 2025-01-16 19:03:25 +01:00
44 lines
1.8 KiB
Python
44 lines
1.8 KiB
Python
|
# Obtains Abclinuxu's "mé komentáře" as JSON feed.
|
||
|
|
||
|
import urllib.request
|
||
|
import re
|
||
|
import json
|
||
|
import sys
|
||
|
from datetime import datetime
|
||
|
from html.parser import HTMLParser
|
||
|
|
||
|
base_url = "https://www.abclinuxu.cz"
|
||
|
url = "https://www.abclinuxu.cz/History?type=comments&uid={uid}".format(uid=sys.argv[1])
|
||
|
response = urllib.request.urlopen(url)
|
||
|
text = response.read().decode("utf-8")
|
||
|
|
||
|
abc_title = re.search("<h1>(.+?)</h1>", text).group(1)
|
||
|
abc_table = re.search("<table class=\"ds\">(.+?)<\/table>", text, re.S).group(1)
|
||
|
|
||
|
articles_iter = re.finditer("<tr>.+?href=\"(.+?)\">(.+?)</a>.+?\"td-datum\">.+?(\d{1,2}\.\d{1,2}\.\d{4} \d{2}:\d{2}).+?</tr>", abc_table, re.S)
|
||
|
|
||
|
# Iterate all articles and generate JSON feed entries.
|
||
|
|
||
|
class HTMLFilter(HTMLParser):
|
||
|
text = ""
|
||
|
def handle_data(self, data):
|
||
|
self.text += data
|
||
|
|
||
|
json_feed = "{{\"title\": \"{title}\", \"items\": [{items}]}}"
|
||
|
items = list()
|
||
|
|
||
|
for article in articles_iter:
|
||
|
article_url = json.dumps(base_url + article.group(1))
|
||
|
|
||
|
f = HTMLFilter()
|
||
|
f.feed(article.group(2))
|
||
|
|
||
|
article_title = json.dumps(f.text)
|
||
|
article_time = json.dumps(datetime.strptime(article.group(3), "%d.%m.%Y %H:%M").isoformat())
|
||
|
items.append("{{\"title\": {title}, \"content_html\": {html}, \"url\": {url}, \"date_published\": {date}}}".format(title=article_title,
|
||
|
html=article_title,
|
||
|
url=article_url,
|
||
|
date=article_time))
|
||
|
|
||
|
json_feed = json_feed.format(title=abc_title, items=", ".join(items))
|
||
|
print(json_feed)
|