nvidia drivers scraper

This commit is contained in:
Martin Rotter 2021-03-20 20:06:02 +01:00 committed by Martin Rotter
parent 1538c1261f
commit 0b5d2d16c6
2 changed files with 27 additions and 1 deletions

@ -1 +1 @@
Subproject commit 47f4125753452eff8800dbd6600c5a05540b15d9
Subproject commit 9c10723bfbaf6cb85107d6ee16e0324e9e487749

View File

@ -0,0 +1,26 @@
# Parses output of Nvidia's GDC web service, which provides
# list of articles.
# Sample input file whose contents must be provided as stdin: "https://www.nvidia.com/bin/nvidiaGDC/servlet/article.json?locale=en_US&region=us&type=both&tag=drivers&offset=0s"
# This scripts outputs JSON feed 1.1: https://jsonfeed.org/version/1.1
import json
import sys
from datetime import datetime
json_data = json.loads(sys.stdin.read())
json_feed = "{{\"title\": \"{title}\", \"items\": [{items}]}}"
items = list()
json_root = json_data[0]
for ite in json_root["articlePagesList"]:
article_author = json.dumps(ite["authorName"])
article_url = json.dumps(ite["articlePath"])
article_title = json.dumps(ite["articleTitle"])
article_time = json.dumps(datetime.strptime(ite["articleDate"], "%B %d, %Y").isoformat())
article_contents = json.dumps(ite["articleShortDescription"])
items.append("{{\"title\": {title}, \"authors\": [{{\"name\": {author}}}], \"content_text\": {html}, \"url\": {url}, \"date_published\": {date}}}".format(title=article_title, html=article_contents, url=article_url, date=article_time, author=article_author))
json_feed = json_feed.format(title="Nvidia " + json_root["articleLocalizedTag"], items=", ".join(items))
print(json_feed)