Make scripts work for JSON guessing too.

This commit is contained in:
Martin Rotter 2021-02-05 08:57:38 +01:00
parent da6cb6ea3a
commit b78ebd1d2a
4 changed files with 52 additions and 3 deletions

View File

@ -30,7 +30,7 @@
<url type="donation">https://martinrotter.github.io/donate/</url> <url type="donation">https://martinrotter.github.io/donate/</url>
<content_rating type="oars-1.1" /> <content_rating type="oars-1.1" />
<releases> <releases>
<release version="3.8.4" date="2021-02-04"/> <release version="3.8.4" date="2021-02-05"/>
</releases> </releases>
<content_rating type="oars-1.0"> <content_rating type="oars-1.0">
<content_attribute id="violence-cartoon">none</content_attribute> <content_attribute id="violence-cartoon">none</content_attribute>

View File

@ -78,6 +78,8 @@ RSS Guard offers placeholder `%data%` which is automatically replaced with full
Also, working directory of process executing the script is set to RSS Guard's user data folder. Also, working directory of process executing the script is set to RSS Guard's user data folder.
There are some examples of website scrapers [here](https://github.com/martinrotter/rssguard/tree/master/resources/scripts/scrapers), most of the are written in Python 3, thus their execution line is `python.exe#script.py`.
After your source feed data are downloaded either via URL or custom script, you can optionally post-process the data with one more custom script, which will take **raw source data as input** and must produce processed valid feed data to **standard output** while printing all error messages to **error output**. After your source feed data are downloaded either via URL or custom script, you can optionally post-process the data with one more custom script, which will take **raw source data as input** and must produce processed valid feed data to **standard output** while printing all error messages to **error output**.
Format of post-process script execution line is the same as above. Format of post-process script execution line is the same as above.

View File

@ -0,0 +1,39 @@
# Obtains Wikipedia's "In the news" today's articles.
import urllib.request
import re
import json
from html.parser import HTMLParser
url = "https://en.wikipedia.org/wiki/Main_Page"
response = urllib.request.urlopen(url)
text = response.read().decode("utf-8")
text_li = re.search("In the news[\S\n\t\v ]+?<ul>([\S\n\t\v ]+?)<\/ul>", text).group(1)
articles_li = re.findall("<li>([\S\n\t\v ]+?)<\/li>", text_li)
# Iterate all articles and generate JSON feed entries.
wiki_base_url = "https://en.wikipedia.org"
class HTMLFilter(HTMLParser):
text = ""
def handle_data(self, data):
self.text += data
json_feed = "{{\"title\": \"Wikipedia - In the news\", \"items\": [{items}]}}"
items = list()
for article in articles_li:
article_url = json.dumps(wiki_base_url + re.search("^.+?href=\"(.+?)\"", article).group(1))
f = HTMLFilter()
f.feed(article)
f.text
article_title = json.dumps(f.text)
article_html = json.dumps("<div>{}</div>".format(article))
items.append("{{\"title\": {title}, \"content_html\": {html}, \"url\": {url}}}".format(title=article_title,
html=article_html,
url=article_url))
json_feed = json_feed.format(items=", ".join(items))
print(json_feed)

View File

@ -261,7 +261,8 @@ StandardFeed* StandardFeed::guessFeed(StandardFeed::SourceType source_type,
StandardFeed* feed = nullptr; StandardFeed* feed = nullptr;
if (content_type.contains(QSL("json"), Qt::CaseSensitivity::CaseInsensitive)) { if (content_type.contains(QSL("json"), Qt::CaseSensitivity::CaseInsensitive) ||
feed_contents.startsWith('{')) {
feed = new StandardFeed(); feed = new StandardFeed();
// We have JSON feed. // We have JSON feed.
@ -693,8 +694,15 @@ QString StandardFeed::runScriptProcess(const QStringList& cmd_args, const QStrin
process.closeWriteChannel(); process.closeWriteChannel();
} }
if (process.waitForFinished(run_timeout)) { if (process.waitForFinished(run_timeout) && process.exitStatus() == QProcess::ExitStatus::NormalExit) {
auto raw_output = process.readAllStandardOutput(); auto raw_output = process.readAllStandardOutput();
auto raw_error = process.readAllStandardError();
if (!raw_error.simplified().isEmpty()) {
qWarningNN << LOGSEC_CORE
<< "Received error output from custom script even if it reported that it exited normally:"
<< QUOTE_W_SPACE_DOT(raw_error);
}
return raw_output; return raw_output;
} }