diff --git a/resources/desktop/com.github.rssguard.appdata.xml b/resources/desktop/com.github.rssguard.appdata.xml
index 0743bf1c4..937f5e74d 100644
--- a/resources/desktop/com.github.rssguard.appdata.xml
+++ b/resources/desktop/com.github.rssguard.appdata.xml
@@ -30,7 +30,7 @@
https://martinrotter.github.io/donate/
-
+
none
diff --git a/resources/docs/Feed-formats.md b/resources/docs/Feed-formats.md
index 52502c689..0092976f4 100755
--- a/resources/docs/Feed-formats.md
+++ b/resources/docs/Feed-formats.md
@@ -78,6 +78,8 @@ RSS Guard offers placeholder `%data%` which is automatically replaced with full
Also, working directory of process executing the script is set to RSS Guard's user data folder.
+There are some examples of website scrapers [here](https://github.com/martinrotter/rssguard/tree/master/resources/scripts/scrapers), most of the are written in Python 3, thus their execution line is `python.exe#script.py`.
+
After your source feed data are downloaded either via URL or custom script, you can optionally post-process the data with one more custom script, which will take **raw source data as input** and must produce processed valid feed data to **standard output** while printing all error messages to **error output**.
Format of post-process script execution line is the same as above.
diff --git a/resources/scripts/scrapers/wiki-inthenews.py b/resources/scripts/scrapers/wiki-inthenews.py
new file mode 100755
index 000000000..d95847f54
--- /dev/null
+++ b/resources/scripts/scrapers/wiki-inthenews.py
@@ -0,0 +1,39 @@
+# Obtains Wikipedia's "In the news" today's articles.
+
+import urllib.request
+import re
+import json
+from html.parser import HTMLParser
+
+url = "https://en.wikipedia.org/wiki/Main_Page"
+response = urllib.request.urlopen(url)
+text = response.read().decode("utf-8")
+
+text_li = re.search("In the news[\S\n\t\v ]+?([\S\n\t\v ]+?)<\/ul>", text).group(1)
+articles_li = re.findall("- ([\S\n\t\v ]+?)<\/li>", text_li)
+
+# Iterate all articles and generate JSON feed entries.
+wiki_base_url = "https://en.wikipedia.org"
+
+class HTMLFilter(HTMLParser):
+ text = ""
+ def handle_data(self, data):
+ self.text += data
+
+json_feed = "{{\"title\": \"Wikipedia - In the news\", \"items\": [{items}]}}"
+items = list()
+
+for article in articles_li:
+ article_url = json.dumps(wiki_base_url + re.search("^.+?href=\"(.+?)\"", article).group(1))
+ f = HTMLFilter()
+ f.feed(article)
+ f.text
+ article_title = json.dumps(f.text)
+ article_html = json.dumps("
{}
".format(article))
+ items.append("{{\"title\": {title}, \"content_html\": {html}, \"url\": {url}}}".format(title=article_title,
+ html=article_html,
+ url=article_url))
+
+json_feed = json_feed.format(items=", ".join(items))
+
+print(json_feed)
\ No newline at end of file
diff --git a/src/librssguard/services/standard/standardfeed.cpp b/src/librssguard/services/standard/standardfeed.cpp
index 9af4aa02f..299d1f636 100644
--- a/src/librssguard/services/standard/standardfeed.cpp
+++ b/src/librssguard/services/standard/standardfeed.cpp
@@ -261,7 +261,8 @@ StandardFeed* StandardFeed::guessFeed(StandardFeed::SourceType source_type,
StandardFeed* feed = nullptr;
- if (content_type.contains(QSL("json"), Qt::CaseSensitivity::CaseInsensitive)) {
+ if (content_type.contains(QSL("json"), Qt::CaseSensitivity::CaseInsensitive) ||
+ feed_contents.startsWith('{')) {
feed = new StandardFeed();
// We have JSON feed.
@@ -693,8 +694,15 @@ QString StandardFeed::runScriptProcess(const QStringList& cmd_args, const QStrin
process.closeWriteChannel();
}
- if (process.waitForFinished(run_timeout)) {
+ if (process.waitForFinished(run_timeout) && process.exitStatus() == QProcess::ExitStatus::NormalExit) {
auto raw_output = process.readAllStandardOutput();
+ auto raw_error = process.readAllStandardError();
+
+ if (!raw_error.simplified().isEmpty()) {
+ qWarningNN << LOGSEC_CORE
+ << "Received error output from custom script even if it reported that it exited normally:"
+ << QUOTE_W_SPACE_DOT(raw_error);
+ }
return raw_output;
}