From d9330210da1b6bb92e4153c86d8a3d89dc29f0ac Mon Sep 17 00:00:00 2001
From: Martin Rotter <rotter.martinos@gmail.com>
Date: Tue, 18 Oct 2022 06:29:20 +0200
Subject: [PATCH] fix full site scraper!

---
 resources/desktop/com.github.rssguard.appdata.xml  | 2 +-
 resources/scripts/scrapers/scrape-full-articles.py | 7 +++----
 2 files changed, 4 insertions(+), 5 deletions(-)
diff --git a/resources/desktop/com.github.rssguard.appdata.xml b/resources/desktop/com.github.rssguard.appdata.xml
index 651d60dc3..01c9643f5 100644
--- a/resources/desktop/com.github.rssguard.appdata.xml
+++ b/resources/desktop/com.github.rssguard.appdata.xml
@@ -24,7 +24,7 @@
   <url type="donation">https://github.com/sponsors/martinrotter</url>
   <content_rating type="oars-1.1" />
   <releases>
-    <release version="4.2.5" date="2022-10-14"/>
+    <release version="4.2.5" date="2022-10-18"/>
   </releases>
   <content_rating type="oars-1.0">
     <content_attribute id="violence-cartoon">none</content_attribute>
diff --git a/resources/scripts/scrapers/scrape-full-articles.py b/resources/scripts/scrapers/scrape-full-articles.py
index 60616299e..466530c1e 100644
--- a/resources/scripts/scrapers/scrape-full-articles.py
+++ b/resources/scripts/scrapers/scrape-full-articles.py
@@ -16,8 +16,7 @@ import xml.etree.ElementTree as ET
 
 # Globals.
 atom_ns = {"atom": "http://www.w3.org/2005/Atom"}
-article_parser_url = "https://demos.pwshub.com/article-parser?url="
-
+article_parser_url = "https://extract-article.deta.dev/?url="
 
 # Methods.
 def process_article(article, is_rss, is_atom):
@@ -59,7 +58,7 @@ def main():
 
   sys.stdin.reconfigure(encoding="utf-8")
 
-  #feed_data = urllib.request.urlopen("https://dilbert.com/feed").read()
+  #feed_data = urllib.request.urlopen("http://feeds.hanselman.com/ScottHanselman").read()
   feed_data = sys.stdin.read()
   feed_document = ET.fromstring(feed_data)
 
@@ -89,7 +88,7 @@ def main():
     for article in feed_articles:
       process_article(article, is_rss, is_atom)
 
-  print(ET.tostring(feed_document, encoding="unicode"))
+  print(ET.tostring(feed_document).decode())
 
 
 if __name__ == '__main__':