From 7446d7323b6399792718996b0585b4d8c5816ce8 Mon Sep 17 00:00:00 2001
From: Martin Rotter <rotter.martinos@gmail.com>
Date: Sat, 13 Mar 2021 21:07:01 +0100
Subject: [PATCH] scraper

---
 .../scripts/scrapers/funnyjunk-embed-gifs.py  | 27 +++++++++++++++++++
 1 file changed, 27 insertions(+)
 create mode 100644 resources/scripts/scrapers/funnyjunk-embed-gifs.py

diff --git a/resources/scripts/scrapers/funnyjunk-embed-gifs.py b/resources/scripts/scrapers/funnyjunk-embed-gifs.py
new file mode 100644
index 000000000..7423d0ce6
--- /dev/null
+++ b/resources/scripts/scrapers/funnyjunk-embed-gifs.py
@@ -0,0 +1,27 @@
+# Embeds GIFs into individual messages from Funnyjunk.com
+# Sample input file whose contents must be provided as stdin: "https://funnyjunk.com/rss/most_popular.rss"
+
+import json
+import re
+import sys
+import urllib.request
+import xml.etree.ElementTree as ET
+from datetime import datetime
+
+input_data = sys.stdin.read()
+
+tree = ET.fromstring(input_data)
+pattern = re.compile("href=\"(https://[^<>]+\.(gif|png|jpg))\"")
+
+for ite in tree.find("channel").iter("item"):
+  link = ite.find("link").text
+  #print(link)
+  response = urllib.request.urlopen(link)
+  text =  response.read().decode("utf-8")
+  for pic_link in re.findall(pattern, text):
+    new = ET.SubElement(ite, "enclosure")
+    new.set("url", pic_link[0])
+    new.set("type", "image/" + pic_link[1])
+  #print(ET.tostring(ite, encoding="unicode"))
+
+print(ET.tostring(tree, encoding="unicode"))
\ No newline at end of file