From 239a6c892cf58a0d85184f5eda05437743b8c8b7 Mon Sep 17 00:00:00 2001
From: Omar Roth <omarroth@hotmail.com>
Date: Wed, 28 Mar 2018 22:29:54 -0500
Subject: [PATCH] Use seperate table for videos pulled from RSS

---
 config/sql/channel_videos.sql | 20 ++++++++++++
 config/sql/channels.sql       | 14 ++------
 setup.sh                      |  1 +
 src/helpers.cr                | 61 ++++++++++++++++++++++++++---------
 src/invidious.cr              | 29 +++++------------
 src/views/subscriptions.ecr   | 10 +++---
 6 files changed, 81 insertions(+), 54 deletions(-)
 create mode 100644 config/sql/channel_videos.sql

diff --git a/config/sql/channel_videos.sql b/config/sql/channel_videos.sql
new file mode 100644
index 00000000..e6cb3c8f
--- /dev/null
+++ b/config/sql/channel_videos.sql
@@ -0,0 +1,20 @@
+-- Table: public.channel_videos
+
+-- DROP TABLE public.channel_videos;
+
+CREATE TABLE public.channel_videos
+(
+    id text COLLATE pg_catalog."default" NOT NULL,
+    title text COLLATE pg_catalog."default",
+    published timestamp with time zone,
+    updated timestamp with time zone,
+    ucid text COLLATE pg_catalog."default",
+    author text COLLATE pg_catalog."default",
+    CONSTRAINT channel_videos_id_key UNIQUE (id)
+)
+WITH (
+    OIDS = FALSE
+)
+TABLESPACE pg_default;
+
+GRANT ALL ON TABLE public.channel_videos TO kemal;
diff --git a/config/sql/channels.sql b/config/sql/channels.sql
index a908c785..d24329f3 100644
--- a/config/sql/channels.sql
+++ b/config/sql/channels.sql
@@ -5,9 +5,8 @@
 CREATE TABLE public.channels
 (
     id text COLLATE pg_catalog."default" NOT NULL,
-    rss text COLLATE pg_catalog."default",
-    updated timestamp with time zone,
-    author text COLLATE pg_catalog."default"
+    author text COLLATE pg_catalog."default",
+    updated timestamp with time zone
 )
 WITH (
     OIDS = FALSE
@@ -15,12 +14,3 @@ WITH (
 TABLESPACE pg_default;
 
 GRANT ALL ON TABLE public.channels TO kemal;
-
--- Index: channel_id_idx
-
--- DROP INDEX public.channel_id_idx;
-
-CREATE UNIQUE INDEX channel_id_idx
-    ON public.channels USING btree
-    (id COLLATE pg_catalog."default")
-    TABLESPACE pg_default;
\ No newline at end of file
diff --git a/setup.sh b/setup.sh
index 333312d0..664073e4 100755
--- a/setup.sh
+++ b/setup.sh
@@ -4,3 +4,4 @@ createdb invidious
 createuser kemal
 psql invidious < config/sql/channels.sql
 psql invidious < config/sql/videos.sql
+psql invidious < config/sql/channel_videos.sql
diff --git a/src/helpers.cr b/src/helpers.cr
index c44405b1..08473de9 100644
--- a/src/helpers.cr
+++ b/src/helpers.cr
@@ -62,15 +62,20 @@ class InvidiousChannel
   end
 
   add_mapping({
-    id:  String,
-    rss: {
-      type:      XML::Node,
-      default:   XML.parse_html(""),
-      converter: InvidiousChannel::XMLConverter,
-
-    },
-    updated: Time,
+    id:      String,
     author:  String,
+    updated: Time,
+  })
+end
+
+class ChannelVideo
+  add_mapping({
+    id:        String,
+    title:     String,
+    published: Time,
+    updated:   Time,
+    ucid:      String,
+    author:    String,
   })
 end
 
@@ -203,10 +208,16 @@ def get_video(id, client, db, refresh = true)
 
     # If record was last updated over an hour ago, refresh (expire param in response lasts for 6 hours)
     if refresh && Time.now - video.updated > 1.hours
-      db.exec("DELETE FROM videos * WHERE id = $1", id)
+      begin
       video = fetch_video(id, client)
-      args = arg_array(video.to_a)
-      db.exec("INSERT INTO videos VALUES (#{args})", video.to_a)
+        video_array = video.to_a[1..-1]
+        args = arg_array(video_array)
+
+        db.exec("UPDATE videos SET (id,info,updated,title,views,likes,dislikes,wilson_score,published,description,language)\
+        = (#{args}) WHERE id = '#{video.id}'", video_array)
+      rescue ex
+        db.exec("DELETE FROM videos * WHERE id = $1", id)
+    end
     end
   else
     video = fetch_video(id, client)
@@ -490,14 +501,14 @@ def get_channel(id, client, db)
     channel = db.query_one("SELECT * FROM channels WHERE id = $1", id, as: InvidiousChannel)
 
     if Time.now - channel.updated > 1.minutes
-      channel = fetch_channel(id, client)
+      channel = fetch_channel(id, client, db)
       channel_array = channel.to_a[1..-1]
       args = arg_array(channel_array)
 
-      db.exec("UPDATE channels SET (rss,updated,author) = (#{args}) WHERE id = '#{channel.id}'", channel_array)
+      db.exec("UPDATE channels SET (author,updated) = (#{args}) WHERE id = '#{channel.id}'", channel_array)
     end
   else
-    channel = fetch_channel(id, client)
+    channel = fetch_channel(id, client, db)
     args = arg_array(channel.to_a)
     db.exec("INSERT INTO channels VALUES (#{args})", channel.to_a)
   end
@@ -505,13 +516,31 @@ def get_channel(id, client, db)
   return channel
 end
 
-def fetch_channel(id, client)
+def fetch_channel(id, client, db)
   rss = client.get("/feeds/videos.xml?channel_id=#{id}").body
   rss = XML.parse_html(rss)
 
+  rss.xpath_nodes("//feed/entry").each do |entry|
+    video_id = entry.xpath_node("videoid").not_nil!.content
+    title = entry.xpath_node("title").not_nil!.content
+    published = Time.parse(entry.xpath_node("published").not_nil!.content, "%FT%X%z")
+    updated = Time.parse(entry.xpath_node("updated").not_nil!.content, "%FT%X%z")
+    author = entry.xpath_node("author/name").not_nil!.content
+    ucid = entry.xpath_node("channelid").not_nil!.content
+
+    video = ChannelVideo.new(video_id, title, published, updated, ucid, author)
+
+    video_array = video.to_a[1..-1]
+    args = arg_array(video_array)
+
+    # TODO: Update record on conflict
+    db.exec("INSERT INTO channel_videos VALUES (#{arg_array(video.to_a)})\
+      ON CONFLICT (id) DO NOTHING", video.to_a)
+  end
+
   author = rss.xpath_node("//feed/author/name").not_nil!.content
 
-  channel = InvidiousChannel.new(id, rss, Time.now, author)
+  channel = InvidiousChannel.new(id, author, Time.now)
 
   return channel
 end
diff --git a/src/invidious.cr b/src/invidious.cr
index 67bbc875..82a40ed1 100644
--- a/src/invidious.cr
+++ b/src/invidious.cr
@@ -574,35 +574,22 @@ get "/feed/subscriptions" do |env|
 
     feed = client.get("/subscription_manager?action_takeout=1", headers).body
 
-    videos = Array(Hash(String, String | Time)).new
+    channels = [] of String
 
     feed = XML.parse_html(feed)
     feed.xpath_nodes("//opml/outline/outline").each do |channel|
       id = channel["xmlurl"][-24..-1]
-      rss = get_channel(id, client, PG_DB).rss
+      get_channel(id, client, PG_DB)
 
-      rss.xpath_nodes("//feed/entry").each do |entry|
-        video = {} of String => String | Time
-
-        video["id"] = entry.xpath_node("videoid").not_nil!.content
-        video["title"] = entry.xpath_node("title").not_nil!.content
-        video["published"] = Time.parse(entry.xpath_node("published").not_nil!.content, "%FT%X%z")
-        video["author"] = entry.xpath_node("author/name").not_nil!.content
-        video["ucid"] = entry.xpath_node("channelid").not_nil!.content
-        video["thumbnail"] = entry.xpath_node("group/thumbnail").not_nil!["url"].gsub(/hqdefault\.jpg$/, "mqdefault.jpg")
-
-        videos << video
-      end
+      channels << id
     end
-
     youtube_pool << client
 
-    videos.sort_by! { |video| video["published"].as(Time).epoch }
-    videos.reverse!
-
-    start = (page - 1)*max_results
-    stop = start + max_results - 1
-    videos = videos[start..stop]
+    time = Time.now
+    args = arg_array(channels)
+    offset = (page - 1) * max_results
+    videos = PG_DB.query_all("SELECT * FROM channel_videos WHERE ucid IN (#{args})\
+      ORDER BY published DESC LIMIT #{max_results} OFFSET #{offset}", channels, as: ChannelVideo)
 
     templated "subscriptions"
   else
diff --git a/src/views/subscriptions.ecr b/src/views/subscriptions.ecr
index 26bd3fc4..0b94e6a3 100644
--- a/src/views/subscriptions.ecr
+++ b/src/views/subscriptions.ecr
@@ -7,15 +7,15 @@
     <% slice.each do |video| %>
     <div class="pure-u-1 pure-u-md-1-4">
         <div style="overflow-wrap:break-word; word-wrap:break-word;" class="h-box">
-            <a style="width:100%;" href="/watch?v=<%= video["id"] %>">
-                <img style="width:100%;" src="<%= video["thumbnail"] %>"/>
-                <p style="height:100%"><%= video["title"] %></p>
+            <a style="width:100%;" href="/watch?v=<%= video.id %>">
+                <img style="width:100%;" src="https://i.ytimg.com/vi/<%= video.id %>/mqdefault.jpg"/>
+                <p style="height:100%"><%= video.title %></p>
             </a>
             <p>
-                <b><a style="width:100%;" href="https://youtube.com/channel/<%= video["ucid"] %>"><%= video["author"] %></a></b>
+                <b><a style="width:100%;" href="https://youtube.com/channel/<%= video.author %>"><%= video.author %></a></b>
             </p>
             <p>
-                <h5>Shared <%= video["published"].as(Time).to_s("%B %-d, %Y at %r") %></h5>
+                <h5>Shared <%= video.published.to_s("%B %-d, %Y at %r") %></h5>
             </p>
         </div>
     </div>