mirror of
				https://gitea.invidious.io/iv-org/invidious
				synced 2025-06-05 23:29:12 +02:00 
			
		
		
		
	Performance: Improve speed of automatic instance redirection (#4193)
The automatic instance redirection implemented in #1940 fetches a new list of instances each time someone queries the /redirect endpoint. This is extremely inefficient... This PR optimizes all that into a background job that only fetches a single list every 30 minutes. This should performance quite a bit. No related issue was opened.
This commit is contained in:
		| @@ -189,6 +189,8 @@ Invidious::Jobs.register Invidious::Jobs::NotificationJob.new(CONNECTION_CHANNEL | |||||||
|  |  | ||||||
| Invidious::Jobs.register Invidious::Jobs::ClearExpiredItemsJob.new | Invidious::Jobs.register Invidious::Jobs::ClearExpiredItemsJob.new | ||||||
|  |  | ||||||
|  | Invidious::Jobs.register Invidious::Jobs::InstanceListRefreshJob.new | ||||||
|  |  | ||||||
| Invidious::Jobs.start_all | Invidious::Jobs.start_all | ||||||
|  |  | ||||||
| def popular_videos | def popular_videos | ||||||
|   | |||||||
| @@ -323,68 +323,6 @@ def parse_range(range) | |||||||
|   return 0_i64, nil |   return 0_i64, nil | ||||||
| end | end | ||||||
|  |  | ||||||
| def fetch_random_instance |  | ||||||
|   begin |  | ||||||
|     instance_api_client = make_client(URI.parse("https://api.invidious.io")) |  | ||||||
|  |  | ||||||
|     # Timeouts |  | ||||||
|     instance_api_client.connect_timeout = 10.seconds |  | ||||||
|     instance_api_client.dns_timeout = 10.seconds |  | ||||||
|  |  | ||||||
|     instance_list = JSON.parse(instance_api_client.get("/instances.json").body).as_a |  | ||||||
|     instance_api_client.close |  | ||||||
|   rescue Socket::ConnectError | IO::TimeoutError | JSON::ParseException |  | ||||||
|     instance_list = [] of JSON::Any |  | ||||||
|   end |  | ||||||
|  |  | ||||||
|   filtered_instance_list = [] of String |  | ||||||
|  |  | ||||||
|   instance_list.each do |data| |  | ||||||
|     # TODO Check if current URL is onion instance and use .onion types if so. |  | ||||||
|     if data[1]["type"] == "https" |  | ||||||
|       # Instances can have statistics disabled, which is an requirement of version validation. |  | ||||||
|       # as_nil? doesn't exist. Thus we'll have to handle the error raised if as_nil fails. |  | ||||||
|       begin |  | ||||||
|         data[1]["stats"].as_nil |  | ||||||
|         next |  | ||||||
|       rescue TypeCastError |  | ||||||
|       end |  | ||||||
|  |  | ||||||
|       # stats endpoint could also lack the software dict. |  | ||||||
|       next if data[1]["stats"]["software"]?.nil? |  | ||||||
|  |  | ||||||
|       # Makes sure the instance isn't too outdated. |  | ||||||
|       if remote_version = data[1]["stats"]?.try &.["software"]?.try &.["version"] |  | ||||||
|         remote_commit_date = remote_version.as_s.match(/\d{4}\.\d{2}\.\d{2}/) |  | ||||||
|         next if !remote_commit_date |  | ||||||
|  |  | ||||||
|         remote_commit_date = Time.parse(remote_commit_date[0], "%Y.%m.%d", Time::Location::UTC) |  | ||||||
|         local_commit_date = Time.parse(CURRENT_VERSION, "%Y.%m.%d", Time::Location::UTC) |  | ||||||
|  |  | ||||||
|         next if (remote_commit_date - local_commit_date).abs.days > 30 |  | ||||||
|  |  | ||||||
|         begin |  | ||||||
|           data[1]["monitor"].as_nil |  | ||||||
|           health = data[1]["monitor"].as_h["dailyRatios"][0].as_h["ratio"] |  | ||||||
|           filtered_instance_list << data[0].as_s if health.to_s.to_f > 90 |  | ||||||
|         rescue TypeCastError |  | ||||||
|           # We can't check the health if the monitoring is broken. Thus we'll just add it to the list |  | ||||||
|           # and move on. Ideally we'll ignore any instance that has broken health monitoring but due to the fact that |  | ||||||
|           # it's an error that often occurs with all the instances at the same time, we have to just skip the check. |  | ||||||
|           filtered_instance_list << data[0].as_s |  | ||||||
|         end |  | ||||||
|       end |  | ||||||
|     end |  | ||||||
|   end |  | ||||||
|  |  | ||||||
|   # If for some reason no instances managed to get fetched successfully then we'll just redirect to redirect.invidious.io |  | ||||||
|   if filtered_instance_list.size == 0 |  | ||||||
|     return "redirect.invidious.io" |  | ||||||
|   end |  | ||||||
|  |  | ||||||
|   return filtered_instance_list.sample(1)[0] |  | ||||||
| end |  | ||||||
|  |  | ||||||
| def reduce_uri(uri : URI | String, max_length : Int32 = 50, suffix : String = "…") : String | def reduce_uri(uri : URI | String, max_length : Int32 = 50, suffix : String = "…") : String | ||||||
|   str = uri.to_s.sub(/^https?:\/\//, "") |   str = uri.to_s.sub(/^https?:\/\//, "") | ||||||
|   if str.size > max_length |   if str.size > max_length | ||||||
|   | |||||||
							
								
								
									
										97
									
								
								src/invidious/jobs/instance_refresh_job.cr
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										97
									
								
								src/invidious/jobs/instance_refresh_job.cr
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,97 @@ | |||||||
|  | class Invidious::Jobs::InstanceListRefreshJob < Invidious::Jobs::BaseJob | ||||||
|  |   # We update the internals of a constant as so it can be accessed from anywhere | ||||||
|  |   # within the codebase | ||||||
|  |   # | ||||||
|  |   # "INSTANCES" => Array(Tuple(String, String))  # region, instance | ||||||
|  |  | ||||||
|  |   INSTANCES = {"INSTANCES" => [] of Tuple(String, String)} | ||||||
|  |  | ||||||
|  |   def initialize | ||||||
|  |   end | ||||||
|  |  | ||||||
|  |   def begin | ||||||
|  |     loop do | ||||||
|  |       refresh_instances | ||||||
|  |       LOGGER.info("InstanceListRefreshJob: Done, sleeping for 30 minutes") | ||||||
|  |       sleep 30.minute | ||||||
|  |       Fiber.yield | ||||||
|  |     end | ||||||
|  |   end | ||||||
|  |  | ||||||
|  |   # Refreshes the list of instances used for redirects. | ||||||
|  |   # | ||||||
|  |   # Does the following three checks for each instance | ||||||
|  |   # -  Is it a clear-net instance? | ||||||
|  |   # -  Is it an instance with a good uptime? | ||||||
|  |   # -  Is it an updated instance? | ||||||
|  |   private def refresh_instances | ||||||
|  |     raw_instance_list = self.fetch_instances | ||||||
|  |     filtered_instance_list = [] of Tuple(String, String) | ||||||
|  |  | ||||||
|  |     raw_instance_list.each do |instance_data| | ||||||
|  |       # TODO allow Tor hidden service instances when the current instance | ||||||
|  |       # is also a hidden service. Same for i2p and any other non-clearnet instances. | ||||||
|  |       begin | ||||||
|  |         domain = instance_data[0] | ||||||
|  |         info = instance_data[1] | ||||||
|  |         stats = info["stats"] | ||||||
|  |  | ||||||
|  |         next unless info["type"] == "https" | ||||||
|  |         next if bad_uptime?(info["monitor"]) | ||||||
|  |         next if outdated?(stats["software"]["version"]) | ||||||
|  |  | ||||||
|  |         filtered_instance_list << {info["region"].as_s, domain.as_s} | ||||||
|  |       rescue ex | ||||||
|  |         if domain | ||||||
|  |           LOGGER.info("InstanceListRefreshJob: failed to parse information from '#{domain}' because \"#{ex}\"\n\"#{ex.backtrace.join('\n')}\"  ") | ||||||
|  |         else | ||||||
|  |           LOGGER.info("InstanceListRefreshJob: failed to parse information from an instance because \"#{ex}\"\n\"#{ex.backtrace.join('\n')}\"  ") | ||||||
|  |         end | ||||||
|  |       end | ||||||
|  |     end | ||||||
|  |  | ||||||
|  |     if !filtered_instance_list.empty? | ||||||
|  |       INSTANCES["INSTANCES"] = filtered_instance_list | ||||||
|  |     end | ||||||
|  |   end | ||||||
|  |  | ||||||
|  |   # Fetches information regarding instances from api.invidious.io or an otherwise configured URL | ||||||
|  |   private def fetch_instances : Array(JSON::Any) | ||||||
|  |     begin | ||||||
|  |       # We directly call the stdlib HTTP::Client here as it allows us to negate the effects | ||||||
|  |       # of the force_resolve config option. This is needed as api.invidious.io does not support ipv6 | ||||||
|  |       # and as such the following request raises if we were to use force_resolve with the ipv6 value. | ||||||
|  |       instance_api_client = HTTP::Client.new(URI.parse("https://api.invidious.io")) | ||||||
|  |  | ||||||
|  |       # Timeouts | ||||||
|  |       instance_api_client.connect_timeout = 10.seconds | ||||||
|  |       instance_api_client.dns_timeout = 10.seconds | ||||||
|  |  | ||||||
|  |       raw_instance_list = JSON.parse(instance_api_client.get("/instances.json").body).as_a | ||||||
|  |       instance_api_client.close | ||||||
|  |     rescue ex : Socket::ConnectError | IO::TimeoutError | JSON::ParseException | ||||||
|  |       raw_instance_list = [] of JSON::Any | ||||||
|  |     end | ||||||
|  |  | ||||||
|  |     return raw_instance_list | ||||||
|  |   end | ||||||
|  |  | ||||||
|  |   # Checks if the given target instance is outdated | ||||||
|  |   private def outdated?(target_instance_version) : Bool | ||||||
|  |     remote_commit_date = target_instance_version.as_s.match(/\d{4}\.\d{2}\.\d{2}/) | ||||||
|  |     return false if !remote_commit_date | ||||||
|  |  | ||||||
|  |     remote_commit_date = Time.parse(remote_commit_date[0], "%Y.%m.%d", Time::Location::UTC) | ||||||
|  |     local_commit_date = Time.parse(CURRENT_VERSION, "%Y.%m.%d", Time::Location::UTC) | ||||||
|  |  | ||||||
|  |     return (remote_commit_date - local_commit_date).abs.days > 30 | ||||||
|  |   end | ||||||
|  |  | ||||||
|  |   # Checks if the uptime of the target instance is greater than 90% over a 30 day period | ||||||
|  |   private def bad_uptime?(target_instance_health_monitor) : Bool | ||||||
|  |     return true if !target_instance_health_monitor["down"].as_bool == false | ||||||
|  |     return true if target_instance_health_monitor["uptime"].as_f < 90 | ||||||
|  |  | ||||||
|  |     return false | ||||||
|  |   end | ||||||
|  | end | ||||||
| @@ -40,7 +40,16 @@ module Invidious::Routes::Misc | |||||||
|  |  | ||||||
|   def self.cross_instance_redirect(env) |   def self.cross_instance_redirect(env) | ||||||
|     referer = get_referer(env) |     referer = get_referer(env) | ||||||
|     instance_url = fetch_random_instance |  | ||||||
|  |     instance_list = Invidious::Jobs::InstanceListRefreshJob::INSTANCES["INSTANCES"] | ||||||
|  |     if instance_list.empty? | ||||||
|  |       instance_url = "redirect.invidious.io" | ||||||
|  |     else | ||||||
|  |       # Sample returns an array | ||||||
|  |       # Instances are packaged as {region, domain} in the instance list | ||||||
|  |       instance_url = instance_list.sample(1)[0][1] | ||||||
|  |     end | ||||||
|  |  | ||||||
|     env.redirect "https://#{instance_url}#{referer}" |     env.redirect "https://#{instance_url}#{referer}" | ||||||
|   end |   end | ||||||
| end | end | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user