mirror of
				https://gitea.invidious.io/iv-org/invidious
				synced 2025-06-05 23:29:12 +02:00 
			
		
		
		
	Refactor instance fetching logic into separate job
This commit is contained in:
		@@ -185,6 +185,8 @@ Invidious::Jobs.register Invidious::Jobs::NotificationJob.new(CONNECTION_CHANNEL
 | 
			
		||||
 | 
			
		||||
Invidious::Jobs.register Invidious::Jobs::ClearExpiredItemsJob.new
 | 
			
		||||
 | 
			
		||||
Invidious::Jobs.register Invidious::Jobs::InstanceListRefreshJob.new
 | 
			
		||||
 | 
			
		||||
Invidious::Jobs.start_all
 | 
			
		||||
 | 
			
		||||
def popular_videos
 | 
			
		||||
 
 | 
			
		||||
@@ -323,68 +323,6 @@ def parse_range(range)
 | 
			
		||||
  return 0_i64, nil
 | 
			
		||||
end
 | 
			
		||||
 | 
			
		||||
def fetch_random_instance
 | 
			
		||||
  begin
 | 
			
		||||
    instance_api_client = make_client(URI.parse("https://api.invidious.io"))
 | 
			
		||||
 | 
			
		||||
    # Timeouts
 | 
			
		||||
    instance_api_client.connect_timeout = 10.seconds
 | 
			
		||||
    instance_api_client.dns_timeout = 10.seconds
 | 
			
		||||
 | 
			
		||||
    instance_list = JSON.parse(instance_api_client.get("/instances.json").body).as_a
 | 
			
		||||
    instance_api_client.close
 | 
			
		||||
  rescue Socket::ConnectError | IO::TimeoutError | JSON::ParseException
 | 
			
		||||
    instance_list = [] of JSON::Any
 | 
			
		||||
  end
 | 
			
		||||
 | 
			
		||||
  filtered_instance_list = [] of String
 | 
			
		||||
 | 
			
		||||
  instance_list.each do |data|
 | 
			
		||||
    # TODO Check if current URL is onion instance and use .onion types if so.
 | 
			
		||||
    if data[1]["type"] == "https"
 | 
			
		||||
      # Instances can have statistics disabled, which is an requirement of version validation.
 | 
			
		||||
      # as_nil? doesn't exist. Thus we'll have to handle the error raised if as_nil fails.
 | 
			
		||||
      begin
 | 
			
		||||
        data[1]["stats"].as_nil
 | 
			
		||||
        next
 | 
			
		||||
      rescue TypeCastError
 | 
			
		||||
      end
 | 
			
		||||
 | 
			
		||||
      # stats endpoint could also lack the software dict.
 | 
			
		||||
      next if data[1]["stats"]["software"]?.nil?
 | 
			
		||||
 | 
			
		||||
      # Makes sure the instance isn't too outdated.
 | 
			
		||||
      if remote_version = data[1]["stats"]?.try &.["software"]?.try &.["version"]
 | 
			
		||||
        remote_commit_date = remote_version.as_s.match(/\d{4}\.\d{2}\.\d{2}/)
 | 
			
		||||
        next if !remote_commit_date
 | 
			
		||||
 | 
			
		||||
        remote_commit_date = Time.parse(remote_commit_date[0], "%Y.%m.%d", Time::Location::UTC)
 | 
			
		||||
        local_commit_date = Time.parse(CURRENT_VERSION, "%Y.%m.%d", Time::Location::UTC)
 | 
			
		||||
 | 
			
		||||
        next if (remote_commit_date - local_commit_date).abs.days > 30
 | 
			
		||||
 | 
			
		||||
        begin
 | 
			
		||||
          data[1]["monitor"].as_nil
 | 
			
		||||
          health = data[1]["monitor"].as_h["dailyRatios"][0].as_h["ratio"]
 | 
			
		||||
          filtered_instance_list << data[0].as_s if health.to_s.to_f > 90
 | 
			
		||||
        rescue TypeCastError
 | 
			
		||||
          # We can't check the health if the monitoring is broken. Thus we'll just add it to the list
 | 
			
		||||
          # and move on. Ideally we'll ignore any instance that has broken health monitoring but due to the fact that
 | 
			
		||||
          # it's an error that often occurs with all the instances at the same time, we have to just skip the check.
 | 
			
		||||
          filtered_instance_list << data[0].as_s
 | 
			
		||||
        end
 | 
			
		||||
      end
 | 
			
		||||
    end
 | 
			
		||||
  end
 | 
			
		||||
 | 
			
		||||
  # If for some reason no instances managed to get fetched successfully then we'll just redirect to redirect.invidious.io
 | 
			
		||||
  if filtered_instance_list.size == 0
 | 
			
		||||
    return "redirect.invidious.io"
 | 
			
		||||
  end
 | 
			
		||||
 | 
			
		||||
  return filtered_instance_list.sample(1)[0]
 | 
			
		||||
end
 | 
			
		||||
 | 
			
		||||
def reduce_uri(uri : URI | String, max_length : Int32 = 50, suffix : String = "…") : String
 | 
			
		||||
  str = uri.to_s.sub(/^https?:\/\//, "")
 | 
			
		||||
  if str.size > max_length
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										94
									
								
								src/invidious/jobs/instance_refresh_job.cr
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										94
									
								
								src/invidious/jobs/instance_refresh_job.cr
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,94 @@
 | 
			
		||||
class Invidious::Jobs::InstanceListRefreshJob < Invidious::Jobs::BaseJob
 | 
			
		||||
  # We update the internals of a constant as so it can be accessed from anywhere
 | 
			
		||||
  # within the codebase
 | 
			
		||||
  #
 | 
			
		||||
  # "INSTANCES" => Array(Tuple(String, String))  # region, instance
 | 
			
		||||
 | 
			
		||||
  INSTANCES = {"INSTANCES" => [] of Tuple(String, String)}
 | 
			
		||||
 | 
			
		||||
  def initialize
 | 
			
		||||
  end
 | 
			
		||||
 | 
			
		||||
  def begin
 | 
			
		||||
    loop do
 | 
			
		||||
      refresh_instances
 | 
			
		||||
      LOGGER.info("InstanceListRefreshJob: Done, sleeping for 30 minutes")
 | 
			
		||||
      sleep 30.minute
 | 
			
		||||
      Fiber.yield
 | 
			
		||||
    end
 | 
			
		||||
  end
 | 
			
		||||
 | 
			
		||||
  # Refreshes the list of instances used for redirects.
 | 
			
		||||
  #
 | 
			
		||||
  # Does the following three checks for each instance
 | 
			
		||||
  # -  Is it a clear-net instance?
 | 
			
		||||
  # -  Is it an instance with a good uptime?
 | 
			
		||||
  # -  Is it an updated instance?
 | 
			
		||||
  private def refresh_instances
 | 
			
		||||
    raw_instance_list = self.fetch_instances
 | 
			
		||||
    filtered_instance_list = [] of Tuple(String, String)
 | 
			
		||||
 | 
			
		||||
    raw_instance_list.each do |instance_data|
 | 
			
		||||
      # TODO allow Tor hidden service instances when the current instance
 | 
			
		||||
      # is also a hidden service. Same for i2p and any other non-clearnet instances.
 | 
			
		||||
      begin
 | 
			
		||||
        domain = instance_data[0]
 | 
			
		||||
        info = instance_data[1]
 | 
			
		||||
        stats = info["stats"]
 | 
			
		||||
 | 
			
		||||
        next unless info["type"] == "https"
 | 
			
		||||
        next if bad_uptime?(info["monitor"])
 | 
			
		||||
        next if outdated?(stats["software"]["version"])
 | 
			
		||||
 | 
			
		||||
        filtered_instance_list << {info["region"].as_s, domain.as_s}
 | 
			
		||||
      rescue ex
 | 
			
		||||
        if domain
 | 
			
		||||
          LOGGER.info("InstanceListRefreshJob: failed to parse information from '#{domain}' because \"#{ex}\"\n\"#{ex.backtrace.join('\n')}\"  ")
 | 
			
		||||
        else
 | 
			
		||||
          LOGGER.info("InstanceListRefreshJob: failed to parse information from an instance because \"#{ex}\"\n\"#{ex.backtrace.join('\n')}\"  ")
 | 
			
		||||
        end
 | 
			
		||||
      end
 | 
			
		||||
    end
 | 
			
		||||
 | 
			
		||||
    if !filtered_instance_list.empty?
 | 
			
		||||
      INSTANCES["INSTANCES"] = filtered_instance_list
 | 
			
		||||
    end
 | 
			
		||||
  end
 | 
			
		||||
 | 
			
		||||
  # Fetches information regarding instances from api.invidious.io or an otherwise configured URL
 | 
			
		||||
  private def fetch_instances : Array(JSON::Any)
 | 
			
		||||
    begin
 | 
			
		||||
      instance_api_client = make_client(URI.parse("https://api.invidious.io"))
 | 
			
		||||
 | 
			
		||||
      # Timeouts
 | 
			
		||||
      instance_api_client.connect_timeout = 10.seconds
 | 
			
		||||
      instance_api_client.dns_timeout = 10.seconds
 | 
			
		||||
 | 
			
		||||
      raw_instance_list = JSON.parse(instance_api_client.get("/instances.json").body).as_a
 | 
			
		||||
      instance_api_client.close
 | 
			
		||||
    rescue Socket::ConnectError | IO::TimeoutError | JSON::ParseException
 | 
			
		||||
      raw_instance_list = [] of JSON::Any
 | 
			
		||||
    end
 | 
			
		||||
 | 
			
		||||
    return raw_instance_list
 | 
			
		||||
  end
 | 
			
		||||
 | 
			
		||||
  # Checks if the given target instance is outdated
 | 
			
		||||
  private def outdated?(target_instance_version) : Bool
 | 
			
		||||
    remote_commit_date = target_instance_version.as_s.match(/\d{4}\.\d{2}\.\d{2}/)
 | 
			
		||||
    return false if !remote_commit_date
 | 
			
		||||
 | 
			
		||||
    remote_commit_date = Time.parse(remote_commit_date[0], "%Y.%m.%d", Time::Location::UTC)
 | 
			
		||||
    local_commit_date = Time.parse(CURRENT_VERSION, "%Y.%m.%d", Time::Location::UTC)
 | 
			
		||||
 | 
			
		||||
    return (remote_commit_date - local_commit_date).abs.days > 30
 | 
			
		||||
  end
 | 
			
		||||
 | 
			
		||||
  # Checks if the uptime of the target instance is greater than 90% over a 30 day period
 | 
			
		||||
  private def bad_uptime?(target_instance_health_monitor) : Bool
 | 
			
		||||
    return false if !target_instance_health_monitor["statusClass"] == "success"
 | 
			
		||||
    return false if target_instance_health_monitor["30dRatio"]["ratio"].as_s.to_f < 90
 | 
			
		||||
 | 
			
		||||
    return true
 | 
			
		||||
  end
 | 
			
		||||
end
 | 
			
		||||
@@ -40,7 +40,16 @@ module Invidious::Routes::Misc
 | 
			
		||||
 | 
			
		||||
  def self.cross_instance_redirect(env)
 | 
			
		||||
    referer = get_referer(env)
 | 
			
		||||
    instance_url = fetch_random_instance
 | 
			
		||||
 | 
			
		||||
    instance_list = Invidious::Jobs::InstanceListRefreshJob::INSTANCES["INSTANCES"]
 | 
			
		||||
    if instance_list.empty?
 | 
			
		||||
      instance_url = "redirect.invidious.io"
 | 
			
		||||
    else
 | 
			
		||||
      # Sample returns an array
 | 
			
		||||
      # Instances are packaged as {region, domain} in the instance list
 | 
			
		||||
      instance_url = instance_list.sample(1)[0][1]
 | 
			
		||||
    end
 | 
			
		||||
 | 
			
		||||
    env.redirect "https://#{instance_url}#{referer}"
 | 
			
		||||
  end
 | 
			
		||||
end
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user