From 2e67b90540d35ede212866e1fb597fd57ced35d5 Mon Sep 17 00:00:00 2001 From: syeopite Date: Sat, 22 Jul 2023 23:55:05 -0700 Subject: [PATCH 01/11] Add method to query /youtubei/v1/get_transcript --- src/invidious/yt_backend/youtube_api.cr | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/src/invidious/yt_backend/youtube_api.cr b/src/invidious/yt_backend/youtube_api.cr index 3dd9e9d8..f8aca04d 100644 --- a/src/invidious/yt_backend/youtube_api.cr +++ b/src/invidious/yt_backend/youtube_api.cr @@ -557,6 +557,30 @@ module YoutubeAPI return self._post_json("/youtubei/v1/search", data, client_config) end + #################################################################### + # transcript(params) + # + # Requests the youtubei/v1/get_transcript endpoint with the required headers + # and POST data in order to get a JSON reply. + # + # The requested data is a specially encoded protobuf string that denotes the specific language requested. + # + # An optional ClientConfig parameter can be passed, too (see + # `struct ClientConfig` above for more details). + # + + def transcript( + params : String, + client_config : ClientConfig | Nil = nil + ) : Hash(String, JSON::Any) + data = { + "context" => self.make_context(client_config), + "params" => params, + } + + return self._post_json("/youtubei/v1/get_transcript", data, client_config) + end + #################################################################### # _post_json(endpoint, data, client_config?) # From 7e5935a9da5355bbdd4c047edf692b0ce57722c7 Mon Sep 17 00:00:00 2001 From: syeopite Date: Sun, 23 Jul 2023 00:54:43 -0700 Subject: [PATCH 02/11] Rename Caption struct to CaptionMetadata The Caption object does not actually store any text lines for the subtitles. Instead it stores the metadata needed to display and fetch the actual captions from the YT timedtext API. Therefore it may be wiser to rename the struct to be more reflective of its current usage as well as the future usage once the current caption retrival system is replaced via InnerTube's transcript API --- src/invidious/frontend/watch_page.cr | 2 +- src/invidious/videos.cr | 6 +++--- src/invidious/videos/caption.cr | 8 ++++---- src/invidious/views/user/preferences.ecr | 2 +- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/invidious/frontend/watch_page.cr b/src/invidious/frontend/watch_page.cr index e3214469..b860dba7 100644 --- a/src/invidious/frontend/watch_page.cr +++ b/src/invidious/frontend/watch_page.cr @@ -7,7 +7,7 @@ module Invidious::Frontend::WatchPage getter full_videos : Array(Hash(String, JSON::Any)) getter video_streams : Array(Hash(String, JSON::Any)) getter audio_streams : Array(Hash(String, JSON::Any)) - getter captions : Array(Invidious::Videos::Caption) + getter captions : Array(Invidious::Videos::CaptionMetadata) def initialize( @full_videos, diff --git a/src/invidious/videos.cr b/src/invidious/videos.cr index f38b33e5..2b1d2603 100644 --- a/src/invidious/videos.cr +++ b/src/invidious/videos.cr @@ -24,7 +24,7 @@ struct Video property updated : Time @[DB::Field(ignore: true)] - @captions = [] of Invidious::Videos::Caption + @captions = [] of Invidious::Videos::CaptionMetadata @[DB::Field(ignore: true)] property adaptive_fmts : Array(Hash(String, JSON::Any))? @@ -215,9 +215,9 @@ struct Video keywords.includes? "YouTube Red" end - def captions : Array(Invidious::Videos::Caption) + def captions : Array(Invidious::Videos::CaptionMetadata) if @captions.empty? && @info.has_key?("captions") - @captions = Invidious::Videos::Caption.from_yt_json(info["captions"]) + @captions = Invidious::Videos::CaptionMetadata.from_yt_json(info["captions"]) end return @captions diff --git a/src/invidious/videos/caption.cr b/src/invidious/videos/caption.cr index 13f81a31..c85b46c3 100644 --- a/src/invidious/videos/caption.cr +++ b/src/invidious/videos/caption.cr @@ -1,7 +1,7 @@ require "json" module Invidious::Videos - struct Caption + struct CaptionMetadata property name : String property language_code : String property base_url : String @@ -10,12 +10,12 @@ module Invidious::Videos end # Parse the JSON structure from Youtube - def self.from_yt_json(container : JSON::Any) : Array(Caption) + def self.from_yt_json(container : JSON::Any) : Array(CaptionMetadata) caption_tracks = container .dig?("playerCaptionsTracklistRenderer", "captionTracks") .try &.as_a - captions_list = [] of Caption + captions_list = [] of CaptionMetadata return captions_list if caption_tracks.nil? caption_tracks.each do |caption| @@ -25,7 +25,7 @@ module Invidious::Videos language_code = caption["languageCode"].to_s base_url = caption["baseUrl"].to_s - captions_list << Caption.new(name, language_code, base_url) + captions_list << CaptionMetadata.new(name, language_code, base_url) end return captions_list diff --git a/src/invidious/views/user/preferences.ecr b/src/invidious/views/user/preferences.ecr index dfda1434..b1061ee8 100644 --- a/src/invidious/views/user/preferences.ecr +++ b/src/invidious/views/user/preferences.ecr @@ -89,7 +89,7 @@ <% preferences.captions.each_with_index do |caption, index| %> From 8e18d445a7adf9a0c0887249003a7b84f0fb95af Mon Sep 17 00:00:00 2001 From: syeopite Date: Sun, 23 Jul 2023 01:52:53 -0700 Subject: [PATCH 03/11] Add method to generate params for transcripts api --- src/invidious/videos/transcript.cr | 34 ++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 src/invidious/videos/transcript.cr diff --git a/src/invidious/videos/transcript.cr b/src/invidious/videos/transcript.cr new file mode 100644 index 00000000..c50f7569 --- /dev/null +++ b/src/invidious/videos/transcript.cr @@ -0,0 +1,34 @@ +module Invidious::Videos + # Namespace for methods primarily relating to Transcripts + module Transcript + def self.generate_param(video_id : String, language_code : String, auto_generated : Bool) : String + if !auto_generated + is_auto_generated = "" + elsif is_auto_generated = "asr" + end + + object = { + "1:0:string" => video_id, + + "2:base64" => { + "1:string" => is_auto_generated, + "2:string" => language_code, + "3:string" => "", + }, + + "3:varint" => 1_i64, + "5:string" => "engagement-panel-searchable-transcript-search-panel", + "6:varint" => 1_i64, + "7:varint" => 1_i64, + "8:varint" => 1_i64, + } + + params = object.try { |i| Protodec::Any.cast_json(i) } + .try { |i| Protodec::Any.from_json(i) } + .try { |i| Base64.urlsafe_encode(i) } + .try { |i| URI.encode_www_form(i) } + + return params + end + end +end From 4b3ac1a757a5ee14919e83a84de31a3d0bd14a4c Mon Sep 17 00:00:00 2001 From: syeopite Date: Sun, 23 Jul 2023 03:22:19 -0700 Subject: [PATCH 04/11] Add method to parse transcript JSON into structs --- src/invidious/videos/transcript.cr | 37 ++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/src/invidious/videos/transcript.cr b/src/invidious/videos/transcript.cr index c50f7569..0d8b0b25 100644 --- a/src/invidious/videos/transcript.cr +++ b/src/invidious/videos/transcript.cr @@ -1,6 +1,8 @@ module Invidious::Videos # Namespace for methods primarily relating to Transcripts module Transcript + record TranscriptLine, start_ms : Time::Span, end_ms : Time::Span, line : String + def self.generate_param(video_id : String, language_code : String, auto_generated : Bool) : String if !auto_generated is_auto_generated = "" @@ -30,5 +32,40 @@ module Invidious::Videos return params end + + def self.convert_transcripts_to_vtt(initial_data : JSON::Any, target_language : String) : String + # Convert into TranscriptLine + + vtt = String.build do |vtt| + result << <<-END_VTT + WEBVTT + Kind: captions + Language: #{tlang} + + + END_VTT + + vtt << "\n\n" + end + end + + def self.parse(initial_data : Hash(String, JSON::Any)) + body = initial_data.dig("actions", 0, "updateEngagementPanelAction", "content", "transcriptRenderer", + "content", "transcriptSearchPanelRenderer", "body", "transcriptSegmentListRenderer", + "initialSegments").as_a + + lines = [] of TranscriptLine + body.each do |line| + line = line["transcriptSegmentRenderer"] + start_ms = line["startMs"].as_s.to_i.millisecond + end_ms = line["endMs"].as_s.to_i.millisecond + + text = extract_text(line["snippet"]) || "" + + lines << TranscriptLine.new(start_ms, end_ms, text) + end + + return lines + end end end From caac7e21668dd88eaf3d57ddc300427885af0a23 Mon Sep 17 00:00:00 2001 From: syeopite Date: Sun, 23 Jul 2023 03:52:26 -0700 Subject: [PATCH 05/11] Add method to convert transcripts response to vtt --- src/invidious/videos/transcript.cr | 39 ++++++++++++++++++++++++++---- 1 file changed, 34 insertions(+), 5 deletions(-) diff --git a/src/invidious/videos/transcript.cr b/src/invidious/videos/transcript.cr index 0d8b0b25..ec990883 100644 --- a/src/invidious/videos/transcript.cr +++ b/src/invidious/videos/transcript.cr @@ -33,23 +33,52 @@ module Invidious::Videos return params end - def self.convert_transcripts_to_vtt(initial_data : JSON::Any, target_language : String) : String - # Convert into TranscriptLine + def self.convert_transcripts_to_vtt(initial_data : Hash(String, JSON::Any), target_language : String) : String + # Convert into array of TranscriptLine + lines = self.parse(initial_data) + # Taken from Invidious::Videos::CaptionMetadata.timedtext_to_vtt() vtt = String.build do |vtt| - result << <<-END_VTT + vtt << <<-END_VTT WEBVTT Kind: captions - Language: #{tlang} + Language: #{target_language} END_VTT vtt << "\n\n" + + lines.each do |line| + start_time = line.start_ms + end_time = line.end_ms + + # start_time + vtt << start_time.hours.to_s.rjust(2, '0') + vtt << ':' << start_time.minutes.to_s.rjust(2, '0') + vtt << ':' << start_time.seconds.to_s.rjust(2, '0') + vtt << '.' << start_time.milliseconds.to_s.rjust(3, '0') + + vtt << " --> " + + # end_time + vtt << end_time.hours.to_s.rjust(2, '0') + vtt << ':' << end_time.minutes.to_s.rjust(2, '0') + vtt << ':' << end_time.seconds.to_s.rjust(2, '0') + vtt << '.' << end_time.milliseconds.to_s.rjust(3, '0') + + vtt << "\n" + vtt << line.line + + vtt << "\n" + vtt << "\n" + end end + + return vtt end - def self.parse(initial_data : Hash(String, JSON::Any)) + private def self.parse(initial_data : Hash(String, JSON::Any)) body = initial_data.dig("actions", 0, "updateEngagementPanelAction", "content", "transcriptRenderer", "content", "transcriptSearchPanelRenderer", "body", "transcriptSegmentListRenderer", "initialSegments").as_a From e4942b188f5c192d5693687698db9b106571332c Mon Sep 17 00:00:00 2001 From: syeopite Date: Sun, 23 Jul 2023 05:02:02 -0700 Subject: [PATCH 06/11] Integrate transcript captions into captions API --- config/config.example.yml | 13 +++ src/invidious/config.cr | 3 + src/invidious/routes/api/v1/videos.cr | 112 ++++++++++++++------------ src/invidious/videos/caption.cr | 11 ++- src/invidious/videos/transcript.cr | 6 ++ 5 files changed, 91 insertions(+), 54 deletions(-) diff --git a/config/config.example.yml b/config/config.example.yml index 34070fe5..51beab89 100644 --- a/config/config.example.yml +++ b/config/config.example.yml @@ -182,6 +182,19 @@ https_only: false #force_resolve: +## +## Use Innertube's transcripts API instead of timedtext for closed captions +## +## Useful for larger instances as InnerTube is **not ratelimited**. See https://github.com/iv-org/invidious/issues/2567 +## +## Subtitle experience may differ slightly on Invidious. +## +## Accepted values: true, false +## Default: false +## +# use_innertube_for_captions: false + + # ----------------------------- # Logging # ----------------------------- diff --git a/src/invidious/config.cr b/src/invidious/config.cr index e5f1e822..c88a4837 100644 --- a/src/invidious/config.cr +++ b/src/invidious/config.cr @@ -129,6 +129,9 @@ class Config # Use quic transport for youtube api property use_quic : Bool = false + # Use Innertube's transcripts API instead of timedtext for closed captions + property use_innertube_for_captions : Bool = false + # Saved cookies in "name1=value1; name2=value2..." format @[YAML::Field(converter: Preferences::StringToCookies)] property cookies : HTTP::Cookies = HTTP::Cookies.new diff --git a/src/invidious/routes/api/v1/videos.cr b/src/invidious/routes/api/v1/videos.cr index af4fc806..000e64b9 100644 --- a/src/invidious/routes/api/v1/videos.cr +++ b/src/invidious/routes/api/v1/videos.cr @@ -87,70 +87,78 @@ module Invidious::Routes::API::V1::Videos caption = caption[0] end - url = URI.parse("#{caption.base_url}&tlang=#{tlang}").request_target + if CONFIG.use_innertube_for_captions + params = Invidious::Videos::Transcript.generate_param(id, caption.language_code, caption.auto_generated) + initial_data = YoutubeAPI.transcript(params.to_s) - # Auto-generated captions often have cues that aren't aligned properly with the video, - # as well as some other markup that makes it cumbersome, so we try to fix that here - if caption.name.includes? "auto-generated" - caption_xml = YT_POOL.client &.get(url).body + webvtt = Invidious::Videos::Transcript.convert_transcripts_to_vtt(initial_data, caption.language_code) + else + # Timedtext API handling + url = URI.parse("#{caption.base_url}&tlang=#{tlang}").request_target - if caption_xml.starts_with?(" i + 1 - end_time = caption_nodes[i + 1]["start"].to_f.seconds - else - end_time = start_time + duration + if caption_nodes.size > i + 1 + end_time = caption_nodes[i + 1]["start"].to_f.seconds + else + end_time = start_time + duration + end + + start_time = "#{start_time.hours.to_s.rjust(2, '0')}:#{start_time.minutes.to_s.rjust(2, '0')}:#{start_time.seconds.to_s.rjust(2, '0')}.#{start_time.milliseconds.to_s.rjust(3, '0')}" + end_time = "#{end_time.hours.to_s.rjust(2, '0')}:#{end_time.minutes.to_s.rjust(2, '0')}:#{end_time.seconds.to_s.rjust(2, '0')}.#{end_time.milliseconds.to_s.rjust(3, '0')}" + + text = HTML.unescape(node.content) + text = text.gsub(//, "") + text = text.gsub(/<\/font>/, "") + if md = text.match(/(?.*) : (?.*)/) + text = "#{md["text"]}" + end + + str << <<-END_CUE + #{start_time} --> #{end_time} + #{text} + + + END_CUE end - - start_time = "#{start_time.hours.to_s.rjust(2, '0')}:#{start_time.minutes.to_s.rjust(2, '0')}:#{start_time.seconds.to_s.rjust(2, '0')}.#{start_time.milliseconds.to_s.rjust(3, '0')}" - end_time = "#{end_time.hours.to_s.rjust(2, '0')}:#{end_time.minutes.to_s.rjust(2, '0')}:#{end_time.seconds.to_s.rjust(2, '0')}.#{end_time.milliseconds.to_s.rjust(3, '0')}" - - text = HTML.unescape(node.content) - text = text.gsub(//, "") - text = text.gsub(/<\/font>/, "") - if md = text.match(/(?.*) : (?.*)/) - text = "#{md["text"]}" - end - - str << <<-END_CUE - #{start_time} --> #{end_time} - #{text} - - - END_CUE end end - end - else - # Some captions have "align:[start/end]" and "position:[num]%" - # attributes. Those are causing issues with VideoJS, which is unable - # to properly align the captions on the video, so we remove them. - # - # See: https://github.com/iv-org/invidious/issues/2391 - webvtt = YT_POOL.client &.get("#{url}&format=vtt").body - if webvtt.starts_with?(" [0-9:.]{12}).+/, "\\1") + if webvtt.starts_with?(" [0-9:.]{12}).+/, "\\1") + end end end diff --git a/src/invidious/videos/caption.cr b/src/invidious/videos/caption.cr index c85b46c3..1e2abde9 100644 --- a/src/invidious/videos/caption.cr +++ b/src/invidious/videos/caption.cr @@ -6,7 +6,9 @@ module Invidious::Videos property language_code : String property base_url : String - def initialize(@name, @language_code, @base_url) + property auto_generated : Bool + + def initialize(@name, @language_code, @base_url, @auto_generated) end # Parse the JSON structure from Youtube @@ -25,7 +27,12 @@ module Invidious::Videos language_code = caption["languageCode"].to_s base_url = caption["baseUrl"].to_s - captions_list << CaptionMetadata.new(name, language_code, base_url) + auto_generated = false + if caption["kind"]? && caption["kind"] == "asr" + auto_generated = true + end + + captions_list << CaptionMetadata.new(name, language_code, base_url, auto_generated) end return captions_list diff --git a/src/invidious/videos/transcript.cr b/src/invidious/videos/transcript.cr index ec990883..ba2728cd 100644 --- a/src/invidious/videos/transcript.cr +++ b/src/invidious/videos/transcript.cr @@ -85,7 +85,13 @@ module Invidious::Videos lines = [] of TranscriptLine body.each do |line| + # Transcript section headers. They are not apart of the captions and as such we can safely skip them. + if line.as_h.has_key?("transcriptSectionHeaderRenderer") + next + end + line = line["transcriptSegmentRenderer"] + start_ms = line["startMs"].as_s.to_i.millisecond end_ms = line["endMs"].as_s.to_i.millisecond From 3509752b791b12bcf20e12656e3b871e5034b1a7 Mon Sep 17 00:00:00 2001 From: syeopite Date: Sun, 23 Jul 2023 16:50:40 -0700 Subject: [PATCH 07/11] Rename transcript() to get_transcript() in YT API --- src/invidious/routes/api/v1/videos.cr | 2 +- src/invidious/yt_backend/youtube_api.cr | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/invidious/routes/api/v1/videos.cr b/src/invidious/routes/api/v1/videos.cr index 000e64b9..25e766d2 100644 --- a/src/invidious/routes/api/v1/videos.cr +++ b/src/invidious/routes/api/v1/videos.cr @@ -89,7 +89,7 @@ module Invidious::Routes::API::V1::Videos if CONFIG.use_innertube_for_captions params = Invidious::Videos::Transcript.generate_param(id, caption.language_code, caption.auto_generated) - initial_data = YoutubeAPI.transcript(params.to_s) + initial_data = YoutubeAPI.get_transcript(params) webvtt = Invidious::Videos::Transcript.convert_transcripts_to_vtt(initial_data, caption.language_code) else diff --git a/src/invidious/yt_backend/youtube_api.cr b/src/invidious/yt_backend/youtube_api.cr index f8aca04d..a3335bbf 100644 --- a/src/invidious/yt_backend/youtube_api.cr +++ b/src/invidious/yt_backend/youtube_api.cr @@ -558,7 +558,7 @@ module YoutubeAPI end #################################################################### - # transcript(params) + # get_transcript(params, client_config?) # # Requests the youtubei/v1/get_transcript endpoint with the required headers # and POST data in order to get a JSON reply. @@ -569,7 +569,7 @@ module YoutubeAPI # `struct ClientConfig` above for more details). # - def transcript( + def get_transcript( params : String, client_config : ClientConfig | Nil = nil ) : Hash(String, JSON::Any) From 1f7592e599054131c689246b0dd6aad45f2d8e7a Mon Sep 17 00:00:00 2001 From: syeopite Date: Thu, 24 Aug 2023 16:00:02 -0700 Subject: [PATCH 08/11] Refactor structure of caption.cr Rename CaptionsMetadata to Metadata Nest Metadata under Captions Unnest LANGUAGES constant from Metadata to main Captions module --- src/invidious/frontend/watch_page.cr | 2 +- src/invidious/videos.cr | 6 +- src/invidious/videos/caption.cr | 166 ++++++++++++----------- src/invidious/videos/transcript.cr | 2 +- src/invidious/views/user/preferences.ecr | 2 +- 5 files changed, 90 insertions(+), 88 deletions(-) diff --git a/src/invidious/frontend/watch_page.cr b/src/invidious/frontend/watch_page.cr index b860dba7..5fd81168 100644 --- a/src/invidious/frontend/watch_page.cr +++ b/src/invidious/frontend/watch_page.cr @@ -7,7 +7,7 @@ module Invidious::Frontend::WatchPage getter full_videos : Array(Hash(String, JSON::Any)) getter video_streams : Array(Hash(String, JSON::Any)) getter audio_streams : Array(Hash(String, JSON::Any)) - getter captions : Array(Invidious::Videos::CaptionMetadata) + getter captions : Array(Invidious::Videos::Captions::Metadata) def initialize( @full_videos, diff --git a/src/invidious/videos.cr b/src/invidious/videos.cr index 2b1d2603..9fbd1374 100644 --- a/src/invidious/videos.cr +++ b/src/invidious/videos.cr @@ -24,7 +24,7 @@ struct Video property updated : Time @[DB::Field(ignore: true)] - @captions = [] of Invidious::Videos::CaptionMetadata + @captions = [] of Invidious::Videos::Captions::Metadata @[DB::Field(ignore: true)] property adaptive_fmts : Array(Hash(String, JSON::Any))? @@ -215,9 +215,9 @@ struct Video keywords.includes? "YouTube Red" end - def captions : Array(Invidious::Videos::CaptionMetadata) + def captions : Array(Invidious::Videos::Captions::Metadata) if @captions.empty? && @info.has_key?("captions") - @captions = Invidious::Videos::CaptionMetadata.from_yt_json(info["captions"]) + @captions = Invidious::Videos::Captions::Metadata.from_yt_json(info["captions"]) end return @captions diff --git a/src/invidious/videos/caption.cr b/src/invidious/videos/caption.cr index 1e2abde9..82b68dcd 100644 --- a/src/invidious/videos/caption.cr +++ b/src/invidious/videos/caption.cr @@ -1,107 +1,109 @@ require "json" module Invidious::Videos - struct CaptionMetadata - property name : String - property language_code : String - property base_url : String + module Captions + struct Metadata + property name : String + property language_code : String + property base_url : String - property auto_generated : Bool + property auto_generated : Bool - def initialize(@name, @language_code, @base_url, @auto_generated) - end - - # Parse the JSON structure from Youtube - def self.from_yt_json(container : JSON::Any) : Array(CaptionMetadata) - caption_tracks = container - .dig?("playerCaptionsTracklistRenderer", "captionTracks") - .try &.as_a - - captions_list = [] of CaptionMetadata - return captions_list if caption_tracks.nil? - - caption_tracks.each do |caption| - name = caption["name"]["simpleText"]? || caption["name"]["runs"][0]["text"] - name = name.to_s.split(" - ")[0] - - language_code = caption["languageCode"].to_s - base_url = caption["baseUrl"].to_s - - auto_generated = false - if caption["kind"]? && caption["kind"] == "asr" - auto_generated = true - end - - captions_list << CaptionMetadata.new(name, language_code, base_url, auto_generated) + def initialize(@name, @language_code, @base_url, @auto_generated) end - return captions_list - end + # Parse the JSON structure from Youtube + def self.from_yt_json(container : JSON::Any) : Array(Captions::Metadata) + caption_tracks = container + .dig?("playerCaptionsTracklistRenderer", "captionTracks") + .try &.as_a - def timedtext_to_vtt(timedtext : String, tlang = nil) : String - # In the future, we could just directly work with the url. This is more of a POC - cues = [] of XML::Node - tree = XML.parse(timedtext) - tree = tree.children.first + captions_list = [] of Captions::Metadata + return captions_list if caption_tracks.nil? - tree.children.each do |item| - if item.name == "body" - item.children.each do |cue| - if cue.name == "p" && !(cue.children.size == 1 && cue.children[0].content == "\n") - cues << cue + caption_tracks.each do |caption| + name = caption["name"]["simpleText"]? || caption["name"]["runs"][0]["text"] + name = name.to_s.split(" - ")[0] + + language_code = caption["languageCode"].to_s + base_url = caption["baseUrl"].to_s + + auto_generated = false + if caption["kind"]? && caption["kind"] == "asr" + auto_generated = true + end + + captions_list << Captions::Metadata.new(name, language_code, base_url, auto_generated) + end + + return captions_list + end + + def timedtext_to_vtt(timedtext : String, tlang = nil) : String + # In the future, we could just directly work with the url. This is more of a POC + cues = [] of XML::Node + tree = XML.parse(timedtext) + tree = tree.children.first + + tree.children.each do |item| + if item.name == "body" + item.children.each do |cue| + if cue.name == "p" && !(cue.children.size == 1 && cue.children[0].content == "\n") + cues << cue + end end + break end - break end - end - result = String.build do |result| - result << <<-END_VTT - WEBVTT - Kind: captions - Language: #{tlang || @language_code} + result = String.build do |result| + result << <<-END_VTT + WEBVTT + Kind: captions + Language: #{tlang || @language_code} - END_VTT + END_VTT - result << "\n\n" + result << "\n\n" - cues.each_with_index do |node, i| - start_time = node["t"].to_f.milliseconds + cues.each_with_index do |node, i| + start_time = node["t"].to_f.milliseconds - duration = node["d"]?.try &.to_f.milliseconds + duration = node["d"]?.try &.to_f.milliseconds - duration ||= start_time + duration ||= start_time - if cues.size > i + 1 - end_time = cues[i + 1]["t"].to_f.milliseconds - else - end_time = start_time + duration + if cues.size > i + 1 + end_time = cues[i + 1]["t"].to_f.milliseconds + else + end_time = start_time + duration + end + + # start_time + result << start_time.hours.to_s.rjust(2, '0') + result << ':' << start_time.minutes.to_s.rjust(2, '0') + result << ':' << start_time.seconds.to_s.rjust(2, '0') + result << '.' << start_time.milliseconds.to_s.rjust(3, '0') + + result << " --> " + + # end_time + result << end_time.hours.to_s.rjust(2, '0') + result << ':' << end_time.minutes.to_s.rjust(2, '0') + result << ':' << end_time.seconds.to_s.rjust(2, '0') + result << '.' << end_time.milliseconds.to_s.rjust(3, '0') + + result << "\n" + + node.children.each do |s| + result << s.content + end + result << "\n" + result << "\n" end - - # start_time - result << start_time.hours.to_s.rjust(2, '0') - result << ':' << start_time.minutes.to_s.rjust(2, '0') - result << ':' << start_time.seconds.to_s.rjust(2, '0') - result << '.' << start_time.milliseconds.to_s.rjust(3, '0') - - result << " --> " - - # end_time - result << end_time.hours.to_s.rjust(2, '0') - result << ':' << end_time.minutes.to_s.rjust(2, '0') - result << ':' << end_time.seconds.to_s.rjust(2, '0') - result << '.' << end_time.milliseconds.to_s.rjust(3, '0') - - result << "\n" - - node.children.each do |s| - result << s.content - end - result << "\n" - result << "\n" end + return result end - return result end # List of all caption languages available on Youtube. diff --git a/src/invidious/videos/transcript.cr b/src/invidious/videos/transcript.cr index ba2728cd..c86b3988 100644 --- a/src/invidious/videos/transcript.cr +++ b/src/invidious/videos/transcript.cr @@ -37,7 +37,7 @@ module Invidious::Videos # Convert into array of TranscriptLine lines = self.parse(initial_data) - # Taken from Invidious::Videos::CaptionMetadata.timedtext_to_vtt() + # Taken from Invidious::Videos::Captions::Metadata.timedtext_to_vtt() vtt = String.build do |vtt| vtt << <<-END_VTT WEBVTT diff --git a/src/invidious/views/user/preferences.ecr b/src/invidious/views/user/preferences.ecr index b1061ee8..55349c5a 100644 --- a/src/invidious/views/user/preferences.ecr +++ b/src/invidious/views/user/preferences.ecr @@ -89,7 +89,7 @@ <% preferences.captions.each_with_index do |caption, index| %> From 7d435f082bf24c1122c95ecc92efee4a39a7b539 Mon Sep 17 00:00:00 2001 From: syeopite <70992037+syeopite@users.noreply.github.com> Date: Thu, 24 Aug 2023 23:20:20 +0000 Subject: [PATCH 09/11] Update src/invidious/videos/transcript.cr Co-authored-by: Samantaz Fox --- src/invidious/videos/transcript.cr | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/invidious/videos/transcript.cr b/src/invidious/videos/transcript.cr index c86b3988..f3360a52 100644 --- a/src/invidious/videos/transcript.cr +++ b/src/invidious/videos/transcript.cr @@ -4,16 +4,13 @@ module Invidious::Videos record TranscriptLine, start_ms : Time::Span, end_ms : Time::Span, line : String def self.generate_param(video_id : String, language_code : String, auto_generated : Bool) : String - if !auto_generated - is_auto_generated = "" - elsif is_auto_generated = "asr" - end + kind = auto_generated ? "asr" : "" object = { "1:0:string" => video_id, "2:base64" => { - "1:string" => is_auto_generated, + "1:string" => kind, "2:string" => language_code, "3:string" => "", }, From 3615bb0e62209cfad4825e8c40d8e6de69aac687 Mon Sep 17 00:00:00 2001 From: syeopite Date: Thu, 24 Aug 2023 16:21:05 -0700 Subject: [PATCH 10/11] Update src/invidious/videos/caption.cr Co-authored-by: Samantaz Fox --- src/invidious/videos/caption.cr | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/invidious/videos/caption.cr b/src/invidious/videos/caption.cr index 82b68dcd..256dfcc0 100644 --- a/src/invidious/videos/caption.cr +++ b/src/invidious/videos/caption.cr @@ -28,10 +28,7 @@ module Invidious::Videos language_code = caption["languageCode"].to_s base_url = caption["baseUrl"].to_s - auto_generated = false - if caption["kind"]? && caption["kind"] == "asr" - auto_generated = true - end + auto_generated = (caption["kind"]? == "asr") captions_list << Captions::Metadata.new(name, language_code, base_url, auto_generated) end From eabcea6f4a16a47555d945460ac824588ff546e5 Mon Sep 17 00:00:00 2001 From: syeopite <70992037+syeopite@users.noreply.github.com> Date: Tue, 29 Aug 2023 06:18:35 +0000 Subject: [PATCH 11/11] Remove trailing whitespace in config documentation Co-authored-by: Samantaz Fox --- config/config.example.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/config.example.yml b/config/config.example.yml index 51beab89..c6051bce 100644 --- a/config/config.example.yml +++ b/config/config.example.yml @@ -187,7 +187,7 @@ https_only: false ## ## Useful for larger instances as InnerTube is **not ratelimited**. See https://github.com/iv-org/invidious/issues/2567 ## -## Subtitle experience may differ slightly on Invidious. +## Subtitle experience may differ slightly on Invidious. ## ## Accepted values: true, false ## Default: false