From 3148c1822f66533998278f0a1cf842b9bea1526a Mon Sep 17 00:00:00 2001
From: bashonly <88596187+bashonly@users.noreply.github.com>
Date: Sat, 19 Oct 2024 21:41:14 +0000
Subject: [PATCH] [ie/substack] Resolve podcast file extensions (#11275)

Closes #4601
Authored by: bashonly
---
 yt_dlp/extractor/substack.py | 31 +++++++++++++++++++++++++++++--
 1 file changed, 29 insertions(+), 2 deletions(-)

diff --git a/yt_dlp/extractor/substack.py b/yt_dlp/extractor/substack.py
index 30cb322dc2..b70d40f2ca 100644
--- a/yt_dlp/extractor/substack.py
+++ b/yt_dlp/extractor/substack.py
@@ -2,7 +2,13 @@ import re
 import urllib.parse
 
 from .common import InfoExtractor
-from ..utils import js_to_json, str_or_none, traverse_obj
+from ..networking import HEADRequest
+from ..utils import (
+    determine_ext,
+    js_to_json,
+    str_or_none,
+)
+from ..utils.traversal import traverse_obj
 
 
 class SubstackIE(InfoExtractor):
@@ -43,6 +49,19 @@ class SubstackIE(InfoExtractor):
             'uploader': "Andrew Zimmern's Spilled Milk ",
             'uploader_id': '577659',
         },
+    }, {
+        # Podcast that needs its file extension resolved to mp3
+        'url': 'https://persuasion1.substack.com/p/summers',
+        'md5': '1456a755d46084744facdfac9edf900f',
+        'info_dict': {
+            'id': '141970405',
+            'ext': 'mp3',
+            'title': 'Larry Summers on What Went Wrong on Campus',
+            'description': 'Yascha Mounk and Larry Summers also discuss the promise and perils of artificial intelligence.',
+            'thumbnail': r're:https://substackcdn\.com/image/.+\.jpeg',
+            'uploader': 'Persuasion',
+            'uploader_id': '61579',
+        },
     }]
 
     @classmethod
@@ -89,7 +108,15 @@ class SubstackIE(InfoExtractor):
         post_type = webpage_info['post']['type']
         formats, subtitles = [], {}
         if post_type == 'podcast':
-            formats, subtitles = [{'url': webpage_info['post']['podcast_url']}], {}
+            fmt = {'url': webpage_info['post']['podcast_url']}
+            if not determine_ext(fmt['url'], default_ext=None):
+                # The redirected format URL expires but the original URL doesn't,
+                # so we only want to extract the extension from this request
+                fmt['ext'] = determine_ext(self._request_webpage(
+                    HEADRequest(fmt['url']), display_id,
+                    'Resolving podcast file extension',
+                    'Podcast URL is invalid').url)
+            formats.append(fmt)
         elif post_type == 'video':
             formats, subtitles = self._extract_video_formats(webpage_info['post']['videoUpload']['id'], canonical_url)
         else: