diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 9f818a12f1..e984f51b5e 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -588,6 +588,7 @@ from .indavideo import IndavideoEmbedIE from .infoq import InfoQIE from .instagram import ( InstagramIE, + InstagramIOSIE, InstagramUserIE, InstagramTagIE, ) diff --git a/yt_dlp/extractor/instagram.py b/yt_dlp/extractor/instagram.py index c4036d096c..4694c9a33b 100644 --- a/yt_dlp/extractor/instagram.py +++ b/yt_dlp/extractor/instagram.py @@ -73,6 +73,48 @@ class InstagramBaseIE(InfoExtractor): self._login() +class InstagramIOSIE(InfoExtractor): + _VALID_URL = r'instagram://media\?id=(?P[\d_]+)' + _TESTS = [{ + 'url': 'instagram://media?id=482584233761418119', + 'md5': '0d2da106a9d2631273e192b372806516', + 'info_dict': { + 'id': 'aye83DjauH', + 'ext': 'mp4', + 'title': 'Video by naomipq', + 'description': 'md5:1f17f0ab29bd6fe2bfad705f58de3cb8', + 'thumbnail': r're:^https?://.*\.jpg', + 'duration': 0, + 'timestamp': 1371748545, + 'upload_date': '20130620', + 'uploader_id': 'naomipq', + 'uploader': 'B E A U T Y F O R A S H E S', + 'like_count': int, + 'comment_count': int, + 'comments': list, + }, + 'add_ie': ['Instagram'] + }] + + def _get_id(self, id): + """Source: https://stackoverflow.com/questions/24437823/getting-instagram-post-url-from-media-id""" + chrs = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_' + media_id = int(id.split('_')[0]) + shortened_id = '' + while media_id > 0: + r = media_id % 64 + media_id = (media_id - r) // 64 + shortened_id = chrs[r] + shortened_id + return shortened_id + + def _real_extract(self, url): + return { + '_type': 'url_transparent', + 'url': f'http://instagram.com/tv/{self._get_id(self._match_id(url))}/', + 'ie_key': 'Instagram', + } + + class InstagramIE(InstagramBaseIE): _VALID_URL = r'(?Phttps?://(?:www\.)?instagram\.com/(?:p|tv|reel)/(?P[^/?#&]+))' _TESTS = [{ @@ -348,7 +390,6 @@ class InstagramIE(InstagramBaseIE): class InstagramPlaylistBaseIE(InstagramBaseIE): - _gis_tmpl = None # used to cache GIS request type def _parse_graphql(self, webpage, item_id):