yt-dlp/yt_dlp/downloader/dash.py

from __future__ import unicode_literals

try:
    import concurrent.futures
    can_threaded_download = True
except ImportError:
    can_threaded_download = False

from ..downloader import _get_real_downloader
from .fragment import FragmentFD

from ..compat import compat_urllib_error
from ..utils import (
    DownloadError,
    sanitize_open,
    urljoin,
)


class DashSegmentsFD(FragmentFD):
    """
    Download segments in a DASH manifest. External downloaders can take over
    the fragment downloads by supporting the 'frag_urls' protocol
    """

    FD_NAME = 'dashsegments'

    def real_download(self, filename, info_dict):
        fragment_base_url = info_dict.get('fragment_base_url')
        fragments = info_dict['fragments'][:1] if self.params.get(
            'test', False) else info_dict['fragments']

        real_downloader = _get_real_downloader(info_dict, 'frag_urls', self.params, None)

        ctx = {
            'filename': filename,
            'total_frags': len(fragments),
        }

        if real_downloader:
            self._prepare_external_frag_download(ctx)
        else:
            self._prepare_and_start_frag_download(ctx)

        fragment_retries = self.params.get('fragment_retries', 0)
        skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)

        fragments_to_download = []
        frag_index = 0
        for i, fragment in enumerate(fragments):
            frag_index += 1
            if frag_index <= ctx['fragment_index']:
                continue
            fragment_url = fragment.get('url')
            if not fragment_url:
                assert fragment_base_url
                fragment_url = urljoin(fragment_base_url, fragment['path'])

            fragments_to_download.append({
                'frag_index': frag_index,
                'index': i,
                'url': fragment_url,
            })

        if real_downloader:
            info_copy = info_dict.copy()
            info_copy['fragments'] = fragments_to_download
            fd = real_downloader(self.ydl, self.params)
            # TODO: Make progress updates work without hooking twice
            # for ph in self._progress_hooks:
            #     fd.add_progress_hook(ph)
            success = fd.real_download(filename, info_copy)
            if not success:
                return False
        else:
            def download_fragment(fragment):
                i = fragment['index']
                frag_index = fragment['frag_index']
                fragment_url = fragment['url']

                ctx['fragment_index'] = frag_index

                # In DASH, the first segment contains necessary headers to
                # generate a valid MP4 file, so always abort for the first segment
                fatal = i == 0 or not skip_unavailable_fragments
                count = 0
                while count <= fragment_retries:
                    try:
                        success, frag_content = self._download_fragment(ctx, fragment_url, info_dict)
                        if not success:
                            return False, frag_index
                        break
                    except compat_urllib_error.HTTPError as err:
                        # YouTube may often return 404 HTTP error for a fragment causing the
                        # whole download to fail. However if the same fragment is immediately
                        # retried with the same request data this usually succeeds (1-2 attempts
                        # is usually enough) thus allowing to download the whole file successfully.
                        # To be future-proof we will retry all fragments that fail with any
                        # HTTP error.
                        count += 1
                        if count <= fragment_retries:
                            self.report_retry_fragment(err, frag_index, count, fragment_retries)
                    except DownloadError:
                        # Don't retry fragment if error occurred during HTTP downloading
                        # itself since it has own retry settings
                        if not fatal:
                            break
                        raise

                if count > fragment_retries:
                    if not fatal:
                        return False, frag_index
                    self.report_error('giving up after %s fragment retries' % fragment_retries)
                    return False, frag_index

                return frag_content, frag_index

            def append_fragment(frag_content, frag_index):
                if frag_content:
                    fragment_filename = '%s-Frag%d' % (ctx['tmpfilename'], frag_index)
                    try:
                        file, frag_sanitized = sanitize_open(fragment_filename, 'rb')
                        ctx['fragment_filename_sanitized'] = frag_sanitized
                        file.close()
                        self._append_fragment(ctx, frag_content)
                        return True
                    except FileNotFoundError:
                        if skip_unavailable_fragments:
                            self.report_skip_fragment(frag_index)
                            return True
                        else:
                            self.report_error(
                                'fragment %s not found, unable to continue' % frag_index)
                            return False
                else:
                    if skip_unavailable_fragments:
                        self.report_skip_fragment(frag_index)
                        return True
                    else:
                        self.report_error(
                            'fragment %s not found, unable to continue' % frag_index)
                        return False

            max_workers = self.params.get('concurrent_fragment_downloads', 1)
            if can_threaded_download and max_workers > 1:
                self.report_warning('The download speed shown is only of one thread. This is a known issue')
                with concurrent.futures.ThreadPoolExecutor(max_workers) as pool:
                    futures = [pool.submit(download_fragment, fragment) for fragment in fragments_to_download]
                    # timeout must be 0 to return instantly
                    done, not_done = concurrent.futures.wait(futures, timeout=0)
                    try:
                        while not_done:
                            # Check every 1 second for KeyboardInterrupt
                            freshly_done, not_done = concurrent.futures.wait(not_done, timeout=1)
                            done |= freshly_done
                    except KeyboardInterrupt:
                        for future in not_done:
                            future.cancel()
                        # timeout must be none to cancel
                        concurrent.futures.wait(not_done, timeout=None)
                        raise KeyboardInterrupt
                results = [future.result() for future in futures]

                for frag_content, frag_index in results:
                    result = append_fragment(frag_content, frag_index)
                    if not result:
                        return False
            else:
                for fragment in fragments_to_download:
                    frag_content, frag_index = download_fragment(fragment)
                    result = append_fragment(frag_content, frag_index)
                    if not result:
                        return False

            self._finish_frag_download(ctx)
        return True
[YoutubeDL] Support DASH manifest downloading 2015-06-03 17:10:18 +02:00			`from __future__ import unicode_literals`

Native concurrent downloading of fragments (#166) * Option `--concurrent-fragments` (`-N`) to set the number of threads Related: #165 Known issues: * When receiving Ctrl+C, the process will exit only after finishing the currently downloading fragments * The download progress shows the speed of only one thread Authored by shirt-dev 2021-03-13 05:46:58 +01:00			`try:`
			`import concurrent.futures`
			`can_threaded_download = True`
			`except ImportError:`
			`can_threaded_download = False`

#55 Add aria2c support for DASH (mpd) and HLS (m3u8) Co-authored-by: Dan <2660574+shirtjs@users.noreply.github.com> Co-authored-by: pukkandan <pukkandan@gmail.com> 2021-02-08 17:46:01 +01:00			`from ..downloader import _get_real_downloader`
[downloader/dash] Implement dashsegments fd in terms of fragment fd 2016-02-09 17:25:02 +01:00			`from .fragment import FragmentFD`
#55 Add aria2c support for DASH (mpd) and HLS (m3u8) Co-authored-by: Dan <2660574+shirtjs@users.noreply.github.com> Co-authored-by: pukkandan <pukkandan@gmail.com> 2021-02-08 17:46:01 +01:00
[downloader/dash] Add fragment retry capability YouTube may often return 404 HTTP error for a fragment causing the whole download to fail. However if the same fragment is immediately retried with the same request data this usually succeeds (1-2 attemps is usually enough) thus allowing to download the whole file successfully. So, we will retry all fragments that fail with 404 HTTP error for now. 2016-03-19 15:42:23 +01:00			`from ..compat import compat_urllib_error`
[downloader/dash] Improve error handling (#16927) 2018-07-08 03:22:56 +02:00			`from ..utils import (`
			`DownloadError,`
Native concurrent downloading of fragments (#166) * Option `--concurrent-fragments` (`-N`) to set the number of threads Related: #165 Known issues: * When receiving Ctrl+C, the process will exit only after finishing the currently downloading fragments * The download progress shows the speed of only one thread Authored by shirt-dev 2021-03-13 05:46:58 +01:00			`sanitize_open,`
[downloader/dash] Improve error handling (#16927) 2018-07-08 03:22:56 +02:00			`urljoin,`
			`)`
[downloader/dash] Reorder imports 2015-06-04 16:12:05 +02:00
[YoutubeDL] Support DASH manifest downloading 2015-06-03 17:10:18 +02:00
[downloader/dash] Implement dashsegments fd in terms of fragment fd 2016-02-09 17:25:02 +01:00			`class DashSegmentsFD(FragmentFD):`
[YoutubeDL] Support DASH manifest downloading 2015-06-03 17:10:18 +02:00			`"""`
More improvements to HLS/DASH external downloader code * Fix error when there is no `protocol` in `info_dict` * Move HLS byte range detection to `Aria2cFD` so that the download will fall back to the native downloader instead of ffmpeg * Fix bug with getting no fragments in DASH * Convert `check_results` in `can_download` to a generator 2021-03-10 16:26:24 +01:00			`Download segments in a DASH manifest. External downloaders can take over`
			`the fragment downloads by supporting the 'frag_urls' protocol`
[YoutubeDL] Support DASH manifest downloading 2015-06-03 17:10:18 +02:00			`"""`

[downloader/dash] Implement dashsegments fd in terms of fragment fd 2016-02-09 17:25:02 +01:00			`FD_NAME = 'dashsegments'`
[downloader/dash] Add testing facility 2015-06-10 08:45:54 +02:00
[downloader/dash] Implement dashsegments fd in terms of fragment fd 2016-02-09 17:25:02 +01:00			`def real_download(self, filename, info_dict):`
Use relative paths for DASH fragments (closes #12990) 10x reduced JSON size refs #13810 2017-08-05 01:57:19 +02:00			`fragment_base_url = info_dict.get('fragment_base_url')`
			`fragments = info_dict['fragments'][:1] if self.params.get(`
Refactor fragments interface and dash segments downloader - Eliminate segment_urls and initialization_url + Introduce manifest_url (manifest may contain unfragmented data in this case url will be used for direct media URL and manifest_url for manifest itself correspondingly) * Rewrite dashsegments downloader to use fragments data * Improve generic mpd extraction 2016-09-17 15:35:22 +02:00			`'test', False) else info_dict['fragments']`
[downloader/dash] Add testing facility 2015-06-10 08:45:54 +02:00
#55 Add aria2c support for DASH (mpd) and HLS (m3u8) Co-authored-by: Dan <2660574+shirtjs@users.noreply.github.com> Co-authored-by: pukkandan <pukkandan@gmail.com> 2021-02-08 17:46:01 +01:00			`real_downloader = _get_real_downloader(info_dict, 'frag_urls', self.params, None)`

[downloader/dash] Implement dashsegments fd in terms of fragment fd 2016-02-09 17:25:02 +01:00			`ctx = {`
			`'filename': filename,`
Use relative paths for DASH fragments (closes #12990) 10x reduced JSON size refs #13810 2017-08-05 01:57:19 +02:00			`'total_frags': len(fragments),`
[downloader/dash] Implement dashsegments fd in terms of fragment fd 2016-02-09 17:25:02 +01:00			`}`
[downloader/dash] Add testing facility 2015-06-10 08:45:54 +02:00
#55 Add aria2c support for DASH (mpd) and HLS (m3u8) Co-authored-by: Dan <2660574+shirtjs@users.noreply.github.com> Co-authored-by: pukkandan <pukkandan@gmail.com> 2021-02-08 17:46:01 +01:00			`if real_downloader:`
			`self._prepare_external_frag_download(ctx)`
			`else:`
			`self._prepare_and_start_frag_download(ctx)`
[YoutubeDL] Support DASH manifest downloading 2015-06-03 17:10:18 +02:00
[downloader/dash] Add fragment retry capability YouTube may often return 404 HTTP error for a fragment causing the whole download to fail. However if the same fragment is immediately retried with the same request data this usually succeeds (1-2 attemps is usually enough) thus allowing to download the whole file successfully. So, we will retry all fragments that fail with 404 HTTP error for now. 2016-03-19 15:42:23 +01:00			`fragment_retries = self.params.get('fragment_retries', 0)`
[downloader/dash:hls] Respect --fragment-retries and --skip-unavailable-fragments (Closes #10165, closes #10448) 2016-08-26 23:55:55 +02:00			`skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)`
[downloader/dash] Add fragment retry capability YouTube may often return 404 HTTP error for a fragment causing the whole download to fail. However if the same fragment is immediately retried with the same request data this usually succeeds (1-2 attemps is usually enough) thus allowing to download the whole file successfully. So, we will retry all fragments that fail with 404 HTTP error for now. 2016-03-19 15:42:23 +01:00
More improvements to HLS/DASH external downloader code * Fix error when there is no `protocol` in `info_dict` * Move HLS byte range detection to `Aria2cFD` so that the download will fall back to the native downloader instead of ffmpeg * Fix bug with getting no fragments in DASH * Convert `check_results` in `can_download` to a generator 2021-03-10 16:26:24 +01:00			`fragments_to_download = []`
[fragment,hls,f4m,dash,ism] improve fragment downloading - resume immediately - no need to concatenate segments and decrypt them on every resume - no need to save temp files for segments and for hls downloader: - no need to download keys for segments that already downloaded 2016-06-28 19:07:50 +02:00			`frag_index = 0`
Use relative paths for DASH fragments (closes #12990) 10x reduced JSON size refs #13810 2017-08-05 01:57:19 +02:00			`for i, fragment in enumerate(fragments):`
[fragment,hls,f4m,dash,ism] improve fragment downloading - resume immediately - no need to concatenate segments and decrypt them on every resume - no need to save temp files for segments and for hls downloader: - no need to download keys for segments that already downloaded 2016-06-28 19:07:50 +02:00			`frag_index += 1`
[downloader/fragment] use the documented names for fragment progress_hooks fields 2017-04-22 17:42:24 +02:00			`if frag_index <= ctx['fragment_index']:`
[fragment,hls,f4m,dash,ism] improve fragment downloading - resume immediately - no need to concatenate segments and decrypt them on every resume - no need to save temp files for segments and for hls downloader: - no need to download keys for segments that already downloaded 2016-06-28 19:07:50 +02:00			`continue`
#55 Add aria2c support for DASH (mpd) and HLS (m3u8) Co-authored-by: Dan <2660574+shirtjs@users.noreply.github.com> Co-authored-by: pukkandan <pukkandan@gmail.com> 2021-02-08 17:46:01 +01:00			`fragment_url = fragment.get('url')`
			`if not fragment_url:`
			`assert fragment_base_url`
			`fragment_url = urljoin(fragment_base_url, fragment['path'])`

Native concurrent downloading of fragments (#166) * Option `--concurrent-fragments` (`-N`) to set the number of threads Related: #165 Known issues: * When receiving Ctrl+C, the process will exit only after finishing the currently downloading fragments * The download progress shows the speed of only one thread Authored by shirt-dev 2021-03-13 05:46:58 +01:00			`fragments_to_download.append({`
			`'frag_index': frag_index,`
			`'index': i,`
			`'url': fragment_url,`
			`})`
[downloader/dash] Implement dashsegments fd in terms of fragment fd 2016-02-09 17:25:02 +01:00
#55 Add aria2c support for DASH (mpd) and HLS (m3u8) Co-authored-by: Dan <2660574+shirtjs@users.noreply.github.com> Co-authored-by: pukkandan <pukkandan@gmail.com> 2021-02-08 17:46:01 +01:00			`if real_downloader:`
			`info_copy = info_dict.copy()`
More improvements to HLS/DASH external downloader code * Fix error when there is no `protocol` in `info_dict` * Move HLS byte range detection to `Aria2cFD` so that the download will fall back to the native downloader instead of ffmpeg * Fix bug with getting no fragments in DASH * Convert `check_results` in `can_download` to a generator 2021-03-10 16:26:24 +01:00			`info_copy['fragments'] = fragments_to_download`
#55 Add aria2c support for DASH (mpd) and HLS (m3u8) Co-authored-by: Dan <2660574+shirtjs@users.noreply.github.com> Co-authored-by: pukkandan <pukkandan@gmail.com> 2021-02-08 17:46:01 +01:00			`fd = real_downloader(self.ydl, self.params)`
			`# TODO: Make progress updates work without hooking twice`
			`# for ph in self._progress_hooks:`
			`# fd.add_progress_hook(ph)`
			`success = fd.real_download(filename, info_copy)`
			`if not success:`
			`return False`
			`else:`
Native concurrent downloading of fragments (#166) * Option `--concurrent-fragments` (`-N`) to set the number of threads Related: #165 Known issues: * When receiving Ctrl+C, the process will exit only after finishing the currently downloading fragments * The download progress shows the speed of only one thread Authored by shirt-dev 2021-03-13 05:46:58 +01:00			`def download_fragment(fragment):`
			`i = fragment['index']`
			`frag_index = fragment['frag_index']`
			`fragment_url = fragment['url']`

			`ctx['fragment_index'] = frag_index`

			`# In DASH, the first segment contains necessary headers to`
			`# generate a valid MP4 file, so always abort for the first segment`
			`fatal = i == 0 or not skip_unavailable_fragments`
			`count = 0`
			`while count <= fragment_retries:`
			`try:`
			`success, frag_content = self._download_fragment(ctx, fragment_url, info_dict)`
			`if not success:`
			`return False, frag_index`
			`break`
			`except compat_urllib_error.HTTPError as err:`
			`# YouTube may often return 404 HTTP error for a fragment causing the`
			`# whole download to fail. However if the same fragment is immediately`
			`# retried with the same request data this usually succeeds (1-2 attempts`
			`# is usually enough) thus allowing to download the whole file successfully.`
			`# To be future-proof we will retry all fragments that fail with any`
			`# HTTP error.`
			`count += 1`
			`if count <= fragment_retries:`
			`self.report_retry_fragment(err, frag_index, count, fragment_retries)`
			`except DownloadError:`
			`# Don't retry fragment if error occurred during HTTP downloading`
			`# itself since it has own retry settings`
			`if not fatal:`
			`break`
			`raise`

			`if count > fragment_retries:`
			`if not fatal:`
			`return False, frag_index`
			`self.report_error('giving up after %s fragment retries' % fragment_retries)`
			`return False, frag_index`

			`return frag_content, frag_index`

			`def append_fragment(frag_content, frag_index):`
			`if frag_content:`
			`fragment_filename = '%s-Frag%d' % (ctx['tmpfilename'], frag_index)`
			`try:`
			`file, frag_sanitized = sanitize_open(fragment_filename, 'rb')`
			`ctx['fragment_filename_sanitized'] = frag_sanitized`
			`file.close()`
			`self._append_fragment(ctx, frag_content)`
			`return True`
			`except FileNotFoundError:`
			`if skip_unavailable_fragments:`
			`self.report_skip_fragment(frag_index)`
			`return True`
			`else:`
			`self.report_error(`
			`'fragment %s not found, unable to continue' % frag_index)`
			`return False`
			`else:`
			`if skip_unavailable_fragments:`
			`self.report_skip_fragment(frag_index)`
			`return True`
			`else:`
			`self.report_error(`
			`'fragment %s not found, unable to continue' % frag_index)`
			`return False`

			`max_workers = self.params.get('concurrent_fragment_downloads', 1)`
			`if can_threaded_download and max_workers > 1:`
			`self.report_warning('The download speed shown is only of one thread. This is a known issue')`
			`with concurrent.futures.ThreadPoolExecutor(max_workers) as pool:`
			`futures = [pool.submit(download_fragment, fragment) for fragment in fragments_to_download]`
			`# timeout must be 0 to return instantly`
			`done, not_done = concurrent.futures.wait(futures, timeout=0)`
			`try:`
			`while not_done:`
			`# Check every 1 second for KeyboardInterrupt`
			`freshly_done, not_done = concurrent.futures.wait(not_done, timeout=1)`
			`done \|= freshly_done`
			`except KeyboardInterrupt:`
			`for future in not_done:`
			`future.cancel()`
			`# timeout must be none to cancel`
			`concurrent.futures.wait(not_done, timeout=None)`
			`raise KeyboardInterrupt`
			`results = [future.result() for future in futures]`

			`for frag_content, frag_index in results:`
			`result = append_fragment(frag_content, frag_index)`
			`if not result:`
			`return False`
			`else:`
			`for fragment in fragments_to_download:`
			`frag_content, frag_index = download_fragment(fragment)`
			`result = append_fragment(frag_content, frag_index)`
			`if not result:`
			`return False`

#55 Add aria2c support for DASH (mpd) and HLS (m3u8) Co-authored-by: Dan <2660574+shirtjs@users.noreply.github.com> Co-authored-by: pukkandan <pukkandan@gmail.com> 2021-02-08 17:46:01 +01:00			`self._finish_frag_download(ctx)`
[YoutubeDL] Support DASH manifest downloading 2015-06-03 17:10:18 +02:00			`return True`