From 64fa820ccf61a7aea6c2a48b1362b3a4ec270cad Mon Sep 17 00:00:00 2001 From: pukkandan Date: Wed, 25 May 2022 17:53:46 +0530 Subject: [PATCH] [cleanup] Misc fixes (see desc) * [tvver] Fix bug in 6837633a4a614920b6e43ffc6b4b8590dca8c9d7 - Closes #4054 * [rumble] Fix tests - Closes #3976 * [make] Remove `cat` abuse - Closes #3989 * [make] Revert #3684 - Closes #3814 * [utils] Improve `get_elements_by_class` - Closes #3993 * [utils] Inherit `Namespace` from `types.SimpleNamespace` * [utils] Use `re.fullmatch` for matching filters * [jsinterp] Handle quotes in `_separate` * [make_readme] Allow overshooting last line Authored by: pukkandan, kwconder, MrRawes, Lesmiscore --- .github/workflows/build.yml | 2 +- Makefile | 2 +- README.md | 30 ++++++++++-------------------- devscripts/make_readme.py | 8 ++++++++ yt_dlp/YoutubeDL.py | 4 ++-- yt_dlp/downloader/common.py | 2 +- yt_dlp/extractor/rumble.py | 7 +++++++ yt_dlp/jsinterp.py | 8 +++++++- yt_dlp/utils.py | 33 ++++++++++++--------------------- 9 files changed, 49 insertions(+), 47 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index bb9507165..687f67b34 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -43,7 +43,7 @@ jobs: run: git push origin ${{ github.event.ref }} - name: Get Changelog run: | - changelog=$(cat Changelog.md | grep -oPz '(?s)(?<=### ${{ steps.bump_version.outputs.ytdlp_version }}\n{2}).+?(?=\n{2,3}###)') || true + changelog=$(grep -oPz '(?s)(?<=### ${{ steps.bump_version.outputs.ytdlp_version }}\n{2}).+?(?=\n{2,3}###)' Changelog.md) || true echo "changelog<> $GITHUB_ENV echo "$changelog" >> $GITHUB_ENV echo "EOF" >> $GITHUB_ENV diff --git a/Makefile b/Makefile index 4c16e88ad..f8b6e556f 100644 --- a/Makefile +++ b/Makefile @@ -43,7 +43,7 @@ PYTHON ?= /usr/bin/env python3 SYSCONFDIR = $(shell if [ $(PREFIX) = /usr -o $(PREFIX) = /usr/local ]; then echo /etc; else echo $(PREFIX)/etc; fi) # set markdown input format to "markdown-smart" for pandoc version 2 and to "markdown" for pandoc prior to version 2 -MARKDOWN = $(shell if [ "$(pandoc -v | head -n1 | cut -d" " -f2 | head -c1)" = "2" ]; then echo markdown-smart; else echo markdown; fi) +MARKDOWN = $(shell if [ `pandoc -v | head -n1 | cut -d" " -f2 | head -c1` = "2" ]; then echo markdown-smart; else echo markdown; fi) install: lazy-extractors yt-dlp yt-dlp.1 completions mkdir -p $(DESTDIR)$(BINDIR) diff --git a/README.md b/README.md index a0a688e23..af613ff7e 100644 --- a/README.md +++ b/README.md @@ -337,8 +337,7 @@ You can also fork the project on github and run your fork's [build workflow](.gi --list-extractors List all supported extractors and exit --extractor-descriptions Output descriptions of all supported extractors and exit - --force-generic-extractor Force extraction to use the generic - extractor + --force-generic-extractor Force extraction to use the generic extractor --default-search PREFIX Use this prefix for unqualified URLs. Eg: "gvsearch2:python" downloads two videos from google videos for the search term "python". @@ -397,8 +396,7 @@ You can also fork the project on github and run your fork's [build workflow](.gi aliases; so be carefull to avoid defining recursive options. As a safety measure, each alias may be triggered a maximum of 100 - times. This option can be used multiple - times + times. This option can be used multiple times ## Network Options: --proxy URL Use the specified HTTP/HTTPS/SOCKS proxy. To @@ -425,8 +423,7 @@ You can also fork the project on github and run your fork's [build workflow](.gi explicitly provided two-letter ISO 3166-2 country code --geo-bypass-ip-block IP_BLOCK Force bypass geographic restriction with - explicitly provided IP block in CIDR - notation + explicitly provided IP block in CIDR notation ## Video Selection: --playlist-start NUMBER Playlist video to start at (default is 1) @@ -636,8 +633,7 @@ You can also fork the project on github and run your fork's [build workflow](.gi modification time (default) --no-mtime Do not use the Last-modified header to set the file modification time - --write-description Write video description to a .description - file + --write-description Write video description to a .description file --no-write-description Do not write video description (default) --write-info-json Write video metadata to a .info.json file (this may contain personal information) @@ -659,8 +655,7 @@ You can also fork the project on github and run your fork's [build workflow](.gi extraction is known to be quick (Alias: --no-get-comments) --load-info-json FILE JSON file containing the video information - (created with the "--write-info-json" - option) + (created with the "--write-info-json" option) --cookies FILE Netscape formatted file to read cookies from and dump cookie jar in --no-cookies Do not read/dump cookies from/to file @@ -676,8 +671,7 @@ You can also fork the project on github and run your fork's [build workflow](.gi for decrypting Chromium cookies on Linux can be (optionally) specified after the browser name separated by a "+". Currently supported - keyrings are: basictext, gnomekeyring, - kwallet + keyrings are: basictext, gnomekeyring, kwallet --no-cookies-from-browser Do not load cookies from browser (default) --cache-dir DIR Location in the filesystem where youtube-dl can store some downloaded information (such @@ -689,8 +683,7 @@ You can also fork the project on github and run your fork's [build workflow](.gi ## Thumbnail Options: --write-thumbnail Write thumbnail image to disk - --no-write-thumbnail Do not write thumbnail image to disk - (default) + --no-write-thumbnail Do not write thumbnail image to disk (default) --write-all-thumbnails Write all thumbnail image formats to disk --list-thumbnails List available thumbnails of each video. Simulate unless --no-simulate is used @@ -976,8 +969,7 @@ You can also fork the project on github and run your fork's [build workflow](.gi otherwise), force (try fixing even if file already exists) --ffmpeg-location PATH Location of the ffmpeg binary; either the - path to the binary or its containing - directory + path to the binary or its containing directory --exec [WHEN:]CMD Execute a command, optionally prefixed with when to execute it (after_move if unspecified), separated by a ":". Supported @@ -1004,8 +996,7 @@ You can also fork the project on github and run your fork's [build workflow](.gi be used with "--paths" and "--output" to set the output filename for the split files. See "OUTPUT TEMPLATE" for details - --no-split-chapters Do not split video based on chapters - (default) + --no-split-chapters Do not split video based on chapters (default) --remove-chapters REGEX Remove chapters whose title matches the given regular expression. The syntax is the same as --download-sections. This option can @@ -1036,8 +1027,7 @@ You can also fork the project on github and run your fork's [build workflow](.gi (after downloading and processing all formats of a video), or "playlist" (at end of playlist). This option can be used - multiple times to add different - postprocessors + multiple times to add different postprocessors ## SponsorBlock Options: Make chapter entries for, or remove various segments (sponsor, diff --git a/devscripts/make_readme.py b/devscripts/make_readme.py index 42578cb0a..015212aa3 100755 --- a/devscripts/make_readme.py +++ b/devscripts/make_readme.py @@ -11,6 +11,7 @@ README_FILE = 'README.md' OPTIONS_START = 'General Options:' OPTIONS_END = 'CONFIGURATION' EPILOG_START = 'See full documentation' +ALLOWED_OVERSHOOT = 2 DISABLE_PATCH = object() @@ -28,6 +29,7 @@ def apply_patch(text, patch): options = take_section(sys.stdin.read(), f'\n {OPTIONS_START}', f'\n{EPILOG_START}', shift=1) +max_width = max(map(len, options.split('\n'))) switch_col_width = len(re.search(r'(?m)^\s{5,}', options).group()) delim = f'\n{" " * switch_col_width}' @@ -44,6 +46,12 @@ PATCHES = ( rf'(?m)({delim}\S+)+$', lambda mobj: ''.join((delim, mobj.group(0).replace(delim, ''))) ), + ( # Allow overshooting last line + rf'(?m)^(?P.+)${delim}(?P.+)$(?!{delim})', + lambda mobj: (mobj.group().replace(delim, ' ') + if len(mobj.group()) - len(delim) + 1 <= max_width + ALLOWED_OVERSHOOT + else mobj.group()) + ), ( # Avoid newline when a space is available b/w switch and description DISABLE_PATCH, # This creates issues with prepare_manpage r'(?m)^(\s{4}-.{%d})(%s)' % (switch_col_width - 6, delim), diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index bf62f2820..b8c250d73 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -576,7 +576,7 @@ class YoutubeDL: ) self._allow_colors = Namespace(**{ type_: not self.params.get('no_color') and supports_terminal_sequences(stream) - for type_, stream in self._out_files if type_ != 'console' + for type_, stream in self._out_files.items_ if type_ != 'console' }) if sys.version_info < (3, 6): @@ -3671,7 +3671,7 @@ class YoutubeDL: sys.getfilesystemencoding(), self.get_encoding(), ', '.join( - f'{key} {get_encoding(stream)}' for key, stream in self._out_files + f'{key} {get_encoding(stream)}' for key, stream in self._out_files.items_ if stream is not None and key != 'console') ) diff --git a/yt_dlp/downloader/common.py b/yt_dlp/downloader/common.py index cd30d1eff..b559e7cae 100644 --- a/yt_dlp/downloader/common.py +++ b/yt_dlp/downloader/common.py @@ -302,7 +302,7 @@ class FileDownloader: ) def _report_progress_status(self, s, default_template): - for name, style in self.ProgressStyles: + for name, style in self.ProgressStyles.items_: name = f'_{name}_str' if name not in s: continue diff --git a/yt_dlp/extractor/rumble.py b/yt_dlp/extractor/rumble.py index 83b688532..8c0d0f37d 100644 --- a/yt_dlp/extractor/rumble.py +++ b/yt_dlp/extractor/rumble.py @@ -24,6 +24,11 @@ class RumbleEmbedIE(InfoExtractor): 'title': 'WMAR 2 News Latest Headlines | October 20, 6pm', 'timestamp': 1571611968, 'upload_date': '20191020', + 'channel_url': 'https://rumble.com/c/WMAR', + 'channel': 'WMAR', + 'thumbnail': 'https://sp.rmbl.ws/s8/1/5/M/z/1/5Mz1a.OvCc-small-WMAR-2-News-Latest-Headline.jpg', + 'duration': 234, + 'uploader': 'WMAR', } }, { 'url': 'https://rumble.com/embed/vslb7v', @@ -38,6 +43,7 @@ class RumbleEmbedIE(InfoExtractor): 'channel': 'CTNews', 'thumbnail': 'https://sp.rmbl.ws/s8/6/7/i/9/h/7i9hd.OvCc.jpg', 'duration': 901, + 'uploader': 'CTNews', } }, { 'url': 'https://rumble.com/embed/ufe9n.v5pv5f', @@ -96,6 +102,7 @@ class RumbleEmbedIE(InfoExtractor): 'channel': author.get('name'), 'channel_url': author.get('url'), 'duration': int_or_none(video.get('duration')), + 'uploader': author.get('name'), } diff --git a/yt_dlp/jsinterp.py b/yt_dlp/jsinterp.py index 70857b798..56229cd99 100644 --- a/yt_dlp/jsinterp.py +++ b/yt_dlp/jsinterp.py @@ -24,6 +24,7 @@ _ASSIGN_OPERATORS.append(('=', (lambda cur, right: right))) _NAME_RE = r'[a-zA-Z_$][a-zA-Z_$0-9]*' _MATCHING_PARENS = dict(zip('({[', ')}]')) +_QUOTES = '\'"' class JS_Break(ExtractorError): @@ -69,12 +70,17 @@ class JSInterpreter: return counters = {k: 0 for k in _MATCHING_PARENS.values()} start, splits, pos, delim_len = 0, 0, 0, len(delim) - 1 + in_quote, escaping = None, False for idx, char in enumerate(expr): if char in _MATCHING_PARENS: counters[_MATCHING_PARENS[char]] += 1 elif char in counters: counters[char] -= 1 - if char != delim[pos] or any(counters.values()): + elif not escaping and char in _QUOTES and in_quote in (char, None): + in_quote = None if in_quote else char + escaping = not escaping and in_quote and char == '\\' + + if char != delim[pos] or any(counters.values()) or in_quote: pos = 0 continue elif pos != delim_len: diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 137d29d0a..e6e6d2759 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -34,6 +34,7 @@ import sys import tempfile import time import traceback +import types import urllib.parse import xml.etree.ElementTree import zlib @@ -397,14 +398,14 @@ def get_element_html_by_attribute(attribute, value, html, **kargs): def get_elements_by_class(class_name, html, **kargs): """Return the content of all tags with the specified class in the passed HTML document as a list""" return get_elements_by_attribute( - 'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name), + 'class', r'[^\'"]*(?<=[\'"\s])%s(?=[\'"\s])[^\'"]*' % re.escape(class_name), html, escape_value=False) def get_elements_html_by_class(class_name, html): """Return the html of all tags with the specified class in the passed HTML document as a list""" return get_elements_html_by_attribute( - 'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name), + 'class', r'[^\'"]*(?<=[\'"\s])%s(?=[\'"\s])[^\'"]*' % re.escape(class_name), html, escape_value=False) @@ -3404,16 +3405,15 @@ def _match_one(filter_part, dct, incomplete): else: is_incomplete = lambda k: k in incomplete - operator_rex = re.compile(r'''(?x)\s* + operator_rex = re.compile(r'''(?x) (?P[a-z_]+) \s*(?P!\s*)?(?P%s)(?P\s*\?)?\s* (?: (?P["\'])(?P.+?)(?P=quote)| (?P.+?) ) - \s*$ ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys()))) - m = operator_rex.search(filter_part) + m = operator_rex.fullmatch(filter_part.strip()) if m: m = m.groupdict() unnegated_op = COMPARISON_OPERATORS[m['op']] @@ -3449,11 +3449,10 @@ def _match_one(filter_part, dct, incomplete): '': lambda v: (v is True) if isinstance(v, bool) else (v is not None), '!': lambda v: (v is False) if isinstance(v, bool) else (v is None), } - operator_rex = re.compile(r'''(?x)\s* + operator_rex = re.compile(r'''(?x) (?P%s)\s*(?P[a-z_]+) - \s*$ ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys()))) - m = operator_rex.search(filter_part) + m = operator_rex.fullmatch(filter_part.strip()) if m: op = UNARY_OPERATORS[m.group('op')] actual_value = dct.get(m.group('key')) @@ -5395,23 +5394,15 @@ class classproperty: return self.func(cls) -class Namespace: +class Namespace(types.SimpleNamespace): """Immutable namespace""" - def __init__(self, **kwargs): - self._dict = kwargs - - def __getattr__(self, attr): - return self._dict[attr] - - def __contains__(self, item): - return item in self._dict.values() - def __iter__(self): - return iter(self._dict.items()) + return iter(self.__dict__.values()) - def __repr__(self): - return f'{type(self).__name__}({", ".join(f"{k}={v}" for k, v in self)})' + @property + def items_(self): + return self.__dict__.items() # Deprecated