mirror of https://github.com/yt-dlp/yt-dlp.git
[cookies] Parse cookies leniently (#4780)
Closes #4776, #3778 Authored by: Grub4K
This commit is contained in:
parent
5736d79172
commit
8817a80d3a
|
@ -3,6 +3,7 @@ from datetime import datetime, timezone
|
|||
|
||||
from yt_dlp import cookies
|
||||
from yt_dlp.cookies import (
|
||||
LenientSimpleCookie,
|
||||
LinuxChromeCookieDecryptor,
|
||||
MacChromeCookieDecryptor,
|
||||
WindowsChromeCookieDecryptor,
|
||||
|
@ -137,3 +138,148 @@ class TestCookies(unittest.TestCase):
|
|||
def test_pbkdf2_sha1(self):
|
||||
key = pbkdf2_sha1(b'peanuts', b' ' * 16, 1, 16)
|
||||
self.assertEqual(key, b'g\xe1\x8e\x0fQ\x1c\x9b\xf3\xc9`!\xaa\x90\xd9\xd34')
|
||||
|
||||
|
||||
class TestLenientSimpleCookie(unittest.TestCase):
|
||||
def _run_tests(self, *cases):
|
||||
for message, raw_cookie, expected in cases:
|
||||
cookie = LenientSimpleCookie(raw_cookie)
|
||||
|
||||
with self.subTest(message, expected=expected):
|
||||
self.assertEqual(cookie.keys(), expected.keys(), message)
|
||||
|
||||
for key, expected_value in expected.items():
|
||||
morsel = cookie[key]
|
||||
if isinstance(expected_value, tuple):
|
||||
expected_value, expected_attributes = expected_value
|
||||
else:
|
||||
expected_attributes = {}
|
||||
|
||||
attributes = {
|
||||
key: value
|
||||
for key, value in dict(morsel).items()
|
||||
if value != ""
|
||||
}
|
||||
self.assertEqual(attributes, expected_attributes, message)
|
||||
|
||||
self.assertEqual(morsel.value, expected_value, message)
|
||||
|
||||
def test_parsing(self):
|
||||
self._run_tests(
|
||||
# Copied from https://github.com/python/cpython/blob/v3.10.7/Lib/test/test_http_cookies.py
|
||||
(
|
||||
"Test basic cookie",
|
||||
"chips=ahoy; vienna=finger",
|
||||
{"chips": "ahoy", "vienna": "finger"},
|
||||
),
|
||||
(
|
||||
"Test quoted cookie",
|
||||
'keebler="E=mc2; L=\\"Loves\\"; fudge=\\012;"',
|
||||
{"keebler": 'E=mc2; L="Loves"; fudge=\012;'},
|
||||
),
|
||||
(
|
||||
"Allow '=' in an unquoted value",
|
||||
"keebler=E=mc2",
|
||||
{"keebler": "E=mc2"},
|
||||
),
|
||||
(
|
||||
"Allow cookies with ':' in their name",
|
||||
"key:term=value:term",
|
||||
{"key:term": "value:term"},
|
||||
),
|
||||
(
|
||||
"Allow '[' and ']' in cookie values",
|
||||
"a=b; c=[; d=r; f=h",
|
||||
{"a": "b", "c": "[", "d": "r", "f": "h"},
|
||||
),
|
||||
(
|
||||
"Test basic cookie attributes",
|
||||
'Customer="WILE_E_COYOTE"; Version=1; Path=/acme',
|
||||
{"Customer": ("WILE_E_COYOTE", {"version": "1", "path": "/acme"})},
|
||||
),
|
||||
(
|
||||
"Test flag only cookie attributes",
|
||||
'Customer="WILE_E_COYOTE"; HttpOnly; Secure',
|
||||
{"Customer": ("WILE_E_COYOTE", {"httponly": True, "secure": True})},
|
||||
),
|
||||
(
|
||||
"Test flag only attribute with values",
|
||||
"eggs=scrambled; httponly=foo; secure=bar; Path=/bacon",
|
||||
{"eggs": ("scrambled", {"httponly": "foo", "secure": "bar", "path": "/bacon"})},
|
||||
),
|
||||
(
|
||||
"Test special case for 'expires' attribute, 4 digit year",
|
||||
'Customer="W"; expires=Wed, 01 Jan 2010 00:00:00 GMT',
|
||||
{"Customer": ("W", {"expires": "Wed, 01 Jan 2010 00:00:00 GMT"})},
|
||||
),
|
||||
(
|
||||
"Test special case for 'expires' attribute, 2 digit year",
|
||||
'Customer="W"; expires=Wed, 01 Jan 98 00:00:00 GMT',
|
||||
{"Customer": ("W", {"expires": "Wed, 01 Jan 98 00:00:00 GMT"})},
|
||||
),
|
||||
(
|
||||
"Test extra spaces in keys and values",
|
||||
"eggs = scrambled ; secure ; path = bar ; foo=foo ",
|
||||
{"eggs": ("scrambled", {"secure": True, "path": "bar"}), "foo": "foo"},
|
||||
),
|
||||
(
|
||||
"Test quoted attributes",
|
||||
'Customer="WILE_E_COYOTE"; Version="1"; Path="/acme"',
|
||||
{"Customer": ("WILE_E_COYOTE", {"version": "1", "path": "/acme"})}
|
||||
),
|
||||
# Our own tests that CPython passes
|
||||
(
|
||||
"Allow ';' in quoted value",
|
||||
'chips="a;hoy"; vienna=finger',
|
||||
{"chips": "a;hoy", "vienna": "finger"},
|
||||
),
|
||||
(
|
||||
"Keep only the last set value",
|
||||
"a=c; a=b",
|
||||
{"a": "b"},
|
||||
),
|
||||
)
|
||||
|
||||
def test_lenient_parsing(self):
|
||||
self._run_tests(
|
||||
(
|
||||
"Ignore and try to skip invalid cookies",
|
||||
'chips={"ahoy;": 1}; vienna="finger;"',
|
||||
{"vienna": "finger;"},
|
||||
),
|
||||
(
|
||||
"Ignore cookies without a name",
|
||||
"a=b; unnamed; c=d",
|
||||
{"a": "b", "c": "d"},
|
||||
),
|
||||
(
|
||||
"Ignore '\"' cookie without name",
|
||||
'a=b; "; c=d',
|
||||
{"a": "b", "c": "d"},
|
||||
),
|
||||
(
|
||||
"Skip all space separated values",
|
||||
"x a=b c=d x; e=f",
|
||||
{"a": "b", "c": "d", "e": "f"},
|
||||
),
|
||||
(
|
||||
"Skip all space separated values",
|
||||
'x a=b; data={"complex": "json", "with": "key=value"}; x c=d x',
|
||||
{"a": "b", "c": "d"},
|
||||
),
|
||||
(
|
||||
"Expect quote mending",
|
||||
'a=b; invalid="; c=d',
|
||||
{"a": "b", "c": "d"},
|
||||
),
|
||||
(
|
||||
"Reset morsel after invalid to not capture attributes",
|
||||
"a=b; invalid; Version=1; c=d",
|
||||
{"a": "b", "c": "d"},
|
||||
),
|
||||
(
|
||||
"Continue after non-flag attribute without value",
|
||||
"a=b; path; Version=1; c=d",
|
||||
{"a": "b", "c": "d"},
|
||||
),
|
||||
)
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
import base64
|
||||
import contextlib
|
||||
import http.cookiejar
|
||||
import http.cookies
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
|
@ -990,3 +991,98 @@ def _parse_browser_specification(browser_name, profile=None, keyring=None, conta
|
|||
if profile is not None and _is_path(profile):
|
||||
profile = os.path.expanduser(profile)
|
||||
return browser_name, profile, keyring, container
|
||||
|
||||
|
||||
class LenientSimpleCookie(http.cookies.SimpleCookie):
|
||||
"""More lenient version of http.cookies.SimpleCookie"""
|
||||
# From https://github.com/python/cpython/blob/v3.10.7/Lib/http/cookies.py
|
||||
_LEGAL_KEY_CHARS = r"\w\d!#%&'~_`><@,:/\$\*\+\-\.\^\|\)\(\?\}\{\="
|
||||
_LEGAL_VALUE_CHARS = _LEGAL_KEY_CHARS + r"\[\]"
|
||||
|
||||
_RESERVED = {
|
||||
"expires",
|
||||
"path",
|
||||
"comment",
|
||||
"domain",
|
||||
"max-age",
|
||||
"secure",
|
||||
"httponly",
|
||||
"version",
|
||||
"samesite",
|
||||
}
|
||||
|
||||
_FLAGS = {"secure", "httponly"}
|
||||
|
||||
# Added 'bad' group to catch the remaining value
|
||||
_COOKIE_PATTERN = re.compile(r"""
|
||||
\s* # Optional whitespace at start of cookie
|
||||
(?P<key> # Start of group 'key'
|
||||
[""" + _LEGAL_KEY_CHARS + r"""]+?# Any word of at least one letter
|
||||
) # End of group 'key'
|
||||
( # Optional group: there may not be a value.
|
||||
\s*=\s* # Equal Sign
|
||||
( # Start of potential value
|
||||
(?P<val> # Start of group 'val'
|
||||
"(?:[^\\"]|\\.)*" # Any doublequoted string
|
||||
| # or
|
||||
\w{3},\s[\w\d\s-]{9,11}\s[\d:]{8}\sGMT # Special case for "expires" attr
|
||||
| # or
|
||||
[""" + _LEGAL_VALUE_CHARS + r"""]* # Any word or empty string
|
||||
) # End of group 'val'
|
||||
| # or
|
||||
(?P<bad>(?:\\;|[^;])*?) # 'bad' group fallback for invalid values
|
||||
) # End of potential value
|
||||
)? # End of optional value group
|
||||
\s* # Any number of spaces.
|
||||
(\s+|;|$) # Ending either at space, semicolon, or EOS.
|
||||
""", re.ASCII | re.VERBOSE)
|
||||
|
||||
def load(self, data):
|
||||
# Workaround for https://github.com/yt-dlp/yt-dlp/issues/4776
|
||||
if not isinstance(data, str):
|
||||
return super().load(data)
|
||||
|
||||
morsel = None
|
||||
index = 0
|
||||
length = len(data)
|
||||
|
||||
while 0 <= index < length:
|
||||
match = self._COOKIE_PATTERN.search(data, index)
|
||||
if not match:
|
||||
break
|
||||
|
||||
index = match.end(0)
|
||||
if match.group("bad"):
|
||||
morsel = None
|
||||
continue
|
||||
|
||||
key, value = match.group("key", "val")
|
||||
|
||||
if key[0] == "$":
|
||||
if morsel is not None:
|
||||
morsel[key[1:]] = True
|
||||
continue
|
||||
|
||||
lower_key = key.lower()
|
||||
if lower_key in self._RESERVED:
|
||||
if morsel is None:
|
||||
continue
|
||||
|
||||
if value is None:
|
||||
if lower_key not in self._FLAGS:
|
||||
morsel = None
|
||||
continue
|
||||
value = True
|
||||
else:
|
||||
value, _ = self.value_decode(value)
|
||||
|
||||
morsel[key] = value
|
||||
|
||||
elif value is not None:
|
||||
morsel = self.get(key, http.cookies.Morsel())
|
||||
real_value, coded_value = self.value_decode(value)
|
||||
morsel.set(key, real_value, coded_value)
|
||||
self[key] = morsel
|
||||
|
||||
else:
|
||||
morsel = None
|
||||
|
|
|
@ -22,6 +22,7 @@ import xml.etree.ElementTree
|
|||
|
||||
from ..compat import functools # isort: split
|
||||
from ..compat import compat_etree_fromstring, compat_expanduser, compat_os_name
|
||||
from ..cookies import LenientSimpleCookie
|
||||
from ..downloader import FileDownloader
|
||||
from ..downloader.f4m import get_base_url, remove_encrypted_media
|
||||
from ..utils import (
|
||||
|
@ -3632,7 +3633,7 @@ class InfoExtractor:
|
|||
|
||||
def _get_cookies(self, url):
|
||||
""" Return a http.cookies.SimpleCookie with the cookies for the url """
|
||||
return http.cookies.SimpleCookie(self._downloader._calc_cookies(url))
|
||||
return LenientSimpleCookie(self._downloader._calc_cookies(url))
|
||||
|
||||
def _apply_first_set_cookie_header(self, url_handle, cookie):
|
||||
"""
|
||||
|
|
Loading…
Reference in New Issue