1
0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2025-01-04 23:47:25 +01:00

[safari] Fix authentication (closes #21090)

This commit is contained in:
Sergey M․ 2019-05-18 03:23:40 +07:00
parent e3c1266f49
commit a9e03736df
No known key found for this signature in database
GPG Key ID: 2C393E0F18A9236D

View File

@ -1,15 +1,18 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import json
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import (
compat_parse_qs,
compat_str,
compat_urlparse,
)
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
sanitized_Request,
std_headers,
urlencode_postdata,
update_url_query, update_url_query,
) )
@ -31,44 +34,52 @@ class SafariBaseIE(InfoExtractor):
if username is None: if username is None:
return return
headers = std_headers.copy() _, urlh = self._download_webpage_handle(
if 'Referer' not in headers: 'https://learning.oreilly.com/accounts/login-check/', None,
headers['Referer'] = self._LOGIN_URL 'Downloading login page')
login_page = self._download_webpage( def is_logged(urlh):
self._LOGIN_URL, None, 'Downloading login form', headers=headers) return 'learning.oreilly.com/home/' in compat_str(urlh.geturl())
def is_logged(webpage): if is_logged(urlh):
return any(re.search(p, webpage) for p in (
r'href=["\']/accounts/logout/', r'>Sign Out<'))
if is_logged(login_page):
self.LOGGED_IN = True self.LOGGED_IN = True
return return
csrf = self._html_search_regex( redirect_url = compat_str(urlh.geturl())
r"name='csrfmiddlewaretoken'\s+value='([^']+)'", parsed_url = compat_urlparse.urlparse(redirect_url)
login_page, 'csrf token') qs = compat_parse_qs(parsed_url.query)
next_uri = compat_urlparse.urljoin(
'https://api.oreilly.com', qs['next'][0])
login_form = { auth, urlh = self._download_json_handle(
'csrfmiddlewaretoken': csrf, 'https://www.oreilly.com/member/auth/login/', None, 'Logging in',
'email': username, data=json.dumps({
'password1': password, 'email': username,
'login': 'Sign In', 'password': password,
'next': '', 'redirect_uri': next_uri,
} }).encode(), headers={
'Content-Type': 'application/json',
'Referer': redirect_url,
}, expected_status=400)
request = sanitized_Request( credentials = auth.get('credentials')
self._LOGIN_URL, urlencode_postdata(login_form), headers=headers) if (not auth.get('logged_in') and not auth.get('redirect_uri')
login_page = self._download_webpage( and credentials):
request, None, 'Logging in')
if not is_logged(login_page):
raise ExtractorError( raise ExtractorError(
'Login failed; make sure your credentials are correct and try again.', 'Unable to login: %s' % credentials, expected=True)
expected=True)
self.LOGGED_IN = True # oreilly serves two same groot_sessionid cookies in Set-Cookie header
# and expects first one to be actually set
self._apply_first_set_cookie_header(urlh, 'groot_sessionid')
_, urlh = self._download_webpage_handle(
auth.get('redirect_uri') or next_uri, None, 'Completing login',)
if is_logged(urlh):
self.LOGGED_IN = True
return
raise ExtractorError('Unable to log in')
class SafariIE(SafariBaseIE): class SafariIE(SafariBaseIE):
@ -76,7 +87,7 @@ class SafariIE(SafariBaseIE):
IE_DESC = 'safaribooksonline.com online video' IE_DESC = 'safaribooksonline.com online video'
_VALID_URL = r'''(?x) _VALID_URL = r'''(?x)
https?:// https?://
(?:www\.)?(?:safaribooksonline|learning\.oreilly)\.com/ (?:www\.)?(?:safaribooksonline|(?:learning\.)?oreilly)\.com/
(?: (?:
library/view/[^/]+/(?P<course_id>[^/]+)/(?P<part>[^/?\#&]+)\.html| library/view/[^/]+/(?P<course_id>[^/]+)/(?P<part>[^/?\#&]+)\.html|
videos/[^/]+/[^/]+/(?P<reference_id>[^-]+-[^/?\#&]+) videos/[^/]+/[^/]+/(?P<reference_id>[^-]+-[^/?\#&]+)
@ -107,6 +118,9 @@ class SafariIE(SafariBaseIE):
}, { }, {
'url': 'https://learning.oreilly.com/videos/hadoop-fundamentals-livelessons/9780133392838/9780133392838-00_SeriesIntro', 'url': 'https://learning.oreilly.com/videos/hadoop-fundamentals-livelessons/9780133392838/9780133392838-00_SeriesIntro',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://www.oreilly.com/library/view/hadoop-fundamentals-livelessons/9780133392838/00_SeriesIntro.html',
'only_matching': True,
}] }]
_PARTNER_ID = '1926081' _PARTNER_ID = '1926081'
@ -163,7 +177,7 @@ class SafariIE(SafariBaseIE):
class SafariApiIE(SafariBaseIE): class SafariApiIE(SafariBaseIE):
IE_NAME = 'safari:api' IE_NAME = 'safari:api'
_VALID_URL = r'https?://(?:www\.)?(?:safaribooksonline|learning\.oreilly)\.com/api/v1/book/(?P<course_id>[^/]+)/chapter(?:-content)?/(?P<part>[^/?#&]+)\.html' _VALID_URL = r'https?://(?:www\.)?(?:safaribooksonline|(?:learning\.)?oreilly)\.com/api/v1/book/(?P<course_id>[^/]+)/chapter(?:-content)?/(?P<part>[^/?#&]+)\.html'
_TESTS = [{ _TESTS = [{
'url': 'https://www.safaribooksonline.com/api/v1/book/9780133392838/chapter/part00.html', 'url': 'https://www.safaribooksonline.com/api/v1/book/9780133392838/chapter/part00.html',
@ -188,7 +202,7 @@ class SafariCourseIE(SafariBaseIE):
_VALID_URL = r'''(?x) _VALID_URL = r'''(?x)
https?:// https?://
(?: (?:
(?:www\.)?(?:safaribooksonline|learning\.oreilly)\.com/ (?:www\.)?(?:safaribooksonline|(?:learning\.)?oreilly)\.com/
(?: (?:
library/view/[^/]+| library/view/[^/]+|
api/v1/book| api/v1/book|
@ -219,6 +233,9 @@ class SafariCourseIE(SafariBaseIE):
}, { }, {
'url': 'https://learning.oreilly.com/videos/hadoop-fundamentals-livelessons/9780133392838', 'url': 'https://learning.oreilly.com/videos/hadoop-fundamentals-livelessons/9780133392838',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://www.oreilly.com/library/view/hadoop-fundamentals-livelessons/9780133392838/',
'only_matching': True,
}] }]
@classmethod @classmethod