commit a9e03736dfb2038d6a569e6305f4ac727a8ac71b
parent e3c1266f492d710e2acbf0d80f44f7f805eb5187
Author: Sergey M․ <dstftw@gmail.com>
Date:   Sat, 18 May 2019 03:23:40 +0700

[safari] Fix authentication (closes #21090)

Diffstat:
Myoutube_dl/extractor/safari.py | 87+++++++++++++++++++++++++++++++++++++++++++++++--------------------------------
1 file changed, 52 insertions(+), 35 deletions(-)

diff --git a/youtube_dl/extractor/safari.py b/youtube_dl/extractor/safari.py @@ -1,15 +1,18 @@ # coding: utf-8 from __future__ import unicode_literals +import json import re from .common import InfoExtractor +from ..compat import ( + compat_parse_qs, + compat_str, + compat_urlparse, +) from ..utils import ( ExtractorError, - sanitized_Request, - std_headers, - urlencode_postdata, update_url_query, ) @@ -31,44 +34,52 @@ class SafariBaseIE(InfoExtractor): if username is None: return - headers = std_headers.copy() - if 'Referer' not in headers: - headers['Referer'] = self._LOGIN_URL - - login_page = self._download_webpage( - self._LOGIN_URL, None, 'Downloading login form', headers=headers) + _, urlh = self._download_webpage_handle( + 'https://learning.oreilly.com/accounts/login-check/', None, + 'Downloading login page') - def is_logged(webpage): - return any(re.search(p, webpage) for p in ( - r'href=["\']/accounts/logout/', r'>Sign Out<')) + def is_logged(urlh): + return 'learning.oreilly.com/home/' in compat_str(urlh.geturl()) - if is_logged(login_page): + if is_logged(urlh): self.LOGGED_IN = True return - csrf = self._html_search_regex( - r"name='csrfmiddlewaretoken'\s+value='([^']+)'", - login_page, 'csrf token') + redirect_url = compat_str(urlh.geturl()) + parsed_url = compat_urlparse.urlparse(redirect_url) + qs = compat_parse_qs(parsed_url.query) + next_uri = compat_urlparse.urljoin( + 'https://api.oreilly.com', qs['next'][0]) + + auth, urlh = self._download_json_handle( + 'https://www.oreilly.com/member/auth/login/', None, 'Logging in', + data=json.dumps({ + 'email': username, + 'password': password, + 'redirect_uri': next_uri, + }).encode(), headers={ + 'Content-Type': 'application/json', + 'Referer': redirect_url, + }, expected_status=400) + + credentials = auth.get('credentials') + if (not auth.get('logged_in') and not auth.get('redirect_uri') + and credentials): + raise ExtractorError( + 'Unable to login: %s' % credentials, expected=True) - login_form = { - 'csrfmiddlewaretoken': csrf, - 'email': username, - 'password1': password, - 'login': 'Sign In', - 'next': '', - } + # oreilly serves two same groot_sessionid cookies in Set-Cookie header + # and expects first one to be actually set + self._apply_first_set_cookie_header(urlh, 'groot_sessionid') - request = sanitized_Request( - self._LOGIN_URL, urlencode_postdata(login_form), headers=headers) - login_page = self._download_webpage( - request, None, 'Logging in') + _, urlh = self._download_webpage_handle( + auth.get('redirect_uri') or next_uri, None, 'Completing login',) - if not is_logged(login_page): - raise ExtractorError( - 'Login failed; make sure your credentials are correct and try again.', - expected=True) + if is_logged(urlh): + self.LOGGED_IN = True + return - self.LOGGED_IN = True + raise ExtractorError('Unable to log in') class SafariIE(SafariBaseIE): @@ -76,7 +87,7 @@ class SafariIE(SafariBaseIE): IE_DESC = 'safaribooksonline.com online video' _VALID_URL = r'''(?x) https?:// - (?:www\.)?(?:safaribooksonline|learning\.oreilly)\.com/ + (?:www\.)?(?:safaribooksonline|(?:learning\.)?oreilly)\.com/ (?: library/view/[^/]+/(?P<course_id>[^/]+)/(?P<part>[^/?\#&]+)\.html| videos/[^/]+/[^/]+/(?P<reference_id>[^-]+-[^/?\#&]+) @@ -107,6 +118,9 @@ class SafariIE(SafariBaseIE): }, { 'url': 'https://learning.oreilly.com/videos/hadoop-fundamentals-livelessons/9780133392838/9780133392838-00_SeriesIntro', 'only_matching': True, + }, { + 'url': 'https://www.oreilly.com/library/view/hadoop-fundamentals-livelessons/9780133392838/00_SeriesIntro.html', + 'only_matching': True, }] _PARTNER_ID = '1926081' @@ -163,7 +177,7 @@ class SafariIE(SafariBaseIE): class SafariApiIE(SafariBaseIE): IE_NAME = 'safari:api' - _VALID_URL = r'https?://(?:www\.)?(?:safaribooksonline|learning\.oreilly)\.com/api/v1/book/(?P<course_id>[^/]+)/chapter(?:-content)?/(?P<part>[^/?#&]+)\.html' + _VALID_URL = r'https?://(?:www\.)?(?:safaribooksonline|(?:learning\.)?oreilly)\.com/api/v1/book/(?P<course_id>[^/]+)/chapter(?:-content)?/(?P<part>[^/?#&]+)\.html' _TESTS = [{ 'url': 'https://www.safaribooksonline.com/api/v1/book/9780133392838/chapter/part00.html', @@ -188,7 +202,7 @@ class SafariCourseIE(SafariBaseIE): _VALID_URL = r'''(?x) https?:// (?: - (?:www\.)?(?:safaribooksonline|learning\.oreilly)\.com/ + (?:www\.)?(?:safaribooksonline|(?:learning\.)?oreilly)\.com/ (?: library/view/[^/]+| api/v1/book| @@ -219,6 +233,9 @@ class SafariCourseIE(SafariBaseIE): }, { 'url': 'https://learning.oreilly.com/videos/hadoop-fundamentals-livelessons/9780133392838', 'only_matching': True, + }, { + 'url': 'https://www.oreilly.com/library/view/hadoop-fundamentals-livelessons/9780133392838/', + 'only_matching': True, }] @classmethod