commit 51c7f40c83a12f9dc0fce0b9e5102a0c13467b6a
parent 4877ffc0e9c1f27262bf6b5a4972d11edd487bfe
Author: Remita Amine <remitamine@gmail.com>
Date:   Mon, 27 Jan 2020 23:37:29 +0100

[vimeo] fix album extraction(closes #23864)

Diffstat:
Myoutube_dl/extractor/vimeo.py | 68+++++++++++++++++++++++++++++++++-----------------------------------
1 file changed, 33 insertions(+), 35 deletions(-)

diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py @@ -841,33 +841,6 @@ class VimeoChannelIE(VimeoBaseInfoExtractor): return self._TITLE or self._html_search_regex( self._TITLE_RE, webpage, 'list title', fatal=False) - def _login_list_password(self, page_url, list_id, webpage): - login_form = self._search_regex( - r'(?s)<form[^>]+?id="pw_form"(.*?)</form>', - webpage, 'login form', default=None) - if not login_form: - return webpage - - password = self._downloader.params.get('videopassword') - if password is None: - raise ExtractorError('This album is protected by a password, use the --video-password option', expected=True) - fields = self._hidden_inputs(login_form) - token, vuid = self._extract_xsrft_and_vuid(webpage) - fields['token'] = token - fields['password'] = password - post = urlencode_postdata(fields) - password_path = self._search_regex( - r'action="([^"]+)"', login_form, 'password URL') - password_url = compat_urlparse.urljoin(page_url, password_path) - password_request = sanitized_Request(password_url, post) - password_request.add_header('Content-type', 'application/x-www-form-urlencoded') - self._set_vimeo_cookie('vuid', vuid) - self._set_vimeo_cookie('xsrft', token) - - return self._download_webpage( - password_request, list_id, - 'Verifying the password', 'Wrong password') - def _title_and_entries(self, list_id, base_url): for pagenum in itertools.count(1): page_url = self._page_url(base_url, pagenum) @@ -876,7 +849,6 @@ class VimeoChannelIE(VimeoBaseInfoExtractor): 'Downloading page %s' % pagenum) if pagenum == 1: - webpage = self._login_list_password(page_url, list_id, webpage) yield self._extract_list_title(webpage) # Try extracting href first since not all videos are available via @@ -923,7 +895,7 @@ class VimeoUserIE(VimeoChannelIE): _BASE_URL_TEMPL = 'https://vimeo.com/%s' -class VimeoAlbumIE(VimeoChannelIE): +class VimeoAlbumIE(VimeoBaseInfoExtractor): IE_NAME = 'vimeo:album' _VALID_URL = r'https://vimeo\.com/(?:album|showcase)/(?P<id>\d+)(?:$|[?#]|/(?!video))' _TITLE_RE = r'<header id="page_header">\n\s*<h1>(.*?)</h1>' @@ -973,13 +945,39 @@ class VimeoAlbumIE(VimeoChannelIE): def _real_extract(self, url): album_id = self._match_id(url) webpage = self._download_webpage(url, album_id) - webpage = self._login_list_password(url, album_id, webpage) - api_config = self._extract_vimeo_config(webpage, album_id)['api'] + viewer = self._parse_json(self._search_regex( + r'bootstrap_data\s*=\s*({.+?})</script>', + webpage, 'bootstrap data'), album_id)['viewer'] + jwt = viewer['jwt'] + album = self._download_json( + 'https://api.vimeo.com/albums/' + album_id, + album_id, headers={'Authorization': 'jwt ' + jwt}, + query={'fields': 'description,name,privacy'}) + hashed_pass = None + if try_get(album, lambda x: x['privacy']['view']) == 'password': + password = self._downloader.params.get('videopassword') + if not password: + raise ExtractorError( + 'This album is protected by a password, use the --video-password option', + expected=True) + self._set_vimeo_cookie('vuid', viewer['vuid']) + try: + hashed_pass = self._download_json( + 'https://vimeo.com/showcase/%s/auth' % album_id, + album_id, 'Verifying the password', data=urlencode_postdata({ + 'password': password, + 'token': viewer['xsrft'], + }), headers={ + 'X-Requested-With': 'XMLHttpRequest', + })['hashed_pass'] + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401: + raise ExtractorError('Wrong password', expected=True) + raise entries = OnDemandPagedList(functools.partial( - self._fetch_page, album_id, api_config['jwt'], - api_config.get('hashed_pass')), self._PAGE_SIZE) - return self.playlist_result(entries, album_id, self._html_search_regex( - r'<title>\s*(.+?)(?:\s+on Vimeo)?</title>', webpage, 'title', fatal=False)) + self._fetch_page, album_id, jwt, hashed_pass), self._PAGE_SIZE) + return self.playlist_result( + entries, album_id, album.get('name'), album.get('description')) class VimeoGroupsIE(VimeoChannelIE):