commit 67ca1a8ef7ea6094e1e34518b93cdb5ba59f31b3
parent 4a733545867a014eb786348f8fb9e6ae95850742
Author: Sergey M․ <dstftw@gmail.com>
Date:   Tue,  1 May 2018 01:48:21 +0700

[zattoo] Improve and simplify (closes #14676)

Diffstat:
Myoutube_dl/extractor/zattoo.py | 248+++++++++++++++++++++++++++++++++++++++++++++----------------------------------
1 file changed, 142 insertions(+), 106 deletions(-)

diff --git a/youtube_dl/extractor/zattoo.py b/youtube_dl/extractor/zattoo.py @@ -1,84 +1,82 @@ # coding: utf-8 from __future__ import unicode_literals -from uuid import uuid4 import re +from uuid import uuid4 from .common import InfoExtractor -from ..utils import ( +from ..compat import ( + compat_HTTPError, compat_str, +) +from ..utils import ( ExtractorError, - sanitized_Request, + int_or_none, + try_get, urlencode_postdata, ) class ZattooBaseIE(InfoExtractor): - _NETRC_MACHINE = 'zattoo' _HOST_URL = 'https://zattoo.com' _power_guide_hash = None - def _login(self, uuid, session_id): + def _login(self): (username, password) = self._get_login_info() if not username or not password: - raise ExtractorError( - 'A valid %s account is needed to access this media.' % self._NETRC_MACHINE, - expected=True) - login_form = { - 'login': username, - 'password': password, - 'remember': True, - } - request = sanitized_Request( - '%s/zapi/v2/account/login' % self._HOST_URL, - urlencode_postdata(login_form)) - request.add_header( - 'Referer', '%s/login' % self._HOST_URL) - request.add_header( - 'Content-Type', 'application/x-www-form-urlencoded; charset=UTF-8') - request.add_header( - 'Cookie', 'uuid=%s; beaker.session.id=%s' % (uuid, session_id)) - response = self._request_webpage( - request, None, 'Logging in') - data = self._parse_json(response.read(), None) - return data['session']['power_guide_hash'] - - def _get_app_token_and_version(self): - host_webpage = self._download_webpage( - self._HOST_URL, None, 'Downloading %s' % self._HOST_URL) + self.raise_login_required( + 'A valid %s account is needed to access this media.' + % self._NETRC_MACHINE) + + try: + data = self._download_json( + '%s/zapi/v2/account/login' % self._HOST_URL, None, 'Logging in', + data=urlencode_postdata({ + 'login': username, + 'password': password, + 'remember': 'true', + }), headers={ + 'Referer': '%s/login' % self._HOST_URL, + 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', + }) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400: + raise ExtractorError( + 'Unable to login: incorrect username and/or password', + expected=True) + raise + + self._power_guide_hash = data['session']['power_guide_hash'] + + def _real_initialize(self): + webpage = self._download_webpage( + self._HOST_URL, None, 'Downloading app token') app_token = self._html_search_regex( - r'<script.+window\.appToken\s*=\s*\'(.+)\'', host_webpage, 'app token') + r'appToken\s*=\s*(["\'])(?P<token>(?:(?!\1).)+?)\1', + webpage, 'app token', group='token') app_version = self._html_search_regex( - r'<!--\w+-(.+?)-', host_webpage, 'app version', default='2.8.2') - return app_token, app_version - - def _say_hello(self, uuid, app_token, app_version): - postdata = { - 'client_app_token': app_token, - 'uuid': uuid, - 'lang': 'en', - 'app_version': app_version, - 'format': 'json', - } - request = sanitized_Request( - '%s/zapi/v2/session/hello' % self._HOST_URL, - urlencode_postdata(postdata)) - response = self._request_webpage( - request, None, 'Say hello') + r'<!--\w+-(.+?)-', webpage, 'app version', default='2.8.2') + + # Will setup appropriate cookies + self._request_webpage( + '%s/zapi/v2/session/hello' % self._HOST_URL, None, + 'Opening session', data=urlencode_postdata({ + 'client_app_token': app_token, + 'uuid': compat_str(uuid4()), + 'lang': 'en', + 'app_version': app_version, + 'format': 'json', + })) - cookie = response.headers.get('Set-Cookie') - session_id = self._search_regex( - r'beaker\.session\.id\s*=\s*(.+?);', cookie, 'session id') - return session_id + self._login() def _extract_cid(self, video_id, channel_name): channel_groups = self._download_json( '%s/zapi/v2/cached/channels/%s' % (self._HOST_URL, self._power_guide_hash), - video_id, - 'Downloading available channel list', + video_id, 'Downloading channel list', query={'details': False})['channel_groups'] channel_list = [] for chgrp in channel_groups: @@ -86,7 +84,9 @@ class ZattooBaseIE(InfoExtractor): try: return next( chan['cid'] for chan in channel_list - if chan['display_alias'] == channel_name or chan['cid'] == channel_name) + if chan.get('cid') and ( + chan.get('display_alias') == channel_name or + chan.get('cid') == channel_name)) except StopIteration: raise ExtractorError('Could not extract channel id') @@ -100,72 +100,90 @@ class ZattooBaseIE(InfoExtractor): 'complete': True }) + p = data['program'] + cid = p['cid'] + info_dict = { 'id': video_id, - 'title': data['program']['title'], - 'description': data['program'].get('description'), - 'thumbnail': data['program'].get('image_url') + 'title': p.get('title') or p['episode_title'], + 'description': p.get('description'), + 'thumbnail': p.get('image_url'), + 'creator': p.get('channel_name'), + 'episode': p.get('episode_title'), + 'episode_number': int_or_none(p.get('episode_number')), + 'season_number': int_or_none(p.get('season_number')), + 'release_year': int_or_none(p.get('year')), + 'categories': try_get(p, lambda x: x['categories'], list), } - cid = data['program']['cid'] + return cid, info_dict def _extract_formats(self, cid, video_id, record_id=None, is_live=False): - postdata = { - 'stream_type': 'dash', + postdata_common = { 'https_watch_urls': True, } - if record_id: - url = '%s/zapi/watch/recording/%s' % (self._HOST_URL, record_id) - else: - url = '%s/zapi/watch/recall/%s/%s' % (self._HOST_URL, cid, video_id) if is_live: - postdata.update({'timeshift': 10800}) + postdata_common.update({'timeshift': 10800}) url = '%s/zapi/watch/live/%s' % (self._HOST_URL, cid) - - data = self._download_json( - sanitized_Request(url, urlencode_postdata(postdata)), - video_id, 'Downloading dash formats') + elif record_id: + url = '%s/zapi/watch/recording/%s' % (self._HOST_URL, record_id) + else: + url = '%s/zapi/watch/recall/%s/%s' % (self._HOST_URL, cid, video_id) formats = [] - for elem in data['stream']['watch_urls']: - audio_channel = elem.get('audio_channel') - maxrate = elem.get('maxrate') - formats.extend( - self._extract_mpd_formats( - elem['url'], video_id, - mpd_id='dash-maxrate-%s-channel-%s' % (maxrate, audio_channel), fatal=False)) - - postdata.update({'stream_type': 'hls'}) - request = sanitized_Request( - url, urlencode_postdata(postdata)) - data = self._download_json( - request, video_id, 'Downloading hls formats') - for elem in data['stream']['watch_urls']: - audio_channel = elem.get('audio_channel') - preference = None - - # Prefer audio channel A: - if audio_channel == 'A': - preference = 1 - - maxrate = elem.get('maxrate') - formats.extend( - self._extract_m3u8_formats( - elem['url'], video_id, 'mp4', entry_protocol='m3u8_native', - preference=preference, - m3u8_id='hls-maxrate-%s-channel-%s' % (maxrate, audio_channel), - fatal=False)) - + for stream_type in ('dash', 'hls', 'hls5', 'hds'): + postdata = postdata_common.copy() + postdata['stream_type'] = stream_type + + data = self._download_json( + url, video_id, 'Downloading %s formats' % stream_type.upper(), + data=urlencode_postdata(postdata), fatal=False) + if not data: + continue + + watch_urls = try_get( + data, lambda x: x['stream']['watch_urls'], list) + if not watch_urls: + continue + + for watch in watch_urls: + if not isinstance(watch, dict): + continue + watch_url = watch.get('url') + if not watch_url or not isinstance(watch_url, compat_str): + continue + format_id_list = [stream_type] + maxrate = watch.get('maxrate') + if maxrate: + format_id_list.append(compat_str(maxrate)) + audio_channel = watch.get('audio_channel') + if audio_channel: + format_id_list.append(compat_str(audio_channel)) + preference = 1 if audio_channel == 'A' else None + format_id = '-'.join(format_id_list) + if stream_type in ('dash', 'dash_widevine', 'dash_playready'): + this_formats = self._extract_mpd_formats( + watch_url, video_id, mpd_id=format_id, fatal=False) + elif stream_type in ('hls', 'hls5', 'hls5_fairplay'): + this_formats = self._extract_m3u8_formats( + watch_url, video_id, 'mp4', + entry_protocol='m3u8_native', m3u8_id=format_id, + fatal=False) + elif stream_type == 'hds': + this_formats = self._extract_f4m_formats( + watch_url, video_id, f4m_id=format_id, fatal=False) + elif stream_type == 'smooth_playready': + this_formats = self._extract_ism_formats( + watch_url, video_id, ism_id=format_id, fatal=False) + else: + assert False + for this_format in this_formats: + this_format['preference'] = preference + formats.extend(this_formats) self._sort_formats(formats) return formats - def _real_initialize(self): - uuid = compat_str(uuid4()) - app_token, app_version = self._get_app_token_and_version() - session_id = self._say_hello(uuid, app_token, app_version) - self._power_guide_hash = self._login(uuid, session_id) - def _extract_video(self, channel_name, video_id, record_id=None, is_live=False): if is_live: cid = self._extract_cid(video_id, channel_name) @@ -190,13 +208,27 @@ class QuicklineBaseIE(ZattooBaseIE): class QuicklineIE(QuicklineBaseIE): _VALID_URL = r'https?://(?:www\.)?mobiltv\.quickline\.com/watch/(?P<channel>[^/]+)/(?P<id>[0-9]+)' + _TEST = { + 'url': 'https://mobiltv.quickline.com/watch/prosieben/130671867-maze-runner-die-auserwaehlten-in-der-brandwueste', + 'only_matching': True, + } + def _real_extract(self, url): channel_name, video_id = re.match(self._VALID_URL, url).groups() return self._extract_video(channel_name, video_id) class QuicklineLiveIE(QuicklineBaseIE): - _VALID_URL = r'https?://(?:www\.)?mobiltv\.quickline\.com/watch/(?P<id>[^/]+)$' + _VALID_URL = r'https?://(?:www\.)?mobiltv\.quickline\.com/watch/(?P<id>[^/]+)' + + _TEST = { + 'url': 'https://mobiltv.quickline.com/watch/srf1', + 'only_matching': True, + } + + @classmethod + def suitable(cls, url): + return False if QuicklineIE.suitable(url) else super(QuicklineLiveIE, cls).suitable(url) def _real_extract(self, url): channel_name = video_id = self._match_id(url) @@ -222,13 +254,17 @@ class ZattooIE(ZattooBaseIE): class ZattooLiveIE(ZattooBaseIE): - _VALID_URL = r'https?://(?:www\.)?zattoo\.com/watch/(?P<id>[^/]+)$' + _VALID_URL = r'https?://(?:www\.)?zattoo\.com/watch/(?P<id>[^/]+)' _TEST = { 'url': 'https://zattoo.com/watch/srf1', 'only_matching': True, } + @classmethod + def suitable(cls, url): + return False if ZattooIE.suitable(url) else super(ZattooLiveIE, cls).suitable(url) + def _real_extract(self, url): channel_name = video_id = self._match_id(url) return self._extract_video(channel_name, video_id, is_live=True)