commit 8fecc7353df35f6cac305c04a4e203fb2bbb4827
parent 5dda1edef93d94c9a49672f905df0c49c75c5739
Author: Remita Amine <remitamine@gmail.com>
Date:   Wed,  6 Feb 2019 13:59:12 +0100

[toutv] fix authentication(closes #16398)(closes #18700)

Diffstat:
Myoutube_dl/extractor/radiocanada.py | 47++++++++++++++++++++++++++++++++---------------
Myoutube_dl/extractor/toutv.py | 49++++++++++++++-----------------------------------
2 files changed, 46 insertions(+), 50 deletions(-)

diff --git a/youtube_dl/extractor/radiocanada.py b/youtube_dl/extractor/radiocanada.py @@ -4,12 +4,12 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..compat import compat_HTTPError from ..utils import ( determine_ext, ExtractorError, int_or_none, unified_strdate, - unsmuggle_url, ) @@ -58,23 +58,35 @@ class RadioCanadaIE(InfoExtractor): } ] _GEO_COUNTRIES = ['CA'] + _access_token = None + _claims = None - def _call_api(self, path, video_id, app_code, query): + def _call_api(self, path, video_id=None, app_code=None, query=None): + if not query: + query = {} query.update({ - 'appCode': app_code, - 'idMedia': video_id, + 'client_key': '773aea60-0e80-41bb-9c7f-e6d7c3ad17fb', 'output': 'json', }) - return self._download_json( - 'https://services.radio-canada.ca/media/' + path, video_id, headers={ - 'Authorization': 'Client-Key 773aea60-0e80-41bb-9c7f-e6d7c3ad17fb' - }, query=query) + if video_id: + query.update({ + 'appCode': app_code, + 'idMedia': video_id, + }) + if self._access_token: + query['access_token'] = self._access_token + try: + return self._download_json( + 'https://services.radio-canada.ca/media/' + path, video_id, query=query) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code in (401, 422): + data = self._parse_json(e.cause.read().decode(), None) + error = data.get('error_description') or data['errorMessage']['text'] + raise ExtractorError(error, expected=True) + raise - def _real_extract(self, url): - url, smuggled_data = unsmuggle_url(url, {}) - app_code, video_id = re.match(self._VALID_URL, url).groups() - - metas = self._call_api('meta/v1/index.ashx', video_id, app_code, {})['Metas'] + def _extract_info(self, app_code, video_id): + metas = self._call_api('meta/v1/index.ashx', video_id, app_code)['Metas'] def get_meta(name): for meta in metas: @@ -93,14 +105,16 @@ class RadioCanadaIE(InfoExtractor): 'deviceType': 'ipad', 'multibitrate': 'true', } - if smuggled_data: - query.update(smuggled_data) + if self._claims: + query['claims'] = self._claims v_data = self._call_api('validation/v2/', video_id, app_code, query) v_url = v_data.get('url') if not v_url: error = v_data['message'] if error == "Le contenu sélectionné n'est pas disponible dans votre pays": raise self.raise_geo_restricted(error, self._GEO_COUNTRIES) + if error == 'Le contenu sélectionné est disponible seulement en premium': + self.raise_login_required(error) raise ExtractorError( '%s said: %s' % (self.IE_NAME, error), expected=True) formats = self._extract_m3u8_formats(v_url, video_id, 'mp4') @@ -128,6 +142,9 @@ class RadioCanadaIE(InfoExtractor): 'formats': formats, } + def _real_extract(self, url): + return self._extract_info(*re.match(self._VALID_URL, url).groups()) + class RadioCanadaAudioVideoIE(InfoExtractor): 'radiocanada:audiovideo' diff --git a/youtube_dl/extractor/toutv.py b/youtube_dl/extractor/toutv.py @@ -3,22 +3,19 @@ from __future__ import unicode_literals import re -from .common import InfoExtractor +from .radiocanada import RadioCanadaIE from ..utils import ( + extract_attributes, int_or_none, - js_to_json, + merge_dicts, urlencode_postdata, - extract_attributes, - smuggle_url, ) -class TouTvIE(InfoExtractor): +class TouTvIE(RadioCanadaIE): _NETRC_MACHINE = 'toutv' IE_NAME = 'tou.tv' _VALID_URL = r'https?://ici\.tou\.tv/(?P<id>[a-zA-Z0-9_-]+(?:/S[0-9]+[EC][0-9]+)?)' - _access_token = None - _claims = None _TESTS = [{ 'url': 'http://ici.tou.tv/garfield-tout-court/S2015E17', @@ -46,18 +43,14 @@ class TouTvIE(InfoExtractor): email, password = self._get_login_info() if email is None: return - state = 'http://ici.tou.tv/' - webpage = self._download_webpage(state, None, 'Downloading homepage') - toutvlogin = self._parse_json(self._search_regex( - r'(?s)toutvlogin\s*=\s*({.+?});', webpage, 'toutvlogin'), None, js_to_json) - authorize_url = toutvlogin['host'] + '/auth/oauth/v2/authorize' login_webpage = self._download_webpage( - authorize_url, None, 'Downloading login page', query={ - 'client_id': toutvlogin['clientId'], - 'redirect_uri': 'https://ici.tou.tv/login/loginCallback', + 'https://services.radio-canada.ca/auth/oauth/v2/authorize', + None, 'Downloading login page', query={ + 'client_id': '4dd36440-09d5-4468-8923-b6d91174ad36', + 'redirect_uri': 'https://ici.tou.tv/logincallback', 'response_type': 'token', - 'scope': 'media-drmt openid profile email id.write media-validation.read.privileged', - 'state': state, + 'scope': 'id.write media-validation.read', + 'state': '/', }) def extract_form_url_and_data(wp, default_form_url, form_spec_re=''): @@ -86,12 +79,7 @@ class TouTvIE(InfoExtractor): self._access_token = self._search_regex( r'access_token=([\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})', urlh.geturl(), 'access token') - self._claims = self._download_json( - 'https://services.radio-canada.ca/media/validation/v2/getClaims', - None, 'Extracting Claims', query={ - 'token': self._access_token, - 'access_token': self._access_token, - })['claims'] + self._claims = self._call_api('validation/v2/getClaims')['claims'] def _real_extract(self, url): path = self._match_id(url) @@ -102,19 +90,10 @@ class TouTvIE(InfoExtractor): self.report_warning('This video is probably DRM protected.', path) video_id = metadata['IdMedia'] details = metadata['Details'] - title = details['OriginalTitle'] - video_url = 'radiocanada:%s:%s' % (metadata.get('AppCode', 'toutv'), video_id) - if self._access_token and self._claims: - video_url = smuggle_url(video_url, { - 'access_token': self._access_token, - 'claims': self._claims, - }) - return { - '_type': 'url_transparent', - 'url': video_url, + return merge_dicts({ 'id': video_id, - 'title': title, + 'title': details.get('OriginalTitle'), 'thumbnail': details.get('ImageUrl'), 'duration': int_or_none(details.get('LengthInSeconds')), - } + }, self._extract_info(metadata.get('AppCode', 'toutv'), video_id))