commit 96c186e1fd8af4943eeaa8c858f9cb0f15e8b265
parent 4ad159c7b0ce476171aad1eaa8a36f7b9b1cce65
Author: Remita Amine <remitamine@gmail.com>
Date:   Thu, 10 Jan 2019 09:05:00 +0100

[fox] add support National Geographic(closes #17985)(closes #15333)(closes #14698)

Diffstat:
Myoutube_dl/extractor/extractors.py | 6+-----
Myoutube_dl/extractor/fox.py | 93++++++++++++++++++++++++++++++++++++++++---------------------------------------
Myoutube_dl/extractor/nationalgeographic.py | 135-------------------------------------------------------------------------------
3 files changed, 48 insertions(+), 186 deletions(-)

diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py @@ -687,11 +687,7 @@ from .myvi import ( MyviEmbedIE, ) from .myvidster import MyVidsterIE -from .nationalgeographic import ( - NationalGeographicVideoIE, - NationalGeographicIE, - NationalGeographicEpisodeGuideIE, -) +from .nationalgeographic import NationalGeographicVideoIE from .naver import NaverIE from .nba import NBAIE from .nbc import ( diff --git a/youtube_dl/extractor/fox.py b/youtube_dl/extractor/fox.py @@ -1,11 +1,11 @@ # coding: utf-8 from __future__ import unicode_literals +# import json +# import uuid + from .adobepass import AdobePassIE -from .uplynk import UplynkPreplayIE -from ..compat import compat_str from ..utils import ( - HEADRequest, int_or_none, parse_age_limit, parse_duration, @@ -16,7 +16,7 @@ from ..utils import ( class FOXIE(AdobePassIE): - _VALID_URL = r'https?://(?:www\.)?fox\.com/watch/(?P<id>[\da-fA-F]+)' + _VALID_URL = r'https?://(?:www\.)?(?:fox\.com|nationalgeographic\.com/tv)/watch/(?P<id>[\da-fA-F]+)' _TESTS = [{ # clip 'url': 'https://www.fox.com/watch/4b765a60490325103ea69888fb2bd4e8/', @@ -43,41 +43,47 @@ class FOXIE(AdobePassIE): # episode, geo-restricted, tv provided required 'url': 'https://www.fox.com/watch/30056b295fb57f7452aeeb4920bc3024/', 'only_matching': True, + }, { + 'url': 'https://www.nationalgeographic.com/tv/watch/f690e05ebbe23ab79747becd0cc223d1/', + 'only_matching': True, }] + # _access_token = None + + # def _call_api(self, path, video_id, data=None): + # headers = { + # 'X-Api-Key': '238bb0a0c2aba67922c48709ce0c06fd', + # } + # if self._access_token: + # headers['Authorization'] = 'Bearer ' + self._access_token + # return self._download_json( + # 'https://api2.fox.com/v2.0/' + path, video_id, data=data, headers=headers) + + # def _real_initialize(self): + # self._access_token = self._call_api( + # 'login', None, json.dumps({ + # 'deviceId': compat_str(uuid.uuid4()), + # }).encode())['accessToken'] def _real_extract(self, url): video_id = self._match_id(url) video = self._download_json( - 'https://api.fox.com/fbc-content/v1_4/video/%s' % video_id, + 'https://api.fox.com/fbc-content/v1_5/video/%s' % video_id, video_id, headers={ 'apikey': 'abdcbed02c124d393b39e818a4312055', 'Content-Type': 'application/json', 'Referer': url, }) + # video = self._call_api('vodplayer/' + video_id, video_id) title = video['name'] release_url = video['videoRelease']['url'] - - description = video.get('description') - duration = int_or_none(video.get('durationInSeconds')) or int_or_none( - video.get('duration')) or parse_duration(video.get('duration')) - timestamp = unified_timestamp(video.get('datePublished')) - rating = video.get('contentRating') - age_limit = parse_age_limit(rating) + # release_url = video['url'] data = try_get( video, lambda x: x['trackingData']['properties'], dict) or {} - creator = data.get('brand') or data.get('network') or video.get('network') - - series = video.get('seriesName') or data.get( - 'seriesName') or data.get('show') - season_number = int_or_none(video.get('seasonNumber')) - episode = video.get('name') - episode_number = int_or_none(video.get('episodeNumber')) - release_year = int_or_none(video.get('releaseYear')) - + rating = video.get('contentRating') if data.get('authRequired'): resource = self._get_mvpd_resource( 'fbc-fox', title, video.get('guid'), rating) @@ -86,6 +92,18 @@ class FOXIE(AdobePassIE): 'auth': self._extract_mvpd_auth( url, video_id, 'fbc-fox', resource) }) + m3u8_url = self._download_json(release_url, video_id)['playURL'] + formats = self._extract_m3u8_formats( + m3u8_url, video_id, 'mp4', + entry_protocol='m3u8_native', m3u8_id='hls') + self._sort_formats(formats) + + duration = int_or_none(video.get('durationInSeconds')) or int_or_none( + video.get('duration')) or parse_duration(video.get('duration')) + timestamp = unified_timestamp(video.get('datePublished')) + creator = data.get('brand') or data.get('network') or video.get('network') + series = video.get('seriesName') or data.get( + 'seriesName') or data.get('show') subtitles = {} for doc_rel in video.get('documentReleases', []): @@ -98,36 +116,19 @@ class FOXIE(AdobePassIE): }] break - info = { + return { 'id': video_id, 'title': title, - 'description': description, + 'formats': formats, + 'description': video.get('description'), 'duration': duration, 'timestamp': timestamp, - 'age_limit': age_limit, + 'age_limit': parse_age_limit(rating), 'creator': creator, 'series': series, - 'season_number': season_number, - 'episode': episode, - 'episode_number': episode_number, - 'release_year': release_year, + 'season_number': int_or_none(video.get('seasonNumber')), + 'episode': video.get('name'), + 'episode_number': int_or_none(video.get('episodeNumber')), + 'release_year': int_or_none(video.get('releaseYear')), 'subtitles': subtitles, } - - urlh = self._request_webpage(HEADRequest(release_url), video_id) - video_url = compat_str(urlh.geturl()) - - if UplynkPreplayIE.suitable(video_url): - info.update({ - '_type': 'url_transparent', - 'url': video_url, - 'ie_key': UplynkPreplayIE.ie_key(), - }) - else: - m3u8_url = self._download_json(release_url, video_id)['playURL'] - formats = self._extract_m3u8_formats( - m3u8_url, video_id, 'mp4', - entry_protocol='m3u8_native', m3u8_id='hls') - self._sort_formats(formats) - info['formats'] = formats - return info diff --git a/youtube_dl/extractor/nationalgeographic.py b/youtube_dl/extractor/nationalgeographic.py @@ -1,15 +1,9 @@ from __future__ import unicode_literals -import re - from .common import InfoExtractor -from .adobepass import AdobePassIE -from .theplatform import ThePlatformIE from ..utils import ( smuggle_url, url_basename, - update_url_query, - get_element_by_class, ) @@ -64,132 +58,3 @@ class NationalGeographicVideoIE(InfoExtractor): {'force_smil_url': True}), 'id': guid, } - - -class NationalGeographicIE(ThePlatformIE, AdobePassIE): - IE_NAME = 'natgeo' - _VALID_URL = r'https?://channel\.nationalgeographic\.com/(?:(?:(?:wild/)?[^/]+/)?(?:videos|episodes)|u)/(?P<id>[^/?]+)' - - _TESTS = [ - { - 'url': 'http://channel.nationalgeographic.com/u/kdi9Ld0PN2molUUIMSBGxoeDhD729KRjQcnxtetilWPMevo8ZwUBIDuPR0Q3D2LVaTsk0MPRkRWDB8ZhqWVeyoxfsZZm36yRp1j-zPfsHEyI_EgAeFY/', - 'md5': '518c9aa655686cf81493af5cc21e2a04', - 'info_dict': { - 'id': 'vKInpacll2pC', - 'ext': 'mp4', - 'title': 'Uncovering a Universal Knowledge', - 'description': 'md5:1a89148475bf931b3661fcd6ddb2ae3a', - 'timestamp': 1458680907, - 'upload_date': '20160322', - 'uploader': 'NEWA-FNG-NGTV', - }, - 'add_ie': ['ThePlatform'], - }, - { - 'url': 'http://channel.nationalgeographic.com/u/kdvOstqYaBY-vSBPyYgAZRUL4sWUJ5XUUPEhc7ISyBHqoIO4_dzfY3K6EjHIC0hmFXoQ7Cpzm6RkET7S3oMlm6CFnrQwSUwo/', - 'md5': 'c4912f656b4cbe58f3e000c489360989', - 'info_dict': { - 'id': 'Pok5lWCkiEFA', - 'ext': 'mp4', - 'title': 'The Stunning Red Bird of Paradise', - 'description': 'md5:7bc8cd1da29686be4d17ad1230f0140c', - 'timestamp': 1459362152, - 'upload_date': '20160330', - 'uploader': 'NEWA-FNG-NGTV', - }, - 'add_ie': ['ThePlatform'], - }, - { - 'url': 'http://channel.nationalgeographic.com/the-story-of-god-with-morgan-freeman/episodes/the-power-of-miracles/', - 'only_matching': True, - }, - { - 'url': 'http://channel.nationalgeographic.com/videos/treasures-rediscovered/', - 'only_matching': True, - }, - { - 'url': 'http://channel.nationalgeographic.com/the-story-of-god-with-morgan-freeman/videos/uncovering-a-universal-knowledge/', - 'only_matching': True, - }, - { - 'url': 'http://channel.nationalgeographic.com/wild/destination-wild/videos/the-stunning-red-bird-of-paradise/', - 'only_matching': True, - } - ] - - def _real_extract(self, url): - display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) - release_url = self._search_regex( - r'video_auth_playlist_url\s*=\s*"([^"]+)"', - webpage, 'release url') - theplatform_path = self._search_regex(r'https?://link\.theplatform\.com/s/([^?]+)', release_url, 'theplatform path') - video_id = theplatform_path.split('/')[-1] - query = { - 'mbr': 'true', - } - is_auth = self._search_regex(r'video_is_auth\s*=\s*"([^"]+)"', webpage, 'is auth', fatal=False) - if is_auth == 'auth': - auth_resource_id = self._search_regex( - r"video_auth_resourceId\s*=\s*'([^']+)'", - webpage, 'auth resource id') - query['auth'] = self._extract_mvpd_auth(url, video_id, 'natgeo', auth_resource_id) - - formats = [] - subtitles = {} - for key, value in (('switch', 'http'), ('manifest', 'm3u')): - tp_query = query.copy() - tp_query.update({ - key: value, - }) - tp_formats, tp_subtitles = self._extract_theplatform_smil( - update_url_query(release_url, tp_query), video_id, 'Downloading %s SMIL data' % value) - formats.extend(tp_formats) - subtitles = self._merge_subtitles(subtitles, tp_subtitles) - self._sort_formats(formats) - - info = self._extract_theplatform_metadata(theplatform_path, display_id) - info.update({ - 'id': video_id, - 'formats': formats, - 'subtitles': subtitles, - 'display_id': display_id, - }) - return info - - -class NationalGeographicEpisodeGuideIE(InfoExtractor): - IE_NAME = 'natgeo:episodeguide' - _VALID_URL = r'https?://channel\.nationalgeographic\.com/(?:wild/)?(?P<id>[^/]+)/episode-guide' - _TESTS = [ - { - 'url': 'http://channel.nationalgeographic.com/the-story-of-god-with-morgan-freeman/episode-guide/', - 'info_dict': { - 'id': 'the-story-of-god-with-morgan-freeman-season-1', - 'title': 'The Story of God with Morgan Freeman - Season 1', - }, - 'playlist_mincount': 6, - }, - { - 'url': 'http://channel.nationalgeographic.com/underworld-inc/episode-guide/?s=2', - 'info_dict': { - 'id': 'underworld-inc-season-2', - 'title': 'Underworld, Inc. - Season 2', - }, - 'playlist_mincount': 7, - }, - ] - - def _real_extract(self, url): - display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) - show = get_element_by_class('show', webpage) - selected_season = self._search_regex( - r'<div[^>]+class="select-seasons[^"]*".*?<a[^>]*>(.*?)</a>', - webpage, 'selected season') - entries = [ - self.url_result(self._proto_relative_url(entry_url), 'NationalGeographic') - for entry_url in re.findall('(?s)<div[^>]+class="col-inner"[^>]*?>.*?<a[^>]+href="([^"]+)"', webpage)] - return self.playlist_result( - entries, '%s-%s' % (display_id, selected_season.lower().replace(' ', '-')), - '%s - %s' % (show, selected_season))