commit b96b4be4619b1e090650212380a92fb068f2fd21
parent edb0e17188a1197afae1a0d594e4aab7d27bbcf2
Author: Remita Amine <remitamine@gmail.com>
Date:   Sat, 21 Jul 2018 11:49:55 +0100

[bbc] add support for BBC Radio Play pages(closes #17022)

Diffstat:
Myoutube_dl/extractor/bbc.py | 41+++++++++++++++++++++++++++++++++++++++++
1 file changed, 41 insertions(+), 0 deletions(-)

diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py @@ -778,6 +778,17 @@ class BBCIE(BBCCoUkIE): 'params': { 'skip_download': True, } + }, { + # window.__PRELOADED_STATE__ + 'url': 'https://www.bbc.co.uk/radio/play/b0b9z4yl', + 'info_dict': { + 'id': 'b0b9z4vz', + 'ext': 'mp4', + 'title': 'Prom 6: An American in Paris and Turangalila', + 'description': 'md5:51cf7d6f5c8553f197e58203bc78dff8', + 'uploader': 'Radio 3', + 'uploader_id': 'bbc_radio_three', + }, }] @classmethod @@ -1000,6 +1011,36 @@ class BBCIE(BBCCoUkIE): 'subtitles': subtitles, } + preload_state = self._parse_json(self._search_regex( + r'window\.__PRELOADED_STATE__\s*=\s*({.+?});', webpage, + 'preload state', default='{}'), playlist_id, fatal=False) + if preload_state: + current_programme = preload_state.get('programmes', {}).get('current') or {} + programme_id = current_programme.get('id') + if current_programme and programme_id and current_programme.get('type') == 'playable_item': + title = current_programme.get('titles', {}).get('tertiary') or playlist_title + formats, subtitles = self._download_media_selector(programme_id) + self._sort_formats(formats) + synopses = current_programme.get('synopses') or {} + network = current_programme.get('network') or {} + duration = int_or_none( + current_programme.get('duration', {}).get('value')) + thumbnail = None + image_url = current_programme.get('image_url') + if image_url: + thumbnail = image_url.replace('{recipe}', '1920x1920') + return { + 'id': programme_id, + 'title': title, + 'description': dict_get(synopses, ('long', 'medium', 'short')), + 'thumbnail': thumbnail, + 'duration': duration, + 'uploader': network.get('short_title'), + 'uploader_id': network.get('id'), + 'formats': formats, + 'subtitles': subtitles, + } + bbc3_config = self._parse_json( self._search_regex( r'(?s)bbcthreeConfig\s*=\s*({.+?})\s*;\s*<', webpage,