commit 4b30282616c35b08308975b9d51614039b3f3d12
parent c9b0564ac141bed3766fa65011274cb5c1c5bccb
Author: Remita Amine <remitamine@gmail.com>
Date:   Wed, 10 Jul 2019 13:54:49 +0100

[funk] fix extraction(closes #17915)

Diffstat:
Myoutube_dl/extractor/funk.py | 171+++++++++++--------------------------------------------------------------------
1 file changed, 23 insertions(+), 148 deletions(-)

diff --git a/youtube_dl/extractor/funk.py b/youtube_dl/extractor/funk.py @@ -1,89 +1,21 @@ # coding: utf-8 from __future__ import unicode_literals -import itertools import re from .common import InfoExtractor from .nexx import NexxIE -from ..compat import compat_str from ..utils import ( int_or_none, - try_get, + str_or_none, ) -class FunkBaseIE(InfoExtractor): - _HEADERS = { - 'Accept': '*/*', - 'Accept-Language': 'en-US,en;q=0.9,ru;q=0.8', - 'authorization': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJjbGllbnROYW1lIjoid2ViYXBwLXYzMSIsInNjb3BlIjoic3RhdGljLWNvbnRlbnQtYXBpLGN1cmF0aW9uLWFwaSxuZXh4LWNvbnRlbnQtYXBpLXYzMSx3ZWJhcHAtYXBpIn0.mbuG9wS9Yf5q6PqgR4fiaRFIagiHk9JhwoKES7ksVX4', - } - _AUTH = 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJjbGllbnROYW1lIjoid2ViYXBwLXYzMSIsInNjb3BlIjoic3RhdGljLWNvbnRlbnQtYXBpLGN1cmF0aW9uLWFwaSxuZXh4LWNvbnRlbnQtYXBpLXYzMSx3ZWJhcHAtYXBpIn0.mbuG9wS9Yf5q6PqgR4fiaRFIagiHk9JhwoKES7ksVX4' - - @staticmethod - def _make_headers(referer): - headers = FunkBaseIE._HEADERS.copy() - headers['Referer'] = referer - return headers - - def _make_url_result(self, video): - return { - '_type': 'url_transparent', - 'url': 'nexx:741:%s' % video['sourceId'], - 'ie_key': NexxIE.ie_key(), - 'id': video['sourceId'], - 'title': video.get('title'), - 'description': video.get('description'), - 'duration': int_or_none(video.get('duration')), - 'season_number': int_or_none(video.get('seasonNr')), - 'episode_number': int_or_none(video.get('episodeNr')), - } - - -class FunkMixIE(FunkBaseIE): - _VALID_URL = r'https?://(?:www\.)?funk\.net/mix/(?P<id>[^/]+)/(?P<alias>[^/?#&]+)' +class FunkIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?funk\.net/(?:channel|playlist)/[^/]+/(?P<display_id>[0-9a-z-]+)-(?P<id>\d+)' _TESTS = [{ - 'url': 'https://www.funk.net/mix/59d65d935f8b160001828b5b/die-realste-kifferdoku-aller-zeiten', - 'md5': '8edf617c2f2b7c9847dfda313f199009', - 'info_dict': { - 'id': '123748', - 'ext': 'mp4', - 'title': '"Die realste Kifferdoku aller Zeiten"', - 'description': 'md5:c97160f5bafa8d47ec8e2e461012aa9d', - 'timestamp': 1490274721, - 'upload_date': '20170323', - }, - }] - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - mix_id = mobj.group('id') - alias = mobj.group('alias') - - lists = self._download_json( - 'https://www.funk.net/api/v3.1/curation/curatedLists/', - mix_id, headers=self._make_headers(url), query={ - 'size': 100, - })['_embedded']['curatedListList'] - - metas = next( - l for l in lists - if mix_id in (l.get('entityId'), l.get('alias')))['videoMetas'] - video = next( - meta['videoDataDelegate'] - for meta in metas - if try_get( - meta, lambda x: x['videoDataDelegate']['alias'], - compat_str) == alias) - - return self._make_url_result(video) - - -class FunkChannelIE(FunkBaseIE): - _VALID_URL = r'https?://(?:www\.)?funk\.net/channel/(?P<id>[^/]+)/(?P<alias>[^/?#&]+)' - _TESTS = [{ - 'url': 'https://www.funk.net/channel/ba/die-lustigsten-instrumente-aus-dem-internet-teil-2', + 'url': 'https://www.funk.net/channel/ba-793/die-lustigsten-instrumente-aus-dem-internet-teil-2-1155821', + 'md5': '8dd9d9ab59b4aa4173b3197f2ea48e81', 'info_dict': { 'id': '1155821', 'ext': 'mp4', @@ -92,83 +24,26 @@ class FunkChannelIE(FunkBaseIE): 'timestamp': 1514507395, 'upload_date': '20171229', }, - 'params': { - 'skip_download': True, - }, - }, { - # only available via byIdList API - 'url': 'https://www.funk.net/channel/informr/martin-sonneborn-erklaert-die-eu', - 'info_dict': { - 'id': '205067', - 'ext': 'mp4', - 'title': 'Martin Sonneborn erklärt die EU', - 'description': 'md5:050f74626e4ed87edf4626d2024210c0', - 'timestamp': 1494424042, - 'upload_date': '20170510', - }, - 'params': { - 'skip_download': True, - }, + }, { - 'url': 'https://www.funk.net/channel/59d5149841dca100012511e3/mein-erster-job-lovemilla-folge-1/lovemilla/', + 'url': 'https://www.funk.net/playlist/neuesteVideos/kameras-auf-dem-fusion-festival-1618699', 'only_matching': True, }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - channel_id = mobj.group('id') - alias = mobj.group('alias') - - headers = self._make_headers(url) - - video = None - - # Id-based channels are currently broken on their side: webplayer - # tries to process them via byChannelAlias endpoint and fails - # predictably. - for page_num in itertools.count(): - by_channel_alias = self._download_json( - 'https://www.funk.net/api/v3.1/webapp/videos/byChannelAlias/%s' - % channel_id, - 'Downloading byChannelAlias JSON page %d' % (page_num + 1), - headers=headers, query={ - 'filterFsk': 'false', - 'sort': 'creationDate,desc', - 'size': 100, - 'page': page_num, - }, fatal=False) - if not by_channel_alias: - break - video_list = try_get( - by_channel_alias, lambda x: x['_embedded']['videoList'], list) - if not video_list: - break - try: - video = next(r for r in video_list if r.get('alias') == alias) - break - except StopIteration: - pass - if not try_get( - by_channel_alias, lambda x: x['_links']['next']): - break - - if not video: - by_id_list = self._download_json( - 'https://www.funk.net/api/v3.0/content/videos/byIdList', - channel_id, 'Downloading byIdList JSON', headers=headers, - query={ - 'ids': alias, - }, fatal=False) - if by_id_list: - video = try_get(by_id_list, lambda x: x['result'][0], dict) - - if not video: - results = self._download_json( - 'https://www.funk.net/api/v3.0/content/videos/filter', - channel_id, 'Downloading filter JSON', headers=headers, query={ - 'channelId': channel_id, - 'size': 100, - })['result'] - video = next(r for r in results if r.get('alias') == alias) - - return self._make_url_result(video) + display_id, nexx_id = re.match(self._VALID_URL, url).groups() + video = self._download_json( + 'https://www.funk.net/api/v4.0/videos/' + nexx_id, nexx_id) + return { + '_type': 'url_transparent', + 'url': 'nexx:741:' + nexx_id, + 'ie_key': NexxIE.ie_key(), + 'id': nexx_id, + 'title': video.get('title'), + 'description': video.get('description'), + 'duration': int_or_none(video.get('duration')), + 'channel_id': str_or_none(video.get('channelId')), + 'display_id': display_id, + 'tags': video.get('tags'), + 'thumbnail': video.get('imageUrlLandscape'), + }