commit 0441d6266ca275b1b4a2ad4efdb4b3f54e318e88
parent 82f68e4a0113f00144b55c5d2a1951793ac78818
Author: Remita Amine <remitamine@gmail.com>
Date:   Fri, 12 Jul 2019 22:31:11 +0100

[rudo] remove extractor(closes #18430)(closes #18474)

Covered by generic extractor

Diffstat:
Myoutube_dl/extractor/biobiochiletv.py | 19++++++++++++-------
Myoutube_dl/extractor/extractors.py | 1-
Dyoutube_dl/extractor/rudo.py | 53-----------------------------------------------------
3 files changed, 12 insertions(+), 61 deletions(-)

diff --git a/youtube_dl/extractor/biobiochiletv.py b/youtube_dl/extractor/biobiochiletv.py @@ -6,7 +6,6 @@ from ..utils import ( ExtractorError, remove_end, ) -from .rudo import RudoIE class BioBioChileTVIE(InfoExtractor): @@ -41,11 +40,15 @@ class BioBioChileTVIE(InfoExtractor): }, { 'url': 'http://www.biobiochile.cl/noticias/bbtv/comentarios-bio-bio/2016/07/08/edecanes-del-congreso-figuras-decorativas-que-le-cuestan-muy-caro-a-los-chilenos.shtml', 'info_dict': { - 'id': 'edecanes-del-congreso-figuras-decorativas-que-le-cuestan-muy-caro-a-los-chilenos', + 'id': 'b4xd0LK3SK', 'ext': 'mp4', - 'uploader': '(none)', - 'upload_date': '20160708', - 'title': 'Edecanes del Congreso: Figuras decorativas que le cuestan muy caro a los chilenos', + # TODO: fix url_transparent information overriding + # 'uploader': 'Juan Pablo Echenique', + 'title': 'Comentario Oscar Cáceres', + }, + 'params': { + # empty m3u8 manifest + 'skip_download': True, }, }, { 'url': 'http://tv.biobiochile.cl/notas/2015/10/22/ninos-transexuales-de-quien-es-la-decision.shtml', @@ -60,7 +63,9 @@ class BioBioChileTVIE(InfoExtractor): webpage = self._download_webpage(url, video_id) - rudo_url = RudoIE._extract_url(webpage) + rudo_url = self._search_regex( + r'<iframe[^>]+src=(?P<q1>[\'"])(?P<url>(?:https?:)?//rudo\.video/vod/[0-9a-zA-Z]+)(?P=q1)', + webpage, 'embed URL', None, group='url') if not rudo_url: raise ExtractorError('No videos found') @@ -68,7 +73,7 @@ class BioBioChileTVIE(InfoExtractor): thumbnail = self._og_search_thumbnail(webpage) uploader = self._html_search_regex( - r'<a[^>]+href=["\']https?://(?:busca|www)\.biobiochile\.cl/(?:lista/)?(?:author|autor)[^>]+>(.+?)</a>', + r'<a[^>]+href=["\'](?:https?://(?:busca|www)\.biobiochile\.cl)?/(?:lista/)?(?:author|autor)[^>]+>(.+?)</a>', webpage, 'uploader', fatal=False) return { diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py @@ -967,7 +967,6 @@ from .rts import RTSIE from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE, RTVELiveIE, RTVETelevisionIE from .rtvnh import RTVNHIE from .rtvs import RTVSIE -from .rudo import RudoIE from .ruhd import RUHDIE from .rutube import ( RutubeIE, diff --git a/youtube_dl/extractor/rudo.py b/youtube_dl/extractor/rudo.py @@ -1,53 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import ( - js_to_json, - get_element_by_class, - unified_strdate, -) - - -class RudoIE(InfoExtractor): - _VALID_URL = r'https?://rudo\.video/vod/(?P<id>[0-9a-zA-Z]+)' - - _TEST = { - 'url': 'http://rudo.video/vod/oTzw0MGnyG', - 'md5': '2a03a5b32dd90a04c83b6d391cf7b415', - 'info_dict': { - 'id': 'oTzw0MGnyG', - 'ext': 'mp4', - 'title': 'Comentario Tomás Mosciatti', - 'upload_date': '20160617', - }, - } - - @classmethod - def _extract_url(cls, webpage): - mobj = re.search( - r'<iframe[^>]+src=(?P<q1>[\'"])(?P<url>(?:https?:)?//rudo\.video/vod/[0-9a-zA-Z]+)(?P=q1)', - webpage) - if mobj: - return mobj.group('url') - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage(url, video_id, encoding='iso-8859-1') - - jwplayer_data = self._parse_json(self._search_regex( - r'(?s)playerInstance\.setup\(({.+?})\)', webpage, 'jwplayer data'), video_id, - transform_source=lambda s: js_to_json(re.sub(r'encodeURI\([^)]+\)', '""', s))) - - info_dict = self._parse_jwplayer_data( - jwplayer_data, video_id, require_title=False, m3u8_id='hls', mpd_id='dash') - - info_dict.update({ - 'title': self._og_search_title(webpage), - 'upload_date': unified_strdate(get_element_by_class('date', webpage)), - }) - - return info_dict