commit 94db1f7f3b7269d5843b815ef2aa5b71d0361e6f
parent ffa7b2bfee7b94191ffc20ef00c22f708c97cddf
Author: Sergey M․ <dstftw@gmail.com>
Date:   Mon, 29 Oct 2018 23:53:39 +0700

[cnbc] Simplify extraction (closes #14280, closes #17110)

Diffstat:
Myoutube_dl/extractor/cnbc.py | 29++++++++++-------------------
Myoutube_dl/extractor/extractors.py | 2+-
2 files changed, 11 insertions(+), 20 deletions(-)

diff --git a/youtube_dl/extractor/cnbc.py b/youtube_dl/extractor/cnbc.py @@ -3,10 +3,7 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import ( - js_to_json, - smuggle_url, -) +from ..utils import smuggle_url class CNBCIE(InfoExtractor): @@ -40,36 +37,30 @@ class CNBCIE(InfoExtractor): } -class CNBCNewIE(InfoExtractor): - IE_NAME = 'CNBC:new' - _VALID_URL = r'https?://(?:www)?\.cnbc\.com/video.*/(?P<id>[^.]+)' +class CNBCVideoIE(InfoExtractor): + _VALID_URL = r'https?://(?:www)?\.cnbc\.com/video/(?:[^/]+/)+(?P<id>[^./?#&]+)' _TEST = { 'url': 'https://www.cnbc.com/video/2018/07/19/trump-i-dont-necessarily-agree-with-raising-rates.html', 'info_dict': { 'id': '7000031301', 'ext': 'mp4', - 'title': 'Trump: I don\'t necessarily agree with raising rates', + 'title': "Trump: I don't necessarily agree with raising rates", 'description': 'md5:878d8f0b4ebb5bb1dda3514b91b49de3', 'timestamp': 1531958400, 'upload_date': '20180719', 'uploader': 'NBCU-CNBC', }, 'params': { - # m3u8 download 'skip_download': True, }, } - CNBC_URL_TEMPLATE = 'http://video.cnbc.com/gallery/?video=%s' - def _real_extract(self, url): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) - video_id = self._parse_json( - self._search_regex( - r'(?s).*<script[^>]*>.*?({.+?content_id.+?}).*?</script>', - webpage, display_id), - display_id, transform_source=js_to_json - )['content_id'] - - return self.url_result(self.CNBC_URL_TEMPLATE % video_id, 'CNBC') + video_id = self._search_regex( + r'content_id["\']\s*:\s*["\'](\d+)', webpage, display_id, + 'video id') + return self.url_result( + 'http://video.cnbc.com/gallery/?video=%s' % video_id, + CNBCIE.ie_key()) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py @@ -211,7 +211,7 @@ from .clyp import ClypIE from .cmt import CMTIE from .cnbc import ( CNBCIE, - CNBCNewIE, + CNBCVideoIE, ) from .cnn import ( CNNIE,