commit fda6d237a5b664cc8a9a45562d4113c51fd0280d
parent 5d9f6cbc5afa033b6f1cfd2abe4327e366da2ad1
Author: Sergey M․ <dstftw@gmail.com>
Date:   Sun, 23 Feb 2020 06:47:11 +0700

[wistia] Add support for multiple generic embeds (closes #8347, closes #11385)

Diffstat:
Myoutube_dl/extractor/generic.py | 17+++++++++--------
Myoutube_dl/extractor/wistia.py | 31++++++++++++++++---------------
2 files changed, 25 insertions(+), 23 deletions(-)

diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py @@ -2537,14 +2537,15 @@ class GenericIE(InfoExtractor): dailymail_urls, video_id, video_title, ie=DailyMailIE.ie_key()) # Look for embedded Wistia player - wistia_url = WistiaIE._extract_url(webpage) - if wistia_url: - return { - '_type': 'url_transparent', - 'url': self._proto_relative_url(wistia_url), - 'ie_key': WistiaIE.ie_key(), - 'uploader': video_uploader, - } + wistia_urls = WistiaIE._extract_urls(webpage) + if wistia_urls: + playlist = self.playlist_from_matches(wistia_urls, video_id, video_title, ie=WistiaIE.ie_key()) + for entry in playlist['entries']: + entry.update({ + '_type': 'url_transparent', + 'uploader': video_uploader, + }) + return playlist # Look for SVT player svt_url = SVTIE._extract_url(webpage) diff --git a/youtube_dl/extractor/wistia.py b/youtube_dl/extractor/wistia.py @@ -45,22 +45,23 @@ class WistiaIE(InfoExtractor): # https://wistia.com/support/embed-and-share/video-on-your-website @staticmethod def _extract_url(webpage): - match = re.search( - r'<(?:meta[^>]+?content|(?:iframe|script)[^>]+?src)=["\'](?P<url>(?:https?:)?//(?:fast\.)?wistia\.(?:net|com)/embed/(?:iframe|medias)/[a-z0-9]{10})', webpage) - if match: - return unescapeHTML(match.group('url')) + urls = WistiaIE._extract_urls(webpage) + return urls[0] if urls else None - match = re.search( - r'''(?sx) - <script[^>]+src=(["'])(?:https?:)?//fast\.wistia\.com/assets/external/E-v1\.js\1[^>]*>.*? - <div[^>]+class=(["']).*?\bwistia_async_(?P<id>[a-z0-9]{10})\b.*?\2 - ''', webpage) - if match: - return 'wistia:%s' % match.group('id') - - match = re.search(r'(?:data-wistia-?id=["\']|Wistia\.embed\(["\']|id=["\']wistia_)(?P<id>[a-z0-9]{10})', webpage) - if match: - return 'wistia:%s' % match.group('id') + @staticmethod + def _extract_urls(webpage): + urls = [] + for match in re.finditer( + r'<(?:meta[^>]+?content|(?:iframe|script)[^>]+?src)=["\'](?P<url>(?:https?:)?//(?:fast\.)?wistia\.(?:net|com)/embed/(?:iframe|medias)/[a-z0-9]{10})', webpage): + urls.append(unescapeHTML(match.group('url'))) + for match in re.finditer( + r'''(?sx) + <div[^>]+class=(["']).*?\bwistia_async_(?P<id>[a-z0-9]{10})\b.*?\2 + ''', webpage): + urls.append('wistia:%s' % match.group('id')) + for match in re.finditer(r'(?:data-wistia-?id=["\']|Wistia\.embed\(["\']|id=["\']wistia_)(?P<id>[a-z0-9]{10})', webpage): + urls.append('wistia:%s' % match.group('id')) + return urls def _real_extract(self, url): video_id = self._match_id(url)