commit 562de77f41d0c08df9dbb08cfa86ba6c7d239c5a
parent e1723c4bac4e465991789b5a29beb946d872f508
Author: Sergey M․ <dstftw@gmail.com>
Date:   Sat,  6 Jun 2020 02:14:35 +0700

[kaltura] Add support for multiple embeds on a webpage (closes #25523)

Diffstat:
Myoutube_dl/extractor/generic.py | 18+++++++++++++++---
Myoutube_dl/extractor/kaltura.py | 19+++++++++++++------
2 files changed, 28 insertions(+), 9 deletions(-)

diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py @@ -1709,6 +1709,15 @@ class GenericIE(InfoExtractor): 'add_ie': ['Kaltura'], }, { + # multiple kaltura embeds, nsfw + 'url': 'https://www.quartier-rouge.be/prive/femmes/kamila-avec-video-jaime-sadomie.html', + 'info_dict': { + 'id': 'kamila-avec-video-jaime-sadomie', + 'title': "Kamila avec vídeo “J'aime sadomie”", + }, + 'playlist_count': 8, + }, + { # Non-standard Vimeo embed 'url': 'https://openclassrooms.com/courses/understanding-the-web', 'md5': '64d86f1c7d369afd9a78b38cbb88d80a', @@ -2844,9 +2853,12 @@ class GenericIE(InfoExtractor): return self.url_result(mobj.group('url'), 'Zapiks') # Look for Kaltura embeds - kaltura_url = KalturaIE._extract_url(webpage) - if kaltura_url: - return self.url_result(smuggle_url(kaltura_url, {'source_url': url}), KalturaIE.ie_key()) + kaltura_urls = KalturaIE._extract_urls(webpage) + if kaltura_urls: + return self.playlist_from_matches( + kaltura_urls, video_id, video_title, + getter=lambda x: smuggle_url(x, {'source_url': url}), + ie=KalturaIE.ie_key()) # Look for EaglePlatform embeds eagleplatform_url = EaglePlatformIE._extract_url(webpage) diff --git a/youtube_dl/extractor/kaltura.py b/youtube_dl/extractor/kaltura.py @@ -113,9 +113,14 @@ class KalturaIE(InfoExtractor): @staticmethod def _extract_url(webpage): + urls = KalturaIE._extract_urls(webpage) + return urls[0] if urls else None + + @staticmethod + def _extract_urls(webpage): # Embed codes: https://knowledge.kaltura.com/embedding-kaltura-media-players-your-site - mobj = ( - re.search( + finditer = ( + re.finditer( r"""(?xs) kWidget\.(?:thumb)?[Ee]mbed\( \{.*? @@ -124,7 +129,7 @@ class KalturaIE(InfoExtractor): (?P<q3>['"])entry_?[Ii]d(?P=q3)\s*:\s* (?P<q4>['"])(?P<id>(?:(?!(?P=q4)).)+)(?P=q4)(?:,|\s*\}) """, webpage) - or re.search( + or re.finditer( r'''(?xs) (?P<q1>["']) (?:https?:)?//cdnapi(?:sec)?\.kaltura\.com(?::\d+)?/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+)(?:(?!(?P=q1)).)* @@ -138,7 +143,7 @@ class KalturaIE(InfoExtractor): ) (?P<q3>["'])(?P<id>(?:(?!(?P=q3)).)+)(?P=q3) ''', webpage) - or re.search( + or re.finditer( r'''(?xs) <(?:iframe[^>]+src|meta[^>]+\bcontent)=(?P<q1>["']) (?:https?:)?//(?:(?:www|cdnapi(?:sec)?)\.)?kaltura\.com/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+) @@ -148,7 +153,8 @@ class KalturaIE(InfoExtractor): (?P=q1) ''', webpage) ) - if mobj: + urls = [] + for mobj in finditer: embed_info = mobj.groupdict() for k, v in embed_info.items(): if v: @@ -160,7 +166,8 @@ class KalturaIE(InfoExtractor): webpage) if service_mobj: url = smuggle_url(url, {'service_url': service_mobj.group('id')}) - return url + urls.append(url) + return urls def _kaltura_api_call(self, video_id, actions, service_url=None, *args, **kwargs): params = actions[0]