commit 0685d9727b9657fc8a31c96cb52c4155de29fcfc
parent e06632e3fe25036b804a62469bb18fa4c37e3368
Author: Sergey M․ <dstftw@gmail.com>
Date:   Mon,  9 Jul 2018 23:43:05 +0700

[utils] Share JSON-LD regex

Diffstat:
Myoutube_dl/extractor/common.py | 4++--
Myoutube_dl/utils.py | 1+
2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py @@ -52,6 +52,7 @@ from ..utils import ( GeoUtils, int_or_none, js_to_json, + JSON_LD_RE, mimetype2ext, orderedSet, parse_codecs, @@ -1149,8 +1150,7 @@ class InfoExtractor(object): def _search_json_ld(self, html, video_id, expected_type=None, **kwargs): json_ld = self._search_regex( - r'(?s)<script[^>]+type=(["\'])application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>', - html, 'JSON-LD', group='json_ld', **kwargs) + JSON_LD_RE, html, 'JSON-LD', group='json_ld', **kwargs) default = kwargs.get('default', NO_DEFAULT) if not json_ld: return default if default is not NO_DEFAULT else {} diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py @@ -184,6 +184,7 @@ DATE_FORMATS_MONTH_FIRST.extend([ ]) PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)" +JSON_LD_RE = r'(?is)<script[^>]+type=(["\'])application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>' def preferredencoding():