commit bee6451fe88dbe8d983150cb17565d1c3e9024f7
parent 00d798b7c25f0a03adb252c882df46abc8c23b1b
Author: Sergey M․ <dstftw@gmail.com>
Date:   Mon, 24 Feb 2020 04:47:56 +0700

[pornhd] Fix extraction (closes #24128)

Diffstat:
Myoutube_dl/extractor/pornhd.py | 36++++++++++++++++++++++++------------
1 file changed, 24 insertions(+), 12 deletions(-)

diff --git a/youtube_dl/extractor/pornhd.py b/youtube_dl/extractor/pornhd.py @@ -8,6 +8,7 @@ from ..utils import ( ExtractorError, int_or_none, js_to_json, + merge_dicts, urljoin, ) @@ -27,23 +28,22 @@ class PornHdIE(InfoExtractor): 'view_count': int, 'like_count': int, 'age_limit': 18, - } + }, + 'skip': 'HTTP Error 404: Not Found', }, { - # removed video 'url': 'http://www.pornhd.com/videos/1962/sierra-day-gets-his-cum-all-over-herself-hd-porn-video', - 'md5': '956b8ca569f7f4d8ec563e2c41598441', + 'md5': '1b7b3a40b9d65a8e5b25f7ab9ee6d6de', 'info_dict': { 'id': '1962', 'display_id': 'sierra-day-gets-his-cum-all-over-herself-hd-porn-video', 'ext': 'mp4', - 'title': 'Sierra loves doing laundry', + 'title': 'md5:98c6f8b2d9c229d0f0fde47f61a1a759', 'description': 'md5:8ff0523848ac2b8f9b065ba781ccf294', 'thumbnail': r're:^https?://.*\.jpg', 'view_count': int, 'like_count': int, 'age_limit': 18, }, - 'skip': 'Not available anymore', }] def _real_extract(self, url): @@ -61,7 +61,13 @@ class PornHdIE(InfoExtractor): r"(?s)sources'?\s*[:=]\s*(\{.+?\})", webpage, 'sources', default='{}')), video_id) + info = {} if not sources: + entries = self._parse_html5_media_entries(url, webpage, video_id) + if entries: + info = entries[0] + + if not sources and not info: message = self._html_search_regex( r'(?s)<(div|p)[^>]+class="no-video"[^>]*>(?P<value>.+?)</\1', webpage, 'error message', group='value') @@ -80,23 +86,29 @@ class PornHdIE(InfoExtractor): 'format_id': format_id, 'height': height, }) - self._sort_formats(formats) + if formats: + info['formats'] = formats + self._sort_formats(info['formats']) description = self._html_search_regex( - r'<(div|p)[^>]+class="description"[^>]*>(?P<value>[^<]+)</\1', - webpage, 'description', fatal=False, group='value') + (r'(?s)<section[^>]+class=["\']video-description[^>]+>(?P<value>.+?)</section>', + r'<(div|p)[^>]+class="description"[^>]*>(?P<value>[^<]+)</\1'), + webpage, 'description', fatal=False, + group='value') or self._html_search_meta( + 'description', webpage, default=None) or self._og_search_description(webpage) view_count = int_or_none(self._html_search_regex( r'(\d+) views\s*<', webpage, 'view count', fatal=False)) thumbnail = self._search_regex( r"poster'?\s*:\s*([\"'])(?P<url>(?:(?!\1).)+)\1", webpage, - 'thumbnail', fatal=False, group='url') + 'thumbnail', default=None, group='url') like_count = int_or_none(self._search_regex( - (r'(\d+)\s*</11[^>]+>(?:&nbsp;|\s)*\blikes', + (r'(\d+)</span>\s*likes', + r'(\d+)\s*</11[^>]+>(?:&nbsp;|\s)*\blikes', r'class=["\']save-count["\'][^>]*>\s*(\d+)'), webpage, 'like count', fatal=False)) - return { + return merge_dicts(info, { 'id': video_id, 'display_id': display_id, 'title': title, @@ -106,4 +118,4 @@ class PornHdIE(InfoExtractor): 'like_count': like_count, 'formats': formats, 'age_limit': 18, - } + })