commit dbeafce5d58b0d40b7af35e5276f77855ced81a4
parent ed604ce7bcf5bfb02cf6d134eebae705f7c0f381
Author: Sergey M․ <dstftw@gmail.com>
Date:   Tue, 16 Jun 2020 03:13:39 +0700

[youtube] Fix categories and improve tags extraction

Diffstat:
Myoutube_dl/extractor/youtube.py | 10+++++++---
1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py @@ -2356,17 +2356,21 @@ class YoutubeIE(YoutubeBaseInfoExtractor): m_cat_container = self._search_regex( r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>', video_webpage, 'categories', default=None) + category = None if m_cat_container: category = self._html_search_regex( r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category', default=None) - video_categories = None if category is None else [category] - else: - video_categories = None + if not category: + category = try_get( + microformat, lambda x: x['category'], compat_str) + video_categories = None if category is None else [category] video_tags = [ unescapeHTML(m.group('content')) for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)] + if not video_tags: + video_tags = try_get(video_details, lambda x: x['keywords'], list) def _extract_count(count_name): return str_to_int(self._search_regex(