commit 64b6a4e91ef735c8d07e93c4e670a01bcb10e7bb
parent b3d39be2393b3e5199d1abc3000d96ded3c4e4d7
Author: Remita Amine <remitamine@gmail.com>
Date:   Fri,  9 Aug 2019 08:16:53 +0100

[youtube] fix annotations extraction(closes #22045)

Diffstat:
Myoutube_dl/extractor/youtube.py | 27+++++++++++++++------------
1 file changed, 15 insertions(+), 12 deletions(-)

diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py @@ -1595,17 +1595,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): video_id = mobj.group(2) return video_id - def _extract_annotations(self, video_id): - return self._download_webpage( - 'https://www.youtube.com/annotations_invideo', video_id, - note='Downloading annotations', - errnote='Unable to download video annotations', fatal=False, - query={ - 'features': 1, - 'legacy': 1, - 'video_id': video_id, - }) - @staticmethod def _extract_chapters(description, duration): if not description: @@ -2277,7 +2266,21 @@ class YoutubeIE(YoutubeBaseInfoExtractor): # annotations video_annotations = None if self._downloader.params.get('writeannotations', False): - video_annotations = self._extract_annotations(video_id) + xsrf_token = self._search_regex( + r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>[A-Za-z0-9+/=]+)\2', + video_webpage, 'xsrf token', group='xsrf_token', fatal=False) + invideo_url = try_get( + player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str) + if xsrf_token and invideo_url: + xsrf_field_name = self._search_regex( + r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2', + video_webpage, 'xsrf field name', + group='xsrf_field_name', default='session_token') + video_annotations = self._download_webpage( + self._proto_relative_url(invideo_url), + video_id, note='Downloading annotations', + errnote='Unable to download video annotations', fatal=False, + data=urlencode_postdata({xsrf_field_name: xsrf_token})) chapters = self._extract_chapters(description_original, video_duration)