commit fca6dba8b80286ae6d3ca0a60c4799c220a52650
parent e2f8bf5888274b95513b430e0f20261120699b4b
Author: Sergey M․ <dstftw@gmail.com>
Date:   Sat, 29 Feb 2020 19:08:44 +0700

[YoutubeDL] Force redirect URL to unicode on python 2

Diffstat:
Myoutube_dl/YoutubeDL.py | 4+++-
Myoutube_dl/utils.py | 9+++++++++
2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py @@ -92,6 +92,7 @@ from .utils import ( YoutubeDLCookieJar, YoutubeDLCookieProcessor, YoutubeDLHandler, + YoutubeDLRedirectHandler, ) from .cache import Cache from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER @@ -2343,6 +2344,7 @@ class YoutubeDL(object): debuglevel = 1 if self.params.get('debug_printtraffic') else 0 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel) ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel) + redirect_handler = YoutubeDLRedirectHandler() data_handler = compat_urllib_request_DataHandler() # When passing our own FileHandler instance, build_opener won't add the @@ -2356,7 +2358,7 @@ class YoutubeDL(object): file_handler.file_open = file_open opener = compat_urllib_request.build_opener( - proxy_handler, https_handler, cookie_processor, ydlh, data_handler, file_handler) + proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler) # Delete the default user-agent header, which would otherwise apply in # cases where our custom HTTP handler doesn't come into play diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py @@ -2795,6 +2795,15 @@ class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor): https_response = http_response +class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler): + if sys.version_info[0] < 3: + def redirect_request(self, req, fp, code, msg, headers, newurl): + # On python 2 urlh.geturl() may sometimes return redirect URL + # as byte string instead of unicode. This workaround allows + # to force it always return unicode. + return compat_urllib_request.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, compat_str(newurl)) + + def extract_timezone(date_str): m = re.search( r'^.{8,}?(?P<tz>Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',