commit 95e42d7336d01f505d6551a21df52f3ae234e96b
parent cf0db4d99785532d767d0ca1cc029c73d16bb045
Author: Xiao Di Guan <xdg@puxlit.net>
Date:   Sat,  3 Nov 2018 05:18:20 +1100

[extractor/common] Ensure response handle is not prematurely closed before it can be read if it matches expected_status (resolves #17195, closes #17846, resolves #17447)


Diffstat:
Mtest/helper.py | 10++++++++++
Mtest/test_InfoExtractor.py | 42++++++++++++++++++++++++++++++++++++++++--
Mtest/test_downloader_http.py | 12+-----------
Mtest/test_http.py | 10+---------
Myoutube_dl/extractor/common.py | 5+++++
5 files changed, 57 insertions(+), 22 deletions(-)

diff --git a/test/helper.py b/test/helper.py @@ -7,6 +7,7 @@ import json import os.path import re import types +import ssl import sys import youtube_dl.extractor @@ -244,3 +245,12 @@ def expect_warnings(ydl, warnings_re): real_warning(w) ydl.report_warning = _report_warning + + +def http_server_port(httpd): + if os.name == 'java' and isinstance(httpd.socket, ssl.SSLSocket): + # In Jython SSLSocket is not a subclass of socket.socket + sock = httpd.socket.sock + else: + sock = httpd.socket + return sock.getsockname()[1] diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py @@ -9,11 +9,30 @@ import sys import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -from test.helper import FakeYDL, expect_dict, expect_value -from youtube_dl.compat import compat_etree_fromstring +from test.helper import FakeYDL, expect_dict, expect_value, http_server_port +from youtube_dl.compat import compat_etree_fromstring, compat_http_server from youtube_dl.extractor.common import InfoExtractor from youtube_dl.extractor import YoutubeIE, get_info_extractor from youtube_dl.utils import encode_data_uri, strip_jsonp, ExtractorError, RegexNotFoundError +import threading + + +TEAPOT_RESPONSE_STATUS = 418 +TEAPOT_RESPONSE_BODY = "<h1>418 I'm a teapot</h1>" + + +class InfoExtractorTestRequestHandler(compat_http_server.BaseHTTPRequestHandler): + def log_message(self, format, *args): + pass + + def do_GET(self): + if self.path == '/teapot': + self.send_response(TEAPOT_RESPONSE_STATUS) + self.send_header('Content-Type', 'text/html; charset=utf-8') + self.end_headers() + self.wfile.write(TEAPOT_RESPONSE_BODY.encode()) + else: + assert False class TestIE(InfoExtractor): @@ -743,6 +762,25 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ for i in range(len(entries)): expect_dict(self, entries[i], expected_entries[i]) + def test_response_with_expected_status_returns_content(self): + # Checks for mitigations against the effects of + # <https://bugs.python.org/issue15002> that affect Python 3.4.1+, which + # manifest as `_download_webpage`, `_download_xml`, `_download_json`, + # or the underlying `_download_webpage_handle` returning no content + # when a response matches `expected_status`. + + httpd = compat_http_server.HTTPServer( + ('127.0.0.1', 0), InfoExtractorTestRequestHandler) + port = http_server_port(httpd) + server_thread = threading.Thread(target=httpd.serve_forever) + server_thread.daemon = True + server_thread.start() + + (content, urlh) = self.ie._download_webpage_handle( + 'http://127.0.0.1:%d/teapot' % port, None, + expected_status=TEAPOT_RESPONSE_STATUS) + self.assertEqual(content, TEAPOT_RESPONSE_BODY) + if __name__ == '__main__': unittest.main() diff --git a/test/test_downloader_http.py b/test/test_downloader_http.py @@ -9,26 +9,16 @@ import sys import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -from test.helper import try_rm +from test.helper import http_server_port, try_rm from youtube_dl import YoutubeDL from youtube_dl.compat import compat_http_server from youtube_dl.downloader.http import HttpFD from youtube_dl.utils import encodeFilename -import ssl import threading TEST_DIR = os.path.dirname(os.path.abspath(__file__)) -def http_server_port(httpd): - if os.name == 'java' and isinstance(httpd.socket, ssl.SSLSocket): - # In Jython SSLSocket is not a subclass of socket.socket - sock = httpd.socket.sock - else: - sock = httpd.socket - return sock.getsockname()[1] - - TEST_SIZE = 10 * 1024 diff --git a/test/test_http.py b/test/test_http.py @@ -8,6 +8,7 @@ import sys import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +from test.helper import http_server_port from youtube_dl import YoutubeDL from youtube_dl.compat import compat_http_server, compat_urllib_request import ssl @@ -16,15 +17,6 @@ import threading TEST_DIR = os.path.dirname(os.path.abspath(__file__)) -def http_server_port(httpd): - if os.name == 'java' and isinstance(httpd.socket, ssl.SSLSocket): - # In Jython SSLSocket is not a subclass of socket.socket - sock = httpd.socket.sock - else: - sock = httpd.socket - return sock.getsockname()[1] - - class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler): def log_message(self, format, *args): pass diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py @@ -606,6 +606,11 @@ class InfoExtractor(object): except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: if isinstance(err, compat_urllib_error.HTTPError): if self.__can_accept_status_code(err, expected_status): + # Retain reference to error to prevent file object from + # being closed before it can be read. Works around the + # effects of <https://bugs.python.org/issue15002> + # introduced in Python 3.4.1. + err.fp._error = err return err.fp if errnote is False: