[extractor/common] Ensure response handle is not prematurely closed before it can...
authorXiao Di Guan <xdg@puxlit.net>
Fri, 2 Nov 2018 18:18:20 +0000 (05:18 +1100)
committerSergey M <dstftw@gmail.com>
Fri, 2 Nov 2018 18:18:20 +0000 (01:18 +0700)
test/helper.py
test/test_InfoExtractor.py
test/test_downloader_http.py
test/test_http.py
youtube_dl/extractor/common.py

index dfee217a9b8acb64e426c3ce8fc5c11a9c5a0121..aa9a1c9b2aadcd3a9eaeb1170c2e8d90afabb0b8 100644 (file)
@@ -7,6 +7,7 @@ import json
 import os.path
 import re
 import types
+import ssl
 import sys
 
 import youtube_dl.extractor
@@ -244,3 +245,12 @@ def expect_warnings(ydl, warnings_re):
             real_warning(w)
 
     ydl.report_warning = _report_warning
+
+
+def http_server_port(httpd):
+    if os.name == 'java' and isinstance(httpd.socket, ssl.SSLSocket):
+        # In Jython SSLSocket is not a subclass of socket.socket
+        sock = httpd.socket.sock
+    else:
+        sock = httpd.socket
+    return sock.getsockname()[1]
index 4833396a521bf1d7a072db8ad425bed333235248..06be726166c164e29c9af54f4a66590001846e53 100644 (file)
@@ -9,11 +9,30 @@ import sys
 import unittest
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
-from test.helper import FakeYDL, expect_dict, expect_value
-from youtube_dl.compat import compat_etree_fromstring
+from test.helper import FakeYDL, expect_dict, expect_value, http_server_port
+from youtube_dl.compat import compat_etree_fromstring, compat_http_server
 from youtube_dl.extractor.common import InfoExtractor
 from youtube_dl.extractor import YoutubeIE, get_info_extractor
 from youtube_dl.utils import encode_data_uri, strip_jsonp, ExtractorError, RegexNotFoundError
+import threading
+
+
+TEAPOT_RESPONSE_STATUS = 418
+TEAPOT_RESPONSE_BODY = "<h1>418 I'm a teapot</h1>"
+
+
+class InfoExtractorTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
+    def log_message(self, format, *args):
+        pass
+
+    def do_GET(self):
+        if self.path == '/teapot':
+            self.send_response(TEAPOT_RESPONSE_STATUS)
+            self.send_header('Content-Type', 'text/html; charset=utf-8')
+            self.end_headers()
+            self.wfile.write(TEAPOT_RESPONSE_BODY.encode())
+        else:
+            assert False
 
 
 class TestIE(InfoExtractor):
@@ -743,6 +762,25 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
                 for i in range(len(entries)):
                     expect_dict(self, entries[i], expected_entries[i])
 
+    def test_response_with_expected_status_returns_content(self):
+        # Checks for mitigations against the effects of
+        # <https://bugs.python.org/issue15002> that affect Python 3.4.1+, which
+        # manifest as `_download_webpage`, `_download_xml`, `_download_json`,
+        # or the underlying `_download_webpage_handle` returning no content
+        # when a response matches `expected_status`.
+
+        httpd = compat_http_server.HTTPServer(
+            ('127.0.0.1', 0), InfoExtractorTestRequestHandler)
+        port = http_server_port(httpd)
+        server_thread = threading.Thread(target=httpd.serve_forever)
+        server_thread.daemon = True
+        server_thread.start()
+
+        (content, urlh) = self.ie._download_webpage_handle(
+            'http://127.0.0.1:%d/teapot' % port, None,
+            expected_status=TEAPOT_RESPONSE_STATUS)
+        self.assertEqual(content, TEAPOT_RESPONSE_BODY)
+
 
 if __name__ == '__main__':
     unittest.main()
index 5cf2bf1a56212ed1888ee5cb8728c68b01d555fa..7504722810b4e706f6b1143c7a36208ee0478749 100644 (file)
@@ -9,26 +9,16 @@ import sys
 import unittest
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
-from test.helper import try_rm
+from test.helper import http_server_port, try_rm
 from youtube_dl import YoutubeDL
 from youtube_dl.compat import compat_http_server
 from youtube_dl.downloader.http import HttpFD
 from youtube_dl.utils import encodeFilename
-import ssl
 import threading
 
 TEST_DIR = os.path.dirname(os.path.abspath(__file__))
 
 
-def http_server_port(httpd):
-    if os.name == 'java' and isinstance(httpd.socket, ssl.SSLSocket):
-        # In Jython SSLSocket is not a subclass of socket.socket
-        sock = httpd.socket.sock
-    else:
-        sock = httpd.socket
-    return sock.getsockname()[1]
-
-
 TEST_SIZE = 10 * 1024
 
 
index 409fec9c8a377a79f05b86b4472106c237cdb629..3ee0a5dda8df4446f915391e031f6d13da486150 100644 (file)
@@ -8,6 +8,7 @@ import sys
 import unittest
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
+from test.helper import http_server_port
 from youtube_dl import YoutubeDL
 from youtube_dl.compat import compat_http_server, compat_urllib_request
 import ssl
@@ -16,15 +17,6 @@ import threading
 TEST_DIR = os.path.dirname(os.path.abspath(__file__))
 
 
-def http_server_port(httpd):
-    if os.name == 'java' and isinstance(httpd.socket, ssl.SSLSocket):
-        # In Jython SSLSocket is not a subclass of socket.socket
-        sock = httpd.socket.sock
-    else:
-        sock = httpd.socket
-    return sock.getsockname()[1]
-
-
 class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
     def log_message(self, format, *args):
         pass
index 8452125c8802b1398698c334084c85b673e80964..e5f8136fc1511d573978298393f5aba5d94d7af8 100644 (file)
@@ -606,6 +606,11 @@ class InfoExtractor(object):
         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
             if isinstance(err, compat_urllib_error.HTTPError):
                 if self.__can_accept_status_code(err, expected_status):
+                    # Retain reference to error to prevent file object from
+                    # being closed before it can be read. Works around the
+                    # effects of <https://bugs.python.org/issue15002>
+                    # introduced in Python 3.4.1.
+                    err.fp._error = err
                     return err.fp
 
             if errnote is False: