Tolerate junk at the end of gzip-compressed content (#1268)
authorPhilipp Hagemeister <phihag@phihag.de>
Wed, 28 Aug 2013 09:57:13 +0000 (11:57 +0200)
committerPhilipp Hagemeister <phihag@phihag.de>
Wed, 28 Aug 2013 09:57:13 +0000 (11:57 +0200)
youtube_dl/utils.py

index e6fa634a7f1823a7e5a0f75c2b2eb79b18e9d2e3..be788cf5acce7cdbc6cabc4d25f47c7b4814c437 100644 (file)
@@ -628,8 +628,23 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
         old_resp = resp
         # gzip
         if resp.headers.get('Content-encoding', '') == 'gzip':
-            gz = gzip.GzipFile(fileobj=io.BytesIO(resp.read()), mode='r')
-            resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
+            content = resp.read()
+            gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
+            try:
+                uncompressed = io.BytesIO(gz.read())
+            except IOError as original_ioerror:
+                # There may be junk add the end of the file
+                # See http://stackoverflow.com/q/4928560/35070 for details
+                for i in range(1, 1024):
+                    try:
+                        gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
+                        uncompressed = io.BytesIO(gz.read())
+                    except IOError:
+                        continue
+                    break
+                else:
+                    raise original_ioerror
+            resp = self.addinfourl_wrapper(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
             resp.msg = old_resp.msg
         # deflate
         if resp.headers.get('Content-encoding', '') == 'deflate':