Merge pull request #736 from rg3/retry
[youtube-dl] / youtube_dl / utils.py
index 532e8c7825066ef822506a7589f9d3f48163f1f8..017f06c42e9a019e18e25480c5e5d8d3aaaef335 100644 (file)
@@ -280,6 +280,12 @@ class AttrParser(compat_html_parser.HTMLParser):
             lines[-1] = lines[-1][:self.result[2][1]-self.result[1][1]]
         lines[-1] = lines[-1][:self.result[2][1]]
         return '\n'.join(lines).strip()
+# Hack for https://github.com/rg3/youtube-dl/issues/662
+if sys.version_info < (2, 7, 3):
+    AttrParser.parse_endtag = (lambda self, i:
+        i + len("</scr'+'ipt>")
+        if self.rawdata[i:].startswith("</scr'+'ipt>")
+        else compat_html_parser.HTMLParser.parse_endtag(self, i))
 
 def get_element_by_id(id, html):
     """Return the content of the tag with the specified ID in the passed HTML document"""
@@ -305,7 +311,7 @@ def clean_html(html):
     html = re.sub('<.*?>', '', html)
     # Replace html entities
     html = unescapeHTML(html)
-    return html
+    return html.strip()
 
 
 def sanitize_open(filename, open_mode):
@@ -323,7 +329,7 @@ def sanitize_open(filename, open_mode):
             if sys.platform == 'win32':
                 import msvcrt
                 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
-            return (sys.stdout, filename)
+            return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
         stream = open(encodeFilename(filename), open_mode)
         return (stream, filename)
     except (IOError, OSError) as err:
@@ -414,6 +420,14 @@ def encodeFilename(s):
             encoding = 'utf-8'
         return s.encode(encoding, 'ignore')
 
+def decodeOption(optval):
+    if optval is None:
+        return optval
+    if isinstance(optval, bytes):
+        optval = optval.decode(preferredencoding())
+
+    assert isinstance(optval, compat_str)
+    return optval
 
 class ExtractorError(Exception):
     """Error during info extraction."""
@@ -421,6 +435,7 @@ class ExtractorError(Exception):
         """ tb, if given, is the original traceback (so that it can be printed out). """
         super(ExtractorError, self).__init__(msg)
         self.traceback = tb
+        self.exc_info = sys.exc_info()  # preserve original exception
 
     def format_traceback(self):
         if self.traceback is None:
@@ -435,7 +450,10 @@ class DownloadError(Exception):
     configured to continue on errors. They will contain the appropriate
     error message.
     """
-    pass
+    def __init__(self, msg, exc_info=None):
+        """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
+        super(DownloadError, self).__init__(msg)
+        self.exc_info = exc_info
 
 
 class SameFileError(Exception):