X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Futils.py;h=017f06c42e9a019e18e25480c5e5d8d3aaaef335;hb=e3700fc9e44c7820e1c38264c84a315c5f91bb2d;hp=463804e183117b23efd8f0e4b0ad9b132e519ddb;hpb=a7c0f8602e91cc96962c7eade10860b61afc3728;p=youtube-dl

diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 463804e18..017f06c42 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -8,6 +8,7 @@ import locale
 import os
 import re
 import sys
+import traceback
 import zlib
 import email.utils
 import json
@@ -154,6 +155,7 @@ std_headers = {
     'Accept-Encoding': 'gzip, deflate',
     'Accept-Language': 'en-us,en;q=0.5',
 }
+
 def preferredencoding():
     """Get preferred encoding.
 
@@ -187,7 +189,6 @@ else:
         with open(fn, 'w', encoding='utf-8') as f:
             json.dump(obj, f)
 
-
 def htmlentity_transform(matchobj):
     """Transforms an HTML entity to a character.
 
@@ -279,6 +280,12 @@ class AttrParser(compat_html_parser.HTMLParser):
             lines[-1] = lines[-1][:self.result[2][1]-self.result[1][1]]
         lines[-1] = lines[-1][:self.result[2][1]]
         return '\n'.join(lines).strip()
+# Hack for https://github.com/rg3/youtube-dl/issues/662
+if sys.version_info < (2, 7, 3):
+    AttrParser.parse_endtag = (lambda self, i:
+        i + len("</scr'+'ipt>")
+        if self.rawdata[i:].startswith("</scr'+'ipt>")
+        else compat_html_parser.HTMLParser.parse_endtag(self, i))
 
 def get_element_by_id(id, html):
     """Return the content of the tag with the specified ID in the passed HTML document"""
@@ -304,7 +311,7 @@ def clean_html(html):
     html = re.sub('<.*?>', '', html)
     # Replace html entities
     html = unescapeHTML(html)
-    return html
+    return html.strip()
 
 
 def sanitize_open(filename, open_mode):
@@ -322,7 +329,7 @@ def sanitize_open(filename, open_mode):
             if sys.platform == 'win32':
                 import msvcrt
                 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
-            return (sys.stdout, filename)
+            return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
         stream = open(encodeFilename(filename), open_mode)
         return (stream, filename)
     except (IOError, OSError) as err:
@@ -408,7 +415,33 @@ def encodeFilename(s):
         # match Windows 9x series as well. Besides, NT 4 is obsolete.)
         return s
     else:
-        return s.encode(sys.getfilesystemencoding(), 'ignore')
+        encoding = sys.getfilesystemencoding()
+        if encoding is None:
+            encoding = 'utf-8'
+        return s.encode(encoding, 'ignore')
+
+def decodeOption(optval):
+    if optval is None:
+        return optval
+    if isinstance(optval, bytes):
+        optval = optval.decode(preferredencoding())
+
+    assert isinstance(optval, compat_str)
+    return optval
+
+class ExtractorError(Exception):
+    """Error during info extraction."""
+    def __init__(self, msg, tb=None):
+        """ tb, if given, is the original traceback (so that it can be printed out). """
+        super(ExtractorError, self).__init__(msg)
+        self.traceback = tb
+        self.exc_info = sys.exc_info()  # preserve original exception
+
+    def format_traceback(self):
+        if self.traceback is None:
+            return None
+        return u''.join(traceback.format_tb(self.traceback))
+
 
 class DownloadError(Exception):
     """Download Error exception.
@@ -417,7 +450,10 @@ class DownloadError(Exception):
     configured to continue on errors. They will contain the appropriate
     error message.
     """
-    pass
+    def __init__(self, msg, exc_info=None):
+        """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
+        super(DownloadError, self).__init__(msg)
+        self.exc_info = exc_info
 
 
 class SameFileError(Exception):
@@ -435,7 +471,8 @@ class PostProcessingError(Exception):
     This exception may be raised by PostProcessor's .run() method to
     indicate an error in the postprocessing task.
     """
-    pass
+    def __init__(self, msg):
+        self.msg = msg
 
 class MaxDownloadsReached(Exception):
     """ --max-downloads limit has been reached. """
@@ -500,14 +537,19 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
         return ret
 
     def http_request(self, req):
-        for h in std_headers:
+        for h,v in std_headers.items():
             if h in req.headers:
                 del req.headers[h]
-            req.add_header(h, std_headers[h])
+            req.add_header(h, v)
         if 'Youtubedl-no-compression' in req.headers:
             if 'Accept-encoding' in req.headers:
                 del req.headers['Accept-encoding']
             del req.headers['Youtubedl-no-compression']
+        if 'Youtubedl-user-agent' in req.headers:
+            if 'User-agent' in req.headers:
+                del req.headers['User-agent']
+            req.headers['User-agent'] = req.headers['Youtubedl-user-agent']
+            del req.headers['Youtubedl-user-agent']
         return req
 
     def http_response(self, req, resp):