X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Futils.py;h=1973bd4836a407d3e66fcc4c3a54d052e958ae19;hb=b4a3d461e4a00dfc60047b667aa3136c8b03eda8;hp=6c84bfe0ffc5667d53a88fbae3ce76e76e36d909;hpb=5552c9eb0fece567f7dda13810939fca32d7d65a;p=youtube-dl diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 6c84bfe0f..1973bd483 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -36,6 +36,7 @@ import xml.etree.ElementTree import zlib from .compat import ( + compat_HTMLParseError, compat_HTMLParser, compat_basestring, compat_chr, @@ -409,8 +410,12 @@ def extract_attributes(html_element): but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5. """ parser = HTMLAttributeParser() - parser.feed(html_element) - parser.close() + try: + parser.feed(html_element) + parser.close() + # Older Python may throw HTMLParseError in case of malformed HTML + except compat_HTMLParseError: + pass return parser.attrs @@ -932,14 +937,6 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler): except zlib.error: return zlib.decompress(data) - @staticmethod - def addinfourl_wrapper(stream, headers, url, code): - if hasattr(compat_urllib_request.addinfourl, 'getcode'): - return compat_urllib_request.addinfourl(stream, headers, url, code) - ret = compat_urllib_request.addinfourl(stream, headers, url) - ret.code = code - return ret - def http_request(self, req): # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not # always respected by websites, some tend to give out URLs with non percent-encoded @@ -991,13 +988,13 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler): break else: raise original_ioerror - resp = self.addinfourl_wrapper(uncompressed, old_resp.headers, old_resp.url, old_resp.code) + resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code) resp.msg = old_resp.msg del resp.headers['Content-encoding'] # deflate if resp.headers.get('Content-encoding', '') == 'deflate': gz = io.BytesIO(self.deflate(resp.read())) - resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code) + resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code) resp.msg = old_resp.msg del resp.headers['Content-encoding'] # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see @@ -1187,7 +1184,7 @@ def unified_timestamp(date_str, day_first=True): if date_str is None: return None - date_str = date_str.replace(',', ' ') + date_str = re.sub(r'[,|]', '', date_str) pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0 timezone, date_str = extract_timezone(date_str)