X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Futils.py;h=d00b14b86d8ea8d43aef6bd7cf5a575cda5f94af;hb=63b4295d20a5b98a9fca4dc3ce132b26408110d6;hp=89c88a4d305a36c389f137926d6beec30f028878;hpb=578c074575f45ffdfd032d7b84f6fe449614f511;p=youtube-dl diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 89c88a4d3..d00b14b86 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -36,6 +36,7 @@ import zlib from .compat import ( compat_basestring, compat_chr, + compat_etree_fromstring, compat_html_entities, compat_http_client, compat_kwargs, @@ -365,7 +366,7 @@ def sanitize_path(s): if drive_or_unc: norm_path.pop(0) sanitized_path = [ - path_part if path_part in ['.', '..'] else re.sub('(?:[/<>:"\\|\\\\?\\*]|\.$)', '#', path_part) + path_part if path_part in ['.', '..'] else re.sub('(?:[/<>:"\\|\\\\?\\*]|[\s.]$)', '#', path_part) for path_part in norm_path] if drive_or_unc: sanitized_path.insert(0, drive_or_unc + os.path.sep) @@ -395,10 +396,14 @@ def _htmlentity_transform(entity): numstr = '0%s' % numstr else: base = 10 - return compat_chr(int(numstr, base)) + # See https://github.com/rg3/youtube-dl/issues/7518 + try: + return compat_chr(int(numstr, base)) + except ValueError: + pass # Unknown entity in name, return its literal representation - return ('&%s;' % entity) + return '&%s;' % entity def unescapeHTML(s): @@ -910,7 +915,8 @@ def unified_strdate(date_str, day_first=True): timetuple = email.utils.parsedate_tz(date_str) if timetuple: upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d') - return upload_date + if upload_date is not None: + return compat_str(upload_date) def determine_ext(url, default_ext='unknown_video'): @@ -1665,29 +1671,6 @@ def encode_dict(d, encoding='utf-8'): return dict((k.encode(encoding), v.encode(encoding)) for k, v in d.items()) -try: - etree_iter = xml.etree.ElementTree.Element.iter -except AttributeError: # Python <=2.6 - etree_iter = lambda n: n.findall('.//*') - - -def parse_xml(s): - class TreeBuilder(xml.etree.ElementTree.TreeBuilder): - def doctype(self, name, pubid, system): - pass # Ignore doctypes - - parser = xml.etree.ElementTree.XMLParser(target=TreeBuilder()) - kwargs = {'parser': parser} if sys.version_info >= (2, 7) else {} - tree = xml.etree.ElementTree.XML(s.encode('utf-8'), **kwargs) - # Fix up XML parser in Python 2.x - if sys.version_info < (3, 0): - for n in etree_iter(tree): - if n.text is not None: - if not isinstance(n.text, compat_str): - n.text = n.text.decode('utf-8') - return tree - - US_RATINGS = { 'G': 0, 'PG': 10, @@ -1988,7 +1971,7 @@ def dfxp2srt(dfxp_data): return out - dfxp = xml.etree.ElementTree.fromstring(dfxp_data.encode('utf-8')) + dfxp = compat_etree_fromstring(dfxp_data.encode('utf-8')) out = [] paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall(_x('.//ttaf1:p')) or dfxp.findall('.//p')