X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fcompat.py;h=a3e85264acda8dbefe0883ea50c901302d01d29f;hb=5c2266df4b9aeb7881ed8c026a038e2a25e43734;hp=cf10835ca55d11e0d8803c550b41a198ac039827;hpb=36e6f62cd0883f0f486d1666d010e5d9e6d515bd;p=youtube-dl diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py index cf10835ca..a3e85264a 100644 --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -216,10 +216,19 @@ except ImportError: # Python 2.6 if sys.version_info[0] >= 3: compat_etree_fromstring = xml.etree.ElementTree.fromstring else: - # on python 2.x the the attributes of a node are str objects instead of - # unicode + # python 2.x tries to encode unicode strings with ascii (see the + # XMLParser._fixtext method) etree = xml.etree.ElementTree + try: + _etree_iter = etree.Element.iter + except AttributeError: # Python <=2.6 + def _etree_iter(root): + for el in root.findall('*'): + yield el + for sub in _etree_iter(el): + yield sub + # on 2.6 XML doesn't have a parser argument, function copied from CPython # 2.7 source def _XML(text, parser=None): @@ -231,11 +240,16 @@ else: def _element_factory(*args, **kwargs): el = etree.Element(*args, **kwargs) for k, v in el.items(): - el.set(k, v.decode('utf-8')) + if isinstance(v, bytes): + el.set(k, v.decode('utf-8')) return el def compat_etree_fromstring(text): - return _XML(text, parser=etree.XMLParser(target=etree.TreeBuilder(element_factory=_element_factory))) + doc = _XML(text, parser=etree.XMLParser(target=etree.TreeBuilder(element_factory=_element_factory))) + for el in _etree_iter(doc): + if el.text is not None and isinstance(el.text, bytes): + el.text = el.text.decode('utf-8') + return doc try: from urllib.parse import parse_qs as compat_parse_qs