X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fgeneric.py;h=d0b486d2a03a5e1dcd0ae0f0703246907d1550a6;hb=4e21b3a94f1ce7ba3757d59d1ac4baf9efaeed84;hp=ca5fbafb2606ff096025d8f6a86bff598410d4d0;hpb=9d5332518c51fb0df69b844f9258e61bd7ecd390;p=youtube-dl diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index ca5fbafb2..d0b486d2a 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -9,6 +9,7 @@ import sys from .common import InfoExtractor from .youtube import YoutubeIE from ..compat import ( + compat_etree_fromstring, compat_urllib_parse_unquote, compat_urllib_request, compat_urlparse, @@ -21,7 +22,6 @@ from ..utils import ( HEADRequest, is_html, orderedSet, - parse_xml, smuggle_url, unescapeHTML, unified_strdate, @@ -141,6 +141,7 @@ class GenericIE(InfoExtractor): 'ext': 'mp4', 'title': 'Automatics, robotics and biocybernetics', 'description': 'md5:815fc1deb6b3a2bff99de2d5325be482', + 'upload_date': '20130627', 'formats': 'mincount:16', 'subtitles': 'mincount:1', }, @@ -1237,7 +1238,7 @@ class GenericIE(InfoExtractor): # Is it an RSS feed, a SMIL file or a XSPF playlist? try: - doc = parse_xml(webpage) + doc = compat_etree_fromstring(webpage.encode('utf-8')) if doc.tag == 'rss': return self._extract_rss(url, video_id, doc) elif re.match(r'^(?:{[^}]+})?smil$', doc.tag): @@ -1671,8 +1672,8 @@ class GenericIE(InfoExtractor): return self.url_result(mobj.group('url'), 'Zapiks') # Look for Kaltura embeds - mobj = (re.search(r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?'wid'\s*:\s*'_?(?P[^']+)',.*?'entry_id'\s*:\s*'(?P[^']+)',", webpage) or - re.search(r'(?s)(["\'])(?:https?:)?//cdnapisec\.kaltura\.com/.*?(?:p|partner_id)/(?P\d+).*?\1.*?entry_id\s*:\s*(["\'])(?P[^\2]+?)\2', webpage)) + mobj = (re.search(r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?'wid'\s*:\s*'_?(?P[^']+)',.*?'entry_?[Ii]d'\s*:\s*'(?P[^']+)',", webpage) or + re.search(r'(?s)(?P["\'])(?:https?:)?//cdnapi(?:sec)?\.kaltura\.com/.*?(?:p|partner_id)/(?P\d+).*?(?P=q1).*?entry_?[Ii]d\s*:\s*(?P["\'])(?P.+?)(?P=q2)', webpage)) if mobj is not None: return self.url_result('kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(), 'Kaltura')