X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fempflix.py;h=1c498d8c8fa0cc95a2963a19384201a42ec2fa4b;hb=191b7cbba95679b389a509420993af56ef51545d;hp=1290cd9f7028aace78fc104cceb76b33da6752ca;hpb=c7bee2a7254d31b7c478c0ac33bf23bdeba1c53c;p=youtube-dl diff --git a/youtube_dl/extractor/empflix.py b/youtube_dl/extractor/empflix.py index 1290cd9f7..1c498d8c8 100644 --- a/youtube_dl/extractor/empflix.py +++ b/youtube_dl/extractor/empflix.py @@ -3,6 +3,7 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..utils import fix_xml_ampersands class EmpflixIE(InfoExtractor): @@ -35,22 +36,23 @@ class EmpflixIE(InfoExtractor): r'flashvars\.config = escape\("([^"]+)"', webpage, 'flashvars.config') - # XML is malformed - cfg_xml = self._download_webpage( - cfg_url, video_id, note='Downloading metadata') + cfg_xml = self._download_xml( + cfg_url, video_id, note='Downloading metadata', + transform_source=fix_xml_ampersands) formats = [ { - 'url': item[1], - 'format_id': item[0], - } for item in re.findall( - r'\s*([^>]+)\s*([^<]+)\s*', cfg_xml) + 'url': item.find('videoLink').text, + 'format_id': item.find('res').text, + } for item in cfg_xml.findall('./quality/item') ] + thumbnail = cfg_xml.find('./startThumb').text return { 'id': video_id, 'title': video_title, 'description': video_description, + 'thumbnail': thumbnail, 'formats': formats, 'age_limit': age_limit, }