import re
from .common import InfoExtractor
+from ..utils import fix_xml_ampersands
class EmpflixIE(InfoExtractor):
r'flashvars\.config = escape\("([^"]+)"',
webpage, 'flashvars.config')
- # XML is malformed
- cfg_xml = self._download_webpage(
- cfg_url, video_id, note='Downloading metadata')
+ cfg_xml = self._download_xml(
+ cfg_url, video_id, note='Downloading metadata',
+ transform_source=fix_xml_ampersands)
formats = [
{
- 'url': item[1],
- 'format_id': item[0],
- } for item in re.findall(
- r'<item>\s*<res>([^>]+)</res>\s*<videoLink>([^<]+)</videoLink>\s*</item>', cfg_xml)
+ 'url': item.find('videoLink').text,
+ 'format_id': item.find('res').text,
+ } for item in cfg_xml.findall('./quality/item')
]
+ thumbnail = cfg_xml.find('./startThumb').text
return {
'id': video_id,
'title': video_title,
'description': video_description,
+ 'thumbnail': thumbnail,
'formats': formats,
'age_limit': age_limit,
}