Merge remote-tracking branch 'hojel/empflix'
[youtube-dl] / youtube_dl / extractor / empflix.py
1 import re
2
3 from .common import InfoExtractor
4 from ..utils import (
5     ExtractorError,
6 )
7
8 class EmpflixIE(InfoExtractor):
9     _VALID_URL = r'^https?://www\.empflix\.com/videos/(?P<videoid>[^\.]+)\.html'
10     _TEST = {
11         u'url': u'http://www.empflix.com/videos/Amateur-Finger-Fuck-33051.html',
12         u'file': u'Amateur-Finger-Fuck-33051.flv',
13         u'md5': u'5e5cc160f38ca9857f318eb97146e13e',
14         u'info_dict': {
15             u"title": u"Amateur Finger Fuck",
16             u"age_limit": 18,
17         }
18     }
19
20     def _real_extract(self, url):
21         mobj = re.match(self._VALID_URL, url)
22
23         video_id = mobj.group('videoid')
24
25         # Get webpage content
26         webpage = self._download_webpage(url, video_id)
27
28         age_limit = self._rta_search(webpage)
29
30         # Get the video title
31         video_title = self._html_search_regex(r'name="title" value="(?P<title>[^"]*)"',
32             webpage, u'title').strip()
33
34         cfg_url = self._html_search_regex(r'flashvars\.config = escape\("([^"]+)"',
35             webpage, u'flashvars.config').strip()
36
37         cfg_xml = self._download_xml(cfg_url, video_id, note=u'Downloading metadata')
38         video_url = cfg_xml.find('videoLink').text
39
40         info = {'id': video_id,
41                 'url': video_url,
42                 'title': video_title,
43                 'ext': 'flv',
44                 'age_limit': age_limit}
45
46         return [info]