Add tests and improve for HotNewHipHop
[youtube-dl] / youtube_dl / extractor / hotnewhiphop.py
1 import re
2 import base64
3
4 from .common import InfoExtractor
5
6
7 class HotNewHipHopIE(InfoExtractor):
8     _VALID_URL = r'http://www\.hotnewhiphop.com/.*\.(?P<id>.*)\.html'
9
10     def _real_extract(self, url):
11         m = re.match(self._VALID_URL, url)
12         video_id = m.group('id')
13
14         webpage_src = self._download_webpage(url, video_id)
15
16         video_url_base64 = self._search_regex(r'data-path="(.*?)"',
17             webpage_src, u'video URL', fatal=False)
18
19         if video_url_base64 == None:
20             video_url = self._search_regex(r'"contentUrl" content="(.*?)"', webpage_src,
21                 u'video URL')
22             return self.url_result(video_url, ie='Youtube')
23
24         video_url = base64.b64decode(video_url_base64).decode('utf-8')
25
26         video_title = self._html_search_regex(r"<title>(.*)</title>",
27             webpage_src, u'title')
28         
29         # Getting thumbnail and if not thumbnail sets correct title for WSHH candy video.
30         thumbnail = self._html_search_regex(r'"og:image" content="(.*)"',
31             webpage_src, u'thumbnail', fatal=False)
32
33         results = [{
34                     'id': video_id,
35                     'url' : video_url,
36                     'title' : video_title,
37                     'thumbnail' : thumbnail,
38                     'ext' : 'mp3',
39                     }]
40         return results