Merge branch 'master' of https://github.com/DarkstaIkers/youtube-dl into DarkstaIkers...
[youtube-dl] / youtube_dl / extractor / youjizz.py
1 from __future__ import unicode_literals
2
3 from .common import InfoExtractor
4
5
6 class YouJizzIE(InfoExtractor):
7     _VALID_URL = r'https?://(?:\w+\.)?youjizz\.com/videos/(?:[^/#?]+)?-(?P<id>[0-9]+)\.html(?:$|[?#])'
8     _TESTS = [{
9         'url': 'http://www.youjizz.com/videos/zeichentrick-1-2189178.html',
10         'md5': '78fc1901148284c69af12640e01c6310',
11         'info_dict': {
12             'id': '2189178',
13             'ext': 'mp4',
14             'title': 'Zeichentrick 1',
15             'age_limit': 18,
16         }
17     }, {
18         'url': 'http://www.youjizz.com/videos/-2189178.html',
19         'only_matching': True,
20     }]
21
22     def _real_extract(self, url):
23         video_id = self._match_id(url)
24         webpage = self._download_webpage(url, video_id)
25         # YouJizz's HTML5 player has invalid HTML
26         webpage = webpage.replace('"controls', '" controls')
27         age_limit = self._rta_search(webpage)
28         video_title = self._html_search_regex(
29             r'<title>\s*(.*)\s*</title>', webpage, 'title')
30
31         info_dict = self._parse_html5_media_entries(url, webpage, video_id)[0]
32
33         info_dict.update({
34             'id': video_id,
35             'title': video_title,
36             'age_limit': age_limit,
37         })
38
39         return info_dict