Merge remote-tracking branch 'rzhxeo/crunchyroll'
[youtube-dl] / youtube_dl / extractor / viddler.py
1 import json
2 import re
3
4 from .common import InfoExtractor
5
6
7 class ViddlerIE(InfoExtractor):
8     _VALID_URL = r'(?P<domain>https?://(?:www\.)?viddler\.com)/(?:v|embed|player)/(?P<id>[a-z0-9]+)'
9     _TEST = {
10         u"url": u"http://www.viddler.com/v/43903784",
11         u'file': u'43903784.mp4',
12         u'md5': u'fbbaedf7813e514eb7ca30410f439ac9',
13         u'info_dict': {
14             u"title": u"Video Made Easy",
15             u"uploader": u"viddler",
16             u"duration": 100.89,
17         }
18     }
19
20     def _real_extract(self, url):
21         mobj = re.match(self._VALID_URL, url)
22         video_id = mobj.group('id')
23
24         embed_url = mobj.group('domain') + u'/embed/' + video_id
25         webpage = self._download_webpage(embed_url, video_id)
26
27         video_sources_code = self._search_regex(
28             r"(?ms)sources\s*:\s*(\{.*?\})", webpage, u'video URLs')
29         video_sources = json.loads(video_sources_code.replace("'", '"'))
30
31         formats = [{
32             'url': video_url,
33             'format': format_id,
34         } for video_url, format_id in video_sources.items()]
35
36         title = self._html_search_regex(
37             r"title\s*:\s*'([^']*)'", webpage, u'title')
38         uploader = self._html_search_regex(
39             r"authorName\s*:\s*'([^']*)'", webpage, u'uploader', fatal=False)
40         duration_s = self._html_search_regex(
41             r"duration\s*:\s*([0-9.]*)", webpage, u'duration', fatal=False)
42         duration = float(duration_s) if duration_s else None
43         thumbnail = self._html_search_regex(
44             r"thumbnail\s*:\s*'([^']*)'",
45             webpage, u'thumbnail', fatal=False)
46
47         return {
48             '_type': 'video',
49             'id': video_id,
50             'title': title,
51             'thumbnail': thumbnail,
52             'uploader': uploader,
53             'duration': duration,
54             'formats': formats,
55         }