[youtube] Fix extraction.
[youtube-dl] / youtube_dl / extractor / discoveryvr.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 from .common import InfoExtractor
5 from ..utils import parse_duration
6
7
8 class DiscoveryVRIE(InfoExtractor):
9     _VALID_URL = r'https?://(?:www\.)?discoveryvr\.com/watch/(?P<id>[^/?#]+)'
10     _TEST = {
11         'url': 'http://www.discoveryvr.com/watch/discovery-vr-an-introduction',
12         'md5': '32b1929798c464a54356378b7912eca4',
13         'info_dict': {
14             'id': 'discovery-vr-an-introduction',
15             'ext': 'mp4',
16             'title': 'Discovery VR - An Introduction',
17             'description': 'md5:80d418a10efb8899d9403e61d8790f06',
18         }
19     }
20
21     def _real_extract(self, url):
22         display_id = self._match_id(url)
23         webpage = self._download_webpage(url, display_id)
24
25         bootstrap_data = self._search_regex(
26             r'root\.DVR\.bootstrapData\s+=\s+"({.+?})";',
27             webpage, 'bootstrap data')
28         bootstrap_data = self._parse_json(
29             bootstrap_data.encode('utf-8').decode('unicode_escape'),
30             display_id)
31         videos = self._parse_json(bootstrap_data['videos'], display_id)['allVideos']
32         video_data = next(video for video in videos if video.get('slug') == display_id)
33
34         series = video_data.get('showTitle')
35         title = episode = video_data.get('title') or series
36         if series and series != title:
37             title = '%s - %s' % (series, title)
38
39         formats = []
40         for f, format_id in (('cdnUriM3U8', 'mobi'), ('webVideoUrlSd', 'sd'), ('webVideoUrlHd', 'hd')):
41             f_url = video_data.get(f)
42             if not f_url:
43                 continue
44             formats.append({
45                 'format_id': format_id,
46                 'url': f_url,
47             })
48
49         return {
50             'id': display_id,
51             'display_id': display_id,
52             'title': title,
53             'description': video_data.get('description'),
54             'thumbnail': video_data.get('thumbnail'),
55             'duration': parse_duration(video_data.get('runTime')),
56             'formats': formats,
57             'episode': episode,
58             'series': series,
59         }