[youtube] fix hd720 format position
[youtube-dl] / youtube_dl / extractor / discoverygo.py
1 from __future__ import unicode_literals
2
3 import re
4
5 from .common import InfoExtractor
6 from ..compat import compat_str
7 from ..utils import (
8     determine_ext,
9     extract_attributes,
10     ExtractorError,
11     int_or_none,
12     parse_age_limit,
13     remove_end,
14     unescapeHTML,
15 )
16
17
18 class DiscoveryGoBaseIE(InfoExtractor):
19     _VALID_URL_TEMPLATE = r'''(?x)https?://(?:www\.)?(?:
20             discovery|
21             investigationdiscovery|
22             discoverylife|
23             animalplanet|
24             ahctv|
25             destinationamerica|
26             sciencechannel|
27             tlc|
28             velocitychannel
29         )go\.com/%s(?P<id>[^/?#&]+)'''
30
31     def _extract_video_info(self, video, stream, display_id):
32         title = video['name']
33
34         if not stream:
35             if video.get('authenticated') is True:
36                 raise ExtractorError(
37                     'This video is only available via cable service provider subscription that'
38                     ' is not currently supported. You may want to use --cookies.', expected=True)
39             else:
40                 raise ExtractorError('Unable to find stream')
41         STREAM_URL_SUFFIX = 'streamUrl'
42         formats = []
43         for stream_kind in ('', 'hds'):
44             suffix = STREAM_URL_SUFFIX.capitalize() if stream_kind else STREAM_URL_SUFFIX
45             stream_url = stream.get('%s%s' % (stream_kind, suffix))
46             if not stream_url:
47                 continue
48             if stream_kind == '':
49                 formats.extend(self._extract_m3u8_formats(
50                     stream_url, display_id, 'mp4', entry_protocol='m3u8_native',
51                     m3u8_id='hls', fatal=False))
52             elif stream_kind == 'hds':
53                 formats.extend(self._extract_f4m_formats(
54                     stream_url, display_id, f4m_id=stream_kind, fatal=False))
55         self._sort_formats(formats)
56
57         video_id = video.get('id') or display_id
58         description = video.get('description', {}).get('detailed')
59         duration = int_or_none(video.get('duration'))
60
61         series = video.get('show', {}).get('name')
62         season_number = int_or_none(video.get('season', {}).get('number'))
63         episode_number = int_or_none(video.get('episodeNumber'))
64
65         tags = video.get('tags')
66         age_limit = parse_age_limit(video.get('parental', {}).get('rating'))
67
68         subtitles = {}
69         captions = stream.get('captions')
70         if isinstance(captions, list):
71             for caption in captions:
72                 subtitle_url = caption.get('fileUrl')
73                 if (not subtitle_url or not isinstance(subtitle_url, compat_str) or
74                         not subtitle_url.startswith('http')):
75                     continue
76                 lang = caption.get('fileLang', 'en')
77                 ext = determine_ext(subtitle_url)
78                 subtitles.setdefault(lang, []).append({
79                     'url': subtitle_url,
80                     'ext': 'ttml' if ext == 'xml' else ext,
81                 })
82
83         return {
84             'id': video_id,
85             'display_id': display_id,
86             'title': title,
87             'description': description,
88             'duration': duration,
89             'series': series,
90             'season_number': season_number,
91             'episode_number': episode_number,
92             'tags': tags,
93             'age_limit': age_limit,
94             'formats': formats,
95             'subtitles': subtitles,
96         }
97
98
99 class DiscoveryGoIE(DiscoveryGoBaseIE):
100     _VALID_URL = DiscoveryGoBaseIE._VALID_URL_TEMPLATE % r'(?:[^/]+/)+'
101     _GEO_COUNTRIES = ['US']
102     _TEST = {
103         'url': 'https://www.discoverygo.com/bering-sea-gold/reaper-madness/',
104         'info_dict': {
105             'id': '58c167d86b66d12f2addeb01',
106             'ext': 'mp4',
107             'title': 'Reaper Madness',
108             'description': 'md5:09f2c625c99afb8946ed4fb7865f6e78',
109             'duration': 2519,
110             'series': 'Bering Sea Gold',
111             'season_number': 8,
112             'episode_number': 6,
113             'age_limit': 14,
114         },
115     }
116
117     def _real_extract(self, url):
118         display_id = self._match_id(url)
119
120         webpage = self._download_webpage(url, display_id)
121
122         container = extract_attributes(
123             self._search_regex(
124                 r'(<div[^>]+class=["\']video-player-container[^>]+>)',
125                 webpage, 'video container'))
126
127         video = self._parse_json(
128             container.get('data-video') or container.get('data-json'),
129             display_id)
130
131         stream = video.get('stream')
132
133         return self._extract_video_info(video, stream, display_id)
134
135
136 class DiscoveryGoPlaylistIE(DiscoveryGoBaseIE):
137     _VALID_URL = DiscoveryGoBaseIE._VALID_URL_TEMPLATE % ''
138     _TEST = {
139         'url': 'https://www.discoverygo.com/bering-sea-gold/',
140         'info_dict': {
141             'id': 'bering-sea-gold',
142             'title': 'Bering Sea Gold',
143             'description': 'md5:cc5c6489835949043c0cc3ad66c2fa0e',
144         },
145         'playlist_mincount': 6,
146     }
147
148     @classmethod
149     def suitable(cls, url):
150         return False if DiscoveryGoIE.suitable(url) else super(
151             DiscoveryGoPlaylistIE, cls).suitable(url)
152
153     def _real_extract(self, url):
154         display_id = self._match_id(url)
155
156         webpage = self._download_webpage(url, display_id)
157
158         entries = []
159         for mobj in re.finditer(r'data-json=(["\'])(?P<json>{.+?})\1', webpage):
160             data = self._parse_json(
161                 mobj.group('json'), display_id,
162                 transform_source=unescapeHTML, fatal=False)
163             if not isinstance(data, dict) or data.get('type') != 'episode':
164                 continue
165             episode_url = data.get('socialUrl')
166             if not episode_url:
167                 continue
168             entries.append(self.url_result(
169                 episode_url, ie=DiscoveryGoIE.ie_key(),
170                 video_id=data.get('id')))
171
172         return self.playlist_result(
173             entries, display_id,
174             remove_end(self._og_search_title(
175                 webpage, fatal=False), ' | Discovery GO'),
176             self._og_search_description(webpage))