[youtube] fix extraction for embed restricted live streams(fixes #16433)
[youtube-dl] / youtube_dl / extractor / tbs.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import re
5
6 from .turner import TurnerBaseIE
7 from ..utils import (
8     float_or_none,
9     int_or_none,
10     strip_or_none,
11 )
12
13
14 class TBSIE(TurnerBaseIE):
15     _VALID_URL = r'https?://(?:www\.)?(?P<site>tbs|tntdrama)\.com/(?:movies|shows/[^/]+/(?:clips|season-\d+/episode-\d+))/(?P<id>[^/?#]+)'
16     _TESTS = [{
17         'url': 'http://www.tntdrama.com/shows/the-alienist/clips/monster',
18         'info_dict': {
19             'id': '8d384cde33b89f3a43ce5329de42903ed5099887',
20             'ext': 'mp4',
21             'title': 'Monster',
22             'description': 'Get a first look at the theatrical trailer for TNT’s highly anticipated new psychological thriller The Alienist, which premieres January 22 on TNT.',
23             'timestamp': 1508175329,
24             'upload_date': '20171016',
25         },
26         'params': {
27             # m3u8 download
28             'skip_download': True,
29         }
30     }, {
31         'url': 'http://www.tbs.com/shows/search-party/season-1/episode-1/explicit-the-mysterious-disappearance-of-the-girl-no-one-knew',
32         'only_matching': True,
33     }, {
34         'url': 'http://www.tntdrama.com/movies/star-wars-a-new-hope',
35         'only_matching': True,
36     }]
37
38     def _real_extract(self, url):
39         site, display_id = re.match(self._VALID_URL, url).groups()
40         webpage = self._download_webpage(url, display_id)
41         video_data = self._parse_json(self._search_regex(
42             r'<script[^>]+?data-drupal-selector="drupal-settings-json"[^>]*?>({.+?})</script>',
43             webpage, 'drupal setting'), display_id)['turner_playlist'][0]
44
45         media_id = video_data['mediaID']
46         title = video_data['title']
47
48         streams_data = self._download_json(
49             'http://medium.ngtv.io/media/%s/tv' % media_id,
50             media_id)['media']['tv']
51         duration = None
52         chapters = []
53         formats = []
54         for supported_type in ('unprotected', 'bulkaes'):
55             stream_data = streams_data.get(supported_type, {})
56             m3u8_url = stream_data.get('secureUrl') or stream_data.get('url')
57             if not m3u8_url:
58                 continue
59             if stream_data.get('playlistProtection') == 'spe':
60                 m3u8_url = self._add_akamai_spe_token(
61                     'http://token.vgtf.net/token/token_spe',
62                     m3u8_url, media_id, {
63                         'url': url,
64                         'site_name': site[:3].upper(),
65                         'auth_required': video_data.get('authRequired') == '1',
66                     })
67             formats.extend(self._extract_m3u8_formats(
68                 m3u8_url, media_id, 'mp4', m3u8_id='hls', fatal=False))
69
70             duration = float_or_none(stream_data.get('totalRuntime') or video_data.get('duration'))
71
72             if not chapters:
73                 for chapter in stream_data.get('contentSegments', []):
74                     start_time = float_or_none(chapter.get('start'))
75                     duration = float_or_none(chapter.get('duration'))
76                     if start_time is None or duration is None:
77                         continue
78                     chapters.append({
79                         'start_time': start_time,
80                         'end_time': start_time + duration,
81                     })
82         self._sort_formats(formats)
83
84         thumbnails = []
85         for image_id, image in video_data.get('images', {}).items():
86             image_url = image.get('url')
87             if not image_url or image.get('type') != 'video':
88                 continue
89             i = {
90                 'id': image_id,
91                 'url': image_url,
92             }
93             mobj = re.search(r'(\d+)x(\d+)', image_url)
94             if mobj:
95                 i.update({
96                     'width': int(mobj.group(1)),
97                     'height': int(mobj.group(2)),
98                 })
99             thumbnails.append(i)
100
101         return {
102             'id': media_id,
103             'title': title,
104             'description': strip_or_none(video_data.get('descriptionNoTags') or video_data.get('shortDescriptionNoTags')),
105             'duration': duration,
106             'timestamp': int_or_none(video_data.get('created')),
107             'season_number': int_or_none(video_data.get('season')),
108             'episode_number': int_or_none(video_data.get('episode')),
109             'cahpters': chapters,
110             'thumbnails': thumbnails,
111             'formats': formats,
112         }