[tbs] fix extraction(fixes #13658)
[youtube-dl] / youtube_dl / extractor / tbs.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import re
5
6 from .turner import TurnerBaseIE
7 from ..utils import (
8     float_or_none,
9     int_or_none,
10     strip_or_none,
11 )
12
13
14 class TBSIE(TurnerBaseIE):
15     _VALID_URL = r'https?://(?:www\.)?(?P<site>tbs|tntdrama)\.com/(?:movies|shows/[^/]+/(?:clips|season-\d+/episode-\d+))/(?P<id>[^/?#]+)'
16     _TESTS = [{
17         'url': 'http://www.tntdrama.com/shows/the-alienist/clips/monster',
18         'info_dict': {
19             'id': '8d384cde33b89f3a43ce5329de42903ed5099887',
20             'ext': 'mp4',
21             'title': 'Monster',
22             'description': 'Get a first look at the theatrical trailer for TNT’s highly anticipated new psychological thriller The Alienist, which premieres January 22 on TNT.',
23             'timestamp': 1508175329,
24             'upload_date': '20171016',
25         },
26         'params': {
27             # m3u8 download
28             'skip_download': True,
29         }
30     }, {
31         'url': 'http://www.tbs.com/shows/search-party/season-1/episode-1/explicit-the-mysterious-disappearance-of-the-girl-no-one-knew',
32         'only_matching': True,
33     }, {
34         'url': 'http://www.tntdrama.com/movies/star-wars-a-new-hope',
35         'only_matching': True,
36     }]
37
38     def _real_extract(self, url):
39         domain, display_id = re.match(self._VALID_URL, url).groups()
40         site = domain[:3]
41         webpage = self._download_webpage(url, display_id)
42         video_data = self._parse_json(self._search_regex(
43             r'<script[^>]+?data-drupal-selector="drupal-settings-json"[^>]*?>({.+?})</script>',
44             webpage, 'drupal setting'), display_id)['turner_playlist'][0]
45
46         media_id = video_data['mediaID']
47         title = video_data['title']
48
49         streams_data = self._download_json(
50             'http://medium.ngtv.io/media/%s/tv' % media_id,
51             media_id)['media']['tv']
52         duration = None
53         chapters = []
54         formats = []
55         for supported_type in ('unprotected', 'bulkaes'):
56             stream_data = streams_data.get(supported_type, {})
57             m3u8_url = stream_data.get('secureUrl') or stream_data.get('url')
58             if not m3u8_url:
59                 continue
60             if stream_data.get('playlistProtection') == 'spe':
61                 m3u8_url = self._add_akamai_spe_token(
62                     'http://www.%s.com/service/token_spe' % site,
63                     m3u8_url, media_id, {
64                         'url': url,
65                         'site_name': site.upper(),
66                         'auth_required': video_data.get('authRequired') == '1',
67                     })
68             formats.extend(self._extract_m3u8_formats(
69                 m3u8_url, media_id, 'mp4', m3u8_id='hls', fatal=False))
70
71             duration = float_or_none(stream_data.get('totalRuntime') or video_data.get('duration'))
72
73             if not chapters:
74                 for chapter in stream_data.get('contentSegments', []):
75                     start_time = float_or_none(chapter.get('start'))
76                     duration = float_or_none(chapter.get('duration'))
77                     if start_time is None or duration is None:
78                         continue
79                     chapters.append({
80                         'start_time': start_time,
81                         'end_time': start_time + duration,
82                     })
83         self._sort_formats(formats)
84
85         thumbnails = []
86         for image_id, image in video_data.get('images', {}).items():
87             image_url = image.get('url')
88             if not image_url or image.get('type') != 'video':
89                 continue
90             i = {
91                 'id': image_id,
92                 'url': image_url,
93             }
94             mobj = re.search(r'(\d+)x(\d+)', image_url)
95             if mobj:
96                 i.update({
97                     'width': int(mobj.group(1)),
98                     'height': int(mobj.group(2)),
99                 })
100             thumbnails.append(i)
101
102         return {
103             'id': media_id,
104             'title': title,
105             'description': strip_or_none(video_data.get('descriptionNoTags') or video_data.get('shortDescriptionNoTags')),
106             'duration': duration,
107             'timestamp': int_or_none(video_data.get('created')),
108             'season_number': int_or_none(video_data.get('season')),
109             'episode_number': int_or_none(video_data.get('episode')),
110             'cahpters': chapters,
111             'thumbnails': thumbnails,
112             'formats': formats,
113         }