[youtube] Fix extraction.
[youtube-dl] / youtube_dl / extractor / adultswim.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import json
5 import re
6
7 from .turner import TurnerBaseIE
8 from ..utils import (
9     determine_ext,
10     float_or_none,
11     int_or_none,
12     mimetype2ext,
13     parse_age_limit,
14     parse_iso8601,
15     strip_or_none,
16     try_get,
17 )
18
19
20 class AdultSwimIE(TurnerBaseIE):
21     _VALID_URL = r'https?://(?:www\.)?adultswim\.com/videos/(?P<show_path>[^/?#]+)(?:/(?P<episode_path>[^/?#]+))?'
22
23     _TESTS = [{
24         'url': 'http://adultswim.com/videos/rick-and-morty/pilot',
25         'info_dict': {
26             'id': 'rQxZvXQ4ROaSOqq-or2Mow',
27             'ext': 'mp4',
28             'title': 'Rick and Morty - Pilot',
29             'description': 'Rick moves in with his daughter\'s family and establishes himself as a bad influence on his grandson, Morty.',
30             'timestamp': 1543294800,
31             'upload_date': '20181127',
32         },
33         'params': {
34             # m3u8 download
35             'skip_download': True,
36         },
37         'expected_warnings': ['Unable to download f4m manifest'],
38     }, {
39         'url': 'http://www.adultswim.com/videos/tim-and-eric-awesome-show-great-job/dr-steve-brule-for-your-wine/',
40         'info_dict': {
41             'id': 'sY3cMUR_TbuE4YmdjzbIcQ',
42             'ext': 'mp4',
43             'title': 'Tim and Eric Awesome Show Great Job! - Dr. Steve Brule, For Your Wine',
44             'description': 'Dr. Brule reports live from Wine Country with a special report on wines.  \nWatch Tim and Eric Awesome Show Great Job! episode #20, "Embarrassed" on Adult Swim.',
45             'upload_date': '20080124',
46             'timestamp': 1201150800,
47         },
48         'params': {
49             # m3u8 download
50             'skip_download': True,
51         },
52         'skip': '404 Not Found',
53     }, {
54         'url': 'http://www.adultswim.com/videos/decker/inside-decker-a-new-hero/',
55         'info_dict': {
56             'id': 'I0LQFQkaSUaFp8PnAWHhoQ',
57             'ext': 'mp4',
58             'title': 'Decker - Inside Decker: A New Hero',
59             'description': 'The guys recap the conclusion of the season. They announce a new hero, take a peek into the Victorville Film Archive and welcome back the talented James Dean.',
60             'timestamp': 1469480460,
61             'upload_date': '20160725',
62         },
63         'params': {
64             # m3u8 download
65             'skip_download': True,
66         },
67         'expected_warnings': ['Unable to download f4m manifest'],
68     }, {
69         'url': 'http://www.adultswim.com/videos/attack-on-titan',
70         'info_dict': {
71             'id': 'attack-on-titan',
72             'title': 'Attack on Titan',
73             'description': 'md5:41caa9416906d90711e31dc00cb7db7e',
74         },
75         'playlist_mincount': 12,
76     }, {
77         'url': 'http://www.adultswim.com/videos/streams/williams-stream',
78         'info_dict': {
79             'id': 'd8DEBj7QRfetLsRgFnGEyg',
80             'ext': 'mp4',
81             'title': r're:^Williams Stream \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
82             'description': 'original programming',
83         },
84         'params': {
85             # m3u8 download
86             'skip_download': True,
87         },
88         'skip': '404 Not Found',
89     }]
90
91     def _real_extract(self, url):
92         show_path, episode_path = re.match(self._VALID_URL, url).groups()
93         display_id = episode_path or show_path
94         query = '''query {
95   getShowBySlug(slug:"%s") {
96     %%s
97   }
98 }''' % show_path
99         if episode_path:
100             query = query % '''title
101     getVideoBySlug(slug:"%s") {
102       _id
103       auth
104       description
105       duration
106       episodeNumber
107       launchDate
108       mediaID
109       seasonNumber
110       poster
111       title
112       tvRating
113     }''' % episode_path
114             ['getVideoBySlug']
115         else:
116             query = query % '''metaDescription
117     title
118     videos(first:1000,sort:["episode_number"]) {
119       edges {
120         node {
121            _id
122            slug
123         }
124       }
125     }'''
126         show_data = self._download_json(
127             'https://www.adultswim.com/api/search', display_id,
128             data=json.dumps({'query': query}).encode(),
129             headers={'Content-Type': 'application/json'})['data']['getShowBySlug']
130         if episode_path:
131             video_data = show_data['getVideoBySlug']
132             video_id = video_data['_id']
133             episode_title = title = video_data['title']
134             series = show_data.get('title')
135             if series:
136                 title = '%s - %s' % (series, title)
137             info = {
138                 'id': video_id,
139                 'title': title,
140                 'description': strip_or_none(video_data.get('description')),
141                 'duration': float_or_none(video_data.get('duration')),
142                 'formats': [],
143                 'subtitles': {},
144                 'age_limit': parse_age_limit(video_data.get('tvRating')),
145                 'thumbnail': video_data.get('poster'),
146                 'timestamp': parse_iso8601(video_data.get('launchDate')),
147                 'series': series,
148                 'season_number': int_or_none(video_data.get('seasonNumber')),
149                 'episode': episode_title,
150                 'episode_number': int_or_none(video_data.get('episodeNumber')),
151             }
152
153             auth = video_data.get('auth')
154             media_id = video_data.get('mediaID')
155             if media_id:
156                 info.update(self._extract_ngtv_info(media_id, {
157                     # CDN_TOKEN_APP_ID from:
158                     # https://d2gg02c3xr550i.cloudfront.net/assets/asvp.e9c8bef24322d060ef87.bundle.js
159                     'appId': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhcHBJZCI6ImFzLXR2ZS1kZXNrdG9wLXB0enQ2bSIsInByb2R1Y3QiOiJ0dmUiLCJuZXR3b3JrIjoiYXMiLCJwbGF0Zm9ybSI6ImRlc2t0b3AiLCJpYXQiOjE1MzI3MDIyNzl9.BzSCk-WYOZ2GMCIaeVb8zWnzhlgnXuJTCu0jGp_VaZE',
160                 }, {
161                     'url': url,
162                     'site_name': 'AdultSwim',
163                     'auth_required': auth,
164                 }))
165
166             if not auth:
167                 extract_data = self._download_json(
168                     'https://www.adultswim.com/api/shows/v1/videos/' + video_id,
169                     video_id, query={'fields': 'stream'}, fatal=False) or {}
170                 assets = try_get(extract_data, lambda x: x['data']['video']['stream']['assets'], list) or []
171                 for asset in assets:
172                     asset_url = asset.get('url')
173                     if not asset_url:
174                         continue
175                     ext = determine_ext(asset_url, mimetype2ext(asset.get('mime_type')))
176                     if ext == 'm3u8':
177                         info['formats'].extend(self._extract_m3u8_formats(
178                             asset_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
179                     elif ext == 'f4m':
180                         continue
181                         # info['formats'].extend(self._extract_f4m_formats(
182                         #     asset_url, video_id, f4m_id='hds', fatal=False))
183                     elif ext in ('scc', 'ttml', 'vtt'):
184                         info['subtitles'].setdefault('en', []).append({
185                             'url': asset_url,
186                         })
187             self._sort_formats(info['formats'])
188
189             return info
190         else:
191             entries = []
192             for edge in show_data.get('videos', {}).get('edges', []):
193                 video = edge.get('node') or {}
194                 slug = video.get('slug')
195                 if not slug:
196                     continue
197                 entries.append(self.url_result(
198                     'http://adultswim.com/videos/%s/%s' % (show_path, slug),
199                     'AdultSwim', video.get('_id')))
200             return self.playlist_result(
201                 entries, show_path, show_data.get('title'),
202                 strip_or_none(show_data.get('metaDescription')))