[youtube] Skip unsupported adaptive stream type (#18804)
[youtube-dl] / youtube_dl / extractor / vice.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import re
5 import time
6 import hashlib
7 import json
8 import random
9
10 from .adobepass import AdobePassIE
11 from .youtube import YoutubeIE
12 from .common import InfoExtractor
13 from ..compat import (
14     compat_HTTPError,
15     compat_str,
16 )
17 from ..utils import (
18     ExtractorError,
19     int_or_none,
20     parse_age_limit,
21     str_or_none,
22     try_get,
23 )
24
25
26 class ViceIE(AdobePassIE):
27     IE_NAME = 'vice'
28     _VALID_URL = r'https?://(?:(?:video|vms)\.vice|(?:www\.)?viceland)\.com/(?P<locale>[^/]+)/(?:video/[^/]+|embed)/(?P<id>[\da-f]+)'
29     _TESTS = [{
30         'url': 'https://video.vice.com/en_us/video/pet-cremator/58c69e38a55424f1227dc3f7',
31         'info_dict': {
32             'id': '5e647f0125e145c9aef2069412c0cbde',
33             'ext': 'mp4',
34             'title': '10 Questions You Always Wanted To Ask: Pet Cremator',
35             'description': 'md5:fe856caacf61fe0e74fab15ce2b07ca5',
36             'uploader': 'vice',
37             'uploader_id': '57a204088cb727dec794c67b',
38             'timestamp': 1489664942,
39             'upload_date': '20170316',
40             'age_limit': 14,
41         },
42         'params': {
43             # m3u8 download
44             'skip_download': True,
45         },
46         'add_ie': ['UplynkPreplay'],
47     }, {
48         # geo restricted to US
49         'url': 'https://video.vice.com/en_us/video/the-signal-from-tolva/5816510690b70e6c5fd39a56',
50         'info_dict': {
51             'id': '930c0ad1f47141cc955087eecaddb0e2',
52             'ext': 'mp4',
53             'uploader': 'waypoint',
54             'title': 'The Signal From Tölva',
55             'description': 'md5:3927e3c79f9e8094606a2b3c5b5e55d5',
56             'uploader_id': '57f7d621e05ca860fa9ccaf9',
57             'timestamp': 1477941983,
58             'upload_date': '20161031',
59         },
60         'params': {
61             # m3u8 download
62             'skip_download': True,
63         },
64         'add_ie': ['UplynkPreplay'],
65     }, {
66         'url': 'https://video.vice.com/alps/video/ulfs-wien-beruchtigste-grafitti-crew-part-1/581b12b60a0e1f4c0fb6ea2f',
67         'info_dict': {
68             'id': '581b12b60a0e1f4c0fb6ea2f',
69             'ext': 'mp4',
70             'title': 'ULFs - Wien berüchtigste Grafitti Crew - Part 1',
71             'description': '<p>Zwischen Hinterzimmer-Tattoos und U-Bahnschächten erzählen uns die Ulfs, wie es ist, "süchtig nach Sachbeschädigung" zu sein.</p>',
72             'uploader': 'VICE',
73             'uploader_id': '57a204088cb727dec794c67b',
74             'timestamp': 1485368119,
75             'upload_date': '20170125',
76             'age_limit': 14,
77         },
78         'params': {
79             # AES-encrypted m3u8
80             'skip_download': True,
81             'proxy': '127.0.0.1:8118',
82         },
83         'add_ie': ['UplynkPreplay'],
84     }, {
85         'url': 'https://video.vice.com/en_us/video/pizza-show-trailer/56d8c9a54d286ed92f7f30e4',
86         'only_matching': True,
87     }, {
88         'url': 'https://video.vice.com/en_us/embed/57f41d3556a0a80f54726060',
89         'only_matching': True,
90     }, {
91         'url': 'https://vms.vice.com/en_us/video/preplay/58c69e38a55424f1227dc3f7',
92         'only_matching': True,
93     }, {
94         'url': 'https://www.viceland.com/en_us/video/thursday-march-1-2018/5a8f2d7ff1cdb332dd446ec1',
95         'only_matching': True,
96     }]
97     _PREPLAY_HOST = 'vms.vice'
98
99     @staticmethod
100     def _extract_urls(webpage):
101         return re.findall(
102             r'<iframe\b[^>]+\bsrc=["\']((?:https?:)?//video\.vice\.com/[^/]+/embed/[\da-f]+)',
103             webpage)
104
105     @staticmethod
106     def _extract_url(webpage):
107         urls = ViceIE._extract_urls(webpage)
108         return urls[0] if urls else None
109
110     def _real_extract(self, url):
111         locale, video_id = re.match(self._VALID_URL, url).groups()
112
113         webpage = self._download_webpage(
114             'https://video.vice.com/%s/embed/%s' % (locale, video_id),
115             video_id)
116
117         video = self._parse_json(
118             self._search_regex(
119                 r'PREFETCH_DATA\s*=\s*({.+?})\s*;\s*\n', webpage,
120                 'app state'), video_id)['video']
121         video_id = video.get('vms_id') or video.get('id') or video_id
122         title = video['title']
123         is_locked = video.get('locked')
124         rating = video.get('rating')
125         thumbnail = video.get('thumbnail_url')
126         duration = int_or_none(video.get('duration'))
127         series = try_get(
128             video, lambda x: x['episode']['season']['show']['title'],
129             compat_str)
130         episode_number = try_get(
131             video, lambda x: x['episode']['episode_number'])
132         season_number = try_get(
133             video, lambda x: x['episode']['season']['season_number'])
134         uploader = None
135
136         query = {}
137         if is_locked:
138             resource = self._get_mvpd_resource(
139                 'VICELAND', title, video_id, rating)
140             query['tvetoken'] = self._extract_mvpd_auth(
141                 url, video_id, 'VICELAND', resource)
142
143         # signature generation algorithm is reverse engineered from signatureGenerator in
144         # webpack:///../shared/~/vice-player/dist/js/vice-player.js in
145         # https://www.viceland.com/assets/common/js/web.vendor.bundle.js
146         # new JS is located here https://vice-web-statics-cdn.vice.com/vice-player/player-embed.js
147         exp = int(time.time()) + 1440
148
149         query.update({
150             'exp': exp,
151             'sign': hashlib.sha512(('%s:GET:%d' % (video_id, exp)).encode()).hexdigest(),
152             '_ad_blocked': None,
153             '_ad_unit': '',
154             '_debug': '',
155             'platform': 'desktop',
156             'rn': random.randint(10000, 100000),
157             'fbprebidtoken': '',
158         })
159
160         try:
161             host = 'www.viceland' if is_locked else self._PREPLAY_HOST
162             preplay = self._download_json(
163                 'https://%s.com/%s/video/preplay/%s' % (host, locale, video_id),
164                 video_id, query=query)
165         except ExtractorError as e:
166             if isinstance(e.cause, compat_HTTPError) and e.cause.code in (400, 401):
167                 error = json.loads(e.cause.read().decode())
168                 error_message = error.get('error_description') or error['details']
169                 raise ExtractorError('%s said: %s' % (
170                     self.IE_NAME, error_message), expected=True)
171             raise
172
173         video_data = preplay['video']
174         base = video_data['base']
175         uplynk_preplay_url = preplay['preplayURL']
176         episode = video_data.get('episode', {})
177         channel = video_data.get('channel', {})
178
179         subtitles = {}
180         cc_url = preplay.get('ccURL')
181         if cc_url:
182             subtitles['en'] = [{
183                 'url': cc_url,
184             }]
185
186         return {
187             '_type': 'url_transparent',
188             'url': uplynk_preplay_url,
189             'id': video_id,
190             'title': title,
191             'description': base.get('body') or base.get('display_body'),
192             'thumbnail': thumbnail,
193             'duration': int_or_none(video_data.get('video_duration')) or duration,
194             'timestamp': int_or_none(video_data.get('created_at'), 1000),
195             'age_limit': parse_age_limit(video_data.get('video_rating')),
196             'series': video_data.get('show_title') or series,
197             'episode_number': int_or_none(episode.get('episode_number') or episode_number),
198             'episode_id': str_or_none(episode.get('id') or video_data.get('episode_id')),
199             'season_number': int_or_none(season_number),
200             'season_id': str_or_none(episode.get('season_id')),
201             'uploader': channel.get('base', {}).get('title') or channel.get('name') or uploader,
202             'uploader_id': str_or_none(channel.get('id')),
203             'subtitles': subtitles,
204             'ie_key': 'UplynkPreplay',
205         }
206
207
208 class ViceShowIE(InfoExtractor):
209     IE_NAME = 'vice:show'
210     _VALID_URL = r'https?://(?:.+?\.)?vice\.com/(?:[^/]+/)?show/(?P<id>[^/?#&]+)'
211
212     _TEST = {
213         'url': 'https://munchies.vice.com/en/show/fuck-thats-delicious-2',
214         'info_dict': {
215             'id': 'fuck-thats-delicious-2',
216             'title': "Fuck, That's Delicious",
217             'description': 'Follow the culinary adventures of rapper Action Bronson during his ongoing world tour.',
218         },
219         'playlist_count': 17,
220     }
221
222     def _real_extract(self, url):
223         show_id = self._match_id(url)
224         webpage = self._download_webpage(url, show_id)
225
226         entries = [
227             self.url_result(video_url, ViceIE.ie_key())
228             for video_url, _ in re.findall(
229                 r'<h2[^>]+class="article-title"[^>]+data-id="\d+"[^>]*>\s*<a[^>]+href="(%s.*?)"'
230                 % ViceIE._VALID_URL, webpage)]
231
232         title = self._search_regex(
233             r'<title>(.+?)</title>', webpage, 'title', default=None)
234         if title:
235             title = re.sub(r'(.+)\s*\|\s*.+$', r'\1', title).strip()
236         description = self._html_search_meta(
237             'description', webpage, 'description')
238
239         return self.playlist_result(entries, show_id, title, description)
240
241
242 class ViceArticleIE(InfoExtractor):
243     IE_NAME = 'vice:article'
244     _VALID_URL = r'https://www\.vice\.com/[^/]+/article/(?P<id>[^?#]+)'
245
246     _TESTS = [{
247         'url': 'https://www.vice.com/en_us/article/on-set-with-the-woman-making-mormon-porn-in-utah',
248         'info_dict': {
249             'id': '41eae2a47b174a1398357cec55f1f6fc',
250             'ext': 'mp4',
251             'title': 'Mormon War on Porn ',
252             'description': 'md5:6394a8398506581d0346b9ab89093fef',
253             'uploader': 'vice',
254             'uploader_id': '57a204088cb727dec794c67b',
255             'timestamp': 1491883129,
256             'upload_date': '20170411',
257             'age_limit': 17,
258         },
259         'params': {
260             # AES-encrypted m3u8
261             'skip_download': True,
262         },
263         'add_ie': ['UplynkPreplay'],
264     }, {
265         'url': 'https://www.vice.com/en_us/article/how-to-hack-a-car',
266         'md5': '7fe8ebc4fa3323efafc127b82bd821d9',
267         'info_dict': {
268             'id': '3jstaBeXgAs',
269             'ext': 'mp4',
270             'title': 'How to Hack a Car: Phreaked Out (Episode 2)',
271             'description': 'md5:ee95453f7ff495db8efe14ae8bf56f30',
272             'uploader': 'Motherboard',
273             'uploader_id': 'MotherboardTV',
274             'upload_date': '20140529',
275         },
276         'add_ie': ['Youtube'],
277     }, {
278         'url': 'https://www.vice.com/en_us/article/znm9dx/karley-sciortino-slutever-reloaded',
279         'md5': 'a7ecf64ee4fa19b916c16f4b56184ae2',
280         'info_dict': {
281             'id': 'e2ed435eb67e43efb66e6ef9a6930a88',
282             'ext': 'mp4',
283             'title': "Making The World's First Male Sex Doll",
284             'description': 'md5:916078ef0e032d76343116208b6cc2c4',
285             'uploader': 'vice',
286             'uploader_id': '57a204088cb727dec794c67b',
287             'timestamp': 1476919911,
288             'upload_date': '20161019',
289             'age_limit': 17,
290         },
291         'params': {
292             'skip_download': True,
293         },
294         'add_ie': [ViceIE.ie_key()],
295     }, {
296         'url': 'https://www.vice.com/en_us/article/cowboy-capitalists-part-1',
297         'only_matching': True,
298     }, {
299         'url': 'https://www.vice.com/ru/article/big-night-out-ibiza-clive-martin-229',
300         'only_matching': True,
301     }]
302
303     def _real_extract(self, url):
304         display_id = self._match_id(url)
305
306         webpage = self._download_webpage(url, display_id)
307
308         prefetch_data = self._parse_json(self._search_regex(
309             r'__APP_STATE\s*=\s*({.+?})(?:\s*\|\|\s*{}\s*)?;\s*\n',
310             webpage, 'app state'), display_id)['pageData']
311         body = prefetch_data['body']
312
313         def _url_res(video_url, ie_key):
314             return {
315                 '_type': 'url_transparent',
316                 'url': video_url,
317                 'display_id': display_id,
318                 'ie_key': ie_key,
319             }
320
321         vice_url = ViceIE._extract_url(webpage)
322         if vice_url:
323             return _url_res(vice_url, ViceIE.ie_key())
324
325         embed_code = self._search_regex(
326             r'embedCode=([^&\'"]+)', body,
327             'ooyala embed code', default=None)
328         if embed_code:
329             return _url_res('ooyala:%s' % embed_code, 'Ooyala')
330
331         youtube_url = YoutubeIE._extract_url(body)
332         if youtube_url:
333             return _url_res(youtube_url, YoutubeIE.ie_key())
334
335         video_url = self._html_search_regex(
336             r'data-video-url="([^"]+)"',
337             prefetch_data['embed_code'], 'video URL')
338
339         return _url_res(video_url, ViceIE.ie_key())