]> git.bitcoin.ninja Git - youtube-dl/blob - youtube_dl/extractor/vice.py
[pornhub] Improve locked videos detection (closes #22449, closes #22780)
[youtube-dl] / youtube_dl / extractor / vice.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import re
5 import time
6 import hashlib
7 import json
8 import random
9
10 from .adobepass import AdobePassIE
11 from .youtube import YoutubeIE
12 from .common import InfoExtractor
13 from ..compat import (
14     compat_HTTPError,
15     compat_str,
16 )
17 from ..utils import (
18     ExtractorError,
19     int_or_none,
20     parse_age_limit,
21     str_or_none,
22     try_get,
23 )
24
25
26 class ViceIE(AdobePassIE):
27     IE_NAME = 'vice'
28     _VALID_URL = r'https?://(?:(?:video|vms)\.vice|(?:www\.)?viceland)\.com/(?P<locale>[^/]+)/(?:video/[^/]+|embed)/(?P<id>[\da-f]+)'
29     _TESTS = [{
30         'url': 'https://video.vice.com/en_us/video/pet-cremator/58c69e38a55424f1227dc3f7',
31         'info_dict': {
32             'id': '5e647f0125e145c9aef2069412c0cbde',
33             'ext': 'mp4',
34             'title': '10 Questions You Always Wanted To Ask: Pet Cremator',
35             'description': 'md5:fe856caacf61fe0e74fab15ce2b07ca5',
36             'uploader': 'vice',
37             'uploader_id': '57a204088cb727dec794c67b',
38             'timestamp': 1489664942,
39             'upload_date': '20170316',
40             'age_limit': 14,
41         },
42         'params': {
43             # m3u8 download
44             'skip_download': True,
45         },
46         'add_ie': ['UplynkPreplay'],
47     }, {
48         # geo restricted to US
49         'url': 'https://video.vice.com/en_us/video/the-signal-from-tolva/5816510690b70e6c5fd39a56',
50         'info_dict': {
51             'id': '930c0ad1f47141cc955087eecaddb0e2',
52             'ext': 'mp4',
53             'uploader': 'waypoint',
54             'title': 'The Signal From Tölva',
55             'description': 'md5:3927e3c79f9e8094606a2b3c5b5e55d5',
56             'uploader_id': '57f7d621e05ca860fa9ccaf9',
57             'timestamp': 1477941983,
58             'upload_date': '20161031',
59         },
60         'params': {
61             # m3u8 download
62             'skip_download': True,
63         },
64         'add_ie': ['UplynkPreplay'],
65     }, {
66         'url': 'https://video.vice.com/alps/video/ulfs-wien-beruchtigste-grafitti-crew-part-1/581b12b60a0e1f4c0fb6ea2f',
67         'info_dict': {
68             'id': '581b12b60a0e1f4c0fb6ea2f',
69             'ext': 'mp4',
70             'title': 'ULFs - Wien berüchtigste Grafitti Crew - Part 1',
71             'description': '<p>Zwischen Hinterzimmer-Tattoos und U-Bahnschächten erzählen uns die Ulfs, wie es ist, "süchtig nach Sachbeschädigung" zu sein.</p>',
72             'uploader': 'VICE',
73             'uploader_id': '57a204088cb727dec794c67b',
74             'timestamp': 1485368119,
75             'upload_date': '20170125',
76             'age_limit': 14,
77         },
78         'params': {
79             # AES-encrypted m3u8
80             'skip_download': True,
81             'proxy': '127.0.0.1:8118',
82         },
83         'add_ie': ['UplynkPreplay'],
84     }, {
85         'url': 'https://video.vice.com/en_us/video/pizza-show-trailer/56d8c9a54d286ed92f7f30e4',
86         'only_matching': True,
87     }, {
88         'url': 'https://video.vice.com/en_us/embed/57f41d3556a0a80f54726060',
89         'only_matching': True,
90     }, {
91         'url': 'https://vms.vice.com/en_us/video/preplay/58c69e38a55424f1227dc3f7',
92         'only_matching': True,
93     }, {
94         'url': 'https://www.viceland.com/en_us/video/thursday-march-1-2018/5a8f2d7ff1cdb332dd446ec1',
95         'only_matching': True,
96     }]
97
98     @staticmethod
99     def _extract_urls(webpage):
100         return re.findall(
101             r'<iframe\b[^>]+\bsrc=["\']((?:https?:)?//video\.vice\.com/[^/]+/embed/[\da-f]+)',
102             webpage)
103
104     @staticmethod
105     def _extract_url(webpage):
106         urls = ViceIE._extract_urls(webpage)
107         return urls[0] if urls else None
108
109     def _real_extract(self, url):
110         locale, video_id = re.match(self._VALID_URL, url).groups()
111
112         webpage = self._download_webpage(
113             'https://video.vice.com/%s/embed/%s' % (locale, video_id),
114             video_id)
115
116         video = self._parse_json(
117             self._search_regex(
118                 r'PREFETCH_DATA\s*=\s*({.+?})\s*;\s*\n', webpage,
119                 'app state'), video_id)['video']
120         video_id = video.get('vms_id') or video.get('id') or video_id
121         title = video['title']
122         is_locked = video.get('locked')
123         rating = video.get('rating')
124         thumbnail = video.get('thumbnail_url')
125         duration = int_or_none(video.get('duration'))
126         series = try_get(
127             video, lambda x: x['episode']['season']['show']['title'],
128             compat_str)
129         episode_number = try_get(
130             video, lambda x: x['episode']['episode_number'])
131         season_number = try_get(
132             video, lambda x: x['episode']['season']['season_number'])
133         uploader = None
134
135         query = {}
136         if is_locked:
137             resource = self._get_mvpd_resource(
138                 'VICELAND', title, video_id, rating)
139             query['tvetoken'] = self._extract_mvpd_auth(
140                 url, video_id, 'VICELAND', resource)
141
142         # signature generation algorithm is reverse engineered from signatureGenerator in
143         # webpack:///../shared/~/vice-player/dist/js/vice-player.js in
144         # https://www.viceland.com/assets/common/js/web.vendor.bundle.js
145         # new JS is located here https://vice-web-statics-cdn.vice.com/vice-player/player-embed.js
146         exp = int(time.time()) + 1440
147
148         query.update({
149             'exp': exp,
150             'sign': hashlib.sha512(('%s:GET:%d' % (video_id, exp)).encode()).hexdigest(),
151             '_ad_blocked': None,
152             '_ad_unit': '',
153             '_debug': '',
154             'platform': 'desktop',
155             'rn': random.randint(10000, 100000),
156             'fbprebidtoken': '',
157         })
158
159         try:
160             preplay = self._download_json(
161                 'https://vms.vice.com/%s/video/preplay/%s' % (locale, video_id),
162                 video_id, query=query)
163         except ExtractorError as e:
164             if isinstance(e.cause, compat_HTTPError) and e.cause.code in (400, 401):
165                 error = json.loads(e.cause.read().decode())
166                 error_message = error.get('error_description') or error['details']
167                 raise ExtractorError('%s said: %s' % (
168                     self.IE_NAME, error_message), expected=True)
169             raise
170
171         video_data = preplay['video']
172         base = video_data['base']
173         uplynk_preplay_url = preplay['preplayURL']
174         episode = video_data.get('episode', {})
175         channel = video_data.get('channel', {})
176
177         subtitles = {}
178         cc_url = preplay.get('ccURL')
179         if cc_url:
180             subtitles['en'] = [{
181                 'url': cc_url,
182             }]
183
184         return {
185             '_type': 'url_transparent',
186             'url': uplynk_preplay_url,
187             'id': video_id,
188             'title': title,
189             'description': base.get('body') or base.get('display_body'),
190             'thumbnail': thumbnail,
191             'duration': int_or_none(video_data.get('video_duration')) or duration,
192             'timestamp': int_or_none(video_data.get('created_at'), 1000),
193             'age_limit': parse_age_limit(video_data.get('video_rating')),
194             'series': video_data.get('show_title') or series,
195             'episode_number': int_or_none(episode.get('episode_number') or episode_number),
196             'episode_id': str_or_none(episode.get('id') or video_data.get('episode_id')),
197             'season_number': int_or_none(season_number),
198             'season_id': str_or_none(episode.get('season_id')),
199             'uploader': channel.get('base', {}).get('title') or channel.get('name') or uploader,
200             'uploader_id': str_or_none(channel.get('id')),
201             'subtitles': subtitles,
202             'ie_key': 'UplynkPreplay',
203         }
204
205
206 class ViceShowIE(InfoExtractor):
207     IE_NAME = 'vice:show'
208     _VALID_URL = r'https?://(?:.+?\.)?vice\.com/(?:[^/]+/)?show/(?P<id>[^/?#&]+)'
209
210     _TEST = {
211         'url': 'https://munchies.vice.com/en/show/fuck-thats-delicious-2',
212         'info_dict': {
213             'id': 'fuck-thats-delicious-2',
214             'title': "Fuck, That's Delicious",
215             'description': 'Follow the culinary adventures of rapper Action Bronson during his ongoing world tour.',
216         },
217         'playlist_count': 17,
218     }
219
220     def _real_extract(self, url):
221         show_id = self._match_id(url)
222         webpage = self._download_webpage(url, show_id)
223
224         entries = [
225             self.url_result(video_url, ViceIE.ie_key())
226             for video_url, _ in re.findall(
227                 r'<h2[^>]+class="article-title"[^>]+data-id="\d+"[^>]*>\s*<a[^>]+href="(%s.*?)"'
228                 % ViceIE._VALID_URL, webpage)]
229
230         title = self._search_regex(
231             r'<title>(.+?)</title>', webpage, 'title', default=None)
232         if title:
233             title = re.sub(r'(.+)\s*\|\s*.+$', r'\1', title).strip()
234         description = self._html_search_meta(
235             'description', webpage, 'description')
236
237         return self.playlist_result(entries, show_id, title, description)
238
239
240 class ViceArticleIE(InfoExtractor):
241     IE_NAME = 'vice:article'
242     _VALID_URL = r'https://www\.vice\.com/[^/]+/article/(?P<id>[^?#]+)'
243
244     _TESTS = [{
245         'url': 'https://www.vice.com/en_us/article/on-set-with-the-woman-making-mormon-porn-in-utah',
246         'info_dict': {
247             'id': '41eae2a47b174a1398357cec55f1f6fc',
248             'ext': 'mp4',
249             'title': 'Mormon War on Porn ',
250             'description': 'md5:6394a8398506581d0346b9ab89093fef',
251             'uploader': 'vice',
252             'uploader_id': '57a204088cb727dec794c67b',
253             'timestamp': 1491883129,
254             'upload_date': '20170411',
255             'age_limit': 17,
256         },
257         'params': {
258             # AES-encrypted m3u8
259             'skip_download': True,
260         },
261         'add_ie': ['UplynkPreplay'],
262     }, {
263         'url': 'https://www.vice.com/en_us/article/how-to-hack-a-car',
264         'md5': '7fe8ebc4fa3323efafc127b82bd821d9',
265         'info_dict': {
266             'id': '3jstaBeXgAs',
267             'ext': 'mp4',
268             'title': 'How to Hack a Car: Phreaked Out (Episode 2)',
269             'description': 'md5:ee95453f7ff495db8efe14ae8bf56f30',
270             'uploader': 'Motherboard',
271             'uploader_id': 'MotherboardTV',
272             'upload_date': '20140529',
273         },
274         'add_ie': ['Youtube'],
275     }, {
276         'url': 'https://www.vice.com/en_us/article/znm9dx/karley-sciortino-slutever-reloaded',
277         'md5': 'a7ecf64ee4fa19b916c16f4b56184ae2',
278         'info_dict': {
279             'id': 'e2ed435eb67e43efb66e6ef9a6930a88',
280             'ext': 'mp4',
281             'title': "Making The World's First Male Sex Doll",
282             'description': 'md5:916078ef0e032d76343116208b6cc2c4',
283             'uploader': 'vice',
284             'uploader_id': '57a204088cb727dec794c67b',
285             'timestamp': 1476919911,
286             'upload_date': '20161019',
287             'age_limit': 17,
288         },
289         'params': {
290             'skip_download': True,
291         },
292         'add_ie': [ViceIE.ie_key()],
293     }, {
294         'url': 'https://www.vice.com/en_us/article/cowboy-capitalists-part-1',
295         'only_matching': True,
296     }, {
297         'url': 'https://www.vice.com/ru/article/big-night-out-ibiza-clive-martin-229',
298         'only_matching': True,
299     }]
300
301     def _real_extract(self, url):
302         display_id = self._match_id(url)
303
304         webpage = self._download_webpage(url, display_id)
305
306         prefetch_data = self._parse_json(self._search_regex(
307             r'__APP_STATE\s*=\s*({.+?})(?:\s*\|\|\s*{}\s*)?;\s*\n',
308             webpage, 'app state'), display_id)['pageData']
309         body = prefetch_data['body']
310
311         def _url_res(video_url, ie_key):
312             return {
313                 '_type': 'url_transparent',
314                 'url': video_url,
315                 'display_id': display_id,
316                 'ie_key': ie_key,
317             }
318
319         vice_url = ViceIE._extract_url(webpage)
320         if vice_url:
321             return _url_res(vice_url, ViceIE.ie_key())
322
323         embed_code = self._search_regex(
324             r'embedCode=([^&\'"]+)', body,
325             'ooyala embed code', default=None)
326         if embed_code:
327             return _url_res('ooyala:%s' % embed_code, 'Ooyala')
328
329         youtube_url = YoutubeIE._extract_url(body)
330         if youtube_url:
331             return _url_res(youtube_url, YoutubeIE.ie_key())
332
333         video_url = self._html_search_regex(
334             r'data-video-url="([^"]+)"',
335             prefetch_data['embed_code'], 'video URL')
336
337         return _url_res(video_url, ViceIE.ie_key())