[turner,nba,cnn,adultswim] add base extractor to parse cvp feeds
[youtube-dl] / youtube_dl / extractor / adultswim.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import re
5
6 from .turner import TurnerBaseIE
7 from ..utils import ExtractorError
8
9
10 class AdultSwimIE(TurnerBaseIE):
11     _VALID_URL = r'https?://(?:www\.)?adultswim\.com/videos/(?P<is_playlist>playlists/)?(?P<show_path>[^/]+)/(?P<episode_path>[^/?#]+)/?'
12
13     _TESTS = [{
14         'url': 'http://adultswim.com/videos/rick-and-morty/pilot',
15         'playlist': [
16             {
17                 'md5': '247572debc75c7652f253c8daa51a14d',
18                 'info_dict': {
19                     'id': 'rQxZvXQ4ROaSOqq-or2Mow-0',
20                     'ext': 'flv',
21                     'title': 'Rick and Morty - Pilot Part 1',
22                     'description': "Rick moves in with his daughter's family and establishes himself as a bad influence on his grandson, Morty. "
23                 },
24             },
25             {
26                 'md5': '77b0e037a4b20ec6b98671c4c379f48d',
27                 'info_dict': {
28                     'id': 'rQxZvXQ4ROaSOqq-or2Mow-3',
29                     'ext': 'flv',
30                     'title': 'Rick and Morty - Pilot Part 4',
31                     'description': "Rick moves in with his daughter's family and establishes himself as a bad influence on his grandson, Morty. "
32                 },
33             },
34         ],
35         'info_dict': {
36             'id': 'rQxZvXQ4ROaSOqq-or2Mow',
37             'title': 'Rick and Morty - Pilot',
38             'description': "Rick moves in with his daughter's family and establishes himself as a bad influence on his grandson, Morty. "
39         },
40         'skip': 'This video is only available for registered users',
41     }, {
42         'url': 'http://www.adultswim.com/videos/playlists/american-parenting/putting-francine-out-of-business/',
43         'playlist': [
44             {
45                 'md5': '2eb5c06d0f9a1539da3718d897f13ec5',
46                 'info_dict': {
47                     'id': '-t8CamQlQ2aYZ49ItZCFog-0',
48                     'ext': 'flv',
49                     'title': 'American Dad - Putting Francine Out of Business',
50                     'description': 'Stan hatches a plan to get Francine out of the real estate business.Watch more American Dad on [adult swim].'
51                 },
52             }
53         ],
54         'info_dict': {
55             'id': '-t8CamQlQ2aYZ49ItZCFog',
56             'title': 'American Dad - Putting Francine Out of Business',
57             'description': 'Stan hatches a plan to get Francine out of the real estate business.Watch more American Dad on [adult swim].'
58         },
59     }, {
60         'url': 'http://www.adultswim.com/videos/tim-and-eric-awesome-show-great-job/dr-steve-brule-for-your-wine/',
61         'playlist': [
62             {
63                 'md5': '3e346a2ab0087d687a05e1e7f3b3e529',
64                 'info_dict': {
65                     'id': 'sY3cMUR_TbuE4YmdjzbIcQ-0',
66                     'ext': 'mp4',
67                     'title': 'Tim and Eric Awesome Show Great Job! - Dr. Steve Brule, For Your Wine',
68                     'description': 'Dr. Brule reports live from Wine Country with a special report on wines.  \r\nWatch Tim and Eric Awesome Show Great Job! episode #20, "Embarrassed" on Adult Swim.\r\n\r\n',
69                 },
70             }
71         ],
72         'info_dict': {
73             'id': 'sY3cMUR_TbuE4YmdjzbIcQ',
74             'title': 'Tim and Eric Awesome Show Great Job! - Dr. Steve Brule, For Your Wine',
75             'description': 'Dr. Brule reports live from Wine Country with a special report on wines.  \r\nWatch Tim and Eric Awesome Show Great Job! episode #20, "Embarrassed" on Adult Swim.\r\n\r\n',
76         },
77         'params': {
78             # m3u8 download
79             'skip_download': True,
80         }
81     }, {
82         # heroMetadata.trailer
83         'url': 'http://www.adultswim.com/videos/decker/inside-decker-a-new-hero/',
84         'info_dict': {
85             'id': 'I0LQFQkaSUaFp8PnAWHhoQ',
86             'ext': 'mp4',
87             'title': 'Decker - Inside Decker: A New Hero',
88             'description': 'md5:c916df071d425d62d70c86d4399d3ee0',
89             'duration': 249.008,
90         },
91         'params': {
92             # m3u8 download
93             'skip_download': True,
94         },
95         'expected_warnings': ['Unable to download f4m manifest'],
96     }]
97
98     @staticmethod
99     def find_video_info(collection, slug):
100         for video in collection.get('videos'):
101             if video.get('slug') == slug:
102                 return video
103
104     @staticmethod
105     def find_collection_by_linkURL(collections, linkURL):
106         for collection in collections:
107             if collection.get('linkURL') == linkURL:
108                 return collection
109
110     @staticmethod
111     def find_collection_containing_video(collections, slug):
112         for collection in collections:
113             for video in collection.get('videos'):
114                 if video.get('slug') == slug:
115                     return collection, video
116         return None, None
117
118     def _real_extract(self, url):
119         mobj = re.match(self._VALID_URL, url)
120         show_path = mobj.group('show_path')
121         episode_path = mobj.group('episode_path')
122         is_playlist = True if mobj.group('is_playlist') else False
123
124         webpage = self._download_webpage(url, episode_path)
125
126         # Extract the value of `bootstrappedData` from the Javascript in the page.
127         bootstrapped_data = self._parse_json(self._search_regex(
128             r'var bootstrappedData = ({.*});', webpage, 'bootstraped data'), episode_path)
129
130         # Downloading videos from a /videos/playlist/ URL needs to be handled differently.
131         # NOTE: We are only downloading one video (the current one) not the playlist
132         if is_playlist:
133             collections = bootstrapped_data['playlists']['collections']
134             collection = self.find_collection_by_linkURL(collections, show_path)
135             video_info = self.find_video_info(collection, episode_path)
136
137             show_title = video_info['showTitle']
138             segment_ids = [video_info['videoPlaybackID']]
139         else:
140             collections = bootstrapped_data['show']['collections']
141             collection, video_info = self.find_collection_containing_video(collections, episode_path)
142             # Video wasn't found in the collections, let's try `slugged_video`.
143             if video_info is None:
144                 if bootstrapped_data.get('slugged_video', {}).get('slug') == episode_path:
145                     video_info = bootstrapped_data['slugged_video']
146             if not video_info:
147                 video_info = bootstrapped_data.get('heroMetadata', {}).get('trailer').get('video')
148             if not video_info:
149                 raise ExtractorError('Unable to find video info')
150
151             show = bootstrapped_data['show']
152             show_title = show['title']
153             stream = video_info.get('stream')
154             if stream and stream.get('videoPlaybackID'):
155                 segment_ids = [stream['videoPlaybackID']]
156             elif video_info.get('clips'):
157                 segment_ids = [clip['videoPlaybackID'] for clip in video_info['clips']]
158             elif video_info.get('videoPlaybackID'):
159                 segment_ids = [video_info['videoPlaybackID']]
160             else:
161                 if video_info.get('auth') is True:
162                     raise ExtractorError(
163                         'This video is only available via cable service provider subscription that'
164                         ' is not currently supported. You may want to use --cookies.', expected=True)
165                 else:
166                     raise ExtractorError('Unable to find stream or clips')
167
168         episode_id = video_info['id']
169         episode_title = video_info['title']
170         episode_description = video_info['description']
171         episode_duration = video_info.get('duration')
172
173         entries = []
174         for part_num, segment_id in enumerate(segment_ids):
175             segement_info = self._extract_cvp_info(
176                 'http://www.adultswim.com/videos/api/v0/assets?id=%s&platform=desktop' % segment_id,
177                 segment_id, {
178                     'secure': {
179                         'media_src': 'http://androidhls-secure.cdn.turner.com/adultswim/big',
180                         'tokenizer_src': 'http://www.adultswim.com/astv/mvpd/processors/services/token_ipadAdobe.do',
181                     },
182                 })
183             segment_title = '%s - %s' % (show_title, episode_title)
184             if len(segment_ids) > 1:
185                 segment_title += ' Part %d' % (part_num + 1)
186             segement_info.update({
187                 'id': segment_id,
188                 'title': segment_title,
189                 'description': episode_description,
190             })
191             entries.append(segement_info)
192
193         return {
194             '_type': 'playlist',
195             'id': episode_id,
196             'display_id': episode_path,
197             'entries': entries,
198             'title': '%s - %s' % (show_title, episode_title),
199             'description': episode_description,
200             'duration': episode_duration
201         }