[adultswim] Add new extractor
[youtube-dl] / youtube_dl / extractor / adultswim.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import re
5
6 from .common import InfoExtractor
7
8 class AdultSwimIE(InfoExtractor):
9     _VALID_URL = r'https?://video\.adultswim\.com/(?P<path>.+?)(?:\.html)?(?:\?.*)?(?:#.*)?$'
10     _TEST = {
11         'url': 'http://video.adultswim.com/rick-and-morty/close-rick-counters-of-the-rick-kind.html?x=y#title',
12         'md5': '4a90c63a07537ec9383175b330dfeab4',
13         'info_dict': {
14             'id': '8a250ba1450996e901453d7e9caf02f3',
15             'title': 'Rick and Morty Close Rick-Counters of the Rick Kind',
16             'description': 'Rick has a run in with some old associates, resulting in a fallout with Morty. You got any chips, broh?',
17         }
18     }
19
20     _available_formats = ['150', '640', '3500']
21
22     _video_extensions = {
23         '3500': 'flv',
24         '640': 'mp4',
25         '150': 'mp4',
26         'ipad': 'm3u8',
27         'iphone': 'm3u8'
28     }
29     _video_dimensions = {
30         '3500': (1280, 720),
31         '640': (480, 270),
32         '150': (320, 180)
33     }
34
35     def _real_extract(self, url):
36         mobj = re.match(self._VALID_URL, url)
37         video_path = mobj.group('path')
38
39         webpage = self._download_webpage(url, video_path)
40         episode_id = self._html_search_regex(r'<link rel="video_src" href="http://i\.adultswim\.com/adultswim/adultswimtv/tools/swf/viralplayer.swf\?id=([0-9a-f]+?)"\s*/?\s*>', webpage, 'episode_id')
41         title = self._html_search_regex(r'<meta property="og:title" content="\s*(.*?)\s*"\s*/?\s*>', webpage, 'title')
42
43         index_url = 'http://asfix.adultswim.com/asfix-svc/episodeSearch/getEpisodesByIDs?networkName=AS&ids=%s' % episode_id
44         idoc = self._download_xml(index_url, title, 'Downloading episode index', 'Unable to download episode index')
45
46         episode_el = idoc.find('.//episode')
47         show_title = episode_el.attrib.get('collectionTitle')
48         episode_title = episode_el.attrib.get('title')
49         thumbnail = episode_el.attrib.get('thumbnailUrl')
50         description = episode_el.find('./description').text.strip()
51
52         entries = []
53         segment_els = episode_el.findall('./segments/segment')
54
55         for part_num, segment_el in enumerate(segment_els):
56             segment_id = segment_el.attrib.get('id')
57             segment_title = '%s %s part %d' % (show_title, episode_title, part_num + 1)
58             thumbnail = segment_el.attrib.get('thumbnailUrl')
59             duration = segment_el.attrib.get('duration')
60
61             segment_url = 'http://asfix.adultswim.com/asfix-svc/episodeservices/getCvpPlaylist?networkName=AS&id=%s' % segment_id
62             idoc = self._download_xml(segment_url, segment_title, 'Downloading segment information', 'Unable to download segment information')
63
64             formats = []
65             file_els = idoc.findall('.//files/file')
66
67             for file_el in file_els:
68                 bitrate = file_el.attrib.get('bitrate')
69                 type = file_el.attrib.get('type')
70                 width, height = self._video_dimensions.get(bitrate, (None, None))
71                 formats.append({
72                     'format_id': '%s-%s' % (bitrate, type),
73                     'url': file_el.text,
74                     'ext': self._video_extensions.get(bitrate, 'mp4'),
75                     'tbr': bitrate,
76                     'height': height,
77                     'width': width
78                 })
79
80             self._sort_formats(formats)
81
82             entries.append({
83                 'id': segment_id,
84                 'title': segment_title,
85                 'formats': formats,
86                 'uploader': show_title,
87                 'thumbnail': thumbnail,
88                 'duration': duration,
89                 'description': description
90             })
91
92         return {
93             '_type': 'playlist',
94             'id': episode_id,
95             'display_id': video_path,
96             'entries': entries,
97             'title': '%s %s' % (show_title, episode_title),
98             'description': description,
99             'thumbnail': thumbnail
100         }