[twitch:past_broadcasts] Fix IE_NAME
[youtube-dl] / youtube_dl / extractor / twitch.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import itertools
5 import re
6
7 from .common import InfoExtractor
8 from ..compat import (
9     compat_urllib_parse,
10     compat_urllib_request,
11 )
12 from ..utils import (
13     ExtractorError,
14     parse_iso8601,
15 )
16
17
18 class TwitchBaseIE(InfoExtractor):
19     _VALID_URL_BASE = r'http://(?:www\.)?twitch\.tv'
20
21     _API_BASE = 'https://api.twitch.tv'
22     _LOGIN_URL = 'https://secure.twitch.tv/user/login'
23
24     def _handle_error(self, response):
25         if not isinstance(response, dict):
26             return
27         error = response.get('error')
28         if error:
29             raise ExtractorError(
30                 '%s returned error: %s - %s' % (self.IE_NAME, error, response.get('message')),
31                 expected=True)
32
33     def _download_json(self, url, video_id, note='Downloading JSON metadata'):
34         response = super(TwitchBaseIE, self)._download_json(url, video_id, note)
35         self._handle_error(response)
36         return response
37
38     def _real_initialize(self):
39         self._login()
40
41     def _login(self):
42         (username, password) = self._get_login_info()
43         if username is None:
44             return
45
46         login_page = self._download_webpage(
47             self._LOGIN_URL, None, 'Downloading login page')
48
49         authenticity_token = self._search_regex(
50             r'<input name="authenticity_token" type="hidden" value="([^"]+)"',
51             login_page, 'authenticity token')
52
53         login_form = {
54             'utf8': '✓'.encode('utf-8'),
55             'authenticity_token': authenticity_token,
56             'redirect_on_login': '',
57             'embed_form': 'false',
58             'mp_source_action': '',
59             'follow': '',
60             'user[login]': username,
61             'user[password]': password,
62         }
63
64         request = compat_urllib_request.Request(
65             self._LOGIN_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
66         request.add_header('Referer', self._LOGIN_URL)
67         response = self._download_webpage(
68             request, None, 'Logging in as %s' % username)
69
70         m = re.search(
71             r"id=([\"'])login_error_message\1[^>]*>(?P<msg>[^<]+)", response)
72         if m:
73             raise ExtractorError(
74                 'Unable to login: %s' % m.group('msg').strip(), expected=True)
75
76
77 class TwitchItemBaseIE(TwitchBaseIE):
78     def _download_info(self, item, item_id):
79         return self._extract_info(self._download_json(
80             '%s/kraken/videos/%s%s' % (self._API_BASE, item, item_id), item_id,
81             'Downloading %s info JSON' % self._ITEM_TYPE))
82
83     def _extract_media(self, item_id):
84         info = self._download_info(self._ITEM_SHORTCUT, item_id)
85         response = self._download_json(
86             '%s/api/videos/%s%s' % (self._API_BASE, self._ITEM_SHORTCUT, item_id), item_id,
87             'Downloading %s playlist JSON' % self._ITEM_TYPE)
88         entries = []
89         chunks = response['chunks']
90         qualities = list(chunks.keys())
91         for num, fragment in enumerate(zip(*chunks.values()), start=1):
92             formats = []
93             for fmt_num, fragment_fmt in enumerate(fragment):
94                 format_id = qualities[fmt_num]
95                 fmt = {
96                     'url': fragment_fmt['url'],
97                     'format_id': format_id,
98                     'quality': 1 if format_id == 'live' else 0,
99                 }
100                 m = re.search(r'^(?P<height>\d+)[Pp]', format_id)
101                 if m:
102                     fmt['height'] = int(m.group('height'))
103                 formats.append(fmt)
104             self._sort_formats(formats)
105             entry = dict(info)
106             entry['id'] = '%s_%d' % (entry['id'], num)
107             entry['title'] = '%s part %d' % (entry['title'], num)
108             entry['formats'] = formats
109             entries.append(entry)
110         return self.playlist_result(entries, info['id'], info['title'])
111
112     def _extract_info(self, info):
113         return {
114             'id': info['_id'],
115             'title': info['title'],
116             'description': info['description'],
117             'duration': info['length'],
118             'thumbnail': info['preview'],
119             'uploader': info['channel']['display_name'],
120             'uploader_id': info['channel']['name'],
121             'timestamp': parse_iso8601(info['recorded_at']),
122             'view_count': info['views'],
123         }
124
125     def _real_extract(self, url):
126         return self._extract_media(self._match_id(url))
127
128
129 class TwitchVideoIE(TwitchItemBaseIE):
130     IE_NAME = 'twitch:video'
131     _VALID_URL = r'%s/[^/]+/b/(?P<id>[^/]+)' % TwitchBaseIE._VALID_URL_BASE
132     _ITEM_TYPE = 'video'
133     _ITEM_SHORTCUT = 'a'
134
135     _TEST = {
136         'url': 'http://www.twitch.tv/riotgames/b/577357806',
137         'info_dict': {
138             'id': 'a577357806',
139             'title': 'Worlds Semifinals - Star Horn Royal Club vs. OMG',
140         },
141         'playlist_mincount': 12,
142     }
143
144
145 class TwitchChapterIE(TwitchItemBaseIE):
146     IE_NAME = 'twitch:chapter'
147     _VALID_URL = r'%s/[^/]+/c/(?P<id>[^/]+)' % TwitchBaseIE._VALID_URL_BASE
148     _ITEM_TYPE = 'chapter'
149     _ITEM_SHORTCUT = 'c'
150
151     _TEST = {
152         'url': 'http://www.twitch.tv/acracingleague/c/5285812',
153         'info_dict': {
154             'id': 'c5285812',
155             'title': 'ACRL Off Season - Sports Cars @ Nordschleife',
156         },
157         'playlist_mincount': 3,
158     }
159
160
161 class TwitchVodIE(TwitchItemBaseIE):
162     IE_NAME = 'twitch:vod'
163     _VALID_URL = r'%s/[^/]+/v/(?P<id>[^/]+)' % TwitchBaseIE._VALID_URL_BASE
164     _ITEM_TYPE = 'vod'
165     _ITEM_SHORTCUT = 'v'
166
167     _TEST = {
168         'url': 'http://www.twitch.tv/ksptv/v/3622000',
169         'info_dict': {
170             'id': 'v3622000',
171             'ext': 'mp4',
172             'title': '''KSPTV: Squadcast: "Everyone's on vacation so here's Dahud" Edition!''',
173             'thumbnail': 're:^https?://.*\.jpg$',
174             'duration': 6951,
175             'timestamp': 1419028564,
176             'upload_date': '20141219',
177             'uploader': 'KSPTV',
178             'uploader_id': 'ksptv',
179             'view_count': int,
180         },
181         'params': {
182             # m3u8 download
183             'skip_download': True,
184         },
185     }
186
187     def _real_extract(self, url):
188         item_id = self._match_id(url)
189         info = self._download_info(self._ITEM_SHORTCUT, item_id)
190         access_token = self._download_json(
191             '%s/api/vods/%s/access_token' % (self._API_BASE, item_id), item_id,
192             'Downloading %s access token' % self._ITEM_TYPE)
193         formats = self._extract_m3u8_formats(
194             'http://usher.twitch.tv/vod/%s?nauth=%s&nauthsig=%s'
195             % (item_id, access_token['token'], access_token['sig']),
196             item_id, 'mp4')
197         info['formats'] = formats
198         return info
199
200
201 class TwitchPlaylistBaseIE(TwitchBaseIE):
202     _PLAYLIST_URL = '%s/kraken/channels/%%s/videos/?offset=%%d&limit=%%d' % TwitchBaseIE._API_BASE
203     _PAGE_LIMIT = 100
204
205     def _extract_playlist(self, channel_id):
206         info = self._download_json(
207             '%s/kraken/channels/%s' % (self._API_BASE, channel_id),
208             channel_id, 'Downloading channel info JSON')
209         channel_name = info.get('display_name') or info.get('name')
210         entries = []
211         offset = 0
212         limit = self._PAGE_LIMIT
213         for counter in itertools.count(1):
214             response = self._download_json(
215                 self._PLAYLIST_URL % (channel_id, offset, limit),
216                 channel_id, 'Downloading %s videos JSON page %d' % (self._PLAYLIST_TYPE, counter))
217             videos = response['videos']
218             if not videos:
219                 break
220             entries.extend([self.url_result(video['url']) for video in videos])
221             offset += limit
222         return self.playlist_result(entries, channel_id, channel_name)
223
224     def _real_extract(self, url):
225         return self._extract_playlist(self._match_id(url))
226
227
228 class TwitchProfileIE(TwitchPlaylistBaseIE):
229     IE_NAME = 'twitch:profile'
230     _VALID_URL = r'%s/(?P<id>[^/]+)/profile/?(?:\#.*)?$' % TwitchBaseIE._VALID_URL_BASE
231     _PLAYLIST_TYPE = 'profile'
232
233     _TEST = {
234         'url': 'http://www.twitch.tv/vanillatv/profile',
235         'info_dict': {
236             'id': 'vanillatv',
237             'title': 'VanillaTV',
238         },
239         'playlist_mincount': 412,
240     }
241
242
243 class TwitchPastBroadcastsIE(TwitchPlaylistBaseIE):
244     IE_NAME = 'twitch:past_broadcasts'
245     _VALID_URL = r'%s/(?P<id>[^/]+)/profile/past_broadcasts/?(?:\#.*)?$' % TwitchBaseIE._VALID_URL_BASE
246     _PLAYLIST_URL = TwitchPlaylistBaseIE._PLAYLIST_URL + '&broadcasts=true'
247     _PLAYLIST_TYPE = 'past broadcasts'
248
249     _TEST = {
250         'url': 'http://www.twitch.tv/spamfish/profile/past_broadcasts',
251         'info_dict': {
252             'id': 'spamfish',
253             'title': 'Spamfish',
254         },
255         'playlist_mincount': 54,
256     }