2 from __future__ import unicode_literals
9 from .common import InfoExtractor
10 from ..compat import (
15 compat_urllib_parse_urlencode,
16 compat_urllib_parse_urlparse,
35 class TwitchBaseIE(InfoExtractor):
36 _VALID_URL_BASE = r'https?://(?:(?:www|go|m)\.)?twitch\.tv'
38 _API_BASE = 'https://api.twitch.tv'
39 _USHER_BASE = 'https://usher.ttvnw.net'
40 _LOGIN_FORM_URL = 'https://www.twitch.tv/login'
41 _LOGIN_POST_URL = 'https://passport.twitch.tv/login'
42 _CLIENT_ID = 'jzkbprff40iqj646a697cyrvl0zt2m6'
43 _NETRC_MACHINE = 'twitch'
45 def _handle_error(self, response):
46 if not isinstance(response, dict):
48 error = response.get('error')
51 '%s returned error: %s - %s' % (self.IE_NAME, error, response.get('message')),
54 def _call_api(self, path, item_id, *args, **kwargs):
55 kwargs.setdefault('headers', {})['Client-ID'] = self._CLIENT_ID
56 response = self._download_json(
57 '%s/%s' % (self._API_BASE, path), item_id,
58 *args, **compat_kwargs(kwargs))
59 self._handle_error(response)
62 def _real_initialize(self):
66 username, password = self._get_login_info()
72 'Unable to login. Twitch said: %s' % message, expected=True)
74 def login_step(page, urlh, note, data):
75 form = self._hidden_inputs(page)
78 page_url = urlh.geturl()
79 post_url = self._search_regex(
80 r'<form[^>]+action=(["\'])(?P<url>.+?)\1', page,
81 'post url', default=self._LOGIN_POST_URL, group='url')
82 post_url = urljoin(page_url, post_url)
87 'Content-Type': 'text/plain;charset=UTF-8'
91 response = self._download_json(
93 data=json.dumps(form).encode(),
95 except ExtractorError as e:
96 if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
97 response = self._parse_json(
98 e.cause.read().decode('utf-8'), None)
99 fail(response.get('error_description') or response.get('error_code'))
102 if 'Authenticated successfully' in response.get('message', ''):
105 redirect_url = urljoin(
107 response.get('redirect') or response['redirect_path'])
108 return self._download_webpage_handle(
109 redirect_url, None, 'Downloading login redirect page',
112 login_page, handle = self._download_webpage_handle(
113 self._LOGIN_FORM_URL, None, 'Downloading login page')
115 # Some TOR nodes and public proxies are blocked completely
116 if 'blacklist_message' in login_page:
117 fail(clean_html(login_page))
119 redirect_page, handle = login_step(
120 login_page, handle, 'Logging in', {
121 'username': username,
122 'password': password,
123 'client_id': self._CLIENT_ID,
127 if not redirect_page:
130 if re.search(r'(?i)<form[^>]+id="two-factor-submit"', redirect_page) is not None:
131 # TODO: Add mechanism to request an SMS or phone call
132 tfa_token = self._get_tfa_info('two-factor authentication token')
133 login_step(redirect_page, handle, 'Submitting TFA token', {
134 'authy_token': tfa_token,
135 'remember_2fa': 'true',
138 def _prefer_source(self, formats):
140 source = next(f for f in formats if f['format_id'] == 'Source')
141 source['preference'] = 10
142 except StopIteration:
143 pass # No Source stream present
144 self._sort_formats(formats)
147 class TwitchItemBaseIE(TwitchBaseIE):
148 def _download_info(self, item, item_id):
149 return self._extract_info(self._call_api(
150 'kraken/videos/%s%s' % (item, item_id), item_id,
151 'Downloading %s info JSON' % self._ITEM_TYPE))
153 def _extract_media(self, item_id):
154 info = self._download_info(self._ITEM_SHORTCUT, item_id)
155 response = self._call_api(
156 'api/videos/%s%s' % (self._ITEM_SHORTCUT, item_id), item_id,
157 'Downloading %s playlist JSON' % self._ITEM_TYPE)
159 chunks = response['chunks']
160 qualities = list(chunks.keys())
161 for num, fragment in enumerate(zip(*chunks.values()), start=1):
163 for fmt_num, fragment_fmt in enumerate(fragment):
164 format_id = qualities[fmt_num]
166 'url': fragment_fmt['url'],
167 'format_id': format_id,
168 'quality': 1 if format_id == 'live' else 0,
170 m = re.search(r'^(?P<height>\d+)[Pp]', format_id)
172 fmt['height'] = int(m.group('height'))
174 self._sort_formats(formats)
176 entry['id'] = '%s_%d' % (entry['id'], num)
177 entry['title'] = '%s part %d' % (entry['title'], num)
178 entry['formats'] = formats
179 entries.append(entry)
180 return self.playlist_result(entries, info['id'], info['title'])
182 def _extract_info(self, info):
183 status = info.get('status')
184 if status == 'recording':
186 elif status == 'recorded':
192 'title': info.get('title') or 'Untitled Broadcast',
193 'description': info.get('description'),
194 'duration': int_or_none(info.get('length')),
195 'thumbnail': info.get('preview'),
196 'uploader': info.get('channel', {}).get('display_name'),
197 'uploader_id': info.get('channel', {}).get('name'),
198 'timestamp': parse_iso8601(info.get('recorded_at')),
199 'view_count': int_or_none(info.get('views')),
203 def _real_extract(self, url):
204 return self._extract_media(self._match_id(url))
207 class TwitchVideoIE(TwitchItemBaseIE):
208 IE_NAME = 'twitch:video'
209 _VALID_URL = r'%s/[^/]+/b/(?P<id>\d+)' % TwitchBaseIE._VALID_URL_BASE
214 'url': 'http://www.twitch.tv/riotgames/b/577357806',
217 'title': 'Worlds Semifinals - Star Horn Royal Club vs. OMG',
219 'playlist_mincount': 12,
220 'skip': 'HTTP Error 404: Not Found',
224 class TwitchChapterIE(TwitchItemBaseIE):
225 IE_NAME = 'twitch:chapter'
226 _VALID_URL = r'%s/[^/]+/c/(?P<id>\d+)' % TwitchBaseIE._VALID_URL_BASE
227 _ITEM_TYPE = 'chapter'
231 'url': 'http://www.twitch.tv/acracingleague/c/5285812',
234 'title': 'ACRL Off Season - Sports Cars @ Nordschleife',
236 'playlist_mincount': 3,
237 'skip': 'HTTP Error 404: Not Found',
239 'url': 'http://www.twitch.tv/tsm_theoddone/c/2349361',
240 'only_matching': True,
244 class TwitchVodIE(TwitchItemBaseIE):
245 IE_NAME = 'twitch:vod'
246 _VALID_URL = r'''(?x)
249 (?:(?:www|go|m)\.)?twitch\.tv/(?:[^/]+/v(?:ideo)?|videos)/|
250 player\.twitch\.tv/\?.*?\bvideo=v
258 'url': 'http://www.twitch.tv/riotgames/v/6528877?t=5m10s',
262 'title': 'LCK Summer Split - Week 6 Day 1',
263 'thumbnail': r're:^https?://.*\.jpg$',
265 'timestamp': 1435131709,
266 'upload_date': '20150624',
267 'uploader': 'Riot Games',
268 'uploader_id': 'riotgames',
274 'skip_download': True,
277 # Untitled broadcast (title is None)
278 'url': 'http://www.twitch.tv/belkao_o/v/11230755',
282 'title': 'Untitled Broadcast',
283 'thumbnail': r're:^https?://.*\.jpg$',
285 'timestamp': 1439746708,
286 'upload_date': '20150816',
287 'uploader': 'BelkAO_o',
288 'uploader_id': 'belkao_o',
293 'skip_download': True,
295 'skip': 'HTTP Error 404: Not Found',
297 'url': 'http://player.twitch.tv/?t=5m10s&video=v6528877',
298 'only_matching': True,
300 'url': 'https://www.twitch.tv/videos/6528877',
301 'only_matching': True,
303 'url': 'https://m.twitch.tv/beagsandjam/v/247478721',
304 'only_matching': True,
306 'url': 'https://www.twitch.tv/northernlion/video/291940395',
307 'only_matching': True,
310 def _real_extract(self, url):
311 item_id = self._match_id(url)
313 info = self._download_info(self._ITEM_SHORTCUT, item_id)
314 access_token = self._call_api(
315 'api/vods/%s/access_token' % item_id, item_id,
316 'Downloading %s access token' % self._ITEM_TYPE)
318 formats = self._extract_m3u8_formats(
320 self._USHER_BASE, item_id,
321 compat_urllib_parse_urlencode({
322 'allow_source': 'true',
323 'allow_audio_only': 'true',
324 'allow_spectre': 'true',
325 'player': 'twitchweb',
326 'nauth': access_token['token'],
327 'nauthsig': access_token['sig'],
329 item_id, 'mp4', entry_protocol='m3u8_native')
331 self._prefer_source(formats)
332 info['formats'] = formats
334 parsed_url = compat_urllib_parse_urlparse(url)
335 query = compat_parse_qs(parsed_url.query)
337 info['start_time'] = parse_duration(query['t'][0])
339 if info.get('timestamp') is not None:
340 info['subtitles'] = {
342 'url': update_url_query(
343 'https://rechat.twitch.tv/rechat-messages', {
344 'video_id': 'v%s' % item_id,
345 'start': info['timestamp'],
354 class TwitchPlaylistBaseIE(TwitchBaseIE):
355 _PLAYLIST_PATH = 'kraken/channels/%s/videos/?offset=%d&limit=%d'
358 def _extract_playlist(self, channel_id):
359 info = self._call_api(
360 'kraken/channels/%s' % channel_id,
361 channel_id, 'Downloading channel info JSON')
362 channel_name = info.get('display_name') or info.get('name')
365 limit = self._PAGE_LIMIT
366 broken_paging_detected = False
367 counter_override = None
368 for counter in itertools.count(1):
369 response = self._call_api(
370 self._PLAYLIST_PATH % (channel_id, offset, limit),
372 'Downloading %s JSON page %s'
373 % (self._PLAYLIST_TYPE, counter_override or counter))
374 page_entries = self._extract_playlist_page(response)
377 total = int_or_none(response.get('_total'))
378 # Since the beginning of March 2016 twitch's paging mechanism
379 # is completely broken on the twitch side. It simply ignores
380 # a limit and returns the whole offset number of videos.
381 # Working around by just requesting all videos at once.
382 # Upd: pagination bug was fixed by twitch on 15.03.2016.
383 if not broken_paging_detected and total and len(page_entries) > limit:
385 'Twitch pagination is broken on twitch side, requesting all videos at once',
387 broken_paging_detected = True
389 counter_override = '(all at once)'
391 entries.extend(page_entries)
392 if broken_paging_detected or total and len(page_entries) >= total:
395 return self.playlist_result(
396 [self._make_url_result(entry) for entry in orderedSet(entries)],
397 channel_id, channel_name)
399 def _make_url_result(self, url):
401 video_id = 'v%s' % TwitchVodIE._match_id(url)
402 return self.url_result(url, TwitchVodIE.ie_key(), video_id=video_id)
403 except AssertionError:
404 return self.url_result(url)
406 def _extract_playlist_page(self, response):
407 videos = response.get('videos')
408 return [video['url'] for video in videos] if videos else []
410 def _real_extract(self, url):
411 return self._extract_playlist(self._match_id(url))
414 class TwitchProfileIE(TwitchPlaylistBaseIE):
415 IE_NAME = 'twitch:profile'
416 _VALID_URL = r'%s/(?P<id>[^/]+)/profile/?(?:\#.*)?$' % TwitchBaseIE._VALID_URL_BASE
417 _PLAYLIST_TYPE = 'profile'
420 'url': 'http://www.twitch.tv/vanillatv/profile',
423 'title': 'VanillaTV',
425 'playlist_mincount': 412,
427 'url': 'http://m.twitch.tv/vanillatv/profile',
428 'only_matching': True,
432 class TwitchVideosBaseIE(TwitchPlaylistBaseIE):
433 _VALID_URL_VIDEOS_BASE = r'%s/(?P<id>[^/]+)/videos' % TwitchBaseIE._VALID_URL_BASE
434 _PLAYLIST_PATH = TwitchPlaylistBaseIE._PLAYLIST_PATH + '&broadcast_type='
437 class TwitchAllVideosIE(TwitchVideosBaseIE):
438 IE_NAME = 'twitch:videos:all'
439 _VALID_URL = r'%s/all' % TwitchVideosBaseIE._VALID_URL_VIDEOS_BASE
440 _PLAYLIST_PATH = TwitchVideosBaseIE._PLAYLIST_PATH + 'archive,upload,highlight'
441 _PLAYLIST_TYPE = 'all videos'
444 'url': 'https://www.twitch.tv/spamfish/videos/all',
449 'playlist_mincount': 869,
451 'url': 'https://m.twitch.tv/spamfish/videos/all',
452 'only_matching': True,
456 class TwitchUploadsIE(TwitchVideosBaseIE):
457 IE_NAME = 'twitch:videos:uploads'
458 _VALID_URL = r'%s/uploads' % TwitchVideosBaseIE._VALID_URL_VIDEOS_BASE
459 _PLAYLIST_PATH = TwitchVideosBaseIE._PLAYLIST_PATH + 'upload'
460 _PLAYLIST_TYPE = 'uploads'
463 'url': 'https://www.twitch.tv/spamfish/videos/uploads',
468 'playlist_mincount': 0,
470 'url': 'https://m.twitch.tv/spamfish/videos/uploads',
471 'only_matching': True,
475 class TwitchPastBroadcastsIE(TwitchVideosBaseIE):
476 IE_NAME = 'twitch:videos:past-broadcasts'
477 _VALID_URL = r'%s/past-broadcasts' % TwitchVideosBaseIE._VALID_URL_VIDEOS_BASE
478 _PLAYLIST_PATH = TwitchVideosBaseIE._PLAYLIST_PATH + 'archive'
479 _PLAYLIST_TYPE = 'past broadcasts'
482 'url': 'https://www.twitch.tv/spamfish/videos/past-broadcasts',
487 'playlist_mincount': 0,
489 'url': 'https://m.twitch.tv/spamfish/videos/past-broadcasts',
490 'only_matching': True,
494 class TwitchHighlightsIE(TwitchVideosBaseIE):
495 IE_NAME = 'twitch:videos:highlights'
496 _VALID_URL = r'%s/highlights' % TwitchVideosBaseIE._VALID_URL_VIDEOS_BASE
497 _PLAYLIST_PATH = TwitchVideosBaseIE._PLAYLIST_PATH + 'highlight'
498 _PLAYLIST_TYPE = 'highlights'
501 'url': 'https://www.twitch.tv/spamfish/videos/highlights',
506 'playlist_mincount': 805,
508 'url': 'https://m.twitch.tv/spamfish/videos/highlights',
509 'only_matching': True,
513 class TwitchStreamIE(TwitchBaseIE):
514 IE_NAME = 'twitch:stream'
515 _VALID_URL = r'''(?x)
518 (?:(?:www|go|m)\.)?twitch\.tv/|
519 player\.twitch\.tv/\?.*?\bchannel=
525 'url': 'http://www.twitch.tv/shroomztv',
528 'display_id': 'shroomztv',
530 'title': 're:^ShroomzTV [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
531 'description': 'H1Z1 - lonewolfing with ShroomzTV | A3 Battle Royale later - @ShroomzTV',
533 'timestamp': 1421928037,
534 'upload_date': '20150122',
535 'uploader': 'ShroomzTV',
536 'uploader_id': 'shroomztv',
541 'skip_download': True,
544 'url': 'http://www.twitch.tv/miracle_doto#profile-0',
545 'only_matching': True,
547 'url': 'https://player.twitch.tv/?channel=lotsofs',
548 'only_matching': True,
550 'url': 'https://go.twitch.tv/food',
551 'only_matching': True,
553 'url': 'https://m.twitch.tv/food',
554 'only_matching': True,
558 def suitable(cls, url):
560 if any(ie.suitable(url) for ie in (
567 TwitchPastBroadcastsIE,
569 else super(TwitchStreamIE, cls).suitable(url))
571 def _real_extract(self, url):
572 channel_id = self._match_id(url)
574 stream = self._call_api(
575 'kraken/streams/%s?stream_type=all' % channel_id, channel_id,
576 'Downloading stream JSON').get('stream')
579 raise ExtractorError('%s is offline' % channel_id, expected=True)
581 # Channel name may be typed if different case than the original channel name
582 # (e.g. http://www.twitch.tv/TWITCHPLAYSPOKEMON) that will lead to constructing
583 # an invalid m3u8 URL. Working around by use of original channel name from stream
584 # JSON and fallback to lowercase if it's not available.
585 channel_id = stream.get('channel', {}).get('name') or channel_id.lower()
587 access_token = self._call_api(
588 'api/channels/%s/access_token' % channel_id, channel_id,
589 'Downloading channel access token')
592 'allow_source': 'true',
593 'allow_audio_only': 'true',
594 'allow_spectre': 'true',
595 'p': random.randint(1000000, 10000000),
596 'player': 'twitchweb',
597 'segment_preference': '4',
598 'sig': access_token['sig'].encode('utf-8'),
599 'token': access_token['token'].encode('utf-8'),
601 formats = self._extract_m3u8_formats(
602 '%s/api/channel/hls/%s.m3u8?%s'
603 % (self._USHER_BASE, channel_id, compat_urllib_parse_urlencode(query)),
605 self._prefer_source(formats)
607 view_count = stream.get('viewers')
608 timestamp = parse_iso8601(stream.get('created_at'))
610 channel = stream['channel']
611 title = self._live_title(channel.get('display_name') or channel.get('name'))
612 description = channel.get('status')
615 for thumbnail_key, thumbnail_url in stream['preview'].items():
616 m = re.search(r'(?P<width>\d+)x(?P<height>\d+)\.jpg$', thumbnail_key)
620 'url': thumbnail_url,
621 'width': int(m.group('width')),
622 'height': int(m.group('height')),
626 'id': compat_str(stream['_id']),
627 'display_id': channel_id,
629 'description': description,
630 'thumbnails': thumbnails,
631 'uploader': channel.get('display_name'),
632 'uploader_id': channel.get('name'),
633 'timestamp': timestamp,
634 'view_count': view_count,
640 class TwitchClipsIE(TwitchBaseIE):
641 IE_NAME = 'twitch:clips'
642 _VALID_URL = r'https?://clips\.twitch\.tv/(?:[^/]+/)*(?P<id>[^/?#&]+)'
645 'url': 'https://clips.twitch.tv/FaintLightGullWholeWheat',
646 'md5': '761769e1eafce0ffebfb4089cb3847cd',
650 'title': 'EA Play 2016 Live from the Novo Theatre',
651 'thumbnail': r're:^https?://.*\.jpg',
652 'timestamp': 1465767393,
653 'upload_date': '20160612',
655 'uploader': 'stereotype_',
656 'uploader_id': '43566419',
660 'url': 'https://clips.twitch.tv/rflegendary/UninterestedBeeDAESuppy',
661 'only_matching': True,
664 def _real_extract(self, url):
665 video_id = self._match_id(url)
667 status = self._download_json(
668 'https://clips.twitch.tv/api/v2/clips/%s/status' % video_id,
673 for option in status['quality_options']:
674 if not isinstance(option, dict):
676 source = url_or_none(option.get('source'))
681 'format_id': option.get('quality'),
682 'height': int_or_none(option.get('quality')),
683 'fps': int_or_none(option.get('frame_rate')),
686 self._sort_formats(formats)
692 clip = self._call_api(
693 'kraken/clips/%s' % video_id, video_id, fatal=False, headers={
694 'Accept': 'application/vnd.twitchtv.v5+json',
698 quality_key = qualities(('tiny', 'small', 'medium'))
700 thumbnails_dict = clip.get('thumbnails')
701 if isinstance(thumbnails_dict, dict):
702 for thumbnail_id, thumbnail_url in thumbnails_dict.items():
705 'url': thumbnail_url,
706 'preference': quality_key(thumbnail_id),
710 'id': clip.get('tracking_id') or video_id,
711 'title': clip.get('title') or video_id,
712 'duration': float_or_none(clip.get('duration')),
713 'views': int_or_none(clip.get('views')),
714 'timestamp': unified_timestamp(clip.get('created_at')),
715 'thumbnails': thumbnails,
716 'creator': try_get(clip, lambda x: x['broadcaster']['display_name'], compat_str),
717 'uploader': try_get(clip, lambda x: x['curator']['display_name'], compat_str),
718 'uploader_id': try_get(clip, lambda x: x['curator']['id'], compat_str),