[twitch] Switch streams to GraphQL and refactor (closes #26535)
[youtube-dl] / youtube_dl / extractor / twitch.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import collections
5 import itertools
6 import json
7 import random
8 import re
9
10 from .common import InfoExtractor
11 from ..compat import (
12     compat_kwargs,
13     compat_parse_qs,
14     compat_str,
15     compat_urlparse,
16     compat_urllib_parse_urlencode,
17     compat_urllib_parse_urlparse,
18 )
19 from ..utils import (
20     clean_html,
21     ExtractorError,
22     float_or_none,
23     int_or_none,
24     parse_duration,
25     parse_iso8601,
26     qualities,
27     try_get,
28     unified_timestamp,
29     update_url_query,
30     url_or_none,
31     urljoin,
32 )
33
34
35 class TwitchBaseIE(InfoExtractor):
36     _VALID_URL_BASE = r'https?://(?:(?:www|go|m)\.)?twitch\.tv'
37
38     _API_BASE = 'https://api.twitch.tv'
39     _USHER_BASE = 'https://usher.ttvnw.net'
40     _LOGIN_FORM_URL = 'https://www.twitch.tv/login'
41     _LOGIN_POST_URL = 'https://passport.twitch.tv/login'
42     _CLIENT_ID = 'kimne78kx3ncx6brgo4mv6wki5h1ko'
43     _NETRC_MACHINE = 'twitch'
44
45     def _handle_error(self, response):
46         if not isinstance(response, dict):
47             return
48         error = response.get('error')
49         if error:
50             raise ExtractorError(
51                 '%s returned error: %s - %s' % (self.IE_NAME, error, response.get('message')),
52                 expected=True)
53
54     def _call_api(self, path, item_id, *args, **kwargs):
55         headers = kwargs.get('headers', {}).copy()
56         headers.update({
57             'Accept': 'application/vnd.twitchtv.v5+json; charset=UTF-8',
58             'Client-ID': self._CLIENT_ID,
59         })
60         kwargs.update({
61             'headers': headers,
62             'expected_status': (400, 410),
63         })
64         response = self._download_json(
65             '%s/%s' % (self._API_BASE, path), item_id,
66             *args, **compat_kwargs(kwargs))
67         self._handle_error(response)
68         return response
69
70     def _real_initialize(self):
71         self._login()
72
73     def _login(self):
74         username, password = self._get_login_info()
75         if username is None:
76             return
77
78         def fail(message):
79             raise ExtractorError(
80                 'Unable to login. Twitch said: %s' % message, expected=True)
81
82         def login_step(page, urlh, note, data):
83             form = self._hidden_inputs(page)
84             form.update(data)
85
86             page_url = urlh.geturl()
87             post_url = self._search_regex(
88                 r'<form[^>]+action=(["\'])(?P<url>.+?)\1', page,
89                 'post url', default=self._LOGIN_POST_URL, group='url')
90             post_url = urljoin(page_url, post_url)
91
92             headers = {
93                 'Referer': page_url,
94                 'Origin': page_url,
95                 'Content-Type': 'text/plain;charset=UTF-8',
96             }
97
98             response = self._download_json(
99                 post_url, None, note, data=json.dumps(form).encode(),
100                 headers=headers, expected_status=400)
101             error = response.get('error_description') or response.get('error_code')
102             if error:
103                 fail(error)
104
105             if 'Authenticated successfully' in response.get('message', ''):
106                 return None, None
107
108             redirect_url = urljoin(
109                 post_url,
110                 response.get('redirect') or response['redirect_path'])
111             return self._download_webpage_handle(
112                 redirect_url, None, 'Downloading login redirect page',
113                 headers=headers)
114
115         login_page, handle = self._download_webpage_handle(
116             self._LOGIN_FORM_URL, None, 'Downloading login page')
117
118         # Some TOR nodes and public proxies are blocked completely
119         if 'blacklist_message' in login_page:
120             fail(clean_html(login_page))
121
122         redirect_page, handle = login_step(
123             login_page, handle, 'Logging in', {
124                 'username': username,
125                 'password': password,
126                 'client_id': self._CLIENT_ID,
127             })
128
129         # Successful login
130         if not redirect_page:
131             return
132
133         if re.search(r'(?i)<form[^>]+id="two-factor-submit"', redirect_page) is not None:
134             # TODO: Add mechanism to request an SMS or phone call
135             tfa_token = self._get_tfa_info('two-factor authentication token')
136             login_step(redirect_page, handle, 'Submitting TFA token', {
137                 'authy_token': tfa_token,
138                 'remember_2fa': 'true',
139             })
140
141     def _prefer_source(self, formats):
142         try:
143             source = next(f for f in formats if f['format_id'] == 'Source')
144             source['quality'] = 10
145         except StopIteration:
146             for f in formats:
147                 if '/chunked/' in f['url']:
148                     f.update({
149                         'quality': 10,
150                         'format_note': 'Source',
151                     })
152         self._sort_formats(formats)
153
154     def _download_access_token(self, channel_name):
155         return self._call_api(
156             'api/channels/%s/access_token' % channel_name, channel_name,
157             'Downloading access token JSON')
158
159     def _extract_channel_id(self, token, channel_name):
160         return compat_str(self._parse_json(token, channel_name)['channel_id'])
161
162
163 class TwitchVodIE(TwitchBaseIE):
164     IE_NAME = 'twitch:vod'
165     _VALID_URL = r'''(?x)
166                     https?://
167                         (?:
168                             (?:(?:www|go|m)\.)?twitch\.tv/(?:[^/]+/v(?:ideo)?|videos)/|
169                             player\.twitch\.tv/\?.*?\bvideo=v?
170                         )
171                         (?P<id>\d+)
172                     '''
173     _ITEM_TYPE = 'vod'
174     _ITEM_SHORTCUT = 'v'
175
176     _TESTS = [{
177         'url': 'http://www.twitch.tv/riotgames/v/6528877?t=5m10s',
178         'info_dict': {
179             'id': 'v6528877',
180             'ext': 'mp4',
181             'title': 'LCK Summer Split - Week 6 Day 1',
182             'thumbnail': r're:^https?://.*\.jpg$',
183             'duration': 17208,
184             'timestamp': 1435131709,
185             'upload_date': '20150624',
186             'uploader': 'Riot Games',
187             'uploader_id': 'riotgames',
188             'view_count': int,
189             'start_time': 310,
190         },
191         'params': {
192             # m3u8 download
193             'skip_download': True,
194         },
195     }, {
196         # Untitled broadcast (title is None)
197         'url': 'http://www.twitch.tv/belkao_o/v/11230755',
198         'info_dict': {
199             'id': 'v11230755',
200             'ext': 'mp4',
201             'title': 'Untitled Broadcast',
202             'thumbnail': r're:^https?://.*\.jpg$',
203             'duration': 1638,
204             'timestamp': 1439746708,
205             'upload_date': '20150816',
206             'uploader': 'BelkAO_o',
207             'uploader_id': 'belkao_o',
208             'view_count': int,
209         },
210         'params': {
211             # m3u8 download
212             'skip_download': True,
213         },
214         'skip': 'HTTP Error 404: Not Found',
215     }, {
216         'url': 'http://player.twitch.tv/?t=5m10s&video=v6528877',
217         'only_matching': True,
218     }, {
219         'url': 'https://www.twitch.tv/videos/6528877',
220         'only_matching': True,
221     }, {
222         'url': 'https://m.twitch.tv/beagsandjam/v/247478721',
223         'only_matching': True,
224     }, {
225         'url': 'https://www.twitch.tv/northernlion/video/291940395',
226         'only_matching': True,
227     }, {
228         'url': 'https://player.twitch.tv/?video=480452374',
229         'only_matching': True,
230     }]
231
232     def _download_info(self, item_id):
233         return self._extract_info(
234             self._call_api(
235                 'kraken/videos/%s' % item_id, item_id,
236                 'Downloading video info JSON'))
237
238     @staticmethod
239     def _extract_info(info):
240         status = info.get('status')
241         if status == 'recording':
242             is_live = True
243         elif status == 'recorded':
244             is_live = False
245         else:
246             is_live = None
247         _QUALITIES = ('small', 'medium', 'large')
248         quality_key = qualities(_QUALITIES)
249         thumbnails = []
250         preview = info.get('preview')
251         if isinstance(preview, dict):
252             for thumbnail_id, thumbnail_url in preview.items():
253                 thumbnail_url = url_or_none(thumbnail_url)
254                 if not thumbnail_url:
255                     continue
256                 if thumbnail_id not in _QUALITIES:
257                     continue
258                 thumbnails.append({
259                     'url': thumbnail_url,
260                     'preference': quality_key(thumbnail_id),
261                 })
262         return {
263             'id': info['_id'],
264             'title': info.get('title') or 'Untitled Broadcast',
265             'description': info.get('description'),
266             'duration': int_or_none(info.get('length')),
267             'thumbnails': thumbnails,
268             'uploader': info.get('channel', {}).get('display_name'),
269             'uploader_id': info.get('channel', {}).get('name'),
270             'timestamp': parse_iso8601(info.get('recorded_at')),
271             'view_count': int_or_none(info.get('views')),
272             'is_live': is_live,
273         }
274
275     def _real_extract(self, url):
276         vod_id = self._match_id(url)
277
278         info = self._download_info(vod_id)
279         access_token = self._call_api(
280             'api/vods/%s/access_token' % vod_id, vod_id,
281             'Downloading %s access token' % self._ITEM_TYPE)
282
283         formats = self._extract_m3u8_formats(
284             '%s/vod/%s.m3u8?%s' % (
285                 self._USHER_BASE, vod_id,
286                 compat_urllib_parse_urlencode({
287                     'allow_source': 'true',
288                     'allow_audio_only': 'true',
289                     'allow_spectre': 'true',
290                     'player': 'twitchweb',
291                     'playlist_include_framerate': 'true',
292                     'nauth': access_token['token'],
293                     'nauthsig': access_token['sig'],
294                 })),
295             vod_id, 'mp4', entry_protocol='m3u8_native')
296
297         self._prefer_source(formats)
298         info['formats'] = formats
299
300         parsed_url = compat_urllib_parse_urlparse(url)
301         query = compat_parse_qs(parsed_url.query)
302         if 't' in query:
303             info['start_time'] = parse_duration(query['t'][0])
304
305         if info.get('timestamp') is not None:
306             info['subtitles'] = {
307                 'rechat': [{
308                     'url': update_url_query(
309                         'https://api.twitch.tv/v5/videos/%s/comments' % vod_id, {
310                             'client_id': self._CLIENT_ID,
311                         }),
312                     'ext': 'json',
313                 }],
314             }
315
316         return info
317
318
319 def _make_video_result(node):
320     assert isinstance(node, dict)
321     video_id = node.get('id')
322     if not video_id:
323         return
324     return {
325         '_type': 'url_transparent',
326         'ie_key': TwitchVodIE.ie_key(),
327         'id': video_id,
328         'url': 'https://www.twitch.tv/videos/%s' % video_id,
329         'title': node.get('title'),
330         'thumbnail': node.get('previewThumbnailURL'),
331         'duration': float_or_none(node.get('lengthSeconds')),
332         'view_count': int_or_none(node.get('viewCount')),
333     }
334
335
336 class TwitchGraphQLBaseIE(TwitchBaseIE):
337     _PAGE_LIMIT = 100
338
339     _OPERATION_HASHES = {
340         'CollectionSideBar': '27111f1b382effad0b6def325caef1909c733fe6a4fbabf54f8d491ef2cf2f14',
341         'FilterableVideoTower_Videos': 'a937f1d22e269e39a03b509f65a7490f9fc247d7f83d6ac1421523e3b68042cb',
342         'ClipsCards__User': 'b73ad2bfaecfd30a9e6c28fada15bd97032c83ec77a0440766a56fe0bd632777',
343         'ChannelCollectionsContent': '07e3691a1bad77a36aba590c351180439a40baefc1c275356f40fc7082419a84',
344         'StreamMetadata': '1c719a40e481453e5c48d9bb585d971b8b372f8ebb105b17076722264dfa5b3e',
345         'ComscoreStreamingQuery': 'e1edae8122517d013405f237ffcc124515dc6ded82480a88daef69c83b53ac01',
346         'VideoPreviewOverlay': '3006e77e51b128d838fa4e835723ca4dc9a05c5efd4466c1085215c6e437e65c',
347     }
348
349     def _download_gql(self, video_id, ops, note, fatal=True):
350         for op in ops:
351             op['extensions'] = {
352                 'persistedQuery': {
353                     'version': 1,
354                     'sha256Hash': self._OPERATION_HASHES[op['operationName']],
355                 }
356             }
357         return self._download_json(
358             'https://gql.twitch.tv/gql', video_id, note,
359             data=json.dumps(ops).encode(),
360             headers={
361                 'Content-Type': 'text/plain;charset=UTF-8',
362                 'Client-ID': self._CLIENT_ID,
363             }, fatal=fatal)
364
365
366 class TwitchCollectionIE(TwitchGraphQLBaseIE):
367     _VALID_URL = r'https?://(?:(?:www|go|m)\.)?twitch\.tv/collections/(?P<id>[^/]+)'
368
369     _TESTS = [{
370         'url': 'https://www.twitch.tv/collections/wlDCoH0zEBZZbQ',
371         'info_dict': {
372             'id': 'wlDCoH0zEBZZbQ',
373             'title': 'Overthrow Nook, capitalism for children',
374         },
375         'playlist_mincount': 13,
376     }]
377
378     _OPERATION_NAME = 'CollectionSideBar'
379
380     def _real_extract(self, url):
381         collection_id = self._match_id(url)
382         collection = self._download_gql(
383             collection_id, [{
384                 'operationName': self._OPERATION_NAME,
385                 'variables': {'collectionID': collection_id},
386             }],
387             'Downloading collection GraphQL')[0]['data']['collection']
388         title = collection.get('title')
389         entries = []
390         for edge in collection['items']['edges']:
391             if not isinstance(edge, dict):
392                 continue
393             node = edge.get('node')
394             if not isinstance(node, dict):
395                 continue
396             video = _make_video_result(node)
397             if video:
398                 entries.append(video)
399         return self.playlist_result(
400             entries, playlist_id=collection_id, playlist_title=title)
401
402
403 class TwitchPlaylistBaseIE(TwitchGraphQLBaseIE):
404     def _entries(self, channel_name, *args):
405         cursor = None
406         variables_common = self._make_variables(channel_name, *args)
407         entries_key = '%ss' % self._ENTRY_KIND
408         for page_num in itertools.count(1):
409             variables = variables_common.copy()
410             variables['limit'] = self._PAGE_LIMIT
411             if cursor:
412                 variables['cursor'] = cursor
413             page = self._download_gql(
414                 channel_name, [{
415                     'operationName': self._OPERATION_NAME,
416                     'variables': variables,
417                 }],
418                 'Downloading %ss GraphQL page %s' % (self._NODE_KIND, page_num),
419                 fatal=False)
420             if not page:
421                 break
422             edges = try_get(
423                 page, lambda x: x[0]['data']['user'][entries_key]['edges'], list)
424             if not edges:
425                 break
426             for edge in edges:
427                 if not isinstance(edge, dict):
428                     continue
429                 if edge.get('__typename') != self._EDGE_KIND:
430                     continue
431                 node = edge.get('node')
432                 if not isinstance(node, dict):
433                     continue
434                 if node.get('__typename') != self._NODE_KIND:
435                     continue
436                 entry = self._extract_entry(node)
437                 if entry:
438                     cursor = edge.get('cursor')
439                     yield entry
440             if not cursor or not isinstance(cursor, compat_str):
441                 break
442
443     # Deprecated kraken v5 API
444     def _entries_kraken(self, channel_name, broadcast_type, sort):
445         access_token = self._download_access_token(channel_name)
446         channel_id = self._extract_channel_id(access_token['token'], channel_name)
447         offset = 0
448         counter_override = None
449         for counter in itertools.count(1):
450             response = self._call_api(
451                 'kraken/channels/%s/videos/' % channel_id,
452                 channel_id,
453                 'Downloading video JSON page %s' % (counter_override or counter),
454                 query={
455                     'offset': offset,
456                     'limit': self._PAGE_LIMIT,
457                     'broadcast_type': broadcast_type,
458                     'sort': sort,
459                 })
460             videos = response.get('videos')
461             if not isinstance(videos, list):
462                 break
463             for video in videos:
464                 if not isinstance(video, dict):
465                     continue
466                 video_url = url_or_none(video.get('url'))
467                 if not video_url:
468                     continue
469                 yield {
470                     '_type': 'url_transparent',
471                     'ie_key': TwitchVodIE.ie_key(),
472                     'id': video.get('_id'),
473                     'url': video_url,
474                     'title': video.get('title'),
475                     'description': video.get('description'),
476                     'timestamp': unified_timestamp(video.get('published_at')),
477                     'duration': float_or_none(video.get('length')),
478                     'view_count': int_or_none(video.get('views')),
479                     'language': video.get('language'),
480                 }
481             offset += self._PAGE_LIMIT
482             total = int_or_none(response.get('_total'))
483             if total and offset >= total:
484                 break
485
486
487 class TwitchVideosIE(TwitchPlaylistBaseIE):
488     _VALID_URL = r'https?://(?:(?:www|go|m)\.)?twitch\.tv/(?P<id>[^/]+)/(?:videos|profile)'
489
490     _TESTS = [{
491         # All Videos sorted by Date
492         'url': 'https://www.twitch.tv/spamfish/videos?filter=all',
493         'info_dict': {
494             'id': 'spamfish',
495             'title': 'spamfish - All Videos sorted by Date',
496         },
497         'playlist_mincount': 924,
498     }, {
499         # All Videos sorted by Popular
500         'url': 'https://www.twitch.tv/spamfish/videos?filter=all&sort=views',
501         'info_dict': {
502             'id': 'spamfish',
503             'title': 'spamfish - All Videos sorted by Popular',
504         },
505         'playlist_mincount': 931,
506     }, {
507         # Past Broadcasts sorted by Date
508         'url': 'https://www.twitch.tv/spamfish/videos?filter=archives',
509         'info_dict': {
510             'id': 'spamfish',
511             'title': 'spamfish - Past Broadcasts sorted by Date',
512         },
513         'playlist_mincount': 27,
514     }, {
515         # Highlights sorted by Date
516         'url': 'https://www.twitch.tv/spamfish/videos?filter=highlights',
517         'info_dict': {
518             'id': 'spamfish',
519             'title': 'spamfish - Highlights sorted by Date',
520         },
521         'playlist_mincount': 901,
522     }, {
523         # Uploads sorted by Date
524         'url': 'https://www.twitch.tv/esl_csgo/videos?filter=uploads&sort=time',
525         'info_dict': {
526             'id': 'esl_csgo',
527             'title': 'esl_csgo - Uploads sorted by Date',
528         },
529         'playlist_mincount': 5,
530     }, {
531         # Past Premieres sorted by Date
532         'url': 'https://www.twitch.tv/spamfish/videos?filter=past_premieres',
533         'info_dict': {
534             'id': 'spamfish',
535             'title': 'spamfish - Past Premieres sorted by Date',
536         },
537         'playlist_mincount': 1,
538     }, {
539         'url': 'https://www.twitch.tv/spamfish/videos/all',
540         'only_matching': True,
541     }, {
542         'url': 'https://m.twitch.tv/spamfish/videos/all',
543         'only_matching': True,
544     }, {
545         'url': 'https://www.twitch.tv/spamfish/videos',
546         'only_matching': True,
547     }]
548
549     Broadcast = collections.namedtuple('Broadcast', ['type', 'label'])
550
551     _DEFAULT_BROADCAST = Broadcast(None, 'All Videos')
552     _BROADCASTS = {
553         'archives': Broadcast('ARCHIVE', 'Past Broadcasts'),
554         'highlights': Broadcast('HIGHLIGHT', 'Highlights'),
555         'uploads': Broadcast('UPLOAD', 'Uploads'),
556         'past_premieres': Broadcast('PAST_PREMIERE', 'Past Premieres'),
557         'all': _DEFAULT_BROADCAST,
558     }
559
560     _DEFAULT_SORTED_BY = 'Date'
561     _SORTED_BY = {
562         'time': _DEFAULT_SORTED_BY,
563         'views': 'Popular',
564     }
565
566     _OPERATION_NAME = 'FilterableVideoTower_Videos'
567     _ENTRY_KIND = 'video'
568     _EDGE_KIND = 'VideoEdge'
569     _NODE_KIND = 'Video'
570
571     @classmethod
572     def suitable(cls, url):
573         return (False
574                 if any(ie.suitable(url) for ie in (
575                     TwitchVideosClipsIE,
576                     TwitchVideosCollectionsIE))
577                 else super(TwitchVideosIE, cls).suitable(url))
578
579     @staticmethod
580     def _make_variables(channel_name, broadcast_type, sort):
581         return {
582             'channelOwnerLogin': channel_name,
583             'broadcastType': broadcast_type,
584             'videoSort': sort.upper(),
585         }
586
587     @staticmethod
588     def _extract_entry(node):
589         return _make_video_result(node)
590
591     def _real_extract(self, url):
592         channel_name = self._match_id(url)
593         qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
594         filter = qs.get('filter', ['all'])[0]
595         sort = qs.get('sort', ['time'])[0]
596         broadcast = self._BROADCASTS.get(filter, self._DEFAULT_BROADCAST)
597         return self.playlist_result(
598             self._entries(channel_name, broadcast.type, sort),
599             playlist_id=channel_name,
600             playlist_title='%s - %s sorted by %s'
601             % (channel_name, broadcast.label,
602                self._SORTED_BY.get(sort, self._DEFAULT_SORTED_BY)))
603
604
605 class TwitchVideosClipsIE(TwitchPlaylistBaseIE):
606     _VALID_URL = r'https?://(?:(?:www|go|m)\.)?twitch\.tv/(?P<id>[^/]+)/(?:clips|videos/*?\?.*?\bfilter=clips)'
607
608     _TESTS = [{
609         # Clips
610         'url': 'https://www.twitch.tv/vanillatv/clips?filter=clips&range=all',
611         'info_dict': {
612             'id': 'vanillatv',
613             'title': 'vanillatv - Clips Top All',
614         },
615         'playlist_mincount': 1,
616     }, {
617         'url': 'https://www.twitch.tv/dota2ruhub/videos?filter=clips&range=7d',
618         'only_matching': True,
619     }]
620
621     Clip = collections.namedtuple('Clip', ['filter', 'label'])
622
623     _DEFAULT_CLIP = Clip('LAST_WEEK', 'Top 7D')
624     _RANGE = {
625         '24hr': Clip('LAST_DAY', 'Top 24H'),
626         '7d': _DEFAULT_CLIP,
627         '30d': Clip('LAST_MONTH', 'Top 30D'),
628         'all': Clip('ALL_TIME', 'Top All'),
629     }
630
631     # NB: values other than 20 result in skipped videos
632     _PAGE_LIMIT = 20
633
634     _OPERATION_NAME = 'ClipsCards__User'
635     _ENTRY_KIND = 'clip'
636     _EDGE_KIND = 'ClipEdge'
637     _NODE_KIND = 'Clip'
638
639     @staticmethod
640     def _make_variables(channel_name, filter):
641         return {
642             'login': channel_name,
643             'criteria': {
644                 'filter': filter,
645             },
646         }
647
648     @staticmethod
649     def _extract_entry(node):
650         assert isinstance(node, dict)
651         clip_url = url_or_none(node.get('url'))
652         if not clip_url:
653             return
654         return {
655             '_type': 'url_transparent',
656             'ie_key': TwitchClipsIE.ie_key(),
657             'id': node.get('id'),
658             'url': clip_url,
659             'title': node.get('title'),
660             'thumbnail': node.get('thumbnailURL'),
661             'duration': float_or_none(node.get('durationSeconds')),
662             'timestamp': unified_timestamp(node.get('createdAt')),
663             'view_count': int_or_none(node.get('viewCount')),
664             'language': node.get('language'),
665         }
666
667     def _real_extract(self, url):
668         channel_name = self._match_id(url)
669         qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
670         range = qs.get('range', ['7d'])[0]
671         clip = self._RANGE.get(range, self._DEFAULT_CLIP)
672         return self.playlist_result(
673             self._entries(channel_name, clip.filter),
674             playlist_id=channel_name,
675             playlist_title='%s - Clips %s' % (channel_name, clip.label))
676
677
678 class TwitchVideosCollectionsIE(TwitchPlaylistBaseIE):
679     _VALID_URL = r'https?://(?:(?:www|go|m)\.)?twitch\.tv/(?P<id>[^/]+)/videos/*?\?.*?\bfilter=collections'
680
681     _TESTS = [{
682         # Collections
683         'url': 'https://www.twitch.tv/spamfish/videos?filter=collections',
684         'info_dict': {
685             'id': 'spamfish',
686             'title': 'spamfish - Collections',
687         },
688         'playlist_mincount': 3,
689     }]
690
691     _OPERATION_NAME = 'ChannelCollectionsContent'
692     _ENTRY_KIND = 'collection'
693     _EDGE_KIND = 'CollectionsItemEdge'
694     _NODE_KIND = 'Collection'
695
696     @staticmethod
697     def _make_variables(channel_name):
698         return {
699             'ownerLogin': channel_name,
700         }
701
702     @staticmethod
703     def _extract_entry(node):
704         assert isinstance(node, dict)
705         collection_id = node.get('id')
706         if not collection_id:
707             return
708         return {
709             '_type': 'url_transparent',
710             'ie_key': TwitchCollectionIE.ie_key(),
711             'id': collection_id,
712             'url': 'https://www.twitch.tv/collections/%s' % collection_id,
713             'title': node.get('title'),
714             'thumbnail': node.get('thumbnailURL'),
715             'duration': float_or_none(node.get('lengthSeconds')),
716             'timestamp': unified_timestamp(node.get('updatedAt')),
717             'view_count': int_or_none(node.get('viewCount')),
718         }
719
720     def _real_extract(self, url):
721         channel_name = self._match_id(url)
722         return self.playlist_result(
723             self._entries(channel_name), playlist_id=channel_name,
724             playlist_title='%s - Collections' % channel_name)
725
726
727 class TwitchStreamIE(TwitchGraphQLBaseIE):
728     IE_NAME = 'twitch:stream'
729     _VALID_URL = r'''(?x)
730                     https?://
731                         (?:
732                             (?:(?:www|go|m)\.)?twitch\.tv/|
733                             player\.twitch\.tv/\?.*?\bchannel=
734                         )
735                         (?P<id>[^/#?]+)
736                     '''
737
738     _TESTS = [{
739         'url': 'http://www.twitch.tv/shroomztv',
740         'info_dict': {
741             'id': '12772022048',
742             'display_id': 'shroomztv',
743             'ext': 'mp4',
744             'title': 're:^ShroomzTV [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
745             'description': 'H1Z1 - lonewolfing with ShroomzTV | A3 Battle Royale later - @ShroomzTV',
746             'is_live': True,
747             'timestamp': 1421928037,
748             'upload_date': '20150122',
749             'uploader': 'ShroomzTV',
750             'uploader_id': 'shroomztv',
751             'view_count': int,
752         },
753         'params': {
754             # m3u8 download
755             'skip_download': True,
756         },
757     }, {
758         'url': 'http://www.twitch.tv/miracle_doto#profile-0',
759         'only_matching': True,
760     }, {
761         'url': 'https://player.twitch.tv/?channel=lotsofs',
762         'only_matching': True,
763     }, {
764         'url': 'https://go.twitch.tv/food',
765         'only_matching': True,
766     }, {
767         'url': 'https://m.twitch.tv/food',
768         'only_matching': True,
769     }]
770
771     @classmethod
772     def suitable(cls, url):
773         return (False
774                 if any(ie.suitable(url) for ie in (
775                     TwitchVodIE,
776                     TwitchCollectionIE,
777                     TwitchVideosIE,
778                     TwitchVideosClipsIE,
779                     TwitchVideosCollectionsIE,
780                     TwitchClipsIE))
781                 else super(TwitchStreamIE, cls).suitable(url))
782
783     def _real_extract(self, url):
784         channel_name = self._match_id(url).lower()
785
786         gql = self._download_gql(
787             channel_name, [{
788                 'operationName': 'StreamMetadata',
789                 'variables': {'channelLogin': channel_name},
790             }, {
791                 'operationName': 'ComscoreStreamingQuery',
792                 'variables': {
793                     'channel': channel_name,
794                     'clipSlug': '',
795                     'isClip': False,
796                     'isLive': True,
797                     'isVodOrCollection': False,
798                     'vodID': '',
799                 },
800             }, {
801                 'operationName': 'VideoPreviewOverlay',
802                 'variables': {'login': channel_name},
803             }],
804             'Downloading stream GraphQL')
805
806         user = gql[0]['data']['user']
807
808         if not user:
809             raise ExtractorError(
810                 '%s does not exist' % channel_name, expected=True)
811
812         stream = user['stream']
813
814         if not stream:
815             raise ExtractorError('%s is offline' % channel_name, expected=True)
816
817         access_token = self._download_access_token(channel_name)
818         token = access_token['token']
819
820         stream_id = stream.get('id') or channel_name
821         query = {
822             'allow_source': 'true',
823             'allow_audio_only': 'true',
824             'allow_spectre': 'true',
825             'p': random.randint(1000000, 10000000),
826             'player': 'twitchweb',
827             'playlist_include_framerate': 'true',
828             'segment_preference': '4',
829             'sig': access_token['sig'].encode('utf-8'),
830             'token': token.encode('utf-8'),
831         }
832         formats = self._extract_m3u8_formats(
833             '%s/api/channel/hls/%s.m3u8' % (self._USHER_BASE, channel_name),
834             stream_id, 'mp4', query=query)
835         self._prefer_source(formats)
836
837         view_count = stream.get('viewers')
838         timestamp = unified_timestamp(stream.get('createdAt'))
839
840         sq_user = try_get(gql, lambda x: x[1]['data']['user'], dict) or {}
841         uploader = sq_user.get('displayName')
842         description = try_get(
843             sq_user, lambda x: x['broadcastSettings']['title'], compat_str)
844
845         thumbnail = url_or_none(try_get(
846             gql, lambda x: x[2]['data']['user']['stream']['previewImageURL'],
847             compat_str))
848
849         title = uploader or channel_name
850         stream_type = stream.get('type')
851         if stream_type in ['rerun', 'live']:
852             title += ' (%s)' % stream_type
853
854         return {
855             'id': stream_id,
856             'display_id': channel_name,
857             'title': self._live_title(title),
858             'description': description,
859             'thumbnail': thumbnail,
860             'uploader': uploader,
861             'uploader_id': channel_name,
862             'timestamp': timestamp,
863             'view_count': view_count,
864             'formats': formats,
865             'is_live': stream_type == 'live',
866         }
867
868
869 class TwitchClipsIE(TwitchBaseIE):
870     IE_NAME = 'twitch:clips'
871     _VALID_URL = r'''(?x)
872                     https?://
873                         (?:
874                             clips\.twitch\.tv/(?:embed\?.*?\bclip=|(?:[^/]+/)*)|
875                             (?:(?:www|go|m)\.)?twitch\.tv/[^/]+/clip/
876                         )
877                         (?P<id>[^/?#&]+)
878                     '''
879
880     _TESTS = [{
881         'url': 'https://clips.twitch.tv/FaintLightGullWholeWheat',
882         'md5': '761769e1eafce0ffebfb4089cb3847cd',
883         'info_dict': {
884             'id': '42850523',
885             'ext': 'mp4',
886             'title': 'EA Play 2016 Live from the Novo Theatre',
887             'thumbnail': r're:^https?://.*\.jpg',
888             'timestamp': 1465767393,
889             'upload_date': '20160612',
890             'creator': 'EA',
891             'uploader': 'stereotype_',
892             'uploader_id': '43566419',
893         },
894     }, {
895         # multiple formats
896         'url': 'https://clips.twitch.tv/rflegendary/UninterestedBeeDAESuppy',
897         'only_matching': True,
898     }, {
899         'url': 'https://www.twitch.tv/sergeynixon/clip/StormyThankfulSproutFutureMan',
900         'only_matching': True,
901     }, {
902         'url': 'https://clips.twitch.tv/embed?clip=InquisitiveBreakableYogurtJebaited',
903         'only_matching': True,
904     }, {
905         'url': 'https://m.twitch.tv/rossbroadcast/clip/ConfidentBraveHumanChefFrank',
906         'only_matching': True,
907     }, {
908         'url': 'https://go.twitch.tv/rossbroadcast/clip/ConfidentBraveHumanChefFrank',
909         'only_matching': True,
910     }]
911
912     def _real_extract(self, url):
913         video_id = self._match_id(url)
914
915         clip = self._download_json(
916             'https://gql.twitch.tv/gql', video_id, data=json.dumps({
917                 'query': '''{
918   clip(slug: "%s") {
919     broadcaster {
920       displayName
921     }
922     createdAt
923     curator {
924       displayName
925       id
926     }
927     durationSeconds
928     id
929     tiny: thumbnailURL(width: 86, height: 45)
930     small: thumbnailURL(width: 260, height: 147)
931     medium: thumbnailURL(width: 480, height: 272)
932     title
933     videoQualities {
934       frameRate
935       quality
936       sourceURL
937     }
938     viewCount
939   }
940 }''' % video_id,
941             }).encode(), headers={
942                 'Client-ID': self._CLIENT_ID,
943             })['data']['clip']
944
945         if not clip:
946             raise ExtractorError(
947                 'This clip is no longer available', expected=True)
948
949         formats = []
950         for option in clip.get('videoQualities', []):
951             if not isinstance(option, dict):
952                 continue
953             source = url_or_none(option.get('sourceURL'))
954             if not source:
955                 continue
956             formats.append({
957                 'url': source,
958                 'format_id': option.get('quality'),
959                 'height': int_or_none(option.get('quality')),
960                 'fps': int_or_none(option.get('frameRate')),
961             })
962         self._sort_formats(formats)
963
964         thumbnails = []
965         for thumbnail_id in ('tiny', 'small', 'medium'):
966             thumbnail_url = clip.get(thumbnail_id)
967             if not thumbnail_url:
968                 continue
969             thumb = {
970                 'id': thumbnail_id,
971                 'url': thumbnail_url,
972             }
973             mobj = re.search(r'-(\d+)x(\d+)\.', thumbnail_url)
974             if mobj:
975                 thumb.update({
976                     'height': int(mobj.group(2)),
977                     'width': int(mobj.group(1)),
978                 })
979             thumbnails.append(thumb)
980
981         return {
982             'id': clip.get('id') or video_id,
983             'title': clip.get('title') or video_id,
984             'formats': formats,
985             'duration': int_or_none(clip.get('durationSeconds')),
986             'views': int_or_none(clip.get('viewCount')),
987             'timestamp': unified_timestamp(clip.get('createdAt')),
988             'thumbnails': thumbnails,
989             'creator': try_get(clip, lambda x: x['broadcaster']['displayName'], compat_str),
990             'uploader': try_get(clip, lambda x: x['curator']['displayName'], compat_str),
991             'uploader_id': try_get(clip, lambda x: x['curator']['id'], compat_str),
992         }