_ Git - youtube-dl/blob - youtube_dl/extractor/twitch.py

   1 # coding: utf-8
   2 from __future__ import unicode_literals
   3
   4 import collections
   5 import itertools
   6 import json
   7 import random
   8 import re
   9
  10 from .common import InfoExtractor
  11 from ..compat import (
  12     compat_kwargs,
  13     compat_parse_qs,
  14     compat_str,
  15     compat_urlparse,
  16     compat_urllib_parse_urlencode,
  17     compat_urllib_parse_urlparse,
  18 )
  19 from ..utils import (
  20     clean_html,
  21     ExtractorError,
  22     float_or_none,
  23     int_or_none,
  24     parse_duration,
  25     parse_iso8601,
  26     qualities,
  27     str_or_none,
  28     try_get,
  29     unified_timestamp,
  30     update_url_query,
  31     url_or_none,
  32     urljoin,
  33 )
  34
  35
  36 class TwitchBaseIE(InfoExtractor):
  37     _VALID_URL_BASE = r'https?://(?:(?:www|go|m)\.)?twitch\.tv'
  38
  39     _API_BASE = 'https://api.twitch.tv'
  40     _USHER_BASE = 'https://usher.ttvnw.net'
  41     _LOGIN_FORM_URL = 'https://www.twitch.tv/login'
  42     _LOGIN_POST_URL = 'https://passport.twitch.tv/login'
  43     _CLIENT_ID = 'kimne78kx3ncx6brgo4mv6wki5h1ko'
  44     _NETRC_MACHINE = 'twitch'
  45
  46     def _handle_error(self, response):
  47         if not isinstance(response, dict):
  48             return
  49         error = response.get('error')
  50         if error:
  51             raise ExtractorError(
  52                 '%s returned error: %s - %s' % (self.IE_NAME, error, response.get('message')),
  53                 expected=True)
  54
  55     def _call_api(self, path, item_id, *args, **kwargs):
  56         headers = kwargs.get('headers', {}).copy()
  57         headers.update({
  58             'Accept': 'application/vnd.twitchtv.v5+json; charset=UTF-8',
  59             'Client-ID': self._CLIENT_ID,
  60         })
  61         kwargs.update({
  62             'headers': headers,
  63             'expected_status': (400, 410),
  64         })
  65         response = self._download_json(
  66             '%s/%s' % (self._API_BASE, path), item_id,
  67             *args, **compat_kwargs(kwargs))
  68         self._handle_error(response)
  69         return response
  70
  71     def _real_initialize(self):
  72         self._login()
  73
  74     def _login(self):
  75         username, password = self._get_login_info()
  76         if username is None:
  77             return
  78
  79         def fail(message):
  80             raise ExtractorError(
  81                 'Unable to login. Twitch said: %s' % message, expected=True)
  82
  83         def login_step(page, urlh, note, data):
  84             form = self._hidden_inputs(page)
  85             form.update(data)
  86
  87             page_url = urlh.geturl()
  88             post_url = self._search_regex(
  89                 r'<form[^>]+action=(["\'])(?P<url>.+?)\1', page,
  90                 'post url', default=self._LOGIN_POST_URL, group='url')
  91             post_url = urljoin(page_url, post_url)
  92
  93             headers = {
  94                 'Referer': page_url,
  95                 'Origin': page_url,
  96                 'Content-Type': 'text/plain;charset=UTF-8',
  97             }
  98
  99             response = self._download_json(
 100                 post_url, None, note, data=json.dumps(form).encode(),
 101                 headers=headers, expected_status=400)
 102             error = response.get('error_description') or response.get('error_code')
 103             if error:
 104                 fail(error)
 105
 106             if 'Authenticated successfully' in response.get('message', ''):
 107                 return None, None
 108
 109             redirect_url = urljoin(
 110                 post_url,
 111                 response.get('redirect') or response['redirect_path'])
 112             return self._download_webpage_handle(
 113                 redirect_url, None, 'Downloading login redirect page',
 114                 headers=headers)
 115
 116         login_page, handle = self._download_webpage_handle(
 117             self._LOGIN_FORM_URL, None, 'Downloading login page')
 118
 119         # Some TOR nodes and public proxies are blocked completely
 120         if 'blacklist_message' in login_page:
 121             fail(clean_html(login_page))
 122
 123         redirect_page, handle = login_step(
 124             login_page, handle, 'Logging in', {
 125                 'username': username,
 126                 'password': password,
 127                 'client_id': self._CLIENT_ID,
 128             })
 129
 130         # Successful login
 131         if not redirect_page:
 132             return
 133
 134         if re.search(r'(?i)<form[^>]+id="two-factor-submit"', redirect_page) is not None:
 135             # TODO: Add mechanism to request an SMS or phone call
 136             tfa_token = self._get_tfa_info('two-factor authentication token')
 137             login_step(redirect_page, handle, 'Submitting TFA token', {
 138                 'authy_token': tfa_token,
 139                 'remember_2fa': 'true',
 140             })
 141
 142     def _prefer_source(self, formats):
 143         try:
 144             source = next(f for f in formats if f['format_id'] == 'Source')
 145             source['quality'] = 10
 146         except StopIteration:
 147             for f in formats:
 148                 if '/chunked/' in f['url']:
 149                     f.update({
 150                         'quality': 10,
 151                         'format_note': 'Source',
 152                     })
 153         self._sort_formats(formats)
 154
 155     def _download_access_token(self, channel_name):
 156         return self._call_api(
 157             'api/channels/%s/access_token' % channel_name, channel_name,
 158             'Downloading access token JSON')
 159
 160     def _extract_channel_id(self, token, channel_name):
 161         return compat_str(self._parse_json(token, channel_name)['channel_id'])
 162
 163
 164 class TwitchVodIE(TwitchBaseIE):
 165     IE_NAME = 'twitch:vod'
 166     _VALID_URL = r'''(?x)
 167                     https?://
 168                         (?:
 169                             (?:(?:www|go|m)\.)?twitch\.tv/(?:[^/]+/v(?:ideo)?|videos)/|
 170                             player\.twitch\.tv/\?.*?\bvideo=v?
 171                         )
 172                         (?P<id>\d+)
 173                     '''
 174     _ITEM_TYPE = 'vod'
 175     _ITEM_SHORTCUT = 'v'
 176
 177     _TESTS = [{
 178         'url': 'http://www.twitch.tv/riotgames/v/6528877?t=5m10s',
 179         'info_dict': {
 180             'id': 'v6528877',
 181             'ext': 'mp4',
 182             'title': 'LCK Summer Split - Week 6 Day 1',
 183             'thumbnail': r're:^https?://.*\.jpg$',
 184             'duration': 17208,
 185             'timestamp': 1435131709,
 186             'upload_date': '20150624',
 187             'uploader': 'Riot Games',
 188             'uploader_id': 'riotgames',
 189             'view_count': int,
 190             'start_time': 310,
 191         },
 192         'params': {
 193             # m3u8 download
 194             'skip_download': True,
 195         },
 196     }, {
 197         # Untitled broadcast (title is None)
 198         'url': 'http://www.twitch.tv/belkao_o/v/11230755',
 199         'info_dict': {
 200             'id': 'v11230755',
 201             'ext': 'mp4',
 202             'title': 'Untitled Broadcast',
 203             'thumbnail': r're:^https?://.*\.jpg$',
 204             'duration': 1638,
 205             'timestamp': 1439746708,
 206             'upload_date': '20150816',
 207             'uploader': 'BelkAO_o',
 208             'uploader_id': 'belkao_o',
 209             'view_count': int,
 210         },
 211         'params': {
 212             # m3u8 download
 213             'skip_download': True,
 214         },
 215         'skip': 'HTTP Error 404: Not Found',
 216     }, {
 217         'url': 'http://player.twitch.tv/?t=5m10s&video=v6528877',
 218         'only_matching': True,
 219     }, {
 220         'url': 'https://www.twitch.tv/videos/6528877',
 221         'only_matching': True,
 222     }, {
 223         'url': 'https://m.twitch.tv/beagsandjam/v/247478721',
 224         'only_matching': True,
 225     }, {
 226         'url': 'https://www.twitch.tv/northernlion/video/291940395',
 227         'only_matching': True,
 228     }, {
 229         'url': 'https://player.twitch.tv/?video=480452374',
 230         'only_matching': True,
 231     }]
 232
 233     def _download_info(self, item_id):
 234         return self._extract_info(
 235             self._call_api(
 236                 'kraken/videos/%s' % item_id, item_id,
 237                 'Downloading video info JSON'))
 238
 239     @staticmethod
 240     def _extract_info(info):
 241         status = info.get('status')
 242         if status == 'recording':
 243             is_live = True
 244         elif status == 'recorded':
 245             is_live = False
 246         else:
 247             is_live = None
 248         _QUALITIES = ('small', 'medium', 'large')
 249         quality_key = qualities(_QUALITIES)
 250         thumbnails = []
 251         preview = info.get('preview')
 252         if isinstance(preview, dict):
 253             for thumbnail_id, thumbnail_url in preview.items():
 254                 thumbnail_url = url_or_none(thumbnail_url)
 255                 if not thumbnail_url:
 256                     continue
 257                 if thumbnail_id not in _QUALITIES:
 258                     continue
 259                 thumbnails.append({
 260                     'url': thumbnail_url,
 261                     'preference': quality_key(thumbnail_id),
 262                 })
 263         return {
 264             'id': info['_id'],
 265             'title': info.get('title') or 'Untitled Broadcast',
 266             'description': info.get('description'),
 267             'duration': int_or_none(info.get('length')),
 268             'thumbnails': thumbnails,
 269             'uploader': info.get('channel', {}).get('display_name'),
 270             'uploader_id': info.get('channel', {}).get('name'),
 271             'timestamp': parse_iso8601(info.get('recorded_at')),
 272             'view_count': int_or_none(info.get('views')),
 273             'is_live': is_live,
 274         }
 275
 276     def _real_extract(self, url):
 277         vod_id = self._match_id(url)
 278
 279         info = self._download_info(vod_id)
 280         access_token = self._call_api(
 281             'api/vods/%s/access_token' % vod_id, vod_id,
 282             'Downloading %s access token' % self._ITEM_TYPE)
 283
 284         formats = self._extract_m3u8_formats(
 285             '%s/vod/%s.m3u8?%s' % (
 286                 self._USHER_BASE, vod_id,
 287                 compat_urllib_parse_urlencode({
 288                     'allow_source': 'true',
 289                     'allow_audio_only': 'true',
 290                     'allow_spectre': 'true',
 291                     'player': 'twitchweb',
 292                     'playlist_include_framerate': 'true',
 293                     'nauth': access_token['token'],
 294                     'nauthsig': access_token['sig'],
 295                 })),
 296             vod_id, 'mp4', entry_protocol='m3u8_native')
 297
 298         self._prefer_source(formats)
 299         info['formats'] = formats
 300
 301         parsed_url = compat_urllib_parse_urlparse(url)
 302         query = compat_parse_qs(parsed_url.query)
 303         if 't' in query:
 304             info['start_time'] = parse_duration(query['t'][0])
 305
 306         if info.get('timestamp') is not None:
 307             info['subtitles'] = {
 308                 'rechat': [{
 309                     'url': update_url_query(
 310                         'https://api.twitch.tv/v5/videos/%s/comments' % vod_id, {
 311                             'client_id': self._CLIENT_ID,
 312                         }),
 313                     'ext': 'json',
 314                 }],
 315             }
 316
 317         return info
 318
 319
 320 def _make_video_result(node):
 321     assert isinstance(node, dict)
 322     video_id = node.get('id')
 323     if not video_id:
 324         return
 325     return {
 326         '_type': 'url_transparent',
 327         'ie_key': TwitchVodIE.ie_key(),
 328         'id': video_id,
 329         'url': 'https://www.twitch.tv/videos/%s' % video_id,
 330         'title': node.get('title'),
 331         'thumbnail': node.get('previewThumbnailURL'),
 332         'duration': float_or_none(node.get('lengthSeconds')),
 333         'view_count': int_or_none(node.get('viewCount')),
 334     }
 335
 336
 337 class TwitchGraphQLBaseIE(TwitchBaseIE):
 338     _PAGE_LIMIT = 100
 339
 340     def _download_gql(self, video_id, op, variables, sha256_hash, note, fatal=True):
 341         return self._download_json(
 342             'https://gql.twitch.tv/gql', video_id, note,
 343             data=json.dumps({
 344                 'operationName': op,
 345                 'variables': variables,
 346                 'extensions': {
 347                     'persistedQuery': {
 348                         'version': 1,
 349                         'sha256Hash': sha256_hash,
 350                     }
 351                 }
 352             }).encode(),
 353             headers={
 354                 'Content-Type': 'text/plain;charset=UTF-8',
 355                 'Client-ID': self._CLIENT_ID,
 356             }, fatal=fatal)
 357
 358
 359 class TwitchCollectionIE(TwitchGraphQLBaseIE):
 360     _VALID_URL = r'https?://(?:(?:www|go|m)\.)?twitch\.tv/collections/(?P<id>[^/]+)'
 361
 362     _TESTS = [{
 363         'url': 'https://www.twitch.tv/collections/wlDCoH0zEBZZbQ',
 364         'info_dict': {
 365             'id': 'wlDCoH0zEBZZbQ',
 366             'title': 'Overthrow Nook, capitalism for children',
 367         },
 368         'playlist_mincount': 13,
 369     }]
 370
 371     _OPERATION_NAME = 'CollectionSideBar'
 372     _SHA256_HASH = '27111f1b382effad0b6def325caef1909c733fe6a4fbabf54f8d491ef2cf2f14'
 373
 374     def _real_extract(self, url):
 375         collection_id = self._match_id(url)
 376         collection = self._download_gql(
 377             collection_id, self._OPERATION_NAME,
 378             {'collectionID': collection_id}, self._SHA256_HASH,
 379             'Downloading collection GraphQL')['data']['collection']
 380         title = collection.get('title')
 381         entries = []
 382         for edge in collection['items']['edges']:
 383             if not isinstance(edge, dict):
 384                 continue
 385             node = edge.get('node')
 386             if not isinstance(node, dict):
 387                 continue
 388             video = _make_video_result(node)
 389             if video:
 390                 entries.append(video)
 391         return self.playlist_result(
 392             entries, playlist_id=collection_id, playlist_title=title)
 393
 394
 395 class TwitchPlaylistBaseIE(TwitchGraphQLBaseIE):
 396     def _entries(self, channel_name, *args):
 397         cursor = None
 398         variables_common = self._make_variables(channel_name, *args)
 399         entries_key = '%ss' % self._ENTRY_KIND
 400         for page_num in itertools.count(1):
 401             variables = variables_common.copy()
 402             variables['limit'] = self._PAGE_LIMIT
 403             if cursor:
 404                 variables['cursor'] = cursor
 405             page = self._download_gql(
 406                 channel_name, self._OPERATION_NAME, variables,
 407                 self._SHA256_HASH,
 408                 'Downloading %ss GraphQL page %s' % (self._NODE_KIND, page_num),
 409                 fatal=False)
 410             if not page:
 411                 break
 412             edges = try_get(
 413                 page, lambda x: x['data']['user'][entries_key]['edges'], list)
 414             if not edges:
 415                 break
 416             for edge in edges:
 417                 if not isinstance(edge, dict):
 418                     continue
 419                 if edge.get('__typename') != self._EDGE_KIND:
 420                     continue
 421                 node = edge.get('node')
 422                 if not isinstance(node, dict):
 423                     continue
 424                 if node.get('__typename') != self._NODE_KIND:
 425                     continue
 426                 entry = self._extract_entry(node)
 427                 if entry:
 428                     cursor = edge.get('cursor')
 429                     yield entry
 430             if not cursor or not isinstance(cursor, compat_str):
 431                 break
 432
 433     # Deprecated kraken v5 API
 434     def _entries_kraken(self, channel_name, broadcast_type, sort):
 435         access_token = self._download_access_token(channel_name)
 436         channel_id = self._extract_channel_id(access_token['token'], channel_name)
 437         offset = 0
 438         counter_override = None
 439         for counter in itertools.count(1):
 440             response = self._call_api(
 441                 'kraken/channels/%s/videos/' % channel_id,
 442                 channel_id,
 443                 'Downloading video JSON page %s' % (counter_override or counter),
 444                 query={
 445                     'offset': offset,
 446                     'limit': self._PAGE_LIMIT,
 447                     'broadcast_type': broadcast_type,
 448                     'sort': sort,
 449                 })
 450             videos = response.get('videos')
 451             if not isinstance(videos, list):
 452                 break
 453             for video in videos:
 454                 if not isinstance(video, dict):
 455                     continue
 456                 video_url = url_or_none(video.get('url'))
 457                 if not video_url:
 458                     continue
 459                 yield {
 460                     '_type': 'url_transparent',
 461                     'ie_key': TwitchVodIE.ie_key(),
 462                     'id': video.get('_id'),
 463                     'url': video_url,
 464                     'title': video.get('title'),
 465                     'description': video.get('description'),
 466                     'timestamp': unified_timestamp(video.get('published_at')),
 467                     'duration': float_or_none(video.get('length')),
 468                     'view_count': int_or_none(video.get('views')),
 469                     'language': video.get('language'),
 470                 }
 471             offset += self._PAGE_LIMIT
 472             total = int_or_none(response.get('_total'))
 473             if total and offset >= total:
 474                 break
 475
 476
 477 class TwitchVideosIE(TwitchPlaylistBaseIE):
 478     _VALID_URL = r'https?://(?:(?:www|go|m)\.)?twitch\.tv/(?P<id>[^/]+)/(?:videos|profile)'
 479
 480     _TESTS = [{
 481         # All Videos sorted by Date
 482         'url': 'https://www.twitch.tv/spamfish/videos?filter=all',
 483         'info_dict': {
 484             'id': 'spamfish',
 485             'title': 'spamfish - All Videos sorted by Date',
 486         },
 487         'playlist_mincount': 924,
 488     }, {
 489         # All Videos sorted by Popular
 490         'url': 'https://www.twitch.tv/spamfish/videos?filter=all&sort=views',
 491         'info_dict': {
 492             'id': 'spamfish',
 493             'title': 'spamfish - All Videos sorted by Popular',
 494         },
 495         'playlist_mincount': 931,
 496     }, {
 497         # Past Broadcasts sorted by Date
 498         'url': 'https://www.twitch.tv/spamfish/videos?filter=archives',
 499         'info_dict': {
 500             'id': 'spamfish',
 501             'title': 'spamfish - Past Broadcasts sorted by Date',
 502         },
 503         'playlist_mincount': 27,
 504     }, {
 505         # Highlights sorted by Date
 506         'url': 'https://www.twitch.tv/spamfish/videos?filter=highlights',
 507         'info_dict': {
 508             'id': 'spamfish',
 509             'title': 'spamfish - Highlights sorted by Date',
 510         },
 511         'playlist_mincount': 901,
 512     }, {
 513         # Uploads sorted by Date
 514         'url': 'https://www.twitch.tv/esl_csgo/videos?filter=uploads&sort=time',
 515         'info_dict': {
 516             'id': 'esl_csgo',
 517             'title': 'esl_csgo - Uploads sorted by Date',
 518         },
 519         'playlist_mincount': 5,
 520     }, {
 521         # Past Premieres sorted by Date
 522         'url': 'https://www.twitch.tv/spamfish/videos?filter=past_premieres',
 523         'info_dict': {
 524             'id': 'spamfish',
 525             'title': 'spamfish - Past Premieres sorted by Date',
 526         },
 527         'playlist_mincount': 1,
 528     }, {
 529         'url': 'https://www.twitch.tv/spamfish/videos/all',
 530         'only_matching': True,
 531     }, {
 532         'url': 'https://m.twitch.tv/spamfish/videos/all',
 533         'only_matching': True,
 534     }, {
 535         'url': 'https://www.twitch.tv/spamfish/videos',
 536         'only_matching': True,
 537     }]
 538
 539     Broadcast = collections.namedtuple('Broadcast', ['type', 'label'])
 540
 541     _DEFAULT_BROADCAST = Broadcast(None, 'All Videos')
 542     _BROADCASTS = {
 543         'archives': Broadcast('ARCHIVE', 'Past Broadcasts'),
 544         'highlights': Broadcast('HIGHLIGHT', 'Highlights'),
 545         'uploads': Broadcast('UPLOAD', 'Uploads'),
 546         'past_premieres': Broadcast('PAST_PREMIERE', 'Past Premieres'),
 547         'all': _DEFAULT_BROADCAST,
 548     }
 549
 550     _DEFAULT_SORTED_BY = 'Date'
 551     _SORTED_BY = {
 552         'time': _DEFAULT_SORTED_BY,
 553         'views': 'Popular',
 554     }
 555
 556     _SHA256_HASH = 'a937f1d22e269e39a03b509f65a7490f9fc247d7f83d6ac1421523e3b68042cb'
 557     _OPERATION_NAME = 'FilterableVideoTower_Videos'
 558     _ENTRY_KIND = 'video'
 559     _EDGE_KIND = 'VideoEdge'
 560     _NODE_KIND = 'Video'
 561
 562     @classmethod
 563     def suitable(cls, url):
 564         return (False
 565                 if any(ie.suitable(url) for ie in (
 566                     TwitchVideosClipsIE,
 567                     TwitchVideosCollectionsIE))
 568                 else super(TwitchVideosIE, cls).suitable(url))
 569
 570     @staticmethod
 571     def _make_variables(channel_name, broadcast_type, sort):
 572         return {
 573             'channelOwnerLogin': channel_name,
 574             'broadcastType': broadcast_type,
 575             'videoSort': sort.upper(),
 576         }
 577
 578     @staticmethod
 579     def _extract_entry(node):
 580         return _make_video_result(node)
 581
 582     def _real_extract(self, url):
 583         channel_name = self._match_id(url)
 584         qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
 585         filter = qs.get('filter', ['all'])[0]
 586         sort = qs.get('sort', ['time'])[0]
 587         broadcast = self._BROADCASTS.get(filter, self._DEFAULT_BROADCAST)
 588         return self.playlist_result(
 589             self._entries(channel_name, broadcast.type, sort),
 590             playlist_id=channel_name,
 591             playlist_title='%s - %s sorted by %s'
 592             % (channel_name, broadcast.label,
 593                self._SORTED_BY.get(sort, self._DEFAULT_SORTED_BY)))
 594
 595
 596 class TwitchVideosClipsIE(TwitchPlaylistBaseIE):
 597     _VALID_URL = r'https?://(?:(?:www|go|m)\.)?twitch\.tv/(?P<id>[^/]+)/(?:clips|videos/*?\?.*?\bfilter=clips)'
 598
 599     _TESTS = [{
 600         # Clips
 601         'url': 'https://www.twitch.tv/vanillatv/clips?filter=clips&range=all',
 602         'info_dict': {
 603             'id': 'vanillatv',
 604             'title': 'vanillatv - Clips Top All',
 605         },
 606         'playlist_mincount': 1,
 607     }, {
 608         'url': 'https://www.twitch.tv/dota2ruhub/videos?filter=clips&range=7d',
 609         'only_matching': True,
 610     }]
 611
 612     Clip = collections.namedtuple('Clip', ['filter', 'label'])
 613
 614     _DEFAULT_CLIP = Clip('LAST_WEEK', 'Top 7D')
 615     _RANGE = {
 616         '24hr': Clip('LAST_DAY', 'Top 24H'),
 617         '7d': _DEFAULT_CLIP,
 618         '30d': Clip('LAST_MONTH', 'Top 30D'),
 619         'all': Clip('ALL_TIME', 'Top All'),
 620     }
 621
 622     # NB: values other than 20 result in skipped videos
 623     _PAGE_LIMIT = 20
 624
 625     _SHA256_HASH = 'b73ad2bfaecfd30a9e6c28fada15bd97032c83ec77a0440766a56fe0bd632777'
 626     _OPERATION_NAME = 'ClipsCards__User'
 627     _ENTRY_KIND = 'clip'
 628     _EDGE_KIND = 'ClipEdge'
 629     _NODE_KIND = 'Clip'
 630
 631     @staticmethod
 632     def _make_variables(channel_name, filter):
 633         return {
 634             'login': channel_name,
 635             'criteria': {
 636                 'filter': filter,
 637             },
 638         }
 639
 640     @staticmethod
 641     def _extract_entry(node):
 642         assert isinstance(node, dict)
 643         clip_url = url_or_none(node.get('url'))
 644         if not clip_url:
 645             return
 646         return {
 647             '_type': 'url_transparent',
 648             'ie_key': TwitchClipsIE.ie_key(),
 649             'id': node.get('id'),
 650             'url': clip_url,
 651             'title': node.get('title'),
 652             'thumbnail': node.get('thumbnailURL'),
 653             'duration': float_or_none(node.get('durationSeconds')),
 654             'timestamp': unified_timestamp(node.get('createdAt')),
 655             'view_count': int_or_none(node.get('viewCount')),
 656             'language': node.get('language'),
 657         }
 658
 659     def _real_extract(self, url):
 660         channel_name = self._match_id(url)
 661         qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
 662         range = qs.get('range', ['7d'])[0]
 663         clip = self._RANGE.get(range, self._DEFAULT_CLIP)
 664         return self.playlist_result(
 665             self._entries(channel_name, clip.filter),
 666             playlist_id=channel_name,
 667             playlist_title='%s - Clips %s' % (channel_name, clip.label))
 668
 669
 670 class TwitchVideosCollectionsIE(TwitchPlaylistBaseIE):
 671     _VALID_URL = r'https?://(?:(?:www|go|m)\.)?twitch\.tv/(?P<id>[^/]+)/videos/*?\?.*?\bfilter=collections'
 672
 673     _TESTS = [{
 674         # Collections
 675         'url': 'https://www.twitch.tv/spamfish/videos?filter=collections',
 676         'info_dict': {
 677             'id': 'spamfish',
 678             'title': 'spamfish - Collections',
 679         },
 680         'playlist_mincount': 3,
 681     }]
 682
 683     _SHA256_HASH = '07e3691a1bad77a36aba590c351180439a40baefc1c275356f40fc7082419a84'
 684     _OPERATION_NAME = 'ChannelCollectionsContent'
 685     _ENTRY_KIND = 'collection'
 686     _EDGE_KIND = 'CollectionsItemEdge'
 687     _NODE_KIND = 'Collection'
 688
 689     @staticmethod
 690     def _make_variables(channel_name):
 691         return {
 692             'ownerLogin': channel_name,
 693         }
 694
 695     @staticmethod
 696     def _extract_entry(node):
 697         assert isinstance(node, dict)
 698         collection_id = node.get('id')
 699         if not collection_id:
 700             return
 701         return {
 702             '_type': 'url_transparent',
 703             'ie_key': TwitchCollectionIE.ie_key(),
 704             'id': collection_id,
 705             'url': 'https://www.twitch.tv/collections/%s' % collection_id,
 706             'title': node.get('title'),
 707             'thumbnail': node.get('thumbnailURL'),
 708             'duration': float_or_none(node.get('lengthSeconds')),
 709             'timestamp': unified_timestamp(node.get('updatedAt')),
 710             'view_count': int_or_none(node.get('viewCount')),
 711         }
 712
 713     def _real_extract(self, url):
 714         channel_name = self._match_id(url)
 715         return self.playlist_result(
 716             self._entries(channel_name), playlist_id=channel_name,
 717             playlist_title='%s - Collections' % channel_name)
 718
 719
 720 class TwitchStreamIE(TwitchBaseIE):
 721     IE_NAME = 'twitch:stream'
 722     _VALID_URL = r'''(?x)
 723                     https?://
 724                         (?:
 725                             (?:(?:www|go|m)\.)?twitch\.tv/|
 726                             player\.twitch\.tv/\?.*?\bchannel=
 727                         )
 728                         (?P<id>[^/#?]+)
 729                     '''
 730
 731     _TESTS = [{
 732         'url': 'http://www.twitch.tv/shroomztv',
 733         'info_dict': {
 734             'id': '12772022048',
 735             'display_id': 'shroomztv',
 736             'ext': 'mp4',
 737             'title': 're:^ShroomzTV [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
 738             'description': 'H1Z1 - lonewolfing with ShroomzTV | A3 Battle Royale later - @ShroomzTV',
 739             'is_live': True,
 740             'timestamp': 1421928037,
 741             'upload_date': '20150122',
 742             'uploader': 'ShroomzTV',
 743             'uploader_id': 'shroomztv',
 744             'view_count': int,
 745         },
 746         'params': {
 747             # m3u8 download
 748             'skip_download': True,
 749         },
 750     }, {
 751         'url': 'http://www.twitch.tv/miracle_doto#profile-0',
 752         'only_matching': True,
 753     }, {
 754         'url': 'https://player.twitch.tv/?channel=lotsofs',
 755         'only_matching': True,
 756     }, {
 757         'url': 'https://go.twitch.tv/food',
 758         'only_matching': True,
 759     }, {
 760         'url': 'https://m.twitch.tv/food',
 761         'only_matching': True,
 762     }]
 763
 764     @classmethod
 765     def suitable(cls, url):
 766         return (False
 767                 if any(ie.suitable(url) for ie in (
 768                     TwitchVodIE,
 769                     TwitchCollectionIE,
 770                     TwitchVideosIE,
 771                     TwitchVideosClipsIE,
 772                     TwitchVideosCollectionsIE,
 773                     TwitchClipsIE))
 774                 else super(TwitchStreamIE, cls).suitable(url))
 775
 776     def _real_extract(self, url):
 777         channel_name = self._match_id(url)
 778
 779         access_token = self._download_access_token(channel_name)
 780
 781         token = access_token['token']
 782         channel_id = self._extract_channel_id(token, channel_name)
 783
 784         stream = self._call_api(
 785             'kraken/streams/%s?stream_type=all' % channel_id,
 786             channel_id, 'Downloading stream JSON').get('stream')
 787
 788         if not stream:
 789             raise ExtractorError('%s is offline' % channel_id, expected=True)
 790
 791         # Channel name may be typed if different case than the original channel name
 792         # (e.g. http://www.twitch.tv/TWITCHPLAYSPOKEMON) that will lead to constructing
 793         # an invalid m3u8 URL. Working around by use of original channel name from stream
 794         # JSON and fallback to lowercase if it's not available.
 795         channel_name = try_get(
 796             stream, lambda x: x['channel']['name'],
 797             compat_str) or channel_name.lower()
 798
 799         query = {
 800             'allow_source': 'true',
 801             'allow_audio_only': 'true',
 802             'allow_spectre': 'true',
 803             'p': random.randint(1000000, 10000000),
 804             'player': 'twitchweb',
 805             'playlist_include_framerate': 'true',
 806             'segment_preference': '4',
 807             'sig': access_token['sig'].encode('utf-8'),
 808             'token': token.encode('utf-8'),
 809         }
 810         formats = self._extract_m3u8_formats(
 811             '%s/api/channel/hls/%s.m3u8?%s'
 812             % (self._USHER_BASE, channel_name, compat_urllib_parse_urlencode(query)),
 813             channel_id, 'mp4')
 814         self._prefer_source(formats)
 815
 816         view_count = stream.get('viewers')
 817         timestamp = parse_iso8601(stream.get('created_at'))
 818
 819         channel = stream['channel']
 820         title = self._live_title(channel.get('display_name') or channel.get('name'))
 821         description = channel.get('status')
 822
 823         thumbnails = []
 824         for thumbnail_key, thumbnail_url in stream['preview'].items():
 825             m = re.search(r'(?P<width>\d+)x(?P<height>\d+)\.jpg$', thumbnail_key)
 826             if not m:
 827                 continue
 828             thumbnails.append({
 829                 'url': thumbnail_url,
 830                 'width': int(m.group('width')),
 831                 'height': int(m.group('height')),
 832             })
 833
 834         return {
 835             'id': str_or_none(stream.get('_id')) or channel_id,
 836             'display_id': channel_name,
 837             'title': title,
 838             'description': description,
 839             'thumbnails': thumbnails,
 840             'uploader': channel.get('display_name'),
 841             'uploader_id': channel.get('name'),
 842             'timestamp': timestamp,
 843             'view_count': view_count,
 844             'formats': formats,
 845             'is_live': True,
 846         }
 847
 848
 849 class TwitchClipsIE(TwitchBaseIE):
 850     IE_NAME = 'twitch:clips'
 851     _VALID_URL = r'''(?x)
 852                     https?://
 853                         (?:
 854                             clips\.twitch\.tv/(?:embed\?.*?\bclip=|(?:[^/]+/)*)|
 855                             (?:(?:www|go|m)\.)?twitch\.tv/[^/]+/clip/
 856                         )
 857                         (?P<id>[^/?#&]+)
 858                     '''
 859
 860     _TESTS = [{
 861         'url': 'https://clips.twitch.tv/FaintLightGullWholeWheat',
 862         'md5': '761769e1eafce0ffebfb4089cb3847cd',
 863         'info_dict': {
 864             'id': '42850523',
 865             'ext': 'mp4',
 866             'title': 'EA Play 2016 Live from the Novo Theatre',
 867             'thumbnail': r're:^https?://.*\.jpg',
 868             'timestamp': 1465767393,
 869             'upload_date': '20160612',
 870             'creator': 'EA',
 871             'uploader': 'stereotype_',
 872             'uploader_id': '43566419',
 873         },
 874     }, {
 875         # multiple formats
 876         'url': 'https://clips.twitch.tv/rflegendary/UninterestedBeeDAESuppy',
 877         'only_matching': True,
 878     }, {
 879         'url': 'https://www.twitch.tv/sergeynixon/clip/StormyThankfulSproutFutureMan',
 880         'only_matching': True,
 881     }, {
 882         'url': 'https://clips.twitch.tv/embed?clip=InquisitiveBreakableYogurtJebaited',
 883         'only_matching': True,
 884     }, {
 885         'url': 'https://m.twitch.tv/rossbroadcast/clip/ConfidentBraveHumanChefFrank',
 886         'only_matching': True,
 887     }, {
 888         'url': 'https://go.twitch.tv/rossbroadcast/clip/ConfidentBraveHumanChefFrank',
 889         'only_matching': True,
 890     }]
 891
 892     def _real_extract(self, url):
 893         video_id = self._match_id(url)
 894
 895         clip = self._download_json(
 896             'https://gql.twitch.tv/gql', video_id, data=json.dumps({
 897                 'query': '''{
 898   clip(slug: "%s") {
 899     broadcaster {
 900       displayName
 901     }
 902     createdAt
 903     curator {
 904       displayName
 905       id
 906     }
 907     durationSeconds
 908     id
 909     tiny: thumbnailURL(width: 86, height: 45)
 910     small: thumbnailURL(width: 260, height: 147)
 911     medium: thumbnailURL(width: 480, height: 272)
 912     title
 913     videoQualities {
 914       frameRate
 915       quality
 916       sourceURL
 917     }
 918     viewCount
 919   }
 920 }''' % video_id,
 921             }).encode(), headers={
 922                 'Client-ID': self._CLIENT_ID,
 923             })['data']['clip']
 924
 925         if not clip:
 926             raise ExtractorError(
 927                 'This clip is no longer available', expected=True)
 928
 929         formats = []
 930         for option in clip.get('videoQualities', []):
 931             if not isinstance(option, dict):
 932                 continue
 933             source = url_or_none(option.get('sourceURL'))
 934             if not source:
 935                 continue
 936             formats.append({
 937                 'url': source,
 938                 'format_id': option.get('quality'),
 939                 'height': int_or_none(option.get('quality')),
 940                 'fps': int_or_none(option.get('frameRate')),
 941             })
 942         self._sort_formats(formats)
 943
 944         thumbnails = []
 945         for thumbnail_id in ('tiny', 'small', 'medium'):
 946             thumbnail_url = clip.get(thumbnail_id)
 947             if not thumbnail_url:
 948                 continue
 949             thumb = {
 950                 'id': thumbnail_id,
 951                 'url': thumbnail_url,
 952             }
 953             mobj = re.search(r'-(\d+)x(\d+)\.', thumbnail_url)
 954             if mobj:
 955                 thumb.update({
 956                     'height': int(mobj.group(2)),
 957                     'width': int(mobj.group(1)),
 958                 })
 959             thumbnails.append(thumb)
 960
 961         return {
 962             'id': clip.get('id') or video_id,
 963             'title': clip.get('title') or video_id,
 964             'formats': formats,
 965             'duration': int_or_none(clip.get('durationSeconds')),
 966             'views': int_or_none(clip.get('viewCount')),
 967             'timestamp': unified_timestamp(clip.get('createdAt')),
 968             'thumbnails': thumbnails,
 969             'creator': try_get(clip, lambda x: x['broadcaster']['displayName'], compat_str),
 970             'uploader': try_get(clip, lambda x: x['curator']['displayName'], compat_str),
 971             'uploader_id': try_get(clip, lambda x: x['curator']['id'], compat_str),
 972         }