X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Ftwitch.py;h=36ee1adff2288570fc39936640bacd3abafe9ed2;hb=d6712378e73951bede475569c887a1ac73f660a9;hp=891499a1f370d6b2484f75db07842afcc83431cc;hpb=fbd9f6ea804328d536aafd2b20a8afb72968e351;p=youtube-dl diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index 891499a1f..36ee1adff 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -9,17 +9,18 @@ from .common import InfoExtractor from ..compat import ( compat_parse_qs, compat_str, - compat_urllib_parse, + compat_urllib_parse_urlencode, compat_urllib_parse_urlparse, - compat_urllib_request, compat_urlparse, ) from ..utils import ( - encode_dict, ExtractorError, int_or_none, + orderedSet, parse_duration, parse_iso8601, + sanitized_Request, + urlencode_postdata, ) @@ -48,7 +49,7 @@ class TwitchBaseIE(InfoExtractor): for cookie in self._downloader.cookiejar: if cookie.name == 'api_token': headers['Twitch-Api-Token'] = cookie.value - request = compat_urllib_request.Request(url, headers=headers) + request = sanitized_Request(url, headers=headers) response = super(TwitchBaseIE, self)._download_json(request, video_id, note) self._handle_error(response) return response @@ -80,8 +81,8 @@ class TwitchBaseIE(InfoExtractor): if not post_url.startswith('http'): post_url = compat_urlparse.urljoin(redirect_url, post_url) - request = compat_urllib_request.Request( - post_url, compat_urllib_parse.urlencode(encode_dict(login_form)).encode('utf-8')) + request = sanitized_Request( + post_url, urlencode_postdata(login_form)) request.add_header('Referer', redirect_url) response = self._download_webpage( request, None, 'Logging in as %s' % username) @@ -240,14 +241,25 @@ class TwitchVodIE(TwitchItemBaseIE): def _real_extract(self, url): item_id = self._match_id(url) + info = self._download_info(self._ITEM_SHORTCUT, item_id) access_token = self._download_json( '%s/api/vods/%s/access_token' % (self._API_BASE, item_id), item_id, 'Downloading %s access token' % self._ITEM_TYPE) + formats = self._extract_m3u8_formats( - '%s/vod/%s?nauth=%s&nauthsig=%s&allow_source=true' - % (self._USHER_BASE, item_id, access_token['token'], access_token['sig']), + '%s/vod/%s?%s' % ( + self._USHER_BASE, item_id, + compat_urllib_parse_urlencode({ + 'allow_source': 'true', + 'allow_audio_only': 'true', + 'allow_spectre': 'true', + 'player': 'twitchweb', + 'nauth': access_token['token'], + 'nauthsig': access_token['sig'], + })), item_id, 'mp4') + self._prefer_source(formats) info['formats'] = formats @@ -271,17 +283,37 @@ class TwitchPlaylistBaseIE(TwitchBaseIE): entries = [] offset = 0 limit = self._PAGE_LIMIT + broken_paging_detected = False + counter_override = None for counter in itertools.count(1): response = self._download_json( self._PLAYLIST_URL % (channel_id, offset, limit), - channel_id, 'Downloading %s videos JSON page %d' % (self._PLAYLIST_TYPE, counter)) + channel_id, + 'Downloading %s videos JSON page %s' + % (self._PLAYLIST_TYPE, counter_override or counter)) page_entries = self._extract_playlist_page(response) if not page_entries: break + total = int_or_none(response.get('_total')) + # Since the beginning of March 2016 twitch's paging mechanism + # is completely broken on the twitch side. It simply ignores + # a limit and returns the whole offset number of videos. + # Working around by just requesting all videos at once. + # Upd: pagination bug was fixed by twitch on 15.03.2016. + if not broken_paging_detected and total and len(page_entries) > limit: + self.report_warning( + 'Twitch pagination is broken on twitch side, requesting all videos at once', + channel_id) + broken_paging_detected = True + offset = total + counter_override = '(all at once)' + continue entries.extend(page_entries) + if broken_paging_detected or total and len(page_entries) >= total: + break offset += limit return self.playlist_result( - [self.url_result(entry) for entry in set(entries)], + [self.url_result(entry) for entry in orderedSet(entries)], channel_id, channel_name) def _extract_playlist_page(self, response): @@ -401,6 +433,7 @@ class TwitchStreamIE(TwitchBaseIE): query = { 'allow_source': 'true', + 'allow_audio_only': 'true', 'p': random.randint(1000000, 10000000), 'player': 'twitchweb', 'segment_preference': '4', @@ -409,7 +442,7 @@ class TwitchStreamIE(TwitchBaseIE): } formats = self._extract_m3u8_formats( '%s/api/channel/hls/%s.m3u8?%s' - % (self._USHER_BASE, channel_id, compat_urllib_parse.urlencode(query)), + % (self._USHER_BASE, channel_id, compat_urllib_parse_urlencode(query)), channel_id, 'mp4') self._prefer_source(formats)