X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fyoutube.py;h=3d3d43491c293c79334ec2c1a8a42fec93063796;hb=1b40dc92eb27b2a3f299157f83bfc8e95ca42268;hp=b7b91f354dbf07f0aa4e639d75d57fd51fe37687;hpb=8940b8608e567dba09b3ea146b89b297190ec6d6;p=youtube-dl diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index b7b91f354..3d3d43491 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -25,6 +25,7 @@ from ..compat import ( from ..utils import ( clean_html, ExtractorError, + float_or_none, get_element_by_attribute, get_element_by_id, int_or_none, @@ -540,26 +541,30 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): if cache_spec is not None: return lambda s: ''.join(s[i] for i in cache_spec) + download_note = ( + 'Downloading player %s' % player_url + if self._downloader.params.get('verbose') else + 'Downloading %s player %s' % (player_type, player_id) + ) if player_type == 'js': code = self._download_webpage( player_url, video_id, - note='Downloading %s player %s' % (player_type, player_id), + note=download_note, errnote='Download of %s failed' % player_url) res = self._parse_sig_js(code) elif player_type == 'swf': urlh = self._request_webpage( player_url, video_id, - note='Downloading %s player %s' % (player_type, player_id), + note=download_note, errnote='Download of %s failed' % player_url) code = urlh.read() res = self._parse_sig_swf(code) else: assert False, 'Invalid player type %r' % player_type - if cache_spec is None: - test_string = ''.join(map(compat_chr, range(len(example_sig)))) - cache_res = res(test_string) - cache_spec = [ord(c) for c in cache_res] + test_string = ''.join(map(compat_chr, range(len(example_sig)))) + cache_res = res(test_string) + cache_spec = [ord(c) for c in cache_res] self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec) return res @@ -780,8 +785,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): fo for fo in formats if fo['format_id'] == format_id) except StopIteration: - f.update(self._formats.get(format_id, {}).items()) - formats.append(f) + full_info = self._formats.get(format_id, {}).copy() + full_info.update(f) + formats.append(full_info) else: existing_format.update(f) return formats @@ -809,6 +815,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): player_url = None # Get video info + embed_webpage = None if re.search(r'player-age-gate-content">', video_webpage) is not None: age_gate = True # We simulate the access to the video from www.youtube.com/v/{video_id} @@ -1016,10 +1023,21 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): url += '&signature=' + url_data['sig'][0] elif 's' in url_data: encrypted_sig = url_data['s'][0] + ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")' jsplayer_url_json = self._search_regex( - r'"assets":.+?"js":\s*("[^"]+")', - embed_webpage if age_gate else video_webpage, 'JS player URL') + ASSETS_RE, + embed_webpage if age_gate else video_webpage, + 'JS player URL (1)', default=None) + if not jsplayer_url_json and not age_gate: + # We need the embed website after all + if embed_webpage is None: + embed_url = proto + '://www.youtube.com/embed/%s' % video_id + embed_webpage = self._download_webpage( + embed_url, video_id, 'Downloading embed webpage') + jsplayer_url_json = self._search_regex( + ASSETS_RE, embed_webpage, 'JS player URL') + player_url = json.loads(jsplayer_url_json) if player_url is None: player_url_json = self._search_regex( @@ -1111,6 +1129,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): 'view_count': view_count, 'like_count': like_count, 'dislike_count': dislike_count, + 'average_rating': float_or_none(video_info.get('avg_rating', [None])[0]), 'formats': formats, } @@ -1148,6 +1167,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor): }, { 'url': 'https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx', 'info_dict': { + 'id': 'PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx', 'title': 'YDL_Empty_List', }, 'playlist_count': 0, @@ -1156,6 +1176,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor): 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC', 'info_dict': { 'title': '29C3: Not my department', + 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC', }, 'playlist_count': 95, }, { @@ -1163,6 +1184,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor): 'url': 'PLBB231211A4F62143', 'info_dict': { 'title': '[OLD]Team Fortress 2 (Class-based LP)', + 'id': 'PLBB231211A4F62143', }, 'playlist_mincount': 26, }, { @@ -1170,12 +1192,14 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor): 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q', 'info_dict': { 'title': 'Uploads from Cauchemar', + 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q', }, 'playlist_mincount': 799, }, { 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl', 'info_dict': { 'title': 'YDL_safe_search', + 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl', }, 'playlist_count': 2, }, { @@ -1184,6 +1208,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor): 'playlist_count': 4, 'info_dict': { 'title': 'JODA15', + 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu', } }, { 'note': 'Embedded SWF player', @@ -1191,12 +1216,14 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor): 'playlist_count': 4, 'info_dict': { 'title': 'JODA7', + 'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ', } }, { 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos', 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA', 'info_dict': { - 'title': 'Uploads from Interstellar Movie', + 'title': 'Uploads from Interstellar Movie', + 'id': 'UUXw-G3eDE9trcvY2sBMM_aA', }, 'playlist_mincout': 21, }] @@ -1302,6 +1329,9 @@ class YoutubeChannelIE(InfoExtractor): 'note': 'paginated channel', 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w', 'playlist_mincount': 91, + 'info_dict': { + 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w', + } }] def extract_videos_from_page(self, page): @@ -1688,6 +1718,7 @@ class YoutubeTruncatedURLIE(InfoExtractor): feature=[a-z_]+| annotation_id=annotation_[^&]+| x-yt-cl=[0-9]+| + hl=[^&]*| )? | attribution_link\?a=[^&]+ @@ -1707,6 +1738,9 @@ class YoutubeTruncatedURLIE(InfoExtractor): }, { 'url': 'https://www.youtube.com/watch?feature=foo', 'only_matching': True, + }, { + 'url': 'https://www.youtube.com/watch?hl=en-GB', + 'only_matching': True, }] def _real_extract(self, url):