X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fyoutube.py;h=8aa7dfc413a7141cd56328ec3c0eaa56941b4171;hb=0b68de3cc1f99ce8c49a497245c02d4d03201aa8;hp=6c9f77d95370e740676faef9e3d5cf1231629af6;hpb=4315f74fa8e97ca1fdd1fe919f777b3942da2028;p=youtube-dl diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 6c9f77d95..8aa7dfc41 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -501,6 +501,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'youtube_include_dash_manifest': True, 'format': '141', }, + 'skip': 'format 141 not served anymore', }, # DASH manifest with encrypted signature { @@ -517,7 +518,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): }, 'params': { 'youtube_include_dash_manifest': True, - 'format': '141', + 'format': '141/bestaudio[ext=m4a]', }, }, # JS player signature function name containing $ @@ -537,7 +538,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): }, 'params': { 'youtube_include_dash_manifest': True, - 'format': '141', + 'format': '141/bestaudio[ext=m4a]', }, }, # Controversy video @@ -618,7 +619,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/olympic', 'license': 'Standard YouTube License', 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games', - 'uploader': 'Olympics', + 'uploader': 'Olympic', 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games', }, 'params': { @@ -671,7 +672,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/dorappi2000', 'uploader': 'dorappi2000', 'license': 'Standard YouTube License', - 'formats': 'mincount:33', + 'formats': 'mincount:32', }, }, # DASH manifest with segment_list @@ -691,7 +692,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'params': { 'youtube_include_dash_manifest': True, 'format': '135', # bestvideo - } + }, + 'skip': 'This live event has ended.', }, { # Multifeed videos (multiple cameras), URL is for Main Camera @@ -762,6 +764,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30', }, 'playlist_count': 2, + 'skip': 'Not multifeed anymore', }, { 'url': 'http://vid.plus/FlRa-iH7PGw', @@ -814,6 +817,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'params': { 'skip_download': True, }, + 'skip': 'This video does not exist.', }, { # Video licensed under Creative Commons @@ -1331,7 +1335,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): (?:[a-zA-Z-]+="[^"]*"\s+)*? (?:title|href)="([^"]+)"\s+ (?:[a-zA-Z-]+="[^"]*"\s+)*? - class="(?:yt-uix-redirect-link|yt-uix-sessionlink[^"]*)"[^>]*> + class="[^"]*"[^>]*> [^<]+\.{3}\s* ''', r'\1', video_description) @@ -1726,6 +1730,39 @@ class YoutubeIE(YoutubeBaseInfoExtractor): } +class YoutubeSharedVideoIE(InfoExtractor): + _VALID_URL = r'(?:https?:)?//(?:www\.)?youtube\.com/shared\?ci=(?P[0-9A-Za-z_-]{11})' + IE_NAME = 'youtube:shared' + + _TEST = { + 'url': 'https://www.youtube.com/shared?ci=1nEzmT-M4fU', + 'info_dict': { + 'id': 'uPDB5I9wfp8', + 'ext': 'webm', + 'title': 'Pocoyo: 90 minutos de episódios completos Português para crianças - PARTE 3', + 'description': 'md5:d9e4d9346a2dfff4c7dc4c8cec0f546d', + 'upload_date': '20160219', + 'uploader': 'Pocoyo - Português (BR)', + 'uploader_id': 'PocoyoBrazil', + }, + 'add_ie': ['Youtube'], + 'params': { + # There are already too many Youtube downloads + 'skip_download': True, + }, + } + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + real_video_id = self._html_search_meta( + 'videoId', webpage, 'YouTube video id', fatal=True) + + return self.url_result(real_video_id, YoutubeIE.ie_key()) + + class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): IE_DESC = 'YouTube.com playlists' _VALID_URL = r"""(?x)(?: @@ -1941,10 +1978,13 @@ class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor): return (False if YoutubePlaylistsIE.suitable(url) or YoutubeLiveIE.suitable(url) else super(YoutubeChannelIE, cls).suitable(url)) + def _build_template_url(self, url, channel_id): + return self._TEMPLATE_URL % channel_id + def _real_extract(self, url): channel_id = self._match_id(url) - url = self._TEMPLATE_URL % channel_id + url = self._build_template_url(url, channel_id) # Channel by page listing is restricted to 35 pages of 30 items, i.e. 1050 videos total (see #5778) # Workaround by extracting as a playlist if managed to obtain channel playlist URL @@ -1958,9 +1998,13 @@ class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor): channel_playlist_id = self._html_search_meta( 'channelId', channel_page, 'channel id', default=None) if not channel_playlist_id: - channel_playlist_id = self._search_regex( - r'data-(?:channel-external-|yt)id="([^"]+)"', - channel_page, 'channel id', default=None) + channel_url = self._html_search_meta( + ('al:ios:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad'), + channel_page, 'channel url', default=None) + if channel_url: + channel_playlist_id = self._search_regex( + r'vnd\.youtube://user/([0-9A-Za-z_-]+)', + channel_url, 'channel id', default=None) if channel_playlist_id and channel_playlist_id.startswith('UC'): playlist_id = 'UU' + channel_playlist_id[2:] return self.url_result( @@ -1983,24 +2027,53 @@ class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor): for video_id, video_title in self.extract_videos_from_page(channel_page)] return self.playlist_result(entries, channel_id) + try: + next(self._entries(channel_page, channel_id)) + except StopIteration: + alert_message = self._html_search_regex( + r'(?s)]+class=(["\']).*?\byt-alert-message\b.*?\1[^>]*>(?P[^<]+)', + channel_page, 'alert', default=None, group='alert') + if alert_message: + raise ExtractorError('Youtube said: %s' % alert_message, expected=True) + return self.playlist_result(self._entries(channel_page, channel_id), channel_id) class YoutubeUserIE(YoutubeChannelIE): IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)' - _VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P[A-Za-z0-9_-]+)' - _TEMPLATE_URL = 'https://www.youtube.com/user/%s/videos' + _VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:(?Puser|c)/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P[A-Za-z0-9_-]+)' + _TEMPLATE_URL = 'https://www.youtube.com/%s/%s/videos' IE_NAME = 'youtube:user' _TESTS = [{ 'url': 'https://www.youtube.com/user/TheLinuxFoundation', 'playlist_mincount': 320, 'info_dict': { - 'title': 'TheLinuxFoundation', + 'id': 'UUfX55Sx5hEFjoC3cNs6mCUQ', + 'title': 'Uploads from The Linux Foundation', + } + }, { + # Only available via https://www.youtube.com/c/12minuteathlete/videos + # but not https://www.youtube.com/user/12minuteathlete/videos + 'url': 'https://www.youtube.com/c/12minuteathlete/videos', + 'playlist_mincount': 249, + 'info_dict': { + 'id': 'UUVjM-zV6_opMDx7WYxnjZiQ', + 'title': 'Uploads from 12 Minute Athlete', } }, { 'url': 'ytuser:phihag', 'only_matching': True, + }, { + 'url': 'https://www.youtube.com/c/gametrailers', + 'only_matching': True, + }, { + 'url': 'https://www.youtube.com/gametrailers', + 'only_matching': True, + }, { + # This channel is not available. + 'url': 'https://www.youtube.com/user/kananishinoSMEJ/videos', + 'only_matching': True, }] @classmethod @@ -2013,6 +2086,10 @@ class YoutubeUserIE(YoutubeChannelIE): else: return super(YoutubeUserIE, cls).suitable(url) + def _build_template_url(self, url, channel_id): + mobj = re.match(self._VALID_URL, url) + return self._TEMPLATE_URL % (mobj.group('user') or 'user', mobj.group('id')) + class YoutubeLiveIE(YoutubeBaseInfoExtractor): IE_DESC = 'YouTube.com live streams'