X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fyoutube.py;h=ca40de522bc5e341f2ac269db997a0e73914c127;hb=4f06c1c9fcbfbc74b81b5fa89a616914b5ce5aad;hp=f2f75110445a6bad461fcd92d4ae3d26dd456d69;hpb=a22b2fd19bd8c08d50f884d1903486d4f00f76ec;p=youtube-dl diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index f2f751104..ca40de522 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -34,6 +34,7 @@ from ..utils import ( int_or_none, mimetype2ext, orderedSet, + parse_codecs, parse_duration, remove_quotes, remove_start, @@ -46,7 +47,6 @@ from ..utils import ( unsmuggle_url, uppercase_escape, urlencode_postdata, - ISO3166Utils, ) @@ -59,6 +59,8 @@ class YoutubeBaseInfoExtractor(InfoExtractor): # If True it will raise an error if no login info is provided _LOGIN_REQUIRED = False + _PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL)[0-9A-Za-z-_]{10,}' + def _set_language(self): self._set_cookie( '.youtube.com', 'PREF', 'f1=50000000&hl=en', @@ -265,9 +267,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor): ) )? # all until now is optional -> you can pass the naked ID ([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID - (?!.*?\blist=) # combined list/video URLs are handled by the playlist IE + (?!.*?\blist= + (?: + %(playlist_id)s| # combined list/video URLs are handled by the playlist IE + WL # WL are handled by the watch later IE + ) + ) (?(1).+)? # if we found the ID, everything can follow - $""" + $""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE} _NEXT_URL_RE = r'[\?&]next_url=([^&]+)' _formats = { '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'}, @@ -329,6 +336,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'preference': -50, 'container': 'm4a_dash'}, '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'preference': -50, 'container': 'm4a_dash'}, '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'preference': -50, 'container': 'm4a_dash'}, + '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'preference': -50, 'container': 'm4a_dash'}, + '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'preference': -50, 'container': 'm4a_dash'}, # Dash webm '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40}, @@ -368,6 +377,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): } _SUBTITLE_FORMATS = ('ttml', 'vtt') + _GEO_BYPASS = False + IE_NAME = 'youtube' _TESTS = [ { @@ -914,7 +925,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor): # itag 212 'url': '1t24XAntNCY', 'only_matching': True, - } + }, + { + # geo restricted to JP + 'url': 'sJL6WA-aGkQ', + 'only_matching': True, + }, + { + 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM', + 'only_matching': True, + }, ] def __init__(self, *args, **kwargs): @@ -1373,11 +1393,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if 'token' not in video_info: if 'reason' in video_info: if 'The uploader has not made this video available in your country.' in video_info['reason']: - regions_allowed = self._html_search_meta('regionsAllowed', video_webpage, default=None) - if regions_allowed: - raise ExtractorError('YouTube said: This video is available in %s only' % ( - ', '.join(map(ISO3166Utils.short2full, regions_allowed.split(',')))), - expected=True) + regions_allowed = self._html_search_meta( + 'regionsAllowed', video_webpage, default=None) + countries = regions_allowed.split(',') if regions_allowed else None + self.raise_geo_restricted( + msg=video_info['reason'][0], countries=countries) raise ExtractorError( 'YouTube said: %s' % video_info['reason'][0], expected=True, video_id=video_id) @@ -1445,7 +1465,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): # Check for "rental" videos if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info: - raise ExtractorError('"rental" videos not supported') + raise ExtractorError('"rental" videos not supported. See https://github.com/rg3/youtube-dl/issues/359 for more information.', expected=True) # Start extracting information self.report_information_extraction(video_id) @@ -1694,15 +1714,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): codecs = mobj.group('val') break if codecs: - codecs = codecs.split(',') - if len(codecs) == 2: - acodec, vcodec = codecs[1], codecs[0] - else: - acodec, vcodec = (codecs[0], 'none') if kind == 'audio' else ('none', codecs[0]) - dct.update({ - 'acodec': acodec, - 'vcodec': vcodec, - }) + dct.update(parse_codecs(codecs)) formats.append(dct) elif video_info.get('hlsvp'): manifest_url = video_info['hlsvp'][0] @@ -1850,7 +1862,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): (?: youtube\.com/ (?: - (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/videoseries) + (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/(?:videoseries|[0-9A-Za-z_-]{11})) \? (?:.*?[&;])*? (?:p|a|list)= | p/ )| @@ -1863,8 +1875,8 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): ) .* | - ((?:PL|LL|EC|UU|FL|RD|UL|TL)[0-9A-Za-z-_]{10,}) - )""" + (%(playlist_id)s) + )""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE} _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s&disable_polymer=true' _VIDEO_RE = r'href="\s*/watch\?v=(?P[0-9A-Za-z_-]{11})&[^"]*?index=(?P\d+)(?:[^>]+>(?P[^<]+))?' IE_NAME = 'youtube:playlist' @@ -1923,6 +1935,13 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): 'title': 'JODA15', 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu', } + }, { + 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl', + 'playlist_mincount': 485, + 'info_dict': { + 'title': '2017 華語最新單曲 (2/24更新)', + 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl', + } }, { 'note': 'Embedded SWF player', 'url': 'https://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0', @@ -2071,7 +2090,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): # Check if it's a video-specific URL query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) video_id = query_dict.get('v', [None])[0] or self._search_regex( - r'(?:^|//)youtu\.be/([0-9A-Za-z_-]{11})', url, + r'(?:(?:^|//)youtu\.be/|youtube\.com/embed/(?!videoseries))([0-9A-Za-z_-]{11})', url, 'video id', default=None) if video_id: if self._downloader.params.get('noplaylist'): @@ -2231,7 +2250,7 @@ class YoutubeUserIE(YoutubeChannelIE): 'url': 'https://www.youtube.com/gametrailers', 'only_matching': True, }, { - # This channel is not available. + # This channel is not available, geo restricted to JP 'url': 'https://www.youtube.com/user/kananishinoSMEJ/videos', 'only_matching': True, }]