X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fyoutube.py;h=9d542f893fc247a9b8a92a0010da02c66adb4fda;hb=a61ce71468cb222338ccd8039dc631f3619dc585;hp=438eb5aa7d371f0d0fd9c70b9c8b7d15a8d44737;hpb=5caabd3c701a484271d197f7006ecf831e38136b;p=youtube-dl diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 438eb5aa7..9d542f893 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -27,6 +27,7 @@ from ..compat import ( ) from ..utils import ( clean_html, + dict_get, error_to_compat_str, ExtractorError, float_or_none, @@ -908,6 +909,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan', 'track': 'Dark Walk - Position Music', 'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan', + 'album': 'Position Music - Production Music Vol. 143 - Dark Walk', }, 'params': { 'skip_download': True, @@ -1088,7 +1090,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): }, }, { - # artist and track fields should return non-null, per issue #20599 + # Youtube Music Auto-generated description 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs', 'info_dict': { 'id': 'MgNrAu2pzNs', @@ -1109,11 +1111,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor): }, }, { + # Youtube Music Auto-generated description # Retrieve 'artist' field from 'Artist:' in video description # when it is present on youtube music video - # Some videos have release_date and no release_year - - # (release_year should be extracted from release_date) - # https://github.com/ytdl-org/youtube-dl/pull/20742#issuecomment-485740932 'url': 'https://www.youtube.com/watch?v=k0jLE7tTwjY', 'info_dict': { 'id': 'k0jLE7tTwjY', @@ -1134,6 +1134,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): }, }, { + # Youtube Music Auto-generated description # handle multiple artists on youtube music video 'url': 'https://www.youtube.com/watch?v=74qn0eJSjpA', 'info_dict': { @@ -1155,6 +1156,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): }, }, { + # Youtube Music Auto-generated description # handle youtube music video with release_year and no release_date 'url': 'https://www.youtube.com/watch?v=-hcAI0g-f5M', 'info_dict': { @@ -1651,6 +1653,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor): def extract_view_count(v_info): return int_or_none(try_get(v_info, lambda x: x['view_count'][0])) + def extract_token(v_info): + return dict_get(v_info, ('account_playback_token', 'accountPlaybackToken', 'token')) + player_response = {} # Get video info @@ -1710,7 +1715,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): # The general idea is to take a union of itags of both DASH manifests (for example # video with such 'manifest behavior' see https://github.com/ytdl-org/youtube-dl/issues/6093) self.report_video_info_webpage_download(video_id) - for el in ('info', 'embedded', 'detailpage', 'vevo', ''): + for el in ('embedded', 'detailpage', 'vevo', ''): query = { 'video_id': video_id, 'ps': 'default', @@ -1740,7 +1745,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): view_count = extract_view_count(get_video_info) if not video_info: video_info = get_video_info - get_token = get_video_info.get('token') or get_video_info.get('account_playback_token') + get_token = extract_token(get_video_info) if get_token: # Different get_video_info requests may report different results, e.g. # some may report video unavailability, but some may serve it without @@ -1751,7 +1756,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): # due to YouTube measures against IP ranges of hosting providers. # Working around by preferring the first succeeded video_info containing # the token if no such video_info yet was found. - token = video_info.get('token') or video_info.get('account_playback_token') + token = extract_token(video_info) if not token: video_info = get_video_info break @@ -1768,28 +1773,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): raise ExtractorError( 'YouTube said: %s' % unavailable_message, expected=True, video_id=video_id) - token = video_info.get('token') or video_info.get('account_playback_token') - if not token: - if 'reason' in video_info: - if 'The uploader has not made this video available in your country.' in video_info['reason']: - regions_allowed = self._html_search_meta( - 'regionsAllowed', video_webpage, default=None) - countries = regions_allowed.split(',') if regions_allowed else None - self.raise_geo_restricted( - msg=video_info['reason'][0], countries=countries) - reason = video_info['reason'][0] - if 'Invalid parameters' in reason: - unavailable_message = extract_unavailable_message() - if unavailable_message: - reason = unavailable_message - raise ExtractorError( - 'YouTube said: %s' % reason, - expected=True, video_id=video_id) - else: - raise ExtractorError( - '"token" parameter not in video info for unknown reason', - video_id=video_id) - if video_info.get('license_info'): raise ExtractorError('This video is DRM protected.', expected=True) @@ -2161,36 +2144,27 @@ class YoutubeIE(YoutubeBaseInfoExtractor): track = extract_meta('Song') artist = extract_meta('Artist') - album = None - release_date = None - release_year = None - - description_info = video_description.split('\n\n') - # If the description of the video has the youtube music auto-generated format, extract additional info - if len(description_info) >= 5 and description_info[-1] == 'Auto-generated by YouTube.': - track_artist = description_info[1].split(' · ') - if len(track_artist) >= 2: - if track is None: - track = track_artist[0] - if artist is None: - artist = re.search(r'Artist: ([^\n]+)', description_info[-2]) - if artist: - artist = artist.group(1) - if artist is None: - artist = track_artist[1] - # handle multiple artists - if len(track_artist) > 2: - for i in range(2, len(track_artist)): - artist += ', %s' % track_artist[i] - release_year = re.search(r'℗ ([0-9]+)', video_description) - if release_year: - release_year = int_or_none(release_year.group(1)) - album = description_info[2] - if description_info[4].startswith('Released on: '): - release_date = description_info[4].split(': ')[1].replace('-', '') - # extract release_year from release_date if necessary - if release_year is None: - release_year = int_or_none(release_date[0:4]) + album = extract_meta('Album') + + # Youtube Music Auto-generated description + release_date = release_year = None + if video_description: + mobj = re.search(r'(?s)Provided to YouTube by [^\n]+\n+(?P[^·]+)·(?P[^\n]+)\n+(?P[^\n]+)(?:.+?℗\s*(?P\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P[^\n]+))?', video_description) + if mobj: + if not track: + track = mobj.group('track').strip() + if not artist: + artist = mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')) + if not album: + album = mobj.group('album'.strip()) + release_year = mobj.group('release_year') + release_date = mobj.group('release_date') + if release_date: + release_date = release_date.replace('-', '') + if not release_year: + release_year = int(release_date[:4]) + if release_year: + release_year = int(release_year) m_episode = re.search( r']+id="watch7-headline"[^>]*>\s*]*>.*?>(?P[^<]+)\s*S(?P\d+)\s*•\s*E(?P\d+)', @@ -2304,6 +2278,29 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if f.get('vcodec') != 'none': f['stretched_ratio'] = ratio + if not formats: + token = extract_token(video_info) + if not token: + if 'reason' in video_info: + if 'The uploader has not made this video available in your country.' in video_info['reason']: + regions_allowed = self._html_search_meta( + 'regionsAllowed', video_webpage, default=None) + countries = regions_allowed.split(',') if regions_allowed else None + self.raise_geo_restricted( + msg=video_info['reason'][0], countries=countries) + reason = video_info['reason'][0] + if 'Invalid parameters' in reason: + unavailable_message = extract_unavailable_message() + if unavailable_message: + reason = unavailable_message + raise ExtractorError( + 'YouTube said: %s' % reason, + expected=True, video_id=video_id) + else: + raise ExtractorError( + '"token" parameter not in video info for unknown reason', + video_id=video_id) + self._sort_formats(formats) self.mark_watched(video_id, video_info, player_response)