[youtube] Remove info el for get_video_info request
[youtube-dl] / youtube_dl / extractor / youtube.py
index 438eb5aa7d371f0d0fd9c70b9c8b7d15a8d44737..9d542f893fc247a9b8a92a0010da02c66adb4fda 100644 (file)
@@ -27,6 +27,7 @@ from ..compat import (
 )
 from ..utils import (
     clean_html,
+    dict_get,
     error_to_compat_str,
     ExtractorError,
     float_or_none,
@@ -908,6 +909,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'creator': 'Todd Haberman,  Daniel Law Heath and Aaron Kaplan',
                 'track': 'Dark Walk - Position Music',
                 'artist': 'Todd Haberman,  Daniel Law Heath and Aaron Kaplan',
+                'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
             },
             'params': {
                 'skip_download': True,
@@ -1088,7 +1090,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             },
         },
         {
-            # artist and track fields should return non-null, per issue #20599
+            # Youtube Music Auto-generated description
             'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
             'info_dict': {
                 'id': 'MgNrAu2pzNs',
@@ -1109,11 +1111,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             },
         },
         {
+            # Youtube Music Auto-generated description
             # Retrieve 'artist' field from 'Artist:' in video description
             # when it is present on youtube music video
-            # Some videos have release_date and no release_year -
-            # (release_year should be extracted from release_date)
-            # https://github.com/ytdl-org/youtube-dl/pull/20742#issuecomment-485740932
             'url': 'https://www.youtube.com/watch?v=k0jLE7tTwjY',
             'info_dict': {
                 'id': 'k0jLE7tTwjY',
@@ -1134,6 +1134,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             },
         },
         {
+            # Youtube Music Auto-generated description
             # handle multiple artists on youtube music video
             'url': 'https://www.youtube.com/watch?v=74qn0eJSjpA',
             'info_dict': {
@@ -1155,6 +1156,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             },
         },
         {
+            # Youtube Music Auto-generated description
             # handle youtube music video with release_year and no release_date
             'url': 'https://www.youtube.com/watch?v=-hcAI0g-f5M',
             'info_dict': {
@@ -1651,6 +1653,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
         def extract_view_count(v_info):
             return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
 
+        def extract_token(v_info):
+            return dict_get(v_info, ('account_playback_token', 'accountPlaybackToken', 'token'))
+
         player_response = {}
 
         # Get video info
@@ -1710,7 +1715,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 # The general idea is to take a union of itags of both DASH manifests (for example
                 # video with such 'manifest behavior' see https://github.com/ytdl-org/youtube-dl/issues/6093)
                 self.report_video_info_webpage_download(video_id)
-                for el in ('info', 'embedded', 'detailpage', 'vevo', ''):
+                for el in ('embedded', 'detailpage', 'vevo', ''):
                     query = {
                         'video_id': video_id,
                         'ps': 'default',
@@ -1740,7 +1745,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                         view_count = extract_view_count(get_video_info)
                     if not video_info:
                         video_info = get_video_info
-                    get_token = get_video_info.get('token') or get_video_info.get('account_playback_token')
+                    get_token = extract_token(get_video_info)
                     if get_token:
                         # Different get_video_info requests may report different results, e.g.
                         # some may report video unavailability, but some may serve it without
@@ -1751,7 +1756,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                         # due to YouTube measures against IP ranges of hosting providers.
                         # Working around by preferring the first succeeded video_info containing
                         # the token if no such video_info yet was found.
-                        token = video_info.get('token') or video_info.get('account_playback_token')
+                        token = extract_token(video_info)
                         if not token:
                             video_info = get_video_info
                         break
@@ -1768,28 +1773,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             raise ExtractorError(
                 'YouTube said: %s' % unavailable_message, expected=True, video_id=video_id)
 
-        token = video_info.get('token') or video_info.get('account_playback_token')
-        if not token:
-            if 'reason' in video_info:
-                if 'The uploader has not made this video available in your country.' in video_info['reason']:
-                    regions_allowed = self._html_search_meta(
-                        'regionsAllowed', video_webpage, default=None)
-                    countries = regions_allowed.split(',') if regions_allowed else None
-                    self.raise_geo_restricted(
-                        msg=video_info['reason'][0], countries=countries)
-                reason = video_info['reason'][0]
-                if 'Invalid parameters' in reason:
-                    unavailable_message = extract_unavailable_message()
-                    if unavailable_message:
-                        reason = unavailable_message
-                raise ExtractorError(
-                    'YouTube said: %s' % reason,
-                    expected=True, video_id=video_id)
-            else:
-                raise ExtractorError(
-                    '"token" parameter not in video info for unknown reason',
-                    video_id=video_id)
-
         if video_info.get('license_info'):
             raise ExtractorError('This video is DRM protected.', expected=True)
 
@@ -2161,36 +2144,27 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
 
         track = extract_meta('Song')
         artist = extract_meta('Artist')
-        album = None
-        release_date = None
-        release_year = None
-
-        description_info = video_description.split('\n\n')
-        # If the description of the video has the youtube music auto-generated format, extract additional info
-        if len(description_info) >= 5 and description_info[-1] == 'Auto-generated by YouTube.':
-            track_artist = description_info[1].split(' · ')
-            if len(track_artist) >= 2:
-                if track is None:
-                    track = track_artist[0]
-                if artist is None:
-                    artist = re.search(r'Artist: ([^\n]+)', description_info[-2])
-                    if artist:
-                        artist = artist.group(1)
-                    if artist is None:
-                        artist = track_artist[1]
-                        # handle multiple artists
-                        if len(track_artist) > 2:
-                            for i in range(2, len(track_artist)):
-                                artist += ', %s' % track_artist[i]
-            release_year = re.search(r'℗ ([0-9]+)', video_description)
-            if release_year:
-                release_year = int_or_none(release_year.group(1))
-            album = description_info[2]
-            if description_info[4].startswith('Released on: '):
-                release_date = description_info[4].split(': ')[1].replace('-', '')
-                # extract release_year from release_date if necessary
-                if release_year is None:
-                    release_year = int_or_none(release_date[0:4])
+        album = extract_meta('Album')
+
+        # Youtube Music Auto-generated description
+        release_date = release_year = None
+        if video_description:
+            mobj = re.search(r'(?s)Provided to YouTube by [^\n]+\n+(?P<track>[^·]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?', video_description)
+            if mobj:
+                if not track:
+                    track = mobj.group('track').strip()
+                if not artist:
+                    artist = mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·'))
+                if not album:
+                    album = mobj.group('album'.strip())
+                release_year = mobj.group('release_year')
+                release_date = mobj.group('release_date')
+                if release_date:
+                    release_date = release_date.replace('-', '')
+                    if not release_year:
+                        release_year = int(release_date[:4])
+                if release_year:
+                    release_year = int(release_year)
 
         m_episode = re.search(
             r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',
@@ -2304,6 +2278,29 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                     if f.get('vcodec') != 'none':
                         f['stretched_ratio'] = ratio
 
+        if not formats:
+            token = extract_token(video_info)
+            if not token:
+                if 'reason' in video_info:
+                    if 'The uploader has not made this video available in your country.' in video_info['reason']:
+                        regions_allowed = self._html_search_meta(
+                            'regionsAllowed', video_webpage, default=None)
+                        countries = regions_allowed.split(',') if regions_allowed else None
+                        self.raise_geo_restricted(
+                            msg=video_info['reason'][0], countries=countries)
+                    reason = video_info['reason'][0]
+                    if 'Invalid parameters' in reason:
+                        unavailable_message = extract_unavailable_message()
+                        if unavailable_message:
+                            reason = unavailable_message
+                    raise ExtractorError(
+                        'YouTube said: %s' % reason,
+                        expected=True, video_id=video_id)
+                else:
+                    raise ExtractorError(
+                        '"token" parameter not in video info for unknown reason',
+                        video_id=video_id)
+
         self._sort_formats(formats)
 
         self.mark_watched(video_id, video_info, player_response)