Start moving to ytdl-org
[youtube-dl] / youtube_dl / extractor / yandexmusic.py
index 22050add3e87ce1660ca07c3f2ed8807d7d9c71c..456f95f698662d03bef59429ccc671b73b6a8913 100644 (file)
@@ -10,8 +10,6 @@ from ..utils import (
     ExtractorError,
     int_or_none,
     float_or_none,
-    sanitized_Request,
-    urlencode_postdata,
 )
 
 
@@ -22,18 +20,24 @@ class YandexMusicBaseIE(InfoExtractor):
             error = response.get('error')
             if error:
                 raise ExtractorError(error, expected=True)
+            if response.get('type') == 'captcha' or 'captcha' in response:
+                YandexMusicBaseIE._raise_captcha()
 
-    def _download_webpage(self, *args, **kwargs):
-        webpage = super(YandexMusicBaseIE, self)._download_webpage(*args, **kwargs)
+    @staticmethod
+    def _raise_captcha():
+        raise ExtractorError(
+            'YandexMusic has considered youtube-dl requests automated and '
+            'asks you to solve a CAPTCHA. You can either wait for some '
+            'time until unblocked and optionally use --sleep-interval '
+            'in future or alternatively you can go to https://music.yandex.ru/ '
+            'solve CAPTCHA, then export cookies and pass cookie file to '
+            'youtube-dl with --cookies',
+            expected=True)
+
+    def _download_webpage_handle(self, *args, **kwargs):
+        webpage = super(YandexMusicBaseIE, self)._download_webpage_handle(*args, **kwargs)
         if 'Нам очень жаль, но запросы, поступившие с вашего IP-адреса, похожи на автоматические.' in webpage:
-            raise ExtractorError(
-                'YandexMusic has considered youtube-dl requests automated and '
-                'asks you to solve a CAPTCHA. You can either wait for some '
-                'time until unblocked and optionally use --sleep-interval '
-                'in future or alternatively you can go to https://music.yandex.ru/ '
-                'solve CAPTCHA, then export cookies and pass cookie file to '
-                'youtube-dl with --cookies',
-                expected=True)
+            self._raise_captcha()
         return webpage
 
     def _download_json(self, *args, **kwargs):
@@ -53,14 +57,14 @@ class YandexMusicTrackIE(YandexMusicBaseIE):
         'info_dict': {
             'id': '4878838',
             'ext': 'mp3',
-            'title': 'Carlo Ambrosio & Fabio Di Bari, Carlo Ambrosio - Gypsy Eyes 1',
+            'title': 'Carlo Ambrosio, Carlo Ambrosio & Fabio Di Bari - Gypsy Eyes 1',
             'filesize': 4628061,
             'duration': 193.04,
             'track': 'Gypsy Eyes 1',
             'album': 'Gypsy Soul',
             'album_artist': 'Carlo Ambrosio',
-            'artist': 'Carlo Ambrosio & Fabio Di Bari, Carlo Ambrosio',
-            'release_year': '2009',
+            'artist': 'Carlo Ambrosio, Carlo Ambrosio & Fabio Di Bari',
+            'release_year': 2009,
         },
         'skip': 'Travis CI servers blocked by YandexMusic',
     }
@@ -71,6 +75,12 @@ class YandexMusicTrackIE(YandexMusicBaseIE):
             % storage_dir,
             track_id, 'Downloading track location JSON')
 
+        # Each string is now wrapped in a list, this is probably only temporarily thus
+        # supporting both scenarios (see https://github.com/ytdl-org/youtube-dl/issues/10193)
+        for k, v in data.items():
+            if v and isinstance(v, list):
+                data[k] = v[0]
+
         key = hashlib.md5(('XGRlBW9FXlekgbPrRHuSiA' + data['path'][1:] + data['s']).encode('utf-8')).hexdigest()
         storage = storage_dir.split('.')
 
@@ -110,7 +120,7 @@ class YandexMusicTrackIE(YandexMusicBaseIE):
                 track_info.update({
                     'album': album.get('title'),
                     'album_artist': extract_artist(album.get('artists')),
-                    'release_year': compat_str(year) if year else None,
+                    'release_year': int_or_none(year),
                 })
 
         track_artist = extract_artist(track.get('artists'))
@@ -190,13 +200,13 @@ class YandexMusicPlaylistIE(YandexMusicPlaylistBaseIE):
         'skip': 'Travis CI servers blocked by YandexMusic',
     }, {
         # playlist exceeding the limit of 150 tracks shipped with webpage (see
-        # https://github.com/rg3/youtube-dl/issues/6666)
+        # https://github.com/ytdl-org/youtube-dl/issues/6666)
         'url': 'https://music.yandex.ru/users/ya.playlist/playlists/1036',
         'info_dict': {
             'id': '1036',
             'title': 'Музыка 90-х',
         },
-        'playlist_count': 310,
+        'playlist_mincount': 300,
         'skip': 'Travis CI servers blocked by YandexMusic',
     }]
 
@@ -224,31 +234,37 @@ class YandexMusicPlaylistIE(YandexMusicPlaylistBaseIE):
                 'overembed': 'false',
             })['playlist']
 
-        tracks, track_ids = playlist['tracks'], playlist['trackIds']
+        tracks = playlist['tracks']
+        track_ids = [compat_str(track_id) for track_id in playlist['trackIds']]
 
         # tracks dictionary shipped with playlist.jsx API is limited to 150 tracks,
         # missing tracks should be retrieved manually.
         if len(tracks) < len(track_ids):
-            present_track_ids = set([compat_str(track['id']) for track in tracks if track.get('id')])
-            missing_track_ids = set(map(compat_str, track_ids)) - set(present_track_ids)
-            request = sanitized_Request(
-                'https://music.yandex.ru/handlers/track-entries.jsx',
-                urlencode_postdata({
+            present_track_ids = set([
+                compat_str(track['id'])
+                for track in tracks if track.get('id')])
+            missing_track_ids = [
+                track_id for track_id in track_ids
+                if track_id not in present_track_ids]
+            missing_tracks = self._download_json(
+                'https://music.yandex.%s/handlers/track-entries.jsx' % tld,
+                playlist_id, 'Downloading missing tracks JSON',
+                fatal=False,
+                headers={
+                    'Referer': url,
+                    'X-Requested-With': 'XMLHttpRequest',
+                },
+                query={
                     'entries': ','.join(missing_track_ids),
                     'lang': tld,
                     'external-domain': 'music.yandex.%s' % tld,
                     'overembed': 'false',
                     'strict': 'true',
-                }))
-            request.add_header('Referer', url)
-            request.add_header('X-Requested-With', 'XMLHttpRequest')
-
-            missing_tracks = self._download_json(
-                request, playlist_id, 'Downloading missing tracks JSON', fatal=False)
+                })
             if missing_tracks:
                 tracks.extend(missing_tracks)
 
         return self.playlist_result(
             self._build_playlist(tracks),
             compat_str(playlist_id),
-            playlist['title'], playlist.get('description'))
+            playlist.get('title'), playlist.get('description'))