Start moving to ytdl-org

[youtube-dl] / youtube_dl / extractor / yandexmusic.py
diff --git a/youtube_dl/extractor/yandexmusic.py b/youtube_dl/extractor/yandexmusic.py

index 22050add3e87ce1660ca07c3f2ed8807d7d9c71c..456f95f698662d03bef59429ccc671b73b6a8913 100644 (file)
--- a/youtube_dl/extractor/yandexmusic.py
+++ b/youtube_dl/extractor/yandexmusic.py
@@ -10,8 +10,6 @@ from ..utils import (
      ExtractorError,
      int_or_none,
      float_or_none,
-    sanitized_Request,
-    urlencode_postdata,
  )
  
  
@@ -22,18 +20,24 @@ class YandexMusicBaseIE(InfoExtractor):
              error = response.get('error')
              if error:
                  raise ExtractorError(error, expected=True)
+            if response.get('type') == 'captcha' or 'captcha' in response:
+                YandexMusicBaseIE._raise_captcha()
  
-    def _download_webpage(self, *args, **kwargs):
-        webpage = super(YandexMusicBaseIE, self)._download_webpage(*args, **kwargs)
+    @staticmethod
+    def _raise_captcha():
+        raise ExtractorError(
+            'YandexMusic has considered youtube-dl requests automated and '
+            'asks you to solve a CAPTCHA. You can either wait for some '
+            'time until unblocked and optionally use --sleep-interval '
+            'in future or alternatively you can go to https://music.yandex.ru/ '
+            'solve CAPTCHA, then export cookies and pass cookie file to '
+            'youtube-dl with --cookies',
+            expected=True)
+
+    def _download_webpage_handle(self, *args, **kwargs):
+        webpage = super(YandexMusicBaseIE, self)._download_webpage_handle(*args, **kwargs)
          if 'Нам очень жаль, но&nbsp;запросы, поступившие с&nbsp;вашего IP-адреса, похожи на&nbsp;автоматические.' in webpage:
-            raise ExtractorError(
-                'YandexMusic has considered youtube-dl requests automated and '
-                'asks you to solve a CAPTCHA. You can either wait for some '
-                'time until unblocked and optionally use --sleep-interval '
-                'in future or alternatively you can go to https://music.yandex.ru/ '
-                'solve CAPTCHA, then export cookies and pass cookie file to '
-                'youtube-dl with --cookies',
-                expected=True)
+            self._raise_captcha()
          return webpage
  
      def _download_json(self, *args, **kwargs):
@@ -53,14 +57,14 @@ class YandexMusicTrackIE(YandexMusicBaseIE):
          'info_dict': {
              'id': '4878838',
              'ext': 'mp3',
-            'title': 'Carlo Ambrosio & Fabio Di Bari, Carlo Ambrosio - Gypsy Eyes 1',
+            'title': 'Carlo Ambrosio, Carlo Ambrosio & Fabio Di Bari - Gypsy Eyes 1',
              'filesize': 4628061,
              'duration': 193.04,
              'track': 'Gypsy Eyes 1',
              'album': 'Gypsy Soul',
              'album_artist': 'Carlo Ambrosio',
-            'artist': 'Carlo Ambrosio & Fabio Di Bari, Carlo Ambrosio',
-            'release_year': '2009',
+            'artist': 'Carlo Ambrosio, Carlo Ambrosio & Fabio Di Bari',
+            'release_year': 2009,
          },
          'skip': 'Travis CI servers blocked by YandexMusic',
      }
@@ -71,6 +75,12 @@ class YandexMusicTrackIE(YandexMusicBaseIE):
              % storage_dir,
              track_id, 'Downloading track location JSON')
  
+        # Each string is now wrapped in a list, this is probably only temporarily thus
+        # supporting both scenarios (see https://github.com/ytdl-org/youtube-dl/issues/10193)
+        for k, v in data.items():
+            if v and isinstance(v, list):
+                data[k] = v[0]
+
          key = hashlib.md5(('XGRlBW9FXlekgbPrRHuSiA' + data['path'][1:] + data['s']).encode('utf-8')).hexdigest()
          storage = storage_dir.split('.')
  
@@ -110,7 +120,7 @@ class YandexMusicTrackIE(YandexMusicBaseIE):
                  track_info.update({
                      'album': album.get('title'),
                      'album_artist': extract_artist(album.get('artists')),
-                    'release_year': compat_str(year) if year else None,
+                    'release_year': int_or_none(year),
                  })
  
          track_artist = extract_artist(track.get('artists'))
@@ -190,13 +200,13 @@ class YandexMusicPlaylistIE(YandexMusicPlaylistBaseIE):
          'skip': 'Travis CI servers blocked by YandexMusic',
      }, {
          # playlist exceeding the limit of 150 tracks shipped with webpage (see
-        # https://github.com/rg3/youtube-dl/issues/6666)
+        # https://github.com/ytdl-org/youtube-dl/issues/6666)
          'url': 'https://music.yandex.ru/users/ya.playlist/playlists/1036',
          'info_dict': {
              'id': '1036',
              'title': 'Музыка 90-х',
          },
-        'playlist_count': 310,
+        'playlist_mincount': 300,
          'skip': 'Travis CI servers blocked by YandexMusic',
      }]
  
@@ -224,31 +234,37 @@ class YandexMusicPlaylistIE(YandexMusicPlaylistBaseIE):
                  'overembed': 'false',
              })['playlist']
  
-        tracks, track_ids = playlist['tracks'], playlist['trackIds']
+        tracks = playlist['tracks']
+        track_ids = [compat_str(track_id) for track_id in playlist['trackIds']]
  
          # tracks dictionary shipped with playlist.jsx API is limited to 150 tracks,
          # missing tracks should be retrieved manually.
          if len(tracks) < len(track_ids):
-            present_track_ids = set([compat_str(track['id']) for track in tracks if track.get('id')])
-            missing_track_ids = set(map(compat_str, track_ids)) - set(present_track_ids)
-            request = sanitized_Request(
-                'https://music.yandex.ru/handlers/track-entries.jsx',
-                urlencode_postdata({
+            present_track_ids = set([
+                compat_str(track['id'])
+                for track in tracks if track.get('id')])
+            missing_track_ids = [
+                track_id for track_id in track_ids
+                if track_id not in present_track_ids]
+            missing_tracks = self._download_json(
+                'https://music.yandex.%s/handlers/track-entries.jsx' % tld,
+                playlist_id, 'Downloading missing tracks JSON',
+                fatal=False,
+                headers={
+                    'Referer': url,
+                    'X-Requested-With': 'XMLHttpRequest',
+                },
+                query={
                      'entries': ','.join(missing_track_ids),
                      'lang': tld,
                      'external-domain': 'music.yandex.%s' % tld,
                      'overembed': 'false',
                      'strict': 'true',
-                }))
-            request.add_header('Referer', url)
-            request.add_header('X-Requested-With', 'XMLHttpRequest')
-
-            missing_tracks = self._download_json(
-                request, playlist_id, 'Downloading missing tracks JSON', fatal=False)
+                })
              if missing_tracks:
                  tracks.extend(missing_tracks)
  
          return self.playlist_result(
              self._build_playlist(tracks),
              compat_str(playlist_id),
-            playlist['title'], playlist.get('description'))
+            playlist.get('title'), playlist.get('description'))