[vevo:playlist] Add extractor (Closes #9334, closes #9364)

[youtube-dl] / youtube_dl / extractor / vevo.py
diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py

index 9c955c8957525a5657d838283de6db2b097aeee8..4ad1e87e4c6b4b3d8f74c893bf93328ff498c577 100644 (file)
--- a/youtube_dl/extractor/vevo.py
+++ b/youtube_dl/extractor/vevo.py
@@ -3,7 +3,10 @@ from __future__ import unicode_literals
  import re
  
  from .common import InfoExtractor
-from ..compat import compat_etree_fromstring
+from ..compat import (
+    compat_etree_fromstring,
+    compat_urlparse,
+)
  from ..utils import (
      ExtractorError,
      int_or_none,
@@ -18,7 +21,7 @@ class VevoIE(InfoExtractor):
      (currently used by MTVIE and MySpaceIE)
      '''
      _VALID_URL = r'''(?x)
-        (?:https?://www\.vevo\.com/watch/(?:[^/]+/(?:[^/]+/)?)?|
+        (?:https?://www\.vevo\.com/watch/(?!playlist|genre)(?:[^/]+/(?:[^/]+/)?)?|
             https?://cache\.vevo\.com/m/html/embed\.html\?video=|
             https?://videoplayer\.vevo\.com/embed/embedded\?videoId=|
             vevo:)
@@ -26,67 +29,52 @@ class VevoIE(InfoExtractor):
  
      _TESTS = [{
          'url': 'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280',
-        'md5': '2dbc7e9fd4f1c60436c9aa73a5406193',
+        'md5': '95ee28ee45e70130e3ab02b0f579ae23',
          'info_dict': {
-            'id': 'Pt1kc_FniKM',
+            'id': 'GB1101300280',
              'ext': 'mp4',
-            'title': 'Hurts - Somebody to Die For',
-            'description': 'md5:13e925b89af6b01c7e417332bd23c4bf',
-            'uploader_id': 'HurtsVEVO',
-            'uploader': 'HurtsVEVO',
+            'title': 'Somebody to Die For',
              'upload_date': '20130624',
-            'duration': 230,
+            'uploader': 'Hurts',
+            'timestamp': 1372057200,
          },
-        'add_ie': ['Youtube'],
      }, {
          'note': 'v3 SMIL format',
          'url': 'http://www.vevo.com/watch/cassadee-pope/i-wish-i-could-break-your-heart/USUV71302923',
-        'md5': '13d5204f520af905eeffa675040b8e76',
+        'md5': 'f6ab09b034f8c22969020b042e5ac7fc',
          'info_dict': {
-            'id': 'ByGmQn1uxJw',
+            'id': 'USUV71302923',
              'ext': 'mp4',
-            'title': 'Cassadee Pope - I Wish I Could Break Your Heart',
-            'description': 'md5:5e9721c92ef117a6f69d00e9b42ceba7',
-            'uploader_id': 'CassadeeVEVO',
-            'uploader': 'CassadeeVEVO',
+            'title': 'I Wish I Could Break Your Heart',
              'upload_date': '20140219',
-            'duration': 226,
-            'age_limit': 0,
+            'uploader': 'Cassadee Pope',
+            'timestamp': 1392796919,
          },
-        'add_ie': ['Youtube'],
      }, {
          'note': 'Age-limited video',
          'url': 'https://www.vevo.com/watch/justin-timberlake/tunnel-vision-explicit/USRV81300282',
          'info_dict': {
-            'id': '07FYdnEawAQ',
+            'id': 'USRV81300282',
              'ext': 'mp4',
-            'age_limit': 18,
-            'title': 'Justin Timberlake - Tunnel Vision (Explicit)',
-            'description': 'md5:64249768eec3bc4276236606ea996373',
-            'uploader_id': 'justintimberlakeVEVO',
-            'uploader': 'justintimberlakeVEVO',
+            'title': 'Tunnel Vision (Explicit)',
              'upload_date': '20130703',
-            'duration': 419,
-        },
-        'params': {
-            'skip_download': 'true',
+            'age_limit': 18,
+            'uploader': 'Justin Timberlake',
+            'timestamp': 1372888800,
          },
-        'add_ie': ['Youtube'],
      }, {
          'note': 'No video_info',
          'url': 'http://www.vevo.com/watch/k-camp-1/Till-I-Die/USUV71503000',
-        'md5': 'a8b84d1d1957cd01046441b701b270fb',
+        'md5': '8b83cc492d72fc9cf74a02acee7dc1b0',
          'info_dict': {
-            'id': 'Lad2jHtJCqY',
+            'id': 'USUV71503000',
              'ext': 'mp4',
-            'title': 'K Camp - Till I Die ft. T.I.',
-            'description': 'md5:0694920ededdee4a14cfc39695cc8ec3',
-            'uploader_id': 'KCampVEVO',
-            'uploader': 'KCampVEVO',
+            'title': 'Till I Die',
              'upload_date': '20151207',
-            'duration': 193,
+            'age_limit': 18,
+            'uploader': 'K Camp',
+            'timestamp': 1449468000,
          },
-        'add_ie': ['Youtube'],
      }]
      _SMIL_BASE_URL = 'http://smil.lvl3.vevo.com'
      _SOURCE_TYPES = {
@@ -146,7 +134,7 @@ class VevoIE(InfoExtractor):
              })
          return formats
  
-    def _initialize_api(self, video_url, video_id):
+    def _initialize_api(self, video_id):
          req = sanitized_Request(
              'http://www.vevo.com/auth', data=b'')
          webpage = self._download_webpage(
@@ -155,7 +143,8 @@ class VevoIE(InfoExtractor):
              errnote='Unable to retrieve oauth token')
  
          if 'THIS PAGE IS CURRENTLY UNAVAILABLE IN YOUR REGION' in webpage:
-            raise ExtractorError('%s said: This page is currently unavailable in your region.' % self.IE_NAME, expected=True)
+            raise ExtractorError(
+                '%s said: This page is currently unavailable in your region.' % self.IE_NAME, expected=True)
  
          auth_info = self._parse_json(webpage, video_id)
          self._api_url_template = self.http_scheme() + '//apiv2.vevo.com/%s?token=' + auth_info['access_token']
@@ -166,8 +155,9 @@ class VevoIE(InfoExtractor):
      def _real_extract(self, url):
          video_id = self._match_id(url)
  
-        json_url = 'http://videoplayer.vevo.com/VideoService/AuthenticateVideo?isrc=%s' % video_id
-        response = self._download_json(json_url, video_id, 'Downloading video info', 'Unable to download info')
+        json_url = 'http://api.vevo.com/VideoService/AuthenticateVideo?isrc=%s' % video_id
+        response = self._download_json(
+            json_url, video_id, 'Downloading video info', 'Unable to download info')
          video_info = response.get('video') or {}
          video_versions = video_info.get('videoVersions')
          uploader = None
@@ -176,30 +166,23 @@ class VevoIE(InfoExtractor):
          formats = []
  
          if not video_info:
-            ytid = response.get('errorInfo', {}).get('ytid')
-            if ytid:
-                return self.url_result(ytid, 'Youtube', ytid)
-
              if response.get('statusCode') != 909:
+                ytid = response.get('errorInfo', {}).get('ytid')
+                if ytid:
+                    self.report_warning(
+                        'Video is geoblocked, trying with the YouTube video %s' % ytid)
+                    return self.url_result(ytid, 'Youtube', ytid)
+
                  if 'statusMessage' in response:
                      raise ExtractorError('%s said: %s' % (
                          self.IE_NAME, response['statusMessage']), expected=True)
                  raise ExtractorError('Unable to extract videos')
  
-            if url.startswith('vevo:'):
-                raise ExtractorError(
-                    'Please specify full Vevo URL for downloading', expected=True)
-
-            self._initialize_api(url, video_id)
+            self._initialize_api(video_id)
              video_info = self._call_api(
                  'video/%s' % video_id, video_id, 'Downloading api video info',
                  'Failed to download video info')
  
-            ytid = video_info.get('youTubeId')
-            if ytid:
-                return self.url_result(
-                    ytid, 'Youtube', ytid)
-
              video_versions = self._call_api(
                  'video/%s/streams' % video_id, video_id,
                  'Downloading video versions info',
@@ -215,10 +198,16 @@ class VevoIE(InfoExtractor):
                  version = self._VERSIONS.get(video_version['version'])
                  version_url = video_version.get('url')
                  if not version_url:
-                        continue
+                    continue
  
-                if '.mpd' in version_url or '.ism' in version_url:
+                if '.ism' in version_url:
                      continue
+                elif '.mpd' in version_url:
+                    formats.extend(self._extract_mpd_formats(
+                        version_url, video_id, mpd_id='dash-%s' % version,
+                        note='Downloading %s MPD information' % version,
+                        errnote='Failed to download %s MPD information' % version,
+                        fatal=False))
                  elif '.m3u8' in version_url:
                      formats.extend(self._extract_m3u8_formats(
                          version_url, video_id, 'mp4', 'm3u8_native',
@@ -261,8 +250,7 @@ class VevoIE(InfoExtractor):
              for video_version in video_info['videoVersions']:
                  version = self._VERSIONS.get(video_version['version'])
                  if version == 'youtube':
-                    return self.url_result(
-                        video_version['id'], 'Youtube', video_version['id'])
+                    continue
                  else:
                      source_type = self._SOURCE_TYPES.get(video_version['sourceType'])
                      renditions = compat_etree_fromstring(video_version['data'])
@@ -287,7 +275,7 @@ class VevoIE(InfoExtractor):
                              note='Downloading %s m3u8 information' % version,
                              errnote='Failed to download %s m3u8 information' % version,
                              fatal=False))
-                    elif source_type == 'smil' and not smil_parsed:
+                    elif source_type == 'smil' and version == 'level3' and not smil_parsed:
                          formats.extend(self._extract_smil_formats(
                              renditions.find('rendition').attrib['url'], video_id, False))
                          smil_parsed = True
@@ -316,3 +304,70 @@ class VevoIE(InfoExtractor):
              'view_count': view_count,
              'age_limit': age_limit,
          }
+
+
+class VevoPlaylistIE(InfoExtractor):
+    _VALID_URL = r'https?://www\.vevo\.com/watch/(?:playlist|genre)/(?P<id>[^/?#&]+)'
+
+    _TESTS = [{
+        'url': 'http://www.vevo.com/watch/playlist/dadbf4e7-b99f-4184-9670-6f0e547b6a29',
+        'info_dict': {
+            'id': 'dadbf4e7-b99f-4184-9670-6f0e547b6a29',
+            'title': 'Best-Of: Birdman',
+        },
+        'playlist_count': 10,
+        'params': {
+            'proxy': '52.53.186.253:8083',
+            'no_check_certificate': True,
+        },
+    }, {
+        'url': 'http://www.vevo.com/watch/playlist/dadbf4e7-b99f-4184-9670-6f0e547b6a29?index=0',
+        'md5': '32dcdfddddf9ec6917fc88ca26d36282',
+        'info_dict': {
+            'id': 'USCMV1100073',
+            'ext': 'mp4',
+            'title': 'Y.U. MAD',
+            'timestamp': 1323417600,
+            'upload_date': '20111209',
+            'uploader': 'Birdman',
+        },
+        'expected_warnings': ['Unable to download SMIL file'],
+        'params': {
+            'proxy': '52.53.186.253:8083',
+            'no_check_certificate': True,
+        },
+    }, {
+        'url': 'http://www.vevo.com/watch/genre/rock?index=0',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        playlist_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, playlist_id)
+
+        qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
+        index = qs.get('index', [None])[0]
+
+        if index:
+            video_id = self._search_regex(
+                r'<meta[^>]+content=(["\'])vevo://video/(?P<id>.+?)\1[^>]*>',
+                webpage, 'video id', default=None, group='id')
+            if video_id:
+                return self.url_result('vevo:%s' % video_id, VevoIE.ie_key())
+
+        playlists = self._parse_json(
+            self._search_regex(
+                r'window\.__INITIAL_STORE__\s*=\s*({.+?});\s*</script>',
+                webpage, 'initial store'),
+            playlist_id)['default']['playlists']
+
+        playlist = list(playlists.values())[0]
+
+        entries = [
+            self.url_result('vevo:%s' % src, VevoIE.ie_key())
+            for src in playlist['isrcs']]
+
+        return self.playlist_result(
+            entries, playlist.get('playlistId'),
+            playlist.get('name'), playlist.get('description'))