[extractor/common] Improve mpd base URL extraction (closes #10909, closes #11079)
[youtube-dl] / youtube_dl / extractor / vimeo.py
index ea8fc59082ec8e463cd1d5095fff581af9b0d8de..51c69a80c216889315a4c5fe070572100c13dd36 100644 (file)
@@ -322,6 +322,22 @@ class VimeoIE(VimeoBaseInfoExtractor):
             },
             'expected_warnings': ['Unable to download JSON metadata'],
         },
+        {
+            # redirects to ondemand extractor and should be passed throught it
+            # for successful extraction
+            'url': 'https://vimeo.com/73445910',
+            'info_dict': {
+                'id': '73445910',
+                'ext': 'mp4',
+                'title': 'The Reluctant Revolutionary',
+                'uploader': '10Ft Films',
+                'uploader_url': 're:https?://(?:www\.)?vimeo\.com/tenfootfilms',
+                'uploader_id': 'tenfootfilms',
+            },
+            'params': {
+                'skip_download': True,
+            },
+        },
         {
             'url': 'http://vimeo.com/moogaloop.swf?clip_id=2539741',
             'only_matching': True,
@@ -359,7 +375,8 @@ class VimeoIE(VimeoBaseInfoExtractor):
         urls = []
         # Look for embedded (iframe) Vimeo player
         for mobj in re.finditer(
-            r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.vimeo\.com/video/.+?)\1', webpage):
+                r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.vimeo\.com/video/.+?)\1',
+                webpage):
             urls.append(VimeoIE._smuggle_referrer(unescapeHTML(mobj.group('url')), url))
         PLAIN_EMBED_RE = (
             # Look for embedded (swf embed) Vimeo player
@@ -413,7 +430,12 @@ class VimeoIE(VimeoBaseInfoExtractor):
         # Retrieve video webpage to extract further information
         request = sanitized_Request(url, headers=headers)
         try:
-            webpage = self._download_webpage(request, video_id)
+            webpage, urlh = self._download_webpage_handle(request, video_id)
+            # Some URLs redirect to ondemand can't be extracted with
+            # this extractor right away thus should be passed through
+            # ondemand extractor (e.g. https://vimeo.com/73445910)
+            if VimeoOndemandIE.suitable(urlh.geturl()):
+                return self.url_result(urlh.geturl(), VimeoOndemandIE.ie_key())
         except ExtractorError as ee:
             if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 403:
                 errmsg = ee.cause.read()
@@ -836,6 +858,7 @@ class VimeoReviewIE(VimeoBaseInfoExtractor):
         'params': {
             'videopassword': 'holygrail',
         },
+        'skip': 'video gone',
     }]
 
     def _real_initialize(self):
@@ -843,9 +866,10 @@ class VimeoReviewIE(VimeoBaseInfoExtractor):
 
     def _get_config_url(self, webpage_url, video_id, video_password_verified=False):
         webpage = self._download_webpage(webpage_url, video_id)
-        config_url = self._html_search_regex(
-            r'data-config-url="([^"]+)"', webpage, 'config URL',
-            default=NO_DEFAULT if video_password_verified else None)
+        data = self._parse_json(self._search_regex(
+            r'window\s*=\s*_extend\(window,\s*({.+?})\);', webpage, 'data',
+            default=NO_DEFAULT if video_password_verified else '{}'), video_id)
+        config_url = data.get('vimeo_esi', {}).get('config', {}).get('configUrl')
         if config_url is None:
             self._verify_video_password(webpage_url, video_id, webpage)
             config_url = self._get_config_url(