Merge branch 'pr-democracynow' of https://github.com/atomicdryad/youtube-dl into...

[youtube-dl] / youtube_dl / extractor / vimeo.py
diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py

index fa1b2204946a25f9cd2fb80ee4c7e2cf90779048..2437ae1eb2cfafc34441d00a6a5b725e78f7cbb3 100644 (file)
--- a/youtube_dl/extractor/vimeo.py
+++ b/youtube_dl/extractor/vimeo.py
@@ -133,7 +133,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
                  'uploader_id': 'user18948128',
                  'uploader': 'Jaime Marquínez Ferrándiz',
                  'duration': 10,
-                'description': 'This is "youtube-dl password protected test video" by Jaime Marquínez Ferrándiz on Vimeo, the home for high quality videos and the people who love them.',
+                'description': 'This is "youtube-dl password protected test video" by Jaime Marquínez Ferrándiz on Vimeo, the home for high quality videos and the people\u2026',
              },
              'params': {
                  'videopassword': 'youtube-dl',
@@ -181,6 +181,11 @@ class VimeoIE(VimeoBaseInfoExtractor):
                  'uploader_id': 'user28849593',
              },
          },
+        {
+            'url': 'https://vimeo.com/109815029',
+            'note': 'Video not completely processed, "failed" seed status',
+            'only_matching': True,
+        },
      ]
  
      @staticmethod
@@ -273,20 +278,30 @@ class VimeoIE(VimeoBaseInfoExtractor):
          self.report_extraction(video_id)
  
          vimeo_config = self._search_regex(
-            r'vimeo\.config\s*=\s*({.+?});', webpage,
+            r'vimeo\.config\s*=\s*(?:({.+?})|_extend\([^,]+,\s+({.+?})\));', webpage,
              'vimeo config', default=None)
          if vimeo_config:
              seed_status = self._parse_json(vimeo_config, video_id).get('seed_status', {})
              if seed_status.get('state') == 'failed':
                  raise ExtractorError(
-                    '%s returned error: %s' % (self.IE_NAME, seed_status['title']),
+                    '%s said: %s' % (self.IE_NAME, seed_status['title']),
                      expected=True)
  
          # Extract the config JSON
          try:
              try:
                  config_url = self._html_search_regex(
-                    r' data-config-url="(.+?)"', webpage, 'config URL')
+                    r' data-config-url="(.+?)"', webpage,
+                    'config URL', default=None)
+                if not config_url:
+                    # Sometimes new react-based page is served instead of old one that require
+                    # different config URL extraction approach (see
+                    # https://github.com/rg3/youtube-dl/pull/7209)
+                    vimeo_clip_page_config = self._search_regex(
+                        r'vimeo\.clip_page_config\s*=\s*({.+?});', webpage,
+                        'vimeo clip page config')
+                    config_url = self._parse_json(
+                        vimeo_clip_page_config, video_id)['player']['config_url']
                  config_json = self._download_webpage(config_url, video_id)
                  config = json.loads(config_json)
              except RegexNotFoundError: