[youtube] Use 'node is None' when checking if the video has automatic captions

[youtube-dl] / youtube_dl / extractor / vimeo.py
diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py

index dee4175ef382c7e6caa0a7b0162fabb760e4135e..2de56ac814462e3c3536ccac34b980b3e9a8bfb5 100644 (file)
--- a/youtube_dl/extractor/vimeo.py
+++ b/youtube_dl/extractor/vimeo.py
@@ -11,13 +11,14 @@ from ..utils import (
      get_element_by_attribute,
      ExtractorError,
      std_headers,
+    unsmuggle_url,
  )
  
  class VimeoIE(InfoExtractor):
      """Information extractor for vimeo.com."""
  
      # _VALID_URL matches Vimeo URLs
-    _VALID_URL = r'(?P<proto>https?://)?(?:(?:www|player)\.)?vimeo(?P<pro>pro)?\.com/(?:(?:(?:groups|album)/[^/]+)|(?:.*?)/)?(?P<direct_link>play_redirect_hls\?clip_id=)?(?:videos?/)?(?P<id>[0-9]+)(?:[?].*)?$'
+    _VALID_URL = r'(?P<proto>https?://)?(?:(?:www|player)\.)?vimeo(?P<pro>pro)?\.com/(?:(?:(?:groups|album)/[^/]+)|(?:.*?)/)?(?P<direct_link>play_redirect_hls\?clip_id=)?(?:videos?/)?(?P<id>[0-9]+)/?(?:[?].*)?$'
      _NETRC_MACHINE = 'vimeo'
      IE_NAME = u'vimeo'
      _TESTS = [
@@ -53,7 +54,7 @@ class VimeoIE(InfoExtractor):
                  u'title': u'Kathy Sierra: Building the minimum Badass User, Business of Software',
                  u'uploader': u'The BLN & Business of Software',
              },
-        },
+        }
      ]
  
      def _login(self):
@@ -98,6 +99,12 @@ class VimeoIE(InfoExtractor):
          self._login()
  
      def _real_extract(self, url, new_video=True):
+        url, data = unsmuggle_url(url)
+        headers = std_headers
+        if data is not None:
+            headers = headers.copy()
+            headers.update(data)
+
          # Extract ID from URL
          mobj = re.match(self._VALID_URL, url)
          if mobj is None:
@@ -112,7 +119,7 @@ class VimeoIE(InfoExtractor):
              url = 'https://vimeo.com/' + video_id
  
          # Retrieve video webpage to extract further information
-        request = compat_urllib_request.Request(url, None, std_headers)
+        request = compat_urllib_request.Request(url, None, headers)
          webpage = self._download_webpage(request, video_id)
  
          # Now we begin extracting as much information as we can from what we
@@ -148,9 +155,17 @@ class VimeoIE(InfoExtractor):
              _, video_thumbnail = sorted((int(width), t_url) for (width, t_url) in config["video"]["thumbs"].items())[-1]
  
          # Extract video description
-        video_description = get_element_by_attribute("itemprop", "description", webpage)
-        if video_description: video_description = clean_html(video_description)
-        else: video_description = u''
+        video_description = None
+        try:
+            video_description = get_element_by_attribute("itemprop", "description", webpage)
+            if video_description: video_description = clean_html(video_description)
+        except AssertionError as err:
+            # On some pages like (http://player.vimeo.com/video/54469442) the
+            # html tags are not closed, python 2.6 cannot handle it
+            if err.args[0] == 'we should not get here!':
+                pass
+            else:
+                raise
  
          # Extract upload date
          video_upload_date = None