Add the 'webpage_url' field to info_dict

author Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>

Sun, 3 Nov 2013 11:11:13 +0000 (12:11 +0100)

committer Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>

Sun, 3 Nov 2013 11:11:13 +0000 (12:11 +0100)
author Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>
Sun, 3 Nov 2013 11:11:13 +0000 (12:11 +0100)
committer Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>
Sun, 3 Nov 2013 11:11:13 +0000 (12:11 +0100)
diff --git a/test/test_download.py b/test/test_download.py

index dfb04d010a0814037a1da4aac52ff1f65c2b1ab6..d6cc9ec3306c66bfb5e7ba76bb2d373138ab8632 100644 (file)
--- a/test/test_download.py
+++ b/test/test_download.py
@@ -148,6 +148,9 @@ def generator(test_case):
                  # Check for the presence of mandatory fields
                  for key in ('id', 'url', 'title', 'ext'):
                      self.assertTrue(key in info_dict.keys() and info_dict[key])
+                # Check for mandatory fields that are automatically set by YoutubeDL
+                for key in ['webpage_url', 'extractor']:
+                    self.assertTrue(info_dict.get(key), u'Missing field: %s' % key)
          finally:
              try_rm_tcs_files()
  
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py

index a3e0a700f54ac03c7cf32654f9265f1ebd74f695..8938a2cd30f566c2c9d86e93da5ce6483ef44f0e 100644 (file)
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -354,8 +354,11 @@ class YoutubeDL(object):
                          '_type': 'compat_list',
                          'entries': ie_result,
                      }
-                if 'extractor' not in ie_result:
-                    ie_result['extractor'] = ie.IE_NAME
+                self.add_extra_info(ie_result,
+                    {
+                        'extractor': ie.IE_NAME,
+                        'webpage_url': url
+                    })
                  return self.process_ie_result(ie_result, download, extra_info)
              except ExtractorError as de: # An error we somewhat expected
                  self.report_error(compat_str(de), de.format_traceback())
@@ -417,6 +420,7 @@ class YoutubeDL(object):
                      'playlist': playlist,
                      'playlist_index': i + playliststart,
                      'extractor': ie_result['extractor'],
+                    'webpage_url': ie_result['webpage_url'],
                  }
                  entry_result = self.process_ie_result(entry,
                                                        download=download,
@@ -427,7 +431,10 @@ class YoutubeDL(object):
          elif result_type == 'compat_list':
              def _fixup(r):
                  self.add_extra_info(r,
-                    {'extractor': ie_result['extractor']})
+                    {
+                        'extractor': ie_result['extractor'],
+                        'webpage_url': ie_result['webpage_url'],
+                    })
                  return r
              ie_result['entries'] = [
                  self.process_ie_result(_fixup(r), download, extra_info)
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py

index cef4dce856fe9cc7339d28c18a9b23cc87dbfc8d..e0ccba533534709b6ce65af518ca16fadee0c4ea 100644 (file)
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -71,6 +71,9 @@ class InfoExtractor(object):
                                  ("3D" or "DASH video")
                      * width     Width of the video, if known
                      * height    Height of the video, if known
+    webpage_url:    The url to the video webpage, if given to youtube-dl it
+                    should allow to get the same result again. (It will be set
+                    by YoutubeDL if it's missing)
  
      Unless mentioned otherwise, the fields should be Unicode strings.
  
diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py

index c7d864a2b6de2e121393bccb0fa50ffca7c2fde2..62273fd33814901ca3b3c799e0589e6c14985fc3 100644 (file)
--- a/youtube_dl/extractor/vimeo.py
+++ b/youtube_dl/extractor/vimeo.py
@@ -20,7 +20,7 @@ class VimeoIE(InfoExtractor):
      """Information extractor for vimeo.com."""
  
      # _VALID_URL matches Vimeo URLs
-    _VALID_URL = r'(?P<proto>https?://)?(?:(?:www|player)\.)?vimeo(?P<pro>pro)?\.com/(?:(?:(?:groups|album)/[^/]+)|(?:.*?)/)?(?P<direct_link>play_redirect_hls\?clip_id=)?(?:videos?/)?(?P<id>[0-9]+)/?(?:[?].*)?(?:#.*)?$'
+    _VALID_URL = r'(?P<proto>https?://)?(?:(?:www|(?P<player>player))\.)?vimeo(?P<pro>pro)?\.com/(?:(?:(?:groups|album)/[^/]+)|(?:.*?)/)?(?P<direct_link>play_redirect_hls\?clip_id=)?(?:videos?/)?(?P<id>[0-9]+)/?(?:[?].*)?(?:#.*)?$'
      _NETRC_MACHINE = 'vimeo'
      IE_NAME = u'vimeo'
      _TESTS = [
@@ -128,11 +128,9 @@ class VimeoIE(InfoExtractor):
              raise ExtractorError(u'Invalid URL: %s' % url)
  
          video_id = mobj.group('id')
-        if not mobj.group('proto'):
-            url = 'https://' + url
-        elif mobj.group('pro'):
+        if mobj.group('pro') or mobj.group('player'):
              url = 'http://player.vimeo.com/video/' + video_id
-        elif mobj.group('direct_link'):
+        else:
              url = 'https://vimeo.com/' + video_id
  
          # Retrieve video webpage to extract further information
@@ -234,7 +232,7 @@ class VimeoIE(InfoExtractor):
          if len(formats) == 0:
              raise ExtractorError(u'No known codec found')
  
-        return [{
+        return {
              'id':       video_id,
              'uploader': video_uploader,
              'uploader_id': video_uploader_id,
@@ -243,7 +241,8 @@ class VimeoIE(InfoExtractor):
              'thumbnail':    video_thumbnail,
              'description':  video_description,
              'formats': formats,
-        }]
+            'webpage_url': url,
+        }
  
  
  class VimeoChannelIE(InfoExtractor):
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py

index a19abe1f0a2ad72f7d6f606278a4ae4577902f39..6ddd6ef06791d52de1245dc1d1744f32be4d96f5 100644 (file)
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -1485,7 +1485,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                  'subtitles':    video_subtitles,
                  'duration':     video_duration,
                  'age_limit':    18 if age_gate else 0,
-                'annotations':  video_annotations
+                'annotations':  video_annotations,
+                'webpage_url': 'https://www.youtube.com/watch?v=%s' % video_id,
              })
          return results
author	Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>
	Sun, 3 Nov 2013 11:11:13 +0000 (12:11 +0100)
committer	Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>
	Sun, 3 Nov 2013 11:11:13 +0000 (12:11 +0100)
test/test_download.py		patch \| blob \| history
youtube_dl/YoutubeDL.py		patch \| blob \| history
youtube_dl/extractor/common.py		patch \| blob \| history
youtube_dl/extractor/vimeo.py		patch \| blob \| history
youtube_dl/extractor/youtube.py		patch \| blob \| history