Merge pull request #801 from expleo/add_referer_support

[youtube-dl] / youtube_dl / InfoExtractors.py
diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py

index d5876ad3463b71a841b00552f0b97737b4264996..3450f0d17e19d95d67645a50c79f0680b05379cc 100755 (executable)
--- a/youtube_dl/InfoExtractors.py
+++ b/youtube_dl/InfoExtractors.py
@@ -114,8 +114,8 @@ class InfoExtractor(object):
      def _request_webpage(self, url_or_request, video_id, note=None, errnote=None):
          """ Returns the response handle """
          if note is None:
-            note = u'Downloading video webpage'
-        if note is not False:
+            self.report_download_webpage(video_id)
+        elif note is not False:
              self.to_screen(u'%s: %s' % (video_id, note))
          try:
              return compat_urllib_request.urlopen(url_or_request)
@@ -152,6 +152,10 @@ class InfoExtractor(object):
          """Report information extraction."""
          self.to_screen(u'%s: Extracting information' % id_or_name)
  
+    def report_download_webpage(self, video_id):
+        """Report webpage download."""
+        self.to_screen(u'%s: Downloading webpage' % video_id)
+
      def report_age_confirmation(self):
          """Report attempt to confirm age."""
          self.to_screen(u'Confirming age')
@@ -684,17 +688,10 @@ class MetacafeIE(InfoExtractor):
      _FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user'
      IE_NAME = u'metacafe'
  
-    def __init__(self, downloader=None):
-        InfoExtractor.__init__(self, downloader)
-
      def report_disclaimer(self):
          """Report disclaimer retrieval."""
          self.to_screen(u'Retrieving disclaimer')
  
-    def report_download_webpage(self, video_id):
-        """Report webpage download."""
-        self.to_screen(u'%s: Downloading webpage' % video_id)
-
      def _real_initialize(self):
          # Retrieve disclaimer
          request = compat_urllib_request.Request(self._DISCLAIMER)
@@ -795,9 +792,6 @@ class DailymotionIE(InfoExtractor):
      IE_NAME = u'dailymotion'
      _WORKING = False
  
-    def __init__(self, downloader=None):
-        InfoExtractor.__init__(self, downloader)
-
      def _real_extract(self, url):
          # Extract id and simplified title from URL
          mobj = re.match(self._VALID_URL, url)
@@ -879,13 +873,6 @@ class PhotobucketIE(InfoExtractor):
      _VALID_URL = r'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.*[\?\&]current=(.*\.flv)'
      IE_NAME = u'photobucket'
  
-    def __init__(self, downloader=None):
-        InfoExtractor.__init__(self, downloader)
-
-    def report_download_webpage(self, video_id):
-        """Report webpage download."""
-        self.to_screen(u'%s: Downloading webpage' % video_id)
-
      def _real_extract(self, url):
          # Extract id from URL
          mobj = re.match(self._VALID_URL, url)
@@ -944,13 +931,6 @@ class YahooIE(InfoExtractor):
      _VPAGE_URL = r'(?:http://)?video\.yahoo\.com/watch/([0-9]+)/([0-9]+)(?:[#\?].*)?'
      IE_NAME = u'video.yahoo'
  
-    def __init__(self, downloader=None):
-        InfoExtractor.__init__(self, downloader)
-
-    def report_download_webpage(self, video_id):
-        """Report webpage download."""
-        self.to_screen(u'%s: Downloading webpage' % video_id)
-
      def _real_extract(self, url, new_video=True):
          # Extract ID from URL
          mobj = re.match(self._VALID_URL, url)
@@ -1080,13 +1060,6 @@ class VimeoIE(InfoExtractor):
      _VALID_URL = r'(?P<proto>https?://)?(?:(?:www|player)\.)?vimeo\.com/(?:(?:groups|album)/[^/]+/)?(?P<direct_link>play_redirect_hls\?clip_id=)?(?:videos?/)?(?P<id>[0-9]+)'
      IE_NAME = u'vimeo'
  
-    def __init__(self, downloader=None):
-        InfoExtractor.__init__(self, downloader)
-
-    def report_download_webpage(self, video_id):
-        """Report webpage download."""
-        self.to_screen(u'%s: Downloading webpage' % video_id)
-
      def _real_extract(self, url, new_video=True):
          # Extract ID from URL
          mobj = re.match(self._VALID_URL, url)
@@ -1120,7 +1093,10 @@ class VimeoIE(InfoExtractor):
              config = webpage.split(' = {config:')[1].split(',assets:')[0]
              config = json.loads(config)
          except:
-            self._downloader.report_error(u'unable to extract info section')
+            if re.search('The creator of this video has not given you permission to embed it on this domain.', webpage):
+                self._downloader.report_error(u'The author has restricted the access to this video, try with the "--referer" option')
+            else:
+                self._downloader.report_error(u'unable to extract info section')
              return
  
          # Extract title
@@ -1197,13 +1173,6 @@ class ArteTvIE(InfoExtractor):
  
      IE_NAME = u'arte.tv'
  
-    def __init__(self, downloader=None):
-        InfoExtractor.__init__(self, downloader)
-
-    def report_download_webpage(self, video_id):
-        """Report webpage download."""
-        self.to_screen(u'%s: Downloading webpage' % video_id)
-
      def fetch_webpage(self, url):
          request = compat_urllib_request.Request(url)
          try:
@@ -1327,14 +1296,11 @@ class GenericIE(InfoExtractor):
      _VALID_URL = r'.*'
      IE_NAME = u'generic'
  
-    def __init__(self, downloader=None):
-        InfoExtractor.__init__(self, downloader)
-
      def report_download_webpage(self, video_id):
          """Report webpage download."""
          if not self._downloader.params.get('test', False):
              self._downloader.report_warning(u'Falling back on generic information extractor.')
-        self.to_screen(u'%s: Downloading webpage' % video_id)
+        super(GenericIE, self).report_download_webpage(video_id)
  
      def report_following_redirect(self, new_url):
          """Report information extraction."""
@@ -1469,9 +1435,6 @@ class YoutubeSearchIE(InfoExtractor):
      _max_youtube_results = 1000
      IE_NAME = u'youtube:search'
  
-    def __init__(self, downloader=None):
-        InfoExtractor.__init__(self, downloader)
-
      def report_download_page(self, query, pagenum):
          """Report attempt to download search page with given number."""
          query = query.decode(preferredencoding())
@@ -1546,9 +1509,6 @@ class GoogleSearchIE(InfoExtractor):
      _max_google_results = 1000
      IE_NAME = u'video.google:search'
  
-    def __init__(self, downloader=None):
-        InfoExtractor.__init__(self, downloader)
-
      def report_download_page(self, query, pagenum):
          """Report attempt to download playlist page with given number."""
          query = query.decode(preferredencoding())
@@ -1630,9 +1590,6 @@ class YahooSearchIE(InfoExtractor):
      _max_yahoo_results = 1000
      IE_NAME = u'video.yahoo:search'
  
-    def __init__(self, downloader=None):
-        InfoExtractor.__init__(self, downloader)
-
      def report_download_page(self, query, pagenum):
          """Report attempt to download playlist page with given number."""
          query = query.decode(preferredencoding())
@@ -1726,9 +1683,6 @@ class YoutubePlaylistIE(InfoExtractor):
      _MAX_RESULTS = 50
      IE_NAME = u'youtube:playlist'
  
-    def __init__(self, downloader=None):
-        InfoExtractor.__init__(self, downloader)
-
      @classmethod
      def suitable(cls, url):
          """Receives a URL and returns True if suitable for this IE."""
@@ -1873,9 +1827,6 @@ class YoutubeUserIE(InfoExtractor):
      _VIDEO_INDICATOR = r'/watch\?v=(.+?)[\<&]'
      IE_NAME = u'youtube:user'
  
-    def __init__(self, downloader=None):
-        InfoExtractor.__init__(self, downloader)
-
      def report_download_page(self, username, start_index):
          """Report attempt to download user page."""
          self._downloader.to_screen(u'[youtube] user %s: Downloading video ids from %d to %d' %
@@ -1942,9 +1893,6 @@ class BlipTVUserIE(InfoExtractor):
      _PAGE_SIZE = 12
      IE_NAME = u'blip.tv:user'
  
-    def __init__(self, downloader=None):
-        InfoExtractor.__init__(self, downloader)
-
      def report_download_page(self, username, pagenum):
          """Report attempt to download user page."""
          self.to_screen(u'user %s: Downloading video ids from page %d' %
@@ -2020,10 +1968,6 @@ class DepositFilesIE(InfoExtractor):
  
      _VALID_URL = r'(?:http://)?(?:\w+\.)?depositfiles\.com/(?:../(?#locale))?files/(.+)'
  
-    def report_download_webpage(self, file_id):
-        """Report webpage download."""
-        self.to_screen(u'%s: Downloading webpage' % file_id)
-
      def _real_extract(self, url):
          file_id = url.split('/')[-1]
          # Rebuild url in english locale
@@ -2274,9 +2218,6 @@ class MyVideoIE(InfoExtractor):
      _VALID_URL = r'(?:http://)?(?:www\.)?myvideo\.de/watch/([0-9]+)/([^?/]+).*'
      IE_NAME = u'myvideo'
  
-    def __init__(self, downloader=None):
-        InfoExtractor.__init__(self, downloader)
-
      def _real_extract(self,url):
          mobj = re.match(self._VALID_URL, url)
          if mobj is None:
@@ -2712,9 +2653,6 @@ class SoundcloudIE(InfoExtractor):
      _VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/([\w\d-]+)'
      IE_NAME = u'soundcloud'
  
-    def __init__(self, downloader=None):
-        InfoExtractor.__init__(self, downloader)
-
      def report_resolve(self, video_id):
          """Report information extraction."""
          self.to_screen(u'%s: Resolving id' % video_id)
@@ -2781,9 +2719,6 @@ class SoundcloudSetIE(InfoExtractor):
      _VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/sets/([\w\d-]+)'
      IE_NAME = u'soundcloud'
  
-    def __init__(self, downloader=None):
-        InfoExtractor.__init__(self, downloader)
-
      def report_resolve(self, video_id):
          """Report information extraction."""
          self.to_screen(u'%s: Resolving id' % video_id)
@@ -2861,7 +2796,7 @@ class InfoQIE(InfoExtractor):
          self.report_extraction(url)
  
          # Extract video URL
-        mobj = re.search(r"jsclassref='([^']*)'", webpage)
+        mobj = re.search(r"jsclassref ?= ?'([^']*)'", webpage)
          if mobj is None:
              self._downloader.report_error(u'unable to extract video url')
              return
@@ -2904,9 +2839,6 @@ class MixcloudIE(InfoExtractor):
      _VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/([\w\d-]+)/([\w\d-]+)'
      IE_NAME = u'mixcloud'
  
-    def __init__(self, downloader=None):
-        InfoExtractor.__init__(self, downloader)
-
      def report_download_json(self, file_id):
          """Report JSON download."""
          self.to_screen(u'Downloading json')
@@ -3014,10 +2946,6 @@ class StanfordOpenClassroomIE(InfoExtractor):
      _VALID_URL = r'^(?:https?://)?openclassroom.stanford.edu(?P<path>/?|(/MainFolder/(?:HomePage|CoursePage|VideoPage)\.php([?]course=(?P<course>[^&]+)(&video=(?P<video>[^&]+))?(&.*)?)?))$'
      IE_NAME = u'stanfordoc'
  
-    def report_download_webpage(self, objid):
-        """Report information extraction."""
-        self.to_screen(u'%s: Downloading webpage' % objid)
-
      def _real_extract(self, url):
          mobj = re.match(self._VALID_URL, url)
          if mobj is None:
@@ -3196,10 +3124,6 @@ class MTVIE(InfoExtractor):
  class YoukuIE(InfoExtractor):
      _VALID_URL =  r'(?:http://)?v\.youku\.com/v_show/id_(?P<ID>[A-Za-z0-9]+)\.html'
  
-    def report_download_webpage(self, file_id):
-        """Report webpage download."""
-        self.to_screen(u'%s: Downloading webpage' % file_id)
-
      def _gen_sid(self):
          nowTime = int(time.time() * 1000)
          random1 = random.randint(1000,1998)
@@ -3309,10 +3233,6 @@ class XNXXIE(InfoExtractor):
      VIDEO_TITLE_RE = r'<title>(.*?)\s+-\s+XNXX.COM'
      VIDEO_THUMB_RE = r'url_bigthumb=(.*?)&amp;'
  
-    def report_webpage(self, video_id):
-        """Report information extraction"""
-        self.to_screen(u'%s: Downloading webpage' % video_id)
-
      def _real_extract(self, url):
          mobj = re.match(self._VALID_URL, url)
          if mobj is None:
@@ -3320,7 +3240,7 @@ class XNXXIE(InfoExtractor):
              return
          video_id = mobj.group(1)
  
-        self.report_webpage(video_id)
+        self.report_download_webpage(video_id)
  
          # Get webpage content
          try:
@@ -3366,9 +3286,6 @@ class GooglePlusIE(InfoExtractor):
      _VALID_URL = r'(?:https://)?plus\.google\.com/(?:[^/]+/)*?posts/(\w+)'
      IE_NAME = u'plus.google'
  
-    def __init__(self, downloader=None):
-        InfoExtractor.__init__(self, downloader)
-
      def report_extract_entry(self, url):
          """Report downloading extry"""
          self.to_screen(u'Downloading entry: %s' % url)