Merge pull request #801 from expleo/add_referer_support
authorJaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>
Fri, 26 Apr 2013 17:25:17 +0000 (19:25 +0200)
committerJaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>
Fri, 26 Apr 2013 17:34:32 +0000 (19:34 +0200)
1  2 
README.md
youtube_dl/InfoExtractors.py
youtube_dl/__init__.py

diff --combined README.md
index 0ab4b660cee8eec837cff27038d58fd09bdd95fd,ffb321ff92d2793c26fbec510589767a07502d64..d3073e641c3260900c313f59b92995d4d38647a2
+++ b/README.md
@@@ -27,6 -27,7 +27,8 @@@ which means you can modify it, redistri
                                 from an initial value of SIZE.
      --dump-user-agent          display the current browser identification
      --user-agent UA            specify a custom user agent
 -    --referer REF              specify a custom referer
++    --referer REF              specify a custom referer, use if the video access
++                               is restricted to one domain
      --list-extractors          List all supported extractors and the URLs they
                                 would handle
  
index 4d145dfa101b0118ccc15a6d81d4098568ab8276,d5876ad3463b71a841b00552f0b97737b4264996..3450f0d17e19d95d67645a50c79f0680b05379cc
@@@ -114,8 -114,8 +114,8 @@@ class InfoExtractor(object)
      def _request_webpage(self, url_or_request, video_id, note=None, errnote=None):
          """ Returns the response handle """
          if note is None:
 -            note = u'Downloading video webpage'
 -        if note is not False:
 +            self.report_download_webpage(video_id)
 +        elif note is not False:
              self.to_screen(u'%s: %s' % (video_id, note))
          try:
              return compat_urllib_request.urlopen(url_or_request)
          """Report information extraction."""
          self.to_screen(u'%s: Extracting information' % id_or_name)
  
 +    def report_download_webpage(self, video_id):
 +        """Report webpage download."""
 +        self.to_screen(u'%s: Downloading webpage' % video_id)
 +
      def report_age_confirmation(self):
          """Report attempt to confirm age."""
          self.to_screen(u'Confirming age')
@@@ -688,10 -684,17 +688,10 @@@ class MetacafeIE(InfoExtractor)
      _FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user'
      IE_NAME = u'metacafe'
  
 -    def __init__(self, downloader=None):
 -        InfoExtractor.__init__(self, downloader)
 -
      def report_disclaimer(self):
          """Report disclaimer retrieval."""
          self.to_screen(u'Retrieving disclaimer')
  
 -    def report_download_webpage(self, video_id):
 -        """Report webpage download."""
 -        self.to_screen(u'%s: Downloading webpage' % video_id)
 -
      def _real_initialize(self):
          # Retrieve disclaimer
          request = compat_urllib_request.Request(self._DISCLAIMER)
@@@ -792,6 -795,9 +792,6 @@@ class DailymotionIE(InfoExtractor)
      IE_NAME = u'dailymotion'
      _WORKING = False
  
 -    def __init__(self, downloader=None):
 -        InfoExtractor.__init__(self, downloader)
 -
      def _real_extract(self, url):
          # Extract id and simplified title from URL
          mobj = re.match(self._VALID_URL, url)
@@@ -873,6 -879,13 +873,6 @@@ class PhotobucketIE(InfoExtractor)
      _VALID_URL = r'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.*[\?\&]current=(.*\.flv)'
      IE_NAME = u'photobucket'
  
 -    def __init__(self, downloader=None):
 -        InfoExtractor.__init__(self, downloader)
 -
 -    def report_download_webpage(self, video_id):
 -        """Report webpage download."""
 -        self.to_screen(u'%s: Downloading webpage' % video_id)
 -
      def _real_extract(self, url):
          # Extract id from URL
          mobj = re.match(self._VALID_URL, url)
@@@ -931,6 -944,13 +931,6 @@@ class YahooIE(InfoExtractor)
      _VPAGE_URL = r'(?:http://)?video\.yahoo\.com/watch/([0-9]+)/([0-9]+)(?:[#\?].*)?'
      IE_NAME = u'video.yahoo'
  
 -    def __init__(self, downloader=None):
 -        InfoExtractor.__init__(self, downloader)
 -
 -    def report_download_webpage(self, video_id):
 -        """Report webpage download."""
 -        self.to_screen(u'%s: Downloading webpage' % video_id)
 -
      def _real_extract(self, url, new_video=True):
          # Extract ID from URL
          mobj = re.match(self._VALID_URL, url)
@@@ -1060,6 -1080,13 +1060,6 @@@ class VimeoIE(InfoExtractor)
      _VALID_URL = r'(?P<proto>https?://)?(?:(?:www|player)\.)?vimeo\.com/(?:(?:groups|album)/[^/]+/)?(?P<direct_link>play_redirect_hls\?clip_id=)?(?:videos?/)?(?P<id>[0-9]+)'
      IE_NAME = u'vimeo'
  
 -    def __init__(self, downloader=None):
 -        InfoExtractor.__init__(self, downloader)
 -
 -    def report_download_webpage(self, video_id):
 -        """Report webpage download."""
 -        self.to_screen(u'%s: Downloading webpage' % video_id)
 -
      def _real_extract(self, url, new_video=True):
          # Extract ID from URL
          mobj = re.match(self._VALID_URL, url)
              config = webpage.split(' = {config:')[1].split(',assets:')[0]
              config = json.loads(config)
          except:
--            self._downloader.report_error(u'unable to extract info section')
++            if re.search('The creator of this video has not given you permission to embed it on this domain.', webpage):
++                self._downloader.report_error(u'The author has restricted the access to this video, try with the "--referer" option')
++            else:
++                self._downloader.report_error(u'unable to extract info section')
              return
  
          # Extract title
@@@ -1170,6 -1197,13 +1173,6 @@@ class ArteTvIE(InfoExtractor)
  
      IE_NAME = u'arte.tv'
  
 -    def __init__(self, downloader=None):
 -        InfoExtractor.__init__(self, downloader)
 -
 -    def report_download_webpage(self, video_id):
 -        """Report webpage download."""
 -        self.to_screen(u'%s: Downloading webpage' % video_id)
 -
      def fetch_webpage(self, url):
          request = compat_urllib_request.Request(url)
          try:
@@@ -1293,11 -1327,14 +1296,11 @@@ class GenericIE(InfoExtractor)
      _VALID_URL = r'.*'
      IE_NAME = u'generic'
  
 -    def __init__(self, downloader=None):
 -        InfoExtractor.__init__(self, downloader)
 -
      def report_download_webpage(self, video_id):
          """Report webpage download."""
          if not self._downloader.params.get('test', False):
              self._downloader.report_warning(u'Falling back on generic information extractor.')
 -        self.to_screen(u'%s: Downloading webpage' % video_id)
 +        super(GenericIE, self).report_download_webpage(video_id)
  
      def report_following_redirect(self, new_url):
          """Report information extraction."""
@@@ -1432,6 -1469,9 +1435,6 @@@ class YoutubeSearchIE(InfoExtractor)
      _max_youtube_results = 1000
      IE_NAME = u'youtube:search'
  
 -    def __init__(self, downloader=None):
 -        InfoExtractor.__init__(self, downloader)
 -
      def report_download_page(self, query, pagenum):
          """Report attempt to download search page with given number."""
          query = query.decode(preferredencoding())
@@@ -1506,6 -1546,9 +1509,6 @@@ class GoogleSearchIE(InfoExtractor)
      _max_google_results = 1000
      IE_NAME = u'video.google:search'
  
 -    def __init__(self, downloader=None):
 -        InfoExtractor.__init__(self, downloader)
 -
      def report_download_page(self, query, pagenum):
          """Report attempt to download playlist page with given number."""
          query = query.decode(preferredencoding())
@@@ -1587,6 -1630,9 +1590,6 @@@ class YahooSearchIE(InfoExtractor)
      _max_yahoo_results = 1000
      IE_NAME = u'video.yahoo:search'
  
 -    def __init__(self, downloader=None):
 -        InfoExtractor.__init__(self, downloader)
 -
      def report_download_page(self, query, pagenum):
          """Report attempt to download playlist page with given number."""
          query = query.decode(preferredencoding())
@@@ -1680,6 -1726,9 +1683,6 @@@ class YoutubePlaylistIE(InfoExtractor)
      _MAX_RESULTS = 50
      IE_NAME = u'youtube:playlist'
  
 -    def __init__(self, downloader=None):
 -        InfoExtractor.__init__(self, downloader)
 -
      @classmethod
      def suitable(cls, url):
          """Receives a URL and returns True if suitable for this IE."""
@@@ -1824,6 -1873,9 +1827,6 @@@ class YoutubeUserIE(InfoExtractor)
      _VIDEO_INDICATOR = r'/watch\?v=(.+?)[\<&]'
      IE_NAME = u'youtube:user'
  
 -    def __init__(self, downloader=None):
 -        InfoExtractor.__init__(self, downloader)
 -
      def report_download_page(self, username, start_index):
          """Report attempt to download user page."""
          self._downloader.to_screen(u'[youtube] user %s: Downloading video ids from %d to %d' %
@@@ -1890,6 -1942,9 +1893,6 @@@ class BlipTVUserIE(InfoExtractor)
      _PAGE_SIZE = 12
      IE_NAME = u'blip.tv:user'
  
 -    def __init__(self, downloader=None):
 -        InfoExtractor.__init__(self, downloader)
 -
      def report_download_page(self, username, pagenum):
          """Report attempt to download user page."""
          self.to_screen(u'user %s: Downloading video ids from page %d' %
@@@ -1965,6 -2020,10 +1968,6 @@@ class DepositFilesIE(InfoExtractor)
  
      _VALID_URL = r'(?:http://)?(?:\w+\.)?depositfiles\.com/(?:../(?#locale))?files/(.+)'
  
 -    def report_download_webpage(self, file_id):
 -        """Report webpage download."""
 -        self.to_screen(u'%s: Downloading webpage' % file_id)
 -
      def _real_extract(self, url):
          file_id = url.split('/')[-1]
          # Rebuild url in english locale
@@@ -2215,6 -2274,9 +2218,6 @@@ class MyVideoIE(InfoExtractor)
      _VALID_URL = r'(?:http://)?(?:www\.)?myvideo\.de/watch/([0-9]+)/([^?/]+).*'
      IE_NAME = u'myvideo'
  
 -    def __init__(self, downloader=None):
 -        InfoExtractor.__init__(self, downloader)
 -
      def _real_extract(self,url):
          mobj = re.match(self._VALID_URL, url)
          if mobj is None:
@@@ -2650,6 -2712,9 +2653,6 @@@ class SoundcloudIE(InfoExtractor)
      _VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/([\w\d-]+)'
      IE_NAME = u'soundcloud'
  
 -    def __init__(self, downloader=None):
 -        InfoExtractor.__init__(self, downloader)
 -
      def report_resolve(self, video_id):
          """Report information extraction."""
          self.to_screen(u'%s: Resolving id' % video_id)
@@@ -2716,6 -2781,9 +2719,6 @@@ class SoundcloudSetIE(InfoExtractor)
      _VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/sets/([\w\d-]+)'
      IE_NAME = u'soundcloud'
  
 -    def __init__(self, downloader=None):
 -        InfoExtractor.__init__(self, downloader)
 -
      def report_resolve(self, video_id):
          """Report information extraction."""
          self.to_screen(u'%s: Resolving id' % video_id)
@@@ -2793,7 -2861,7 +2796,7 @@@ class InfoQIE(InfoExtractor)
          self.report_extraction(url)
  
          # Extract video URL
 -        mobj = re.search(r"jsclassref='([^']*)'", webpage)
 +        mobj = re.search(r"jsclassref ?= ?'([^']*)'", webpage)
          if mobj is None:
              self._downloader.report_error(u'unable to extract video url')
              return
@@@ -2836,6 -2904,9 +2839,6 @@@ class MixcloudIE(InfoExtractor)
      _VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/([\w\d-]+)/([\w\d-]+)'
      IE_NAME = u'mixcloud'
  
 -    def __init__(self, downloader=None):
 -        InfoExtractor.__init__(self, downloader)
 -
      def report_download_json(self, file_id):
          """Report JSON download."""
          self.to_screen(u'Downloading json')
@@@ -2943,6 -3014,10 +2946,6 @@@ class StanfordOpenClassroomIE(InfoExtra
      _VALID_URL = r'^(?:https?://)?openclassroom.stanford.edu(?P<path>/?|(/MainFolder/(?:HomePage|CoursePage|VideoPage)\.php([?]course=(?P<course>[^&]+)(&video=(?P<video>[^&]+))?(&.*)?)?))$'
      IE_NAME = u'stanfordoc'
  
 -    def report_download_webpage(self, objid):
 -        """Report information extraction."""
 -        self.to_screen(u'%s: Downloading webpage' % objid)
 -
      def _real_extract(self, url):
          mobj = re.match(self._VALID_URL, url)
          if mobj is None:
@@@ -3121,6 -3196,10 +3124,6 @@@ class MTVIE(InfoExtractor)
  class YoukuIE(InfoExtractor):
      _VALID_URL =  r'(?:http://)?v\.youku\.com/v_show/id_(?P<ID>[A-Za-z0-9]+)\.html'
  
 -    def report_download_webpage(self, file_id):
 -        """Report webpage download."""
 -        self.to_screen(u'%s: Downloading webpage' % file_id)
 -
      def _gen_sid(self):
          nowTime = int(time.time() * 1000)
          random1 = random.randint(1000,1998)
@@@ -3230,6 -3309,10 +3233,6 @@@ class XNXXIE(InfoExtractor)
      VIDEO_TITLE_RE = r'<title>(.*?)\s+-\s+XNXX.COM'
      VIDEO_THUMB_RE = r'url_bigthumb=(.*?)&amp;'
  
 -    def report_webpage(self, video_id):
 -        """Report information extraction"""
 -        self.to_screen(u'%s: Downloading webpage' % video_id)
 -
      def _real_extract(self, url):
          mobj = re.match(self._VALID_URL, url)
          if mobj is None:
              return
          video_id = mobj.group(1)
  
 -        self.report_webpage(video_id)
 +        self.report_download_webpage(video_id)
  
          # Get webpage content
          try:
@@@ -3283,6 -3366,9 +3286,6 @@@ class GooglePlusIE(InfoExtractor)
      _VALID_URL = r'(?:https://)?plus\.google\.com/(?:[^/]+/)*?posts/(\w+)'
      IE_NAME = u'plus.google'
  
 -    def __init__(self, downloader=None):
 -        InfoExtractor.__init__(self, downloader)
 -
      def report_extract_entry(self, url):
          """Report downloading extry"""
          self.to_screen(u'Downloading entry: %s' % url)
diff --combined youtube_dl/__init__.py
index 74375175dd1ebca987de37ceef7af30b34ca5de0,16e74785f6f0996cef56ff5d6d628f57e3790394..d491402c6a9702c57dbbd4122924b22f2ede2177
@@@ -140,6 -140,8 +140,9 @@@ def parseOpts(overrideArguments=None)
              help='display the current browser identification', default=False)
      general.add_option('--user-agent',
              dest='user_agent', help='specify a custom user agent', metavar='UA')
 -            dest='referer', help='specify a custom referer', metavar='REF', default=None)
+     general.add_option('--referer',
++            dest='referer', help='specify a custom referer, use if the video access is restricted to one domain',
++            metavar='REF', default=None)
      general.add_option('--list-extractors',
              action='store_true', dest='list_extractors',
              help='List all supported extractors and the URLs they would handle', default=False)
@@@ -342,6 -344,10 +345,10 @@@ def _real_main(argv=None)
      # Set user agent
      if opts.user_agent is not None:
          std_headers['User-Agent'] = opts.user_agent
+     
+     # Set referer
+     if opts.referer is not None:
+         std_headers['Referer'] = opts.referer
  
      # Dump user agent
      if opts.dump_user_agent: