Merge pull request #801 from expleo/add_referer_support

author Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>

Fri, 26 Apr 2013 17:25:17 +0000 (19:25 +0200)

committer Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>

Fri, 26 Apr 2013 17:34:32 +0000 (19:34 +0200)
author Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>
Fri, 26 Apr 2013 17:25:17 +0000 (19:25 +0200)
committer Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>
Fri, 26 Apr 2013 17:34:32 +0000 (19:34 +0200)
diff --combined README.md

index 0ab4b660cee8eec837cff27038d58fd09bdd95fd,ffb321ff92d2793c26fbec510589767a07502d64..d3073e641c3260900c313f59b92995d4d38647a2
--- 1/README.md
--- 2/README.md
+++ b/README.md
@@@ -27,6 -27,7 +27,8 @@@ which means you can modify it, redistri
                                  from an initial value of SIZE.
       --dump-user-agent          display the current browser identification
       --user-agent UA            specify a custom user agent
- -    --referer REF              specify a custom referer
++    --referer REF              specify a custom referer, use if the video access
++                               is restricted to one domain
       --list-extractors          List all supported extractors and the URLs they
                                  would handle
   
diff --combined youtube_dl/InfoExtractors.py

index 4d145dfa101b0118ccc15a6d81d4098568ab8276,d5876ad3463b71a841b00552f0b97737b4264996..3450f0d17e19d95d67645a50c79f0680b05379cc
--- 1/youtube_dl/InfoExtractors.py
--- 2/youtube_dl/InfoExtractors.py
+++ b/youtube_dl/InfoExtractors.py
@@@ -114,8 -114,8 +114,8 @@@ class InfoExtractor(object)
       def _request_webpage(self, url_or_request, video_id, note=None, errnote=None):
           """ Returns the response handle """
           if note is None:
- -            note = u'Downloading video webpage'
- -        if note is not False:
+ +            self.report_download_webpage(video_id)
+ +        elif note is not False:
               self.to_screen(u'%s: %s' % (video_id, note))
           try:
               return compat_urllib_request.urlopen(url_or_request)
@@@ -152,10 -152,6 +152,10 @@@
           """Report information extraction."""
           self.to_screen(u'%s: Extracting information' % id_or_name)
   
+ +    def report_download_webpage(self, video_id):
+ +        """Report webpage download."""
+ +        self.to_screen(u'%s: Downloading webpage' % video_id)
+ +
       def report_age_confirmation(self):
           """Report attempt to confirm age."""
           self.to_screen(u'Confirming age')
@@@ -688,10 -684,17 +688,10 @@@ class MetacafeIE(InfoExtractor)
       _FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user'
       IE_NAME = u'metacafe'
   
- -    def __init__(self, downloader=None):
- -        InfoExtractor.__init__(self, downloader)
- -
       def report_disclaimer(self):
           """Report disclaimer retrieval."""
           self.to_screen(u'Retrieving disclaimer')
   
- -    def report_download_webpage(self, video_id):
- -        """Report webpage download."""
- -        self.to_screen(u'%s: Downloading webpage' % video_id)
- -
       def _real_initialize(self):
           # Retrieve disclaimer
           request = compat_urllib_request.Request(self._DISCLAIMER)
@@@ -792,6 -795,9 +792,6 @@@ class DailymotionIE(InfoExtractor)
       IE_NAME = u'dailymotion'
       _WORKING = False
   
- -    def __init__(self, downloader=None):
- -        InfoExtractor.__init__(self, downloader)
- -
       def _real_extract(self, url):
           # Extract id and simplified title from URL
           mobj = re.match(self._VALID_URL, url)
@@@ -873,6 -879,13 +873,6 @@@ class PhotobucketIE(InfoExtractor)
       _VALID_URL = r'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.*[\?\&]current=(.*\.flv)'
       IE_NAME = u'photobucket'
   
- -    def __init__(self, downloader=None):
- -        InfoExtractor.__init__(self, downloader)
- -
- -    def report_download_webpage(self, video_id):
- -        """Report webpage download."""
- -        self.to_screen(u'%s: Downloading webpage' % video_id)
- -
       def _real_extract(self, url):
           # Extract id from URL
           mobj = re.match(self._VALID_URL, url)
@@@ -931,6 -944,13 +931,6 @@@ class YahooIE(InfoExtractor)
       _VPAGE_URL = r'(?:http://)?video\.yahoo\.com/watch/([0-9]+)/([0-9]+)(?:[#\?].*)?'
       IE_NAME = u'video.yahoo'
   
- -    def __init__(self, downloader=None):
- -        InfoExtractor.__init__(self, downloader)
- -
- -    def report_download_webpage(self, video_id):
- -        """Report webpage download."""
- -        self.to_screen(u'%s: Downloading webpage' % video_id)
- -
       def _real_extract(self, url, new_video=True):
           # Extract ID from URL
           mobj = re.match(self._VALID_URL, url)
@@@ -1060,6 -1080,13 +1060,6 @@@ class VimeoIE(InfoExtractor)
       _VALID_URL = r'(?P<proto>https?://)?(?:(?:www|player)\.)?vimeo\.com/(?:(?:groups|album)/[^/]+/)?(?P<direct_link>play_redirect_hls\?clip_id=)?(?:videos?/)?(?P<id>[0-9]+)'
       IE_NAME = u'vimeo'
   
- -    def __init__(self, downloader=None):
- -        InfoExtractor.__init__(self, downloader)
- -
- -    def report_download_webpage(self, video_id):
- -        """Report webpage download."""
- -        self.to_screen(u'%s: Downloading webpage' % video_id)
- -
       def _real_extract(self, url, new_video=True):
           # Extract ID from URL
           mobj = re.match(self._VALID_URL, url)
@@@ -1093,7 -1120,7 +1093,10 @@@
               config = webpage.split(' = {config:')[1].split(',assets:')[0]
               config = json.loads(config)
           except:
--            self._downloader.report_error(u'unable to extract info section')
++            if re.search('The creator of this video has not given you permission to embed it on this domain.', webpage):
++                self._downloader.report_error(u'The author has restricted the access to this video, try with the "--referer" option')
++            else:
++                self._downloader.report_error(u'unable to extract info section')
               return
   
           # Extract title
@@@ -1170,6 -1197,13 +1173,6 @@@ class ArteTvIE(InfoExtractor)
   
       IE_NAME = u'arte.tv'
   
- -    def __init__(self, downloader=None):
- -        InfoExtractor.__init__(self, downloader)
- -
- -    def report_download_webpage(self, video_id):
- -        """Report webpage download."""
- -        self.to_screen(u'%s: Downloading webpage' % video_id)
- -
       def fetch_webpage(self, url):
           request = compat_urllib_request.Request(url)
           try:
@@@ -1293,11 -1327,14 +1296,11 @@@ class GenericIE(InfoExtractor)
       _VALID_URL = r'.*'
       IE_NAME = u'generic'
   
- -    def __init__(self, downloader=None):
- -        InfoExtractor.__init__(self, downloader)
- -
       def report_download_webpage(self, video_id):
           """Report webpage download."""
           if not self._downloader.params.get('test', False):
               self._downloader.report_warning(u'Falling back on generic information extractor.')
- -        self.to_screen(u'%s: Downloading webpage' % video_id)
+ +        super(GenericIE, self).report_download_webpage(video_id)
   
       def report_following_redirect(self, new_url):
           """Report information extraction."""
@@@ -1432,6 -1469,9 +1435,6 @@@ class YoutubeSearchIE(InfoExtractor)
       _max_youtube_results = 1000
       IE_NAME = u'youtube:search'
   
- -    def __init__(self, downloader=None):
- -        InfoExtractor.__init__(self, downloader)
- -
       def report_download_page(self, query, pagenum):
           """Report attempt to download search page with given number."""
           query = query.decode(preferredencoding())
@@@ -1506,6 -1546,9 +1509,6 @@@ class GoogleSearchIE(InfoExtractor)
       _max_google_results = 1000
       IE_NAME = u'video.google:search'
   
- -    def __init__(self, downloader=None):
- -        InfoExtractor.__init__(self, downloader)
- -
       def report_download_page(self, query, pagenum):
           """Report attempt to download playlist page with given number."""
           query = query.decode(preferredencoding())
@@@ -1587,6 -1630,9 +1590,6 @@@ class YahooSearchIE(InfoExtractor)
       _max_yahoo_results = 1000
       IE_NAME = u'video.yahoo:search'
   
- -    def __init__(self, downloader=None):
- -        InfoExtractor.__init__(self, downloader)
- -
       def report_download_page(self, query, pagenum):
           """Report attempt to download playlist page with given number."""
           query = query.decode(preferredencoding())
@@@ -1680,6 -1726,9 +1683,6 @@@ class YoutubePlaylistIE(InfoExtractor)
       _MAX_RESULTS = 50
       IE_NAME = u'youtube:playlist'
   
- -    def __init__(self, downloader=None):
- -        InfoExtractor.__init__(self, downloader)
- -
       @classmethod
       def suitable(cls, url):
           """Receives a URL and returns True if suitable for this IE."""
@@@ -1824,6 -1873,9 +1827,6 @@@ class YoutubeUserIE(InfoExtractor)
       _VIDEO_INDICATOR = r'/watch\?v=(.+?)[\<&]'
       IE_NAME = u'youtube:user'
   
- -    def __init__(self, downloader=None):
- -        InfoExtractor.__init__(self, downloader)
- -
       def report_download_page(self, username, start_index):
           """Report attempt to download user page."""
           self._downloader.to_screen(u'[youtube] user %s: Downloading video ids from %d to %d' %
@@@ -1890,6 -1942,9 +1893,6 @@@ class BlipTVUserIE(InfoExtractor)
       _PAGE_SIZE = 12
       IE_NAME = u'blip.tv:user'
   
- -    def __init__(self, downloader=None):
- -        InfoExtractor.__init__(self, downloader)
- -
       def report_download_page(self, username, pagenum):
           """Report attempt to download user page."""
           self.to_screen(u'user %s: Downloading video ids from page %d' %
@@@ -1965,6 -2020,10 +1968,6 @@@ class DepositFilesIE(InfoExtractor)
   
       _VALID_URL = r'(?:http://)?(?:\w+\.)?depositfiles\.com/(?:../(?#locale))?files/(.+)'
   
- -    def report_download_webpage(self, file_id):
- -        """Report webpage download."""
- -        self.to_screen(u'%s: Downloading webpage' % file_id)
- -
       def _real_extract(self, url):
           file_id = url.split('/')[-1]
           # Rebuild url in english locale
@@@ -2215,6 -2274,9 +2218,6 @@@ class MyVideoIE(InfoExtractor)
       _VALID_URL = r'(?:http://)?(?:www\.)?myvideo\.de/watch/([0-9]+)/([^?/]+).*'
       IE_NAME = u'myvideo'
   
- -    def __init__(self, downloader=None):
- -        InfoExtractor.__init__(self, downloader)
- -
       def _real_extract(self,url):
           mobj = re.match(self._VALID_URL, url)
           if mobj is None:
@@@ -2650,6 -2712,9 +2653,6 @@@ class SoundcloudIE(InfoExtractor)
       _VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/([\w\d-]+)'
       IE_NAME = u'soundcloud'
   
- -    def __init__(self, downloader=None):
- -        InfoExtractor.__init__(self, downloader)
- -
       def report_resolve(self, video_id):
           """Report information extraction."""
           self.to_screen(u'%s: Resolving id' % video_id)
@@@ -2716,6 -2781,9 +2719,6 @@@ class SoundcloudSetIE(InfoExtractor)
       _VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/sets/([\w\d-]+)'
       IE_NAME = u'soundcloud'
   
- -    def __init__(self, downloader=None):
- -        InfoExtractor.__init__(self, downloader)
- -
       def report_resolve(self, video_id):
           """Report information extraction."""
           self.to_screen(u'%s: Resolving id' % video_id)
@@@ -2793,7 -2861,7 +2796,7 @@@ class InfoQIE(InfoExtractor)
           self.report_extraction(url)
   
           # Extract video URL
- -        mobj = re.search(r"jsclassref='([^']*)'", webpage)
+ +        mobj = re.search(r"jsclassref ?= ?'([^']*)'", webpage)
           if mobj is None:
               self._downloader.report_error(u'unable to extract video url')
               return
@@@ -2836,6 -2904,9 +2839,6 @@@ class MixcloudIE(InfoExtractor)
       _VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/([\w\d-]+)/([\w\d-]+)'
       IE_NAME = u'mixcloud'
   
- -    def __init__(self, downloader=None):
- -        InfoExtractor.__init__(self, downloader)
- -
       def report_download_json(self, file_id):
           """Report JSON download."""
           self.to_screen(u'Downloading json')
@@@ -2943,6 -3014,10 +2946,6 @@@ class StanfordOpenClassroomIE(InfoExtra
       _VALID_URL = r'^(?:https?://)?openclassroom.stanford.edu(?P<path>/?|(/MainFolder/(?:HomePage|CoursePage|VideoPage)\.php([?]course=(?P<course>[^&]+)(&video=(?P<video>[^&]+))?(&.*)?)?))$'
       IE_NAME = u'stanfordoc'
   
- -    def report_download_webpage(self, objid):
- -        """Report information extraction."""
- -        self.to_screen(u'%s: Downloading webpage' % objid)
- -
       def _real_extract(self, url):
           mobj = re.match(self._VALID_URL, url)
           if mobj is None:
@@@ -3121,6 -3196,10 +3124,6 @@@ class MTVIE(InfoExtractor)
   class YoukuIE(InfoExtractor):
       _VALID_URL =  r'(?:http://)?v\.youku\.com/v_show/id_(?P<ID>[A-Za-z0-9]+)\.html'
   
- -    def report_download_webpage(self, file_id):
- -        """Report webpage download."""
- -        self.to_screen(u'%s: Downloading webpage' % file_id)
- -
       def _gen_sid(self):
           nowTime = int(time.time() * 1000)
           random1 = random.randint(1000,1998)
@@@ -3230,6 -3309,10 +3233,6 @@@ class XNXXIE(InfoExtractor)
       VIDEO_TITLE_RE = r'<title>(.*?)\s+-\s+XNXX.COM'
       VIDEO_THUMB_RE = r'url_bigthumb=(.*?)&amp;'
   
- -    def report_webpage(self, video_id):
- -        """Report information extraction"""
- -        self.to_screen(u'%s: Downloading webpage' % video_id)
- -
       def _real_extract(self, url):
           mobj = re.match(self._VALID_URL, url)
           if mobj is None:
@@@ -3237,7 -3320,7 +3240,7 @@@
               return
           video_id = mobj.group(1)
   
- -        self.report_webpage(video_id)
+ +        self.report_download_webpage(video_id)
   
           # Get webpage content
           try:
@@@ -3283,6 -3366,9 +3286,6 @@@ class GooglePlusIE(InfoExtractor)
       _VALID_URL = r'(?:https://)?plus\.google\.com/(?:[^/]+/)*?posts/(\w+)'
       IE_NAME = u'plus.google'
   
- -    def __init__(self, downloader=None):
- -        InfoExtractor.__init__(self, downloader)
- -
       def report_extract_entry(self, url):
           """Report downloading extry"""
           self.to_screen(u'Downloading entry: %s' % url)
diff --combined youtube_dl/__init__.py

index 74375175dd1ebca987de37ceef7af30b34ca5de0,16e74785f6f0996cef56ff5d6d628f57e3790394..d491402c6a9702c57dbbd4122924b22f2ede2177
--- 1/youtube_dl/__init__.py
--- 2/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@@ -140,6 -140,8 +140,9 @@@ def parseOpts(overrideArguments=None)
               help='display the current browser identification', default=False)
       general.add_option('--user-agent',
               dest='user_agent', help='specify a custom user agent', metavar='UA')
- -            dest='referer', help='specify a custom referer', metavar='REF', default=None)
+     general.add_option('--referer',
++            dest='referer', help='specify a custom referer, use if the video access is restricted to one domain',
++            metavar='REF', default=None)
       general.add_option('--list-extractors',
               action='store_true', dest='list_extractors',
               help='List all supported extractors and the URLs they would handle', default=False)
@@@ -342,6 -344,10 +345,10 @@@ def _real_main(argv=None)
       # Set user agent
       if opts.user_agent is not None:
           std_headers['User-Agent'] = opts.user_agent
+     
+     # Set referer
+     if opts.referer is not None:
+         std_headers['Referer'] = opts.referer
   
       # Dump user agent
       if opts.dump_user_agent:
author	Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>
	Fri, 26 Apr 2013 17:25:17 +0000 (19:25 +0200)
committer	Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>
	Fri, 26 Apr 2013 17:34:32 +0000 (19:34 +0200)
		1	2
README.md	patch \|	diff1 \|	diff2 \|	blob \| history
youtube_dl/InfoExtractors.py	patch \|	diff1 \|	diff2 \|	blob \| history
youtube_dl/__init__.py	patch \|	diff1 \|	diff2 \|	blob \| history