From: Jaime Marquínez Ferrándiz Date: Fri, 26 Apr 2013 17:25:17 +0000 (+0200) Subject: Merge pull request #801 from expleo/add_referer_support X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=commitdiff_plain;h=3820df0106d6065f50cc1eb90823906410dc9543;hp=-c;p=youtube-dl Merge pull request #801 from expleo/add_referer_support --- 3820df0106d6065f50cc1eb90823906410dc9543 diff --combined README.md index 0ab4b660c,ffb321ff9..d3073e641 --- a/README.md +++ b/README.md @@@ -27,6 -27,7 +27,8 @@@ which means you can modify it, redistri from an initial value of SIZE. --dump-user-agent display the current browser identification --user-agent UA specify a custom user agent - --referer REF specify a custom referer ++ --referer REF specify a custom referer, use if the video access ++ is restricted to one domain --list-extractors List all supported extractors and the URLs they would handle diff --combined youtube_dl/InfoExtractors.py index 4d145dfa1,d5876ad34..3450f0d17 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@@ -114,8 -114,8 +114,8 @@@ class InfoExtractor(object) def _request_webpage(self, url_or_request, video_id, note=None, errnote=None): """ Returns the response handle """ if note is None: - note = u'Downloading video webpage' - if note is not False: + self.report_download_webpage(video_id) + elif note is not False: self.to_screen(u'%s: %s' % (video_id, note)) try: return compat_urllib_request.urlopen(url_or_request) @@@ -152,10 -152,6 +152,10 @@@ """Report information extraction.""" self.to_screen(u'%s: Extracting information' % id_or_name) + def report_download_webpage(self, video_id): + """Report webpage download.""" + self.to_screen(u'%s: Downloading webpage' % video_id) + def report_age_confirmation(self): """Report attempt to confirm age.""" self.to_screen(u'Confirming age') @@@ -688,10 -684,17 +688,10 @@@ class MetacafeIE(InfoExtractor) _FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user' IE_NAME = u'metacafe' - def __init__(self, downloader=None): - InfoExtractor.__init__(self, downloader) - def report_disclaimer(self): """Report disclaimer retrieval.""" self.to_screen(u'Retrieving disclaimer') - def report_download_webpage(self, video_id): - """Report webpage download.""" - self.to_screen(u'%s: Downloading webpage' % video_id) - def _real_initialize(self): # Retrieve disclaimer request = compat_urllib_request.Request(self._DISCLAIMER) @@@ -792,6 -795,9 +792,6 @@@ class DailymotionIE(InfoExtractor) IE_NAME = u'dailymotion' _WORKING = False - def __init__(self, downloader=None): - InfoExtractor.__init__(self, downloader) - def _real_extract(self, url): # Extract id and simplified title from URL mobj = re.match(self._VALID_URL, url) @@@ -873,6 -879,13 +873,6 @@@ class PhotobucketIE(InfoExtractor) _VALID_URL = r'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.*[\?\&]current=(.*\.flv)' IE_NAME = u'photobucket' - def __init__(self, downloader=None): - InfoExtractor.__init__(self, downloader) - - def report_download_webpage(self, video_id): - """Report webpage download.""" - self.to_screen(u'%s: Downloading webpage' % video_id) - def _real_extract(self, url): # Extract id from URL mobj = re.match(self._VALID_URL, url) @@@ -931,6 -944,13 +931,6 @@@ class YahooIE(InfoExtractor) _VPAGE_URL = r'(?:http://)?video\.yahoo\.com/watch/([0-9]+)/([0-9]+)(?:[#\?].*)?' IE_NAME = u'video.yahoo' - def __init__(self, downloader=None): - InfoExtractor.__init__(self, downloader) - - def report_download_webpage(self, video_id): - """Report webpage download.""" - self.to_screen(u'%s: Downloading webpage' % video_id) - def _real_extract(self, url, new_video=True): # Extract ID from URL mobj = re.match(self._VALID_URL, url) @@@ -1060,6 -1080,13 +1060,6 @@@ class VimeoIE(InfoExtractor) _VALID_URL = r'(?Phttps?://)?(?:(?:www|player)\.)?vimeo\.com/(?:(?:groups|album)/[^/]+/)?(?Pplay_redirect_hls\?clip_id=)?(?:videos?/)?(?P[0-9]+)' IE_NAME = u'vimeo' - def __init__(self, downloader=None): - InfoExtractor.__init__(self, downloader) - - def report_download_webpage(self, video_id): - """Report webpage download.""" - self.to_screen(u'%s: Downloading webpage' % video_id) - def _real_extract(self, url, new_video=True): # Extract ID from URL mobj = re.match(self._VALID_URL, url) @@@ -1093,7 -1120,7 +1093,10 @@@ config = webpage.split(' = {config:')[1].split(',assets:')[0] config = json.loads(config) except: -- self._downloader.report_error(u'unable to extract info section') ++ if re.search('The creator of this video has not given you permission to embed it on this domain.', webpage): ++ self._downloader.report_error(u'The author has restricted the access to this video, try with the "--referer" option') ++ else: ++ self._downloader.report_error(u'unable to extract info section') return # Extract title @@@ -1170,6 -1197,13 +1173,6 @@@ class ArteTvIE(InfoExtractor) IE_NAME = u'arte.tv' - def __init__(self, downloader=None): - InfoExtractor.__init__(self, downloader) - - def report_download_webpage(self, video_id): - """Report webpage download.""" - self.to_screen(u'%s: Downloading webpage' % video_id) - def fetch_webpage(self, url): request = compat_urllib_request.Request(url) try: @@@ -1293,11 -1327,14 +1296,11 @@@ class GenericIE(InfoExtractor) _VALID_URL = r'.*' IE_NAME = u'generic' - def __init__(self, downloader=None): - InfoExtractor.__init__(self, downloader) - def report_download_webpage(self, video_id): """Report webpage download.""" if not self._downloader.params.get('test', False): self._downloader.report_warning(u'Falling back on generic information extractor.') - self.to_screen(u'%s: Downloading webpage' % video_id) + super(GenericIE, self).report_download_webpage(video_id) def report_following_redirect(self, new_url): """Report information extraction.""" @@@ -1432,6 -1469,9 +1435,6 @@@ class YoutubeSearchIE(InfoExtractor) _max_youtube_results = 1000 IE_NAME = u'youtube:search' - def __init__(self, downloader=None): - InfoExtractor.__init__(self, downloader) - def report_download_page(self, query, pagenum): """Report attempt to download search page with given number.""" query = query.decode(preferredencoding()) @@@ -1506,6 -1546,9 +1509,6 @@@ class GoogleSearchIE(InfoExtractor) _max_google_results = 1000 IE_NAME = u'video.google:search' - def __init__(self, downloader=None): - InfoExtractor.__init__(self, downloader) - def report_download_page(self, query, pagenum): """Report attempt to download playlist page with given number.""" query = query.decode(preferredencoding()) @@@ -1587,6 -1630,9 +1590,6 @@@ class YahooSearchIE(InfoExtractor) _max_yahoo_results = 1000 IE_NAME = u'video.yahoo:search' - def __init__(self, downloader=None): - InfoExtractor.__init__(self, downloader) - def report_download_page(self, query, pagenum): """Report attempt to download playlist page with given number.""" query = query.decode(preferredencoding()) @@@ -1680,6 -1726,9 +1683,6 @@@ class YoutubePlaylistIE(InfoExtractor) _MAX_RESULTS = 50 IE_NAME = u'youtube:playlist' - def __init__(self, downloader=None): - InfoExtractor.__init__(self, downloader) - @classmethod def suitable(cls, url): """Receives a URL and returns True if suitable for this IE.""" @@@ -1824,6 -1873,9 +1827,6 @@@ class YoutubeUserIE(InfoExtractor) _VIDEO_INDICATOR = r'/watch\?v=(.+?)[\<&]' IE_NAME = u'youtube:user' - def __init__(self, downloader=None): - InfoExtractor.__init__(self, downloader) - def report_download_page(self, username, start_index): """Report attempt to download user page.""" self._downloader.to_screen(u'[youtube] user %s: Downloading video ids from %d to %d' % @@@ -1890,6 -1942,9 +1893,6 @@@ class BlipTVUserIE(InfoExtractor) _PAGE_SIZE = 12 IE_NAME = u'blip.tv:user' - def __init__(self, downloader=None): - InfoExtractor.__init__(self, downloader) - def report_download_page(self, username, pagenum): """Report attempt to download user page.""" self.to_screen(u'user %s: Downloading video ids from page %d' % @@@ -1965,6 -2020,10 +1968,6 @@@ class DepositFilesIE(InfoExtractor) _VALID_URL = r'(?:http://)?(?:\w+\.)?depositfiles\.com/(?:../(?#locale))?files/(.+)' - def report_download_webpage(self, file_id): - """Report webpage download.""" - self.to_screen(u'%s: Downloading webpage' % file_id) - def _real_extract(self, url): file_id = url.split('/')[-1] # Rebuild url in english locale @@@ -2215,6 -2274,9 +2218,6 @@@ class MyVideoIE(InfoExtractor) _VALID_URL = r'(?:http://)?(?:www\.)?myvideo\.de/watch/([0-9]+)/([^?/]+).*' IE_NAME = u'myvideo' - def __init__(self, downloader=None): - InfoExtractor.__init__(self, downloader) - def _real_extract(self,url): mobj = re.match(self._VALID_URL, url) if mobj is None: @@@ -2650,6 -2712,9 +2653,6 @@@ class SoundcloudIE(InfoExtractor) _VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/([\w\d-]+)' IE_NAME = u'soundcloud' - def __init__(self, downloader=None): - InfoExtractor.__init__(self, downloader) - def report_resolve(self, video_id): """Report information extraction.""" self.to_screen(u'%s: Resolving id' % video_id) @@@ -2716,6 -2781,9 +2719,6 @@@ class SoundcloudSetIE(InfoExtractor) _VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/sets/([\w\d-]+)' IE_NAME = u'soundcloud' - def __init__(self, downloader=None): - InfoExtractor.__init__(self, downloader) - def report_resolve(self, video_id): """Report information extraction.""" self.to_screen(u'%s: Resolving id' % video_id) @@@ -2793,7 -2861,7 +2796,7 @@@ class InfoQIE(InfoExtractor) self.report_extraction(url) # Extract video URL - mobj = re.search(r"jsclassref='([^']*)'", webpage) + mobj = re.search(r"jsclassref ?= ?'([^']*)'", webpage) if mobj is None: self._downloader.report_error(u'unable to extract video url') return @@@ -2836,6 -2904,9 +2839,6 @@@ class MixcloudIE(InfoExtractor) _VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/([\w\d-]+)/([\w\d-]+)' IE_NAME = u'mixcloud' - def __init__(self, downloader=None): - InfoExtractor.__init__(self, downloader) - def report_download_json(self, file_id): """Report JSON download.""" self.to_screen(u'Downloading json') @@@ -2943,6 -3014,10 +2946,6 @@@ class StanfordOpenClassroomIE(InfoExtra _VALID_URL = r'^(?:https?://)?openclassroom.stanford.edu(?P/?|(/MainFolder/(?:HomePage|CoursePage|VideoPage)\.php([?]course=(?P[^&]+)(&video=(?P