_LANG_URL = r'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
_LOGIN_URL = 'https://www.youtube.com/signup?next=/&gl=US&hl=en'
_AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
+ _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
_NETRC_MACHINE = 'youtube'
# Listed in order of quality
_available_formats = ['38', '37', '22', '45', '35', '44', '34', '18', '43', '6', '5', '17', '13']
return
def _real_extract(self, url):
+ # Extract original video URL from URL with redirection, like age verification, using next_url parameter
+ mobj = re.search(self._NEXT_URL_RE, url)
+ if mobj:
+ url = 'http://www.youtube.com/' + urllib.unquote(mobj.group(1)).lstrip('/')
+
# Extract video id from URL
mobj = re.match(self._VALID_URL, url)
if mobj is None:
"""Report information extraction."""
self._downloader.to_screen(u'[generic] %s: Extracting information' % video_id)
+ def report_following_redirect(self, new_url):
+ """Report information extraction."""
+ self._downloader.to_screen(u'[redirect] Following redirect to %s' % new_url)
+
+ def _test_redirect(self, url):
+ """Check if it is a redirect, like url shorteners, in case restart chain."""
+ class HeadRequest(urllib2.Request):
+ def get_method(self):
+ return "HEAD"
+
+ class HEADRedirectHandler(urllib2.HTTPRedirectHandler):
+ """
+ Subclass the HTTPRedirectHandler to make it use our
+ HeadRequest also on the redirected URL
+ """
+ def redirect_request(self, req, fp, code, msg, headers, newurl):
+ if code in (301, 302, 303, 307):
+ newurl = newurl.replace(' ', '%20')
+ newheaders = dict((k,v) for k,v in req.headers.items()
+ if k.lower() not in ("content-length", "content-type"))
+ return HeadRequest(newurl,
+ headers=newheaders,
+ origin_req_host=req.get_origin_req_host(),
+ unverifiable=True)
+ else:
+ raise urllib2.HTTPError(req.get_full_url(), code, msg, headers, fp)
+
+ class HTTPMethodFallback(urllib2.BaseHandler):
+ """
+ Fallback to GET if HEAD is not allowed (405 HTTP error)
+ """
+ def http_error_405(self, req, fp, code, msg, headers):
+ fp.read()
+ fp.close()
+
+ newheaders = dict((k,v) for k,v in req.headers.items()
+ if k.lower() not in ("content-length", "content-type"))
+ return self.parent.open(urllib2.Request(req.get_full_url(),
+ headers=newheaders,
+ origin_req_host=req.get_origin_req_host(),
+ unverifiable=True))
+
+ # Build our opener
+ opener = urllib2.OpenerDirector()
+ for handler in [urllib2.HTTPHandler, urllib2.HTTPDefaultErrorHandler,
+ HTTPMethodFallback, HEADRedirectHandler,
+ urllib2.HTTPErrorProcessor, urllib2.HTTPSHandler]:
+ opener.add_handler(handler())
+
+ response = opener.open(HeadRequest(url))
+ new_url = response.geturl()
+
+ if url == new_url: return False
+
+ self.report_following_redirect(new_url)
+ self._downloader.download([new_url])
+ return True
+
def _real_extract(self, url):
+ if self._test_redirect(url): return
+
# At this point we have a new video
self._downloader.increment_downloads()
except IOError:
sys.exit(u'ERROR: batch file could not be read')
all_urls = batchurls + args
+ all_urls = map(lambda url: url.strip(), all_urls)
# General configuration
cookie_processor = urllib2.HTTPCookieProcessor(jar)
_LANG_URL = r'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
_LOGIN_URL = 'https://www.youtube.com/signup?next=/&gl=US&hl=en'
_AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
+ _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
_NETRC_MACHINE = 'youtube'
# Listed in order of quality
_available_formats = ['38', '37', '22', '45', '35', '44', '34', '18', '43', '6', '5', '17', '13']
return
def _real_extract(self, url):
+ # Extract original video URL from URL with redirection, like age verification, using next_url parameter
+ mobj = re.search(self._NEXT_URL_RE, url)
+ if mobj:
+ url = 'http://www.youtube.com/' + urllib.unquote(mobj.group(1)).lstrip('/')
+
# Extract video id from URL
mobj = re.match(self._VALID_URL, url)
if mobj is None:
"""Report information extraction."""
self._downloader.to_screen(u'[generic] %s: Extracting information' % video_id)
+ def report_following_redirect(self, new_url):
+ """Report information extraction."""
+ self._downloader.to_screen(u'[redirect] Following redirect to %s' % new_url)
+
+ def _test_redirect(self, url):
+ """Check if it is a redirect, like url shorteners, in case restart chain."""
+ class HeadRequest(urllib2.Request):
+ def get_method(self):
+ return "HEAD"
+
+ class HEADRedirectHandler(urllib2.HTTPRedirectHandler):
+ """
+ Subclass the HTTPRedirectHandler to make it use our
+ HeadRequest also on the redirected URL
+ """
+ def redirect_request(self, req, fp, code, msg, headers, newurl):
+ if code in (301, 302, 303, 307):
+ newurl = newurl.replace(' ', '%20')
+ newheaders = dict((k,v) for k,v in req.headers.items()
+ if k.lower() not in ("content-length", "content-type"))
+ return HeadRequest(newurl,
+ headers=newheaders,
+ origin_req_host=req.get_origin_req_host(),
+ unverifiable=True)
+ else:
+ raise urllib2.HTTPError(req.get_full_url(), code, msg, headers, fp)
+
+ class HTTPMethodFallback(urllib2.BaseHandler):
+ """
+ Fallback to GET if HEAD is not allowed (405 HTTP error)
+ """
+ def http_error_405(self, req, fp, code, msg, headers):
+ fp.read()
+ fp.close()
+
+ newheaders = dict((k,v) for k,v in req.headers.items()
+ if k.lower() not in ("content-length", "content-type"))
+ return self.parent.open(urllib2.Request(req.get_full_url(),
+ headers=newheaders,
+ origin_req_host=req.get_origin_req_host(),
+ unverifiable=True))
+
+ # Build our opener
+ opener = urllib2.OpenerDirector()
+ for handler in [urllib2.HTTPHandler, urllib2.HTTPDefaultErrorHandler,
+ HTTPMethodFallback, HEADRedirectHandler,
+ urllib2.HTTPErrorProcessor, urllib2.HTTPSHandler]:
+ opener.add_handler(handler())
+
+ response = opener.open(HeadRequest(url))
+ new_url = response.geturl()
+
+ if url == new_url: return False
+
+ self.report_following_redirect(new_url)
+ self._downloader.download([new_url])
+ return True
+
def _real_extract(self, url):
+ if self._test_redirect(url): return
+
# At this point we have a new video
self._downloader.increment_downloads()
except IOError:
sys.exit(u'ERROR: batch file could not be read')
all_urls = batchurls + args
+ all_urls = map(lambda url: url.strip(), all_urls)
# General configuration
cookie_processor = urllib2.HTTPCookieProcessor(jar)