]> git.bitcoin.ninja Git - youtube-dl/commitdiff
Merge branch 'next-url'
authorFilippo Valsorda <filippo.valsorda@gmail.com>
Sun, 25 Mar 2012 00:07:47 +0000 (01:07 +0100)
committerFilippo Valsorda <filippo.valsorda@gmail.com>
Sun, 25 Mar 2012 00:07:47 +0000 (01:07 +0100)
1  2 
youtube-dl
youtube_dl/__init__.py

diff --combined youtube-dl
index 5a595901ccc1e8d36a9b24c00966104e05515ebf,595cce497ca677b8f7de61a02a01c30c0755c9e0..8d0d1cc3381afab236486af52f9712c110cfa311
@@@ -1176,6 -1176,7 +1176,7 @@@ class YoutubeIE(InfoExtractor)
        _LANG_URL = r'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
        _LOGIN_URL = 'https://www.youtube.com/signup?next=/&gl=US&hl=en'
        _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
+       _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
        _NETRC_MACHINE = 'youtube'
        # Listed in order of quality
        _available_formats = ['38', '37', '22', '45', '35', '44', '34', '18', '43', '6', '5', '17', '13']
                        return
  
        def _real_extract(self, url):
+               # Extract original video URL from URL with redirection, like age verification, using next_url parameter
+               mobj = re.search(self._NEXT_URL_RE, url)
+               if mobj:
+                       url = 'http://www.youtube.com/' + urllib.unquote(mobj.group(1)).lstrip('/')
                # Extract video id from URL
                mobj = re.match(self._VALID_URL, url)
                if mobj is None:
@@@ -2241,67 -2247,7 +2247,67 @@@ class GenericIE(InfoExtractor)
                """Report information extraction."""
                self._downloader.to_screen(u'[generic] %s: Extracting information' % video_id)
  
 +      def report_following_redirect(self, new_url):
 +              """Report information extraction."""
 +              self._downloader.to_screen(u'[redirect] Following redirect to %s' % new_url)
 +              
 +      def _test_redirect(self, url):
 +              """Check if it is a redirect, like url shorteners, in case restart chain."""
 +              class HeadRequest(urllib2.Request):
 +                      def get_method(self):
 +                              return "HEAD"
 +
 +              class HEADRedirectHandler(urllib2.HTTPRedirectHandler):
 +                      """
 +                      Subclass the HTTPRedirectHandler to make it use our 
 +                      HeadRequest also on the redirected URL
 +                      """
 +                      def redirect_request(self, req, fp, code, msg, headers, newurl): 
 +                              if code in (301, 302, 303, 307):
 +                                  newurl = newurl.replace(' ', '%20') 
 +                                  newheaders = dict((k,v) for k,v in req.headers.items()
 +                                                    if k.lower() not in ("content-length", "content-type"))
 +                                  return HeadRequest(newurl, 
 +                                                     headers=newheaders,
 +                                                     origin_req_host=req.get_origin_req_host(), 
 +                                                     unverifiable=True) 
 +                              else: 
 +                                  raise urllib2.HTTPError(req.get_full_url(), code, msg, headers, fp) 
 +                                  
 +              class HTTPMethodFallback(urllib2.BaseHandler):
 +                      """
 +                      Fallback to GET if HEAD is not allowed (405 HTTP error)
 +                      """
 +                      def http_error_405(self, req, fp, code, msg, headers): 
 +                              fp.read()
 +                              fp.close()
 +
 +                              newheaders = dict((k,v) for k,v in req.headers.items()
 +                                                if k.lower() not in ("content-length", "content-type"))
 +                              return self.parent.open(urllib2.Request(req.get_full_url(), 
 +                                                               headers=newheaders, 
 +                                                               origin_req_host=req.get_origin_req_host(), 
 +                                                               unverifiable=True))
 +
 +              # Build our opener
 +              opener = urllib2.OpenerDirector() 
 +              for handler in [urllib2.HTTPHandler, urllib2.HTTPDefaultErrorHandler,
 +                                      HTTPMethodFallback, HEADRedirectHandler,
 +                                      urllib2.HTTPErrorProcessor, urllib2.HTTPSHandler]:
 +                      opener.add_handler(handler())
 +
 +              response = opener.open(HeadRequest(url))
 +              new_url = response.geturl()
 +              
 +              if url == new_url: return False
 +              
 +              self.report_following_redirect(new_url)
 +              self._downloader.download([new_url])
 +              return True
 +
        def _real_extract(self, url):
 +              if self._test_redirect(url): return
 +              
                # At this point we have a new video
                self._downloader.increment_downloads()
  
@@@ -4624,6 -4570,7 +4630,7 @@@ def _real_main()
                except IOError:
                        sys.exit(u'ERROR: batch file could not be read')
        all_urls = batchurls + args
+       all_urls = map(lambda url: url.strip(), all_urls)
  
        # General configuration
        cookie_processor = urllib2.HTTPCookieProcessor(jar)
diff --combined youtube_dl/__init__.py
index 5a595901ccc1e8d36a9b24c00966104e05515ebf,595cce497ca677b8f7de61a02a01c30c0755c9e0..8d0d1cc3381afab236486af52f9712c110cfa311
@@@ -1176,6 -1176,7 +1176,7 @@@ class YoutubeIE(InfoExtractor)
        _LANG_URL = r'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
        _LOGIN_URL = 'https://www.youtube.com/signup?next=/&gl=US&hl=en'
        _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
+       _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
        _NETRC_MACHINE = 'youtube'
        # Listed in order of quality
        _available_formats = ['38', '37', '22', '45', '35', '44', '34', '18', '43', '6', '5', '17', '13']
                        return
  
        def _real_extract(self, url):
+               # Extract original video URL from URL with redirection, like age verification, using next_url parameter
+               mobj = re.search(self._NEXT_URL_RE, url)
+               if mobj:
+                       url = 'http://www.youtube.com/' + urllib.unquote(mobj.group(1)).lstrip('/')
                # Extract video id from URL
                mobj = re.match(self._VALID_URL, url)
                if mobj is None:
@@@ -2241,67 -2247,7 +2247,67 @@@ class GenericIE(InfoExtractor)
                """Report information extraction."""
                self._downloader.to_screen(u'[generic] %s: Extracting information' % video_id)
  
 +      def report_following_redirect(self, new_url):
 +              """Report information extraction."""
 +              self._downloader.to_screen(u'[redirect] Following redirect to %s' % new_url)
 +              
 +      def _test_redirect(self, url):
 +              """Check if it is a redirect, like url shorteners, in case restart chain."""
 +              class HeadRequest(urllib2.Request):
 +                      def get_method(self):
 +                              return "HEAD"
 +
 +              class HEADRedirectHandler(urllib2.HTTPRedirectHandler):
 +                      """
 +                      Subclass the HTTPRedirectHandler to make it use our 
 +                      HeadRequest also on the redirected URL
 +                      """
 +                      def redirect_request(self, req, fp, code, msg, headers, newurl): 
 +                              if code in (301, 302, 303, 307):
 +                                  newurl = newurl.replace(' ', '%20') 
 +                                  newheaders = dict((k,v) for k,v in req.headers.items()
 +                                                    if k.lower() not in ("content-length", "content-type"))
 +                                  return HeadRequest(newurl, 
 +                                                     headers=newheaders,
 +                                                     origin_req_host=req.get_origin_req_host(), 
 +                                                     unverifiable=True) 
 +                              else: 
 +                                  raise urllib2.HTTPError(req.get_full_url(), code, msg, headers, fp) 
 +                                  
 +              class HTTPMethodFallback(urllib2.BaseHandler):
 +                      """
 +                      Fallback to GET if HEAD is not allowed (405 HTTP error)
 +                      """
 +                      def http_error_405(self, req, fp, code, msg, headers): 
 +                              fp.read()
 +                              fp.close()
 +
 +                              newheaders = dict((k,v) for k,v in req.headers.items()
 +                                                if k.lower() not in ("content-length", "content-type"))
 +                              return self.parent.open(urllib2.Request(req.get_full_url(), 
 +                                                               headers=newheaders, 
 +                                                               origin_req_host=req.get_origin_req_host(), 
 +                                                               unverifiable=True))
 +
 +              # Build our opener
 +              opener = urllib2.OpenerDirector() 
 +              for handler in [urllib2.HTTPHandler, urllib2.HTTPDefaultErrorHandler,
 +                                      HTTPMethodFallback, HEADRedirectHandler,
 +                                      urllib2.HTTPErrorProcessor, urllib2.HTTPSHandler]:
 +                      opener.add_handler(handler())
 +
 +              response = opener.open(HeadRequest(url))
 +              new_url = response.geturl()
 +              
 +              if url == new_url: return False
 +              
 +              self.report_following_redirect(new_url)
 +              self._downloader.download([new_url])
 +              return True
 +
        def _real_extract(self, url):
 +              if self._test_redirect(url): return
 +              
                # At this point we have a new video
                self._downloader.increment_downloads()
  
@@@ -4624,6 -4570,7 +4630,7 @@@ def _real_main()
                except IOError:
                        sys.exit(u'ERROR: batch file could not be read')
        all_urls = batchurls + args
+       all_urls = map(lambda url: url.strip(), all_urls)
  
        # General configuration
        cookie_processor = urllib2.HTTPCookieProcessor(jar)