transplant ceba827e9aab563ae7c7190fc236ec1aa358ee59, d891ff9fd9952b2829a47b508acf40d9...

[youtube-dl] / youtube-dl
diff --git a/youtube-dl b/youtube-dl

index 689427f5baee153f1ff033792567cdb7da0a3c74..595cce497ca677b8f7de61a02a01c30c0755c9e0 100755 (executable)
--- a/youtube-dl
+++ b/youtube-dl
@@ -15,6 +15,7 @@ __authors__  = (
         'Kevin Ngo',
         'Ori Avtalion',
         'shizeeg',
+       'Filippo Valsorda',
         )
  
  __license__ = 'Public Domain'
@@ -491,6 +492,7 @@ class FileDownloader(object):
         writedescription: Write the video description to a .description file
         writeinfojson:    Write the video description to a .info.json file
         writesubtitles:   Write the video subtitles to a .srt file
+       subtitleslang:    Language of the subtitles to download
         """
  
         params = None
@@ -1174,6 +1176,7 @@ class YoutubeIE(InfoExtractor):
         _LANG_URL = r'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
         _LOGIN_URL = 'https://www.youtube.com/signup?next=/&gl=US&hl=en'
         _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
+       _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
         _NETRC_MACHINE = 'youtube'
         # Listed in order of quality
         _available_formats = ['38', '37', '22', '45', '35', '44', '34', '18', '43', '6', '5', '17', '13']
@@ -1334,6 +1337,11 @@ class YoutubeIE(InfoExtractor):
                         return
  
         def _real_extract(self, url):
+               # Extract original video URL from URL with redirection, like age verification, using next_url parameter
+               mobj = re.search(self._NEXT_URL_RE, url)
+               if mobj:
+                       url = 'http://www.youtube.com/' + urllib.unquote(mobj.group(1)).lstrip('/')
+
                 # Extract video id from URL
                 mobj = re.match(self._VALID_URL, url)
                 if mobj is None:
@@ -1443,17 +1451,24 @@ class YoutubeIE(InfoExtractor):
                         else:
                                 srt_lang_list = re.findall(r'lang_code="([\w\-]+)"', srt_list)
                                 if srt_lang_list:
-                                       if 'en' in srt_lang_list: srt_lang = 'en'
-                                       else: srt_lang = srt_lang_list[0] # TODO choose better and provide an override
-                                       request = urllib2.Request('http://video.google.com/timedtext?hl=en&lang=%s&v=%s' % (srt_lang, video_id))
-                                       try:
-                                               srt_xml = urllib2.urlopen(request).read()
-                                       except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-                                               self._downloader.trouble(u'WARNING: unable to download video subtitles: %s' % str(err))
+                                       if self._downloader.params.get('subtitleslang', False):
+                                               srt_lang = self._downloader.params.get('subtitleslang')
+                                       elif 'en' in srt_lang_list:
+                                               srt_lang = 'en'
+                                       else:
+                                               srt_lang = srt_lang_list[0]
+                                       if not srt_lang in srt_lang_list:
+                                               self._downloader.trouble(u'WARNING: no closed captions found in the specified language')
                                         else:
-                                               video_subtitles = self._closed_captions_xml_to_srt(srt_xml.decode('utf-8'))
+                                               request = urllib2.Request('http://video.google.com/timedtext?hl=en&lang=%s&v=%s' % (srt_lang, video_id))
+                                               try:
+                                                       srt_xml = urllib2.urlopen(request).read()
+                                               except (urllib2.URLError, httplib.HTTPException, socket.error), err:
+                                                       self._downloader.trouble(u'WARNING: unable to download video subtitles: %s' % str(err))
+                                               else:
+                                                       video_subtitles = self._closed_captions_xml_to_srt(srt_xml.decode('utf-8'))
                                 else:
-                                       self._downloader.trouble(u'WARNING: video has no subtitles')
+                                       self._downloader.trouble(u'WARNING: video has no closed captions')
  
                 # token
                 video_token = urllib.unquote_plus(video_info['token'][0])
@@ -4385,6 +4400,12 @@ def parseOpts():
                         action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download')
         video_format.add_option('-F', '--list-formats',
                         action='store_true', dest='listformats', help='list all available formats (currently youtube only)')
+       video_format.add_option('--write-srt',
+                       action='store_true', dest='writesubtitles',
+                       help='write video closed captions to a .srt file (currently youtube only)', default=False)
+       video_format.add_option('--srt-lang',
+                       action='store', dest='subtitleslang', metavar='LANG',
+                       help='language of the closed captions to download (optional) use IETF language tags like \'en\'')
  
  
         verbosity.add_option('-q', '--quiet',
@@ -4449,9 +4470,6 @@ def parseOpts():
         filesystem.add_option('--write-info-json',
                         action='store_true', dest='writeinfojson',
                         help='write video metadata to a .info.json file', default=False)
-       filesystem.add_option('--write-srt',
-                       action='store_true', dest='writesubtitles',
-                       help='write video subtitles to a .srt file', default=False)
  
  
         postproc.add_option('--extract-audio', action='store_true', dest='extractaudio', default=False,
@@ -4552,6 +4570,7 @@ def _real_main():
                 except IOError:
                         sys.exit(u'ERROR: batch file could not be read')
         all_urls = batchurls + args
+       all_urls = map(lambda url: url.strip(), all_urls)
  
         # General configuration
         cookie_processor = urllib2.HTTPCookieProcessor(jar)
@@ -4653,6 +4672,7 @@ def _real_main():
                 'writedescription': opts.writedescription,
                 'writeinfojson': opts.writeinfojson,
                 'writesubtitles': opts.writesubtitles,
+               'subtitleslang': opts.subtitleslang,
                 'matchtitle': opts.matchtitle,
                 'rejecttitle': opts.rejecttitle,
                 'max_downloads': opts.max_downloads,