'Kevin Ngo',
'Ori Avtalion',
'shizeeg',
+ 'Filippo Valsorda',
)
__license__ = 'Public Domain'
writedescription: Write the video description to a .description file
writeinfojson: Write the video description to a .info.json file
writesubtitles: Write the video subtitles to a .srt file
+ subtitleslang: Language of the subtitles to download
"""
params = None
_LANG_URL = r'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
_LOGIN_URL = 'https://www.youtube.com/signup?next=/&gl=US&hl=en'
_AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
+ _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
_NETRC_MACHINE = 'youtube'
# Listed in order of quality
_available_formats = ['38', '37', '22', '45', '35', '44', '34', '18', '43', '6', '5', '17', '13']
return
def _real_extract(self, url):
+ # Extract original video URL from URL with redirection, like age verification, using next_url parameter
+ mobj = re.search(self._NEXT_URL_RE, url)
+ if mobj:
+ url = 'http://www.youtube.com/' + urllib.unquote(mobj.group(1)).lstrip('/')
+
# Extract video id from URL
mobj = re.match(self._VALID_URL, url)
if mobj is None:
else:
srt_lang_list = re.findall(r'lang_code="([\w\-]+)"', srt_list)
if srt_lang_list:
- if 'en' in srt_lang_list: srt_lang = 'en'
- else: srt_lang = srt_lang_list[0] # TODO choose better and provide an override
- request = urllib2.Request('http://video.google.com/timedtext?hl=en&lang=%s&v=%s' % (srt_lang, video_id))
- try:
- srt_xml = urllib2.urlopen(request).read()
- except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- self._downloader.trouble(u'WARNING: unable to download video subtitles: %s' % str(err))
+ if self._downloader.params.get('subtitleslang', False):
+ srt_lang = self._downloader.params.get('subtitleslang')
+ elif 'en' in srt_lang_list:
+ srt_lang = 'en'
+ else:
+ srt_lang = srt_lang_list[0]
+ if not srt_lang in srt_lang_list:
+ self._downloader.trouble(u'WARNING: no closed captions found in the specified language')
else:
- video_subtitles = self._closed_captions_xml_to_srt(srt_xml.decode('utf-8'))
+ request = urllib2.Request('http://video.google.com/timedtext?hl=en&lang=%s&v=%s' % (srt_lang, video_id))
+ try:
+ srt_xml = urllib2.urlopen(request).read()
+ except (urllib2.URLError, httplib.HTTPException, socket.error), err:
+ self._downloader.trouble(u'WARNING: unable to download video subtitles: %s' % str(err))
+ else:
+ video_subtitles = self._closed_captions_xml_to_srt(srt_xml.decode('utf-8'))
else:
- self._downloader.trouble(u'WARNING: video has no subtitles')
+ self._downloader.trouble(u'WARNING: video has no closed captions')
# token
video_token = urllib.unquote_plus(video_info['token'][0])
action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download')
video_format.add_option('-F', '--list-formats',
action='store_true', dest='listformats', help='list all available formats (currently youtube only)')
+ video_format.add_option('--write-srt',
+ action='store_true', dest='writesubtitles',
+ help='write video closed captions to a .srt file (currently youtube only)', default=False)
+ video_format.add_option('--srt-lang',
+ action='store', dest='subtitleslang', metavar='LANG',
+ help='language of the closed captions to download (optional) use IETF language tags like \'en\'')
verbosity.add_option('-q', '--quiet',
filesystem.add_option('--write-info-json',
action='store_true', dest='writeinfojson',
help='write video metadata to a .info.json file', default=False)
- filesystem.add_option('--write-srt',
- action='store_true', dest='writesubtitles',
- help='write video subtitles to a .srt file', default=False)
postproc.add_option('--extract-audio', action='store_true', dest='extractaudio', default=False,
except IOError:
sys.exit(u'ERROR: batch file could not be read')
all_urls = batchurls + args
+ all_urls = map(lambda url: url.strip(), all_urls)
# General configuration
cookie_processor = urllib2.HTTPCookieProcessor(jar)
'writedescription': opts.writedescription,
'writeinfojson': opts.writeinfojson,
'writesubtitles': opts.writesubtitles,
+ 'subtitleslang': opts.subtitleslang,
'matchtitle': opts.matchtitle,
'rejecttitle': opts.rejecttitle,
'max_downloads': opts.max_downloads,