'Kevin Ngo',
'Ori Avtalion',
'shizeeg',
+ 'Filippo Valsorda',
)
__license__ = 'Public Domain'
-__version__ = '2012.01.08'
+__version__ = '2012.02.27'
UPDATE_URL = 'https://raw.github.com/rg3/youtube-dl/master/youtube-dl'
+
import cookielib
import datetime
+import getpass
import gzip
import htmlentitydefs
import HTMLParser
import locale
import math
import netrc
+import optparse
import os
import os.path
import re
+import shlex
import socket
import string
import subprocess
"""
assert type(s) == type(u'')
- return s.encode(sys.getfilesystemencoding(), 'ignore')
+
+ if sys.platform == 'win32' and sys.getwindowsversion().major >= 5:
+ # Pass u'' directly to use Unicode APIs on Windows 2000 and up
+ # (Detecting Windows NT 4 is tricky because 'major >= 4' would
+ # match Windows 9x series as well. Besides, NT 4 is obsolete.)
+ return s
+ else:
+ return s.encode(sys.getfilesystemencoding(), 'ignore')
class DownloadError(Exception):
"""Download Error exception.
updatetime: Use the Last-modified header to set output file timestamps.
writedescription: Write the video description to a .description file
writeinfojson: Write the video description to a .info.json file
+ writesubtitles: Write the video subtitles to a .srt file
+ subtitleslang: Language of the subtitles to download
"""
params = None
""" Report that the description file is being written """
self.to_screen(u'[info] Writing video description to: ' + descfn)
+ def report_writesubtitles(self, srtfn):
+ """ Report that the subtitles file is being written """
+ self.to_screen(u'[info] Writing video subtitles to: ' + srtfn)
+
def report_writeinfojson(self, infofn):
""" Report that the metadata file has been written """
self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn)
except (OSError, IOError):
self.trouble(u'ERROR: Cannot write description file ' + descfn)
return
+
+ if self.params.get('writesubtitles', False) and 'subtitles' in info_dict and info_dict['subtitles']:
+ # subtitles download errors are already managed as troubles in relevant IE
+ # that way it will silently go on when used with unsupporting IE
+ try:
+ srtfn = filename.rsplit('.', 1)[0] + u'.srt'
+ self.report_writesubtitles(srtfn)
+ srtfile = open(_encodeFilename(srtfn), 'wb')
+ try:
+ srtfile.write(info_dict['subtitles'].encode('utf-8'))
+ finally:
+ srtfile.close()
+ except (OSError, IOError):
+ self.trouble(u'ERROR: Cannot write subtitles file ' + descfn)
+ return
if self.params.get('writeinfojson', False):
infofn = filename + u'.info.json'
# the connection was interrumpted and resuming appears to be
# possible. This is part of rtmpdump's normal usage, AFAIK.
basic_args = ['rtmpdump', '-q'] + [[], ['-W', player_url]][player_url is not None] + ['-r', url, '-o', tmpfilename]
- retval = subprocess.call(basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)])
+ args = basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)]
+ if self.params.get('verbose', False):
+ try:
+ import pipes
+ shell_quote = lambda args: ' '.join(map(pipes.quote, args))
+ except ImportError:
+ shell_quote = repr
+ self.to_screen(u'[debug] rtmpdump command line: ' + shell_quote(args))
+ retval = subprocess.call(args)
while retval == 2 or retval == 1:
prevsize = os.path.getsize(_encodeFilename(tmpfilename))
self.to_screen(u'\r[rtmpdump] %s bytes' % prevsize, skip_eol=True)
_LANG_URL = r'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
_LOGIN_URL = 'https://www.youtube.com/signup?next=/&gl=US&hl=en'
_AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
+ _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
_NETRC_MACHINE = 'youtube'
# Listed in order of quality
_available_formats = ['38', '37', '22', '45', '35', '44', '34', '18', '43', '6', '5', '17', '13']
"""Report attempt to download video info webpage."""
self._downloader.to_screen(u'[youtube] %s: Downloading video info webpage' % video_id)
+ def report_video_subtitles_download(self, video_id):
+ """Report attempt to download video info webpage."""
+ self._downloader.to_screen(u'[youtube] %s: Downloading video subtitles' % video_id)
+
def report_information_extraction(self, video_id):
"""Report attempt to extract video information."""
self._downloader.to_screen(u'[youtube] %s: Extracting video information' % video_id)
"""Indicate the download will use the RTMP protocol."""
self._downloader.to_screen(u'[youtube] RTMP download detected')
+ def _closed_captions_xml_to_srt(self, xml_string):
+ srt = ''
+ texts = re.findall(r'<text start="([\d\.]+)"( dur="([\d\.]+)")?>([^<]+)</text>', xml_string, re.MULTILINE)
+ # TODO parse xml instead of regex
+ for n, (start, dur_tag, dur, caption) in enumerate(texts):
+ if not dur: dur = '4'
+ start = float(start)
+ end = start + float(dur)
+ start = "%02i:%02i:%02i,%03i" %(start/(60*60), start/60%60, start%60, start%1*1000)
+ end = "%02i:%02i:%02i,%03i" %(end/(60*60), end/60%60, end%60, end%1*1000)
+ caption = re.sub(ur'(?u)&(.+?);', htmlentity_transform, caption)
+ caption = re.sub(ur'(?u)&(.+?);', htmlentity_transform, caption) # double cycle, inentional
+ srt += str(n) + '\n'
+ srt += start + ' --> ' + end + '\n'
+ srt += caption + '\n\n'
+ return srt
+
def _print_formats(self, formats):
print 'Available formats:'
for x in formats:
return
def _real_extract(self, url):
+ # Extract original video URL from URL with redirection, like age verification, using next_url parameter
+ mobj = re.search(self._NEXT_URL_RE, url)
+ if mobj:
+ url = 'http://www.youtube.com/' + urllib.unquote(mobj.group(1)).lstrip('/')
+
# Extract video id from URL
mobj = re.match(self._VALID_URL, url)
if mobj is None:
lxml.etree
except NameError:
video_description = u'No description available.'
- if self._downloader.params.get('forcedescription', False) or self._downloader.params.get('writedescription', False):
- mobj = re.search(r'<meta name="description" content="(.*)"(?:\s*/)?>', video_webpage)
- if mobj is not None:
- video_description = mobj.group(1).decode('utf-8')
+ mobj = re.search(r'<meta name="description" content="(.*?)">', video_webpage)
+ if mobj is not None:
+ video_description = mobj.group(1).decode('utf-8')
else:
html_parser = lxml.etree.HTMLParser(encoding='utf-8')
vwebpage_doc = lxml.etree.parse(StringIO.StringIO(video_webpage), html_parser)
video_description = u''.join(vwebpage_doc.xpath('id("eow-description")//text()'))
# TODO use another parser
+
+ # closed captions
+ video_subtitles = None
+ if self._downloader.params.get('writesubtitles', False):
+ self.report_video_subtitles_download(video_id)
+ request = urllib2.Request('http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id)
+ try:
+ srt_list = urllib2.urlopen(request).read()
+ except (urllib2.URLError, httplib.HTTPException, socket.error), err:
+ self._downloader.trouble(u'WARNING: unable to download video subtitles: %s' % str(err))
+ else:
+ srt_lang_list = re.findall(r'lang_code="([\w\-]+)"', srt_list)
+ if srt_lang_list:
+ if self._downloader.params.get('subtitleslang', False):
+ srt_lang = self._downloader.params.get('subtitleslang')
+ elif 'en' in srt_lang_list:
+ srt_lang = 'en'
+ else:
+ srt_lang = srt_lang_list[0]
+ if not srt_lang in srt_lang_list:
+ self._downloader.trouble(u'WARNING: no closed captions found in the specified language')
+ else:
+ request = urllib2.Request('http://video.google.com/timedtext?hl=en&lang=%s&v=%s' % (srt_lang, video_id))
+ try:
+ srt_xml = urllib2.urlopen(request).read()
+ except (urllib2.URLError, httplib.HTTPException, socket.error), err:
+ self._downloader.trouble(u'WARNING: unable to download video subtitles: %s' % str(err))
+ else:
+ video_subtitles = self._closed_captions_xml_to_srt(srt_xml.decode('utf-8'))
+ else:
+ self._downloader.trouble(u'WARNING: video has no closed captions')
# token
video_token = urllib.unquote_plus(video_info['token'][0])
'thumbnail': video_thumbnail.decode('utf-8'),
'description': video_description,
'player_url': player_url,
+ 'subtitles': video_subtitles
})
except UnavailableVideoError, err:
self._downloader.trouble(u'\nERROR: unable to download video')
video_id = mobj.group(1)
# Retrieve video webpage to extract further information
- request = urllib2.Request("http://vimeo.com/moogaloop/load/clip:%s" % video_id, None, std_headers)
+ request = urllib2.Request(url, None, std_headers)
try:
self.report_download_webpage(video_id)
webpage = urllib2.urlopen(request).read()
# and latter we extract those that are Vimeo specific.
self.report_extraction(video_id)
- # Extract title
- mobj = re.search(r'<caption>(.*?)</caption>', webpage)
- if mobj is None:
- self._downloader.trouble(u'ERROR: unable to extract video title')
+ # Extract the config JSON
+ config = webpage.split(' = {config:')[1].split(',assets:')[0]
+ try:
+ config = json.loads(config)
+ except:
+ self._downloader.trouble(u'ERROR: unable to extract info section')
return
- video_title = mobj.group(1).decode('utf-8')
+
+ # Extract title
+ video_title = config["video"]["title"]
simple_title = _simplify_title(video_title)
# Extract uploader
- mobj = re.search(r'<uploader_url>http://vimeo.com/(.*?)</uploader_url>', webpage)
- if mobj is None:
- self._downloader.trouble(u'ERROR: unable to extract video uploader')
- return
- video_uploader = mobj.group(1).decode('utf-8')
+ video_uploader = config["video"]["owner"]["name"]
# Extract video thumbnail
- mobj = re.search(r'<thumbnail>(.*?)</thumbnail>', webpage)
- if mobj is None:
- self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
- return
- video_thumbnail = mobj.group(1).decode('utf-8')
+ video_thumbnail = config["video"]["thumbnail"]
- # # Extract video description
- # mobj = re.search(r'<meta property="og:description" content="(.*)" />', webpage)
- # if mobj is None:
- # self._downloader.trouble(u'ERROR: unable to extract video description')
- # return
- # video_description = mobj.group(1).decode('utf-8')
- # if not video_description: video_description = 'No description available.'
- video_description = 'Foo.'
-
- # Vimeo specific: extract request signature
- mobj = re.search(r'<request_signature>(.*?)</request_signature>', webpage)
- if mobj is None:
- self._downloader.trouble(u'ERROR: unable to extract request signature')
- return
- sig = mobj.group(1).decode('utf-8')
-
- # Vimeo specific: extract video quality information
- mobj = re.search(r'<isHD>(\d+)</isHD>', webpage)
- if mobj is None:
- self._downloader.trouble(u'ERROR: unable to extract video quality information')
- return
- quality = mobj.group(1).decode('utf-8')
-
- if int(quality) == 1:
- quality = 'hd'
+ # Extract video description
+ try:
+ lxml.etree
+ except NameError:
+ video_description = u'No description available.'
+ mobj = re.search(r'<meta name="description" content="(.*?)" />', webpage, re.MULTILINE)
+ if mobj is not None:
+ video_description = mobj.group(1)
else:
- quality = 'sd'
+ html_parser = lxml.etree.HTMLParser()
+ vwebpage_doc = lxml.etree.parse(StringIO.StringIO(webpage), html_parser)
+ video_description = u''.join(vwebpage_doc.xpath('id("description")//text()')).strip()
+ # TODO use another parser
- # Vimeo specific: Extract request signature expiration
- mobj = re.search(r'<request_signature_expires>(.*?)</request_signature_expires>', webpage)
- if mobj is None:
- self._downloader.trouble(u'ERROR: unable to extract request signature expiration')
+ # Extract upload date
+ video_upload_date = u'NA'
+ mobj = re.search(r'<span id="clip-date" style="display:none">[^:]*: (.*?)( \([^\(]*\))?</span>', webpage)
+ if mobj is not None:
+ video_upload_date = mobj.group(1)
+
+ # Vimeo specific: extract request signature and timestamp
+ sig = config['request']['signature']
+ timestamp = config['request']['timestamp']
+
+ # Vimeo specific: extract video codec and quality information
+ # TODO bind to format param
+ codecs = [('h264', 'mp4'), ('vp8', 'flv'), ('vp6', 'flv')]
+ for codec in codecs:
+ if codec[0] in config["video"]["files"]:
+ video_codec = codec[0]
+ video_extension = codec[1]
+ if 'hd' in config["video"]["files"][codec[0]]: quality = 'hd'
+ else: quality = 'sd'
+ break
+ else:
+ self._downloader.trouble(u'ERROR: no known codec found')
return
- sig_exp = mobj.group(1).decode('utf-8')
- video_url = "http://vimeo.com/moogaloop/play/clip:%s/%s/%s/?q=%s" % (video_id, sig, sig_exp, quality)
+ video_url = "http://player.vimeo.com/play_redirect?clip_id=%s&sig=%s&time=%s&quality=%s&codecs=%s&type=moogaloop_local&embed_location=" \
+ %(video_id, sig, timestamp, quality, video_codec.upper())
try:
# Process video information
self._downloader.process_info({
- 'id': video_id.decode('utf-8'),
+ 'id': video_id,
'url': video_url,
'uploader': video_uploader,
- 'upload_date': u'NA',
+ 'upload_date': video_upload_date,
'title': video_title,
'stitle': simple_title,
- 'ext': u'mp4',
- 'thumbnail': video_thumbnail.decode('utf-8'),
- 'description': video_description,
+ 'ext': video_extension,
'thumbnail': video_thumbnail,
'description': video_description,
'player_url': None,
"""Report information extraction."""
self._downloader.to_screen(u'[generic] %s: Extracting information' % video_id)
+ def report_following_redirect(self, new_url):
+ """Report information extraction."""
+ self._downloader.to_screen(u'[redirect] Following redirect to %s' % new_url)
+
+ def _test_redirect(self, url):
+ """Check if it is a redirect, like url shorteners, in case restart chain."""
+ class HeadRequest(urllib2.Request):
+ def get_method(self):
+ return "HEAD"
+
+ class HEADRedirectHandler(urllib2.HTTPRedirectHandler):
+ """
+ Subclass the HTTPRedirectHandler to make it use our
+ HeadRequest also on the redirected URL
+ """
+ def redirect_request(self, req, fp, code, msg, headers, newurl):
+ if code in (301, 302, 303, 307):
+ newurl = newurl.replace(' ', '%20')
+ newheaders = dict((k,v) for k,v in req.headers.items()
+ if k.lower() not in ("content-length", "content-type"))
+ return HeadRequest(newurl,
+ headers=newheaders,
+ origin_req_host=req.get_origin_req_host(),
+ unverifiable=True)
+ else:
+ raise urllib2.HTTPError(req.get_full_url(), code, msg, headers, fp)
+
+ class HTTPMethodFallback(urllib2.BaseHandler):
+ """
+ Fallback to GET if HEAD is not allowed (405 HTTP error)
+ """
+ def http_error_405(self, req, fp, code, msg, headers):
+ fp.read()
+ fp.close()
+
+ newheaders = dict((k,v) for k,v in req.headers.items()
+ if k.lower() not in ("content-length", "content-type"))
+ return self.parent.open(urllib2.Request(req.get_full_url(),
+ headers=newheaders,
+ origin_req_host=req.get_origin_req_host(),
+ unverifiable=True))
+
+ # Build our opener
+ opener = urllib2.OpenerDirector()
+ for handler in [urllib2.HTTPHandler, urllib2.HTTPDefaultErrorHandler,
+ HTTPMethodFallback, HEADRedirectHandler,
+ urllib2.HTTPErrorProcessor, urllib2.HTTPSHandler]:
+ opener.add_handler(handler())
+
+ response = opener.open(HeadRequest(url))
+ new_url = response.geturl()
+
+ if url == new_url: return False
+
+ self.report_following_redirect(new_url)
+ self._downloader.download([new_url])
+ return True
+
def _real_extract(self, url):
+ if self._test_redirect(url): return
+
# At this point we have a new video
self._downloader.increment_downloads()
class YoutubeSearchIE(InfoExtractor):
"""Information Extractor for YouTube search queries."""
_VALID_URL = r'ytsearch(\d+|all)?:[\s\S]+'
- _TEMPLATE_URL = 'http://www.youtube.com/results?search_query=%s&page=%s&gl=US&hl=en'
- _VIDEO_INDICATOR = r'href="/watch\?v=.+?"'
- _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
+ _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc'
_youtube_ie = None
_max_youtube_results = 1000
IE_NAME = u'youtube:search'
"""Downloads a specified number of results for a query"""
video_ids = []
- already_seen = set()
- pagenum = 1
+ pagenum = 0
+ limit = n
- while True:
- self.report_download_page(query, pagenum)
- result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
+ while (50 * pagenum) < limit:
+ self.report_download_page(query, pagenum+1)
+ result_url = self._API_URL % (urllib.quote_plus(query), (50*pagenum)+1)
request = urllib2.Request(result_url)
try:
- page = urllib2.urlopen(request).read()
+ data = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
+ self._downloader.trouble(u'ERROR: unable to download API page: %s' % str(err))
return
+ api_response = json.loads(data)['data']
- # Extract video identifiers
- for mobj in re.finditer(self._VIDEO_INDICATOR, page):
- video_id = page[mobj.span()[0]:mobj.span()[1]].split('=')[2][:-1]
- if video_id not in already_seen:
- video_ids.append(video_id)
- already_seen.add(video_id)
- if len(video_ids) == n:
- # Specified n videos reached
- for id in video_ids:
- self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
- return
+ new_ids = list(video['id'] for video in api_response['items'])
+ video_ids += new_ids
- if re.search(self._MORE_PAGES_INDICATOR, page) is None:
- for id in video_ids:
- self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
- return
+ limit = min(n, api_response['totalItems'])
+ pagenum += 1
- pagenum = pagenum + 1
+ if len(video_ids) > n:
+ video_ids = video_ids[:n]
+ for id in video_ids:
+ self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
+ return
class GoogleSearchIE(InfoExtractor):
"""Information Extractor for Google Video search queries."""
_VALID_URL = r'gvsearch(\d+|all)?:[\s\S]+'
_TEMPLATE_URL = 'http://video.google.com/videosearch?q=%s+site:video.google.com&start=%s&hl=en'
- _VIDEO_INDICATOR = r'videoplay\?docid=([^\&>]+)\&'
- _MORE_PAGES_INDICATOR = r'<span>Next</span>'
+ _VIDEO_INDICATOR = r'<a href="http://video\.google\.com/videoplay\?docid=([^"\&]+)'
+ _MORE_PAGES_INDICATOR = r'class="pn" id="pnnext"'
_google_ie = None
_max_google_results = 1000
IE_NAME = u'video.google:search'
"""Downloads a specified number of results for a query"""
video_ids = []
- already_seen = set()
- pagenum = 1
+ pagenum = 0
while True:
self.report_download_page(query, pagenum)
- result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
+ result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum*10)
request = urllib2.Request(result_url)
try:
page = urllib2.urlopen(request).read()
# Extract video identifiers
for mobj in re.finditer(self._VIDEO_INDICATOR, page):
video_id = mobj.group(1)
- if video_id not in already_seen:
+ if video_id not in video_ids:
video_ids.append(video_id)
- already_seen.add(video_id)
if len(video_ids) == n:
# Specified n videos reached
for id in video_ids:
_VALID_URL = r'(?:https?://)?(?:\w+\.)?youtube\.com/(?:(?:course|view_play_list|my_playlists|artist|playlist)\?.*?(p|a|list)=|user/.*?/user/|p/|user/.*?#[pg]/c/)(?:PL)?([0-9A-Za-z-_]+)(?:/.*?/([0-9A-Za-z_-]+))?.*'
_TEMPLATE_URL = 'http://www.youtube.com/%s?%s=%s&page=%s&gl=US&hl=en'
- _VIDEO_INDICATOR = r'/watch\?v=(.+?)&'
+ _VIDEO_INDICATOR_TEMPLATE = r'/watch\?v=(.+?)&list=PL%s&'
_MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
_youtube_ie = None
IE_NAME = u'youtube:playlist'
# Extract video identifiers
ids_in_page = []
- for mobj in re.finditer(self._VIDEO_INDICATOR, page):
+ for mobj in re.finditer(self._VIDEO_INDICATOR_TEMPLATE % playlist_id, page):
if mobj.group(1) not in ids_in_page:
ids_in_page.append(mobj.group(1))
video_ids.extend(ids_in_page)
playliststart = self._downloader.params.get('playliststart', 1) - 1
playlistend = self._downloader.params.get('playlistend', -1)
- video_ids = video_ids[playliststart:playlistend]
+ if playlistend == -1:
+ video_ids = video_ids[playliststart:]
+ else:
+ video_ids = video_ids[playliststart:playlistend]
for id in video_ids:
self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
else:
video_ids = video_ids[playliststart:playlistend]
- self._downloader.to_screen("[youtube] user %s: Collected %d video ids (downloading %d of them)" %
+ self._downloader.to_screen(u"[youtube] user %s: Collected %d video ids (downloading %d of them)" %
(username, all_ids_count, len(video_ids)))
for video_id in video_ids:
if not os.access(filename, os.W_OK):
sys.exit('ERROR: no write permissions on %s' % filename)
- downloader.to_screen('Updating to latest version...')
+ downloader.to_screen(u'Updating to latest version...')
try:
try:
vmatch = re.search("__version__ = '([^']+)'", newcontent)
if vmatch is not None and vmatch.group(1) == __version__:
- downloader.to_screen('youtube-dl is up-to-date (' + __version__ + ')')
+ downloader.to_screen(u'youtube-dl is up-to-date (' + __version__ + ')')
return
finally:
urlh.close()
except (IOError, OSError), err:
sys.exit('ERROR: unable to overwrite current version')
- downloader.to_screen('Updated youtube-dl. Restart youtube-dl to use the new version.')
+ downloader.to_screen(u'Updated youtube-dl. Restart youtube-dl to use the new version.')
def parseOpts():
- # Deferred imports
- import getpass
- import optparse
- import shlex
-
def _readOptions(filename_bytes):
try:
optionf = open(filename_bytes)
action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download')
video_format.add_option('-F', '--list-formats',
action='store_true', dest='listformats', help='list all available formats (currently youtube only)')
+ video_format.add_option('--write-srt',
+ action='store_true', dest='writesubtitles',
+ help='write video closed captions to a .srt file (currently youtube only)', default=False)
+ video_format.add_option('--srt-lang',
+ action='store', dest='subtitleslang', metavar='LANG',
+ help='language of the closed captions to download (optional) use IETF language tags like \'en\'')
verbosity.add_option('-q', '--quiet',
verbosity.add_option('--console-title',
action='store_true', dest='consoletitle',
help='display progress in console titlebar', default=False)
+ verbosity.add_option('-v', '--verbose',
+ action='store_true', dest='verbose', help='print various debugging information', default=False)
filesystem.add_option('-t', '--title',
filesystem.add_option('-w', '--no-overwrites',
action='store_true', dest='nooverwrites', help='do not overwrite files', default=False)
filesystem.add_option('-c', '--continue',
- action='store_true', dest='continue_dl', help='resume partially downloaded files', default=False)
+ action='store_true', dest='continue_dl', help='resume partially downloaded files', default=True)
filesystem.add_option('--no-continue',
action='store_false', dest='continue_dl',
help='do not resume partially downloaded files (restart from beginning)')
except IOError:
sys.exit(u'ERROR: batch file could not be read')
all_urls = batchurls + args
+ all_urls = map(lambda url: url.strip(), all_urls)
# General configuration
cookie_processor = urllib2.HTTPCookieProcessor(jar)
- opener = urllib2.build_opener(urllib2.ProxyHandler(), cookie_processor, YoutubeDLHandler())
+ proxy_handler = urllib2.ProxyHandler()
+ opener = urllib2.build_opener(proxy_handler, cookie_processor, YoutubeDLHandler())
urllib2.install_opener(opener)
socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words)
+ if opts.verbose:
+ print(u'[debug] Proxy map: ' + str(proxy_handler.proxies))
+
extractors = gen_extractors()
if opts.list_extractors:
'updatetime': opts.updatetime,
'writedescription': opts.writedescription,
'writeinfojson': opts.writeinfojson,
+ 'writesubtitles': opts.writesubtitles,
+ 'subtitleslang': opts.subtitleslang,
'matchtitle': opts.matchtitle,
'rejecttitle': opts.rejecttitle,
'max_downloads': opts.max_downloads,
'prefer_free_formats': opts.prefer_free_formats,
+ 'verbose': opts.verbose,
})
for extractor in extractors:
fd.add_info_extractor(extractor)