X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2FFileDownloader.py;h=868023db9f2d1233ed96ac680a8b0966336f27ef;hb=89fb51dd2d4d7464b919f17b9d5d24a448319dfc;hp=14e872a98a922606b8a3f3ea15a9d3d61ef87274;hpb=1b91a2e2cfa3b9277205eb9652e5a2f0b40a0016;p=youtube-dl

diff --git a/youtube_dl/FileDownloader.py b/youtube_dl/FileDownloader.py
index 14e872a98..868023db9 100644
--- a/youtube_dl/FileDownloader.py
+++ b/youtube_dl/FileDownloader.py
@@ -9,11 +9,10 @@ import socket
 import subprocess
 import sys
 import time
-import urllib2
 
 if os.name == 'nt':
 	import ctypes
-	
+
 from utils import *
 
 
@@ -44,37 +43,40 @@ class FileDownloader(object):
 
 	Available options:
 
-	username:         Username for authentication purposes.
-	password:         Password for authentication purposes.
-	usenetrc:         Use netrc for authentication instead.
-	quiet:            Do not print messages to stdout.
-	forceurl:         Force printing final URL.
-	forcetitle:       Force printing title.
-	forcethumbnail:   Force printing thumbnail URL.
-	forcedescription: Force printing description.
-	forcefilename:    Force printing final filename.
-	simulate:         Do not download the video files.
-	format:           Video format code.
-	format_limit:     Highest quality format to try.
-	outtmpl:          Template for output names.
-	ignoreerrors:     Do not stop on download errors.
-	ratelimit:        Download speed limit, in bytes/sec.
-	nooverwrites:     Prevent overwriting files.
-	retries:          Number of times to retry for HTTP error 5xx
-	continuedl:       Try to continue downloads if possible.
-	noprogress:       Do not print the progress bar.
-	playliststart:    Playlist item to start at.
-	playlistend:      Playlist item to end at.
-	matchtitle:       Download only matching titles.
-	rejecttitle:      Reject downloads for matching titles.
-	logtostderr:      Log messages to stderr instead of stdout.
-	consoletitle:     Display progress in console window's titlebar.
-	nopart:           Do not use temporary .part files.
-	updatetime:       Use the Last-modified header to set output file timestamps.
-	writedescription: Write the video description to a .description file
-	writeinfojson:    Write the video description to a .info.json file
-	writesubtitles:   Write the video subtitles to a .srt file
-	subtitleslang:    Language of the subtitles to download
+	username:          Username for authentication purposes.
+	password:          Password for authentication purposes.
+	usenetrc:          Use netrc for authentication instead.
+	quiet:             Do not print messages to stdout.
+	forceurl:          Force printing final URL.
+	forcetitle:        Force printing title.
+	forcethumbnail:    Force printing thumbnail URL.
+	forcedescription:  Force printing description.
+	forcefilename:     Force printing final filename.
+	simulate:          Do not download the video files.
+	format:            Video format code.
+	format_limit:      Highest quality format to try.
+	outtmpl:           Template for output names.
+	restrictfilenames: Do not allow "&" and spaces in file names
+	ignoreerrors:      Do not stop on download errors.
+	ratelimit:         Download speed limit, in bytes/sec.
+	nooverwrites:      Prevent overwriting files.
+	retries:           Number of times to retry for HTTP error 5xx
+	buffersize:        Size of download buffer in bytes.
+	noresizebuffer:    Do not automatically resize the download buffer.
+	continuedl:        Try to continue downloads if possible.
+	noprogress:        Do not print the progress bar.
+	playliststart:     Playlist item to start at.
+	playlistend:       Playlist item to end at.
+	matchtitle:        Download only matching titles.
+	rejecttitle:       Reject downloads for matching titles.
+	logtostderr:       Log messages to stderr instead of stdout.
+	consoletitle:      Display progress in console window's titlebar.
+	nopart:            Do not use temporary .part files.
+	updatetime:        Use the Last-modified header to set output file timestamps.
+	writedescription:  Write the video description to a .description file
+	writeinfojson:     Write the video description to a .info.json file
+	writesubtitles:    Write the video subtitles to a .srt file
+	subtitleslang:     Language of the subtitles to download
 	"""
 
 	params = None
@@ -93,6 +95,9 @@ class FileDownloader(object):
 		self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 		self.params = params
 
+		if '%(stitle)s' in self.params['outtmpl']:
+			self.to_stderr(u'WARNING: %(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
+
 	@staticmethod
 	def format_bytes(bytes):
 		if bytes is None:
@@ -102,7 +107,7 @@ class FileDownloader(object):
 		if bytes == 0.0:
 			exponent = 0
 		else:
-			exponent = long(math.log(bytes, 1024.0))
+			exponent = int(math.log(bytes, 1024.0))
 		suffix = 'bkMGTPEZY'[exponent]
 		converted = float(bytes) / float(1024 ** exponent)
 		return '%.2f%s' % (converted, suffix)
@@ -121,7 +126,7 @@ class FileDownloader(object):
 		if current == 0 or dif < 0.001: # One millisecond
 			return '--:--'
 		rate = float(current) / dif
-		eta = long((float(total) - float(current)) / rate)
+		eta = int((float(total) - float(current)) / rate)
 		(eta_mins, eta_secs) = divmod(eta, 60)
 		if eta_mins > 99:
 			return '--:--'
@@ -139,23 +144,23 @@ class FileDownloader(object):
 		new_min = max(bytes / 2.0, 1.0)
 		new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
 		if elapsed_time < 0.001:
-			return long(new_max)
+			return int(new_max)
 		rate = bytes / elapsed_time
 		if rate > new_max:
-			return long(new_max)
+			return int(new_max)
 		if rate < new_min:
-			return long(new_min)
-		return long(rate)
+			return int(new_min)
+		return int(rate)
 
 	@staticmethod
 	def parse_bytes(bytestr):
-		"""Parse a string indicating a byte quantity into a long integer."""
+		"""Parse a string indicating a byte quantity into an integer."""
 		matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
 		if matchobj is None:
 			return None
 		number = float(matchobj.group(1))
 		multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
-		return long(round(number * multiplier))
+		return int(round(number * multiplier))
 
 	def add_info_extractor(self, ie):
 		"""Add an InfoExtractor object to the end of the list."""
@@ -173,15 +178,15 @@ class FileDownloader(object):
 		if not self.params.get('quiet', False):
 			terminator = [u'\n', u''][skip_eol]
 			output = message + terminator
-
-			if 'b' not in self._screen_file.mode or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
+			if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
 				output = output.encode(preferredencoding(), 'ignore')
 			self._screen_file.write(output)
 			self._screen_file.flush()
 
 	def to_stderr(self, message):
 		"""Print message to stderr."""
-		print >>sys.stderr, message.encode(preferredencoding())
+		assert type(message) == type(u'')
+		sys.stderr.write((message + u'\n').encode(preferredencoding()))
 
 	def to_cons_title(self, message):
 		"""Set console/terminal window title to message."""
@@ -196,7 +201,7 @@ class FileDownloader(object):
 
 	def fixed_template(self):
 		"""Checks if the output template is fixed."""
-		return (re.search(ur'(?u)%\(.+?\)s', self.params['outtmpl']) is None)
+		return (re.search(u'(?u)%\\(.+?\\)s', self.params['outtmpl']) is None)
 
 	def trouble(self, message=None):
 		"""Determine action to take when a download problem appears.
@@ -241,7 +246,7 @@ class FileDownloader(object):
 			if old_filename == new_filename:
 				return
 			os.rename(encodeFilename(old_filename), encodeFilename(new_filename))
-		except (IOError, OSError), err:
+		except (IOError, OSError) as err:
 			self.trouble(u'ERROR: unable to rename file')
 
 	def try_utime(self, filename, last_modified_hdr):
@@ -299,7 +304,7 @@ class FileDownloader(object):
 		"""Report file has already been fully downloaded."""
 		try:
 			self.to_screen(u'[download] %s has already been downloaded' % file_name)
-		except (UnicodeEncodeError), err:
+		except (UnicodeEncodeError) as err:
 			self.to_screen(u'[download] The file has already been downloaded')
 
 	def report_unable_to_resume(self):
@@ -321,11 +326,16 @@ class FileDownloader(object):
 		"""Generate the output filename."""
 		try:
 			template_dict = dict(info_dict)
-			template_dict['epoch'] = unicode(long(time.time()))
-			template_dict['autonumber'] = unicode('%05d' % self._num_downloads)
+
+			template_dict['epoch'] = int(time.time())
+			template_dict['autonumber'] = u'%05d' % self._num_downloads
+
+			template_dict = dict((key, u'NA' if val is None else val) for key, val in template_dict.items())
+			template_dict = dict((k, sanitize_filename(compat_str(v), self.params.get('restrictfilenames'))) for k,v in template_dict.items())
+
 			filename = self.params['outtmpl'] % template_dict
 			return filename
-		except (ValueError, KeyError), err:
+		except (ValueError, KeyError) as err:
 			self.trouble(u'ERROR: invalid system charset or erroneous output template')
 			return None
 
@@ -334,17 +344,25 @@ class FileDownloader(object):
 
 		title = info_dict['title']
 		matchtitle = self.params.get('matchtitle', False)
-		if matchtitle and not re.search(matchtitle, title, re.IGNORECASE):
-			return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
+		if matchtitle:
+			matchtitle = matchtitle.decode('utf8')
+			if not re.search(matchtitle, title, re.IGNORECASE):
+				return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
 		rejecttitle = self.params.get('rejecttitle', False)
-		if rejecttitle and re.search(rejecttitle, title, re.IGNORECASE):
-			return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
+		if rejecttitle:
+			rejecttitle = rejecttitle.decode('utf8')
+			if re.search(rejecttitle, title, re.IGNORECASE):
+				return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
 		return None
 
 	def process_info(self, info_dict):
 		"""Process a single dictionary returned by an InfoExtractor."""
 
-		info_dict['stitle'] = sanitize_filename(info_dict['title'])
+		# Keep for backwards compatibility
+		info_dict['stitle'] = info_dict['title']
+
+		if not 'format' in info_dict:
+			info_dict['format'] = info_dict['ext']
 
 		reason = self._match_entry(info_dict)
 		if reason is not None:
@@ -357,20 +375,20 @@ class FileDownloader(object):
 				raise MaxDownloadsReached()
 
 		filename = self.prepare_filename(info_dict)
-		
+
 		# Forced printings
 		if self.params.get('forcetitle', False):
-			print info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace')
+			print(info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace'))
 		if self.params.get('forceurl', False):
-			print info_dict['url'].encode(preferredencoding(), 'xmlcharrefreplace')
+			print(info_dict['url'].encode(preferredencoding(), 'xmlcharrefreplace'))
 		if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
-			print info_dict['thumbnail'].encode(preferredencoding(), 'xmlcharrefreplace')
+			print(info_dict['thumbnail'].encode(preferredencoding(), 'xmlcharrefreplace'))
 		if self.params.get('forcedescription', False) and 'description' in info_dict:
-			print info_dict['description'].encode(preferredencoding(), 'xmlcharrefreplace')
+			print(info_dict['description'].encode(preferredencoding(), 'xmlcharrefreplace'))
 		if self.params.get('forcefilename', False) and filename is not None:
-			print filename.encode(preferredencoding(), 'xmlcharrefreplace')
+			print(filename.encode(preferredencoding(), 'xmlcharrefreplace'))
 		if self.params.get('forceformat', False):
-			print info_dict['format'].encode(preferredencoding(), 'xmlcharrefreplace')
+			print(info_dict['format'].encode(preferredencoding(), 'xmlcharrefreplace'))
 
 		# Do nothing else if in simulate mode
 		if self.params.get('simulate', False):
@@ -383,8 +401,8 @@ class FileDownloader(object):
 			dn = os.path.dirname(encodeFilename(filename))
 			if dn != '' and not os.path.exists(dn): # dn is already encoded
 				os.makedirs(dn)
-		except (OSError, IOError), err:
-			self.trouble(u'ERROR: unable to create directory ' + unicode(err))
+		except (OSError, IOError) as err:
+			self.trouble(u'ERROR: unable to create directory ' + compat_str(err))
 			return
 
 		if self.params.get('writedescription', False):
@@ -399,10 +417,10 @@ class FileDownloader(object):
 			except (OSError, IOError):
 				self.trouble(u'ERROR: Cannot write description file ' + descfn)
 				return
-				
+
 		if self.params.get('writesubtitles', False) and 'subtitles' in info_dict and info_dict['subtitles']:
 			# subtitles download errors are already managed as troubles in relevant IE
-			# that way it will silently go on when used with unsupporting IE 
+			# that way it will silently go on when used with unsupporting IE
 			try:
 				srtfn = filename.rsplit('.', 1)[0] + u'.srt'
 				self.report_writesubtitles(srtfn)
@@ -440,19 +458,19 @@ class FileDownloader(object):
 			else:
 				try:
 					success = self._do_download(filename, info_dict)
-				except (OSError, IOError), err:
+				except (OSError, IOError) as err:
 					raise UnavailableVideoError
-				except (urllib2.URLError, httplib.HTTPException, socket.error), err:
+				except (compat_urllib_error.URLError, httplib.HTTPException, socket.error) as err:
 					self.trouble(u'ERROR: unable to download video data: %s' % str(err))
 					return
-				except (ContentTooShortError, ), err:
+				except (ContentTooShortError, ) as err:
 					self.trouble(u'ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
 					return
-	
+
 			if success:
 				try:
 					self.post_process(filename, info_dict)
-				except (PostProcessingError), err:
+				except (PostProcessingError) as err:
 					self.trouble(u'ERROR: postprocessing: %s' % str(err))
 					return
 
@@ -468,12 +486,18 @@ class FileDownloader(object):
 				if not ie.suitable(url):
 					continue
 
+				# Warn if the _WORKING attribute is False
+				if not ie.working():
+					self.trouble(u'WARNING: the program functionality for this site has been marked as broken, '
+						         u'and will probably not work. If you want to go on, use the -i option.')
+
 				# Suitable InfoExtractor found
 				suitable_found = True
 
 				# Extract information from URL and process it
 				videos = ie.extract(url)
 				for video in videos or []:
+					video['extractor'] = ie.IE_NAME
 					try:
 						self.increment_downloads()
 						self.process_info(video)
@@ -560,8 +584,8 @@ class FileDownloader(object):
 
 		# Do not include the Accept-Encoding header
 		headers = {'Youtubedl-no-compression': 'True'}
-		basic_request = urllib2.Request(url, None, headers)
-		request = urllib2.Request(url, None, headers)
+		basic_request = compat_urllib_request.Request(url, None, headers)
+		request = compat_urllib_request.Request(url, None, headers)
 
 		# Establish possible resume length
 		if os.path.isfile(encodeFilename(tmpfilename)):
@@ -585,9 +609,9 @@ class FileDownloader(object):
 			try:
 				if count == 0 and 'urlhandle' in info_dict:
 					data = info_dict['urlhandle']
-				data = urllib2.urlopen(request)
+				data = compat_urllib_request.urlopen(request)
 				break
-			except (urllib2.HTTPError, ), err:
+			except (compat_urllib_error.HTTPError, ) as err:
 				if (err.code < 500 or err.code >= 600) and err.code != 416:
 					# Unexpected HTTP error
 					raise
@@ -595,15 +619,15 @@ class FileDownloader(object):
 					# Unable to resume (requested range not satisfiable)
 					try:
 						# Open the connection again without the range header
-						data = urllib2.urlopen(basic_request)
+						data = compat_urllib_request.urlopen(basic_request)
 						content_length = data.info()['Content-Length']
-					except (urllib2.HTTPError, ), err:
+					except (compat_urllib_error.HTTPError, ) as err:
 						if err.code < 500 or err.code >= 600:
 							raise
 					else:
 						# Examine the reported length
 						if (content_length is not None and
-								(resume_len - 100 < long(content_length) < resume_len + 100)):
+								(resume_len - 100 < int(content_length) < resume_len + 100)):
 							# The file had already been fully downloaded.
 							# Explanation to the above condition: in issue #175 it was revealed that
 							# YouTube sometimes adds or removes a few bytes from the end of the file,
@@ -630,10 +654,10 @@ class FileDownloader(object):
 
 		data_len = data.info().get('Content-length', None)
 		if data_len is not None:
-			data_len = long(data_len) + resume_len
+			data_len = int(data_len) + resume_len
 		data_len_str = self.format_bytes(data_len)
 		byte_counter = 0 + resume_len
-		block_size = 1024
+		block_size = self.params.get('buffersize', 1024)
 		start = time.time()
 		while True:
 			# Download and write
@@ -651,15 +675,16 @@ class FileDownloader(object):
 					assert stream is not None
 					filename = self.undo_temp_name(tmpfilename)
 					self.report_destination(filename)
-				except (OSError, IOError), err:
+				except (OSError, IOError) as err:
 					self.trouble(u'ERROR: unable to open for writing: %s' % str(err))
 					return False
 			try:
 				stream.write(data_block)
-			except (IOError, OSError), err:
+			except (IOError, OSError) as err:
 				self.trouble(u'\nERROR: unable to write data: %s' % str(err))
 				return False
-			block_size = self.best_block_size(after - before, len(data_block))
+			if not self.params.get('noresizebuffer', False):
+				block_size = self.best_block_size(after - before, len(data_block))
 
 			# Progress message
 			speed_str = self.calc_speed(start, time.time(), byte_counter - resume_len)
@@ -679,7 +704,7 @@ class FileDownloader(object):
 		stream.close()
 		self.report_finish()
 		if data_len is not None and byte_counter != data_len:
-			raise ContentTooShortError(byte_counter, long(data_len))
+			raise ContentTooShortError(byte_counter, int(data_len))
 		self.try_rename(tmpfilename, filename)
 
 		# Update file modification time