X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2FFileDownloader.py;h=868023db9f2d1233ed96ac680a8b0966336f27ef;hb=89fb51dd2d4d7464b919f17b9d5d24a448319dfc;hp=14e872a98a922606b8a3f3ea15a9d3d61ef87274;hpb=1b91a2e2cfa3b9277205eb9652e5a2f0b40a0016;p=youtube-dl diff --git a/youtube_dl/FileDownloader.py b/youtube_dl/FileDownloader.py index 14e872a98..868023db9 100644 --- a/youtube_dl/FileDownloader.py +++ b/youtube_dl/FileDownloader.py @@ -9,11 +9,10 @@ import socket import subprocess import sys import time -import urllib2 if os.name == 'nt': import ctypes - + from utils import * @@ -44,37 +43,40 @@ class FileDownloader(object): Available options: - username: Username for authentication purposes. - password: Password for authentication purposes. - usenetrc: Use netrc for authentication instead. - quiet: Do not print messages to stdout. - forceurl: Force printing final URL. - forcetitle: Force printing title. - forcethumbnail: Force printing thumbnail URL. - forcedescription: Force printing description. - forcefilename: Force printing final filename. - simulate: Do not download the video files. - format: Video format code. - format_limit: Highest quality format to try. - outtmpl: Template for output names. - ignoreerrors: Do not stop on download errors. - ratelimit: Download speed limit, in bytes/sec. - nooverwrites: Prevent overwriting files. - retries: Number of times to retry for HTTP error 5xx - continuedl: Try to continue downloads if possible. - noprogress: Do not print the progress bar. - playliststart: Playlist item to start at. - playlistend: Playlist item to end at. - matchtitle: Download only matching titles. - rejecttitle: Reject downloads for matching titles. - logtostderr: Log messages to stderr instead of stdout. - consoletitle: Display progress in console window's titlebar. - nopart: Do not use temporary .part files. - updatetime: Use the Last-modified header to set output file timestamps. - writedescription: Write the video description to a .description file - writeinfojson: Write the video description to a .info.json file - writesubtitles: Write the video subtitles to a .srt file - subtitleslang: Language of the subtitles to download + username: Username for authentication purposes. + password: Password for authentication purposes. + usenetrc: Use netrc for authentication instead. + quiet: Do not print messages to stdout. + forceurl: Force printing final URL. + forcetitle: Force printing title. + forcethumbnail: Force printing thumbnail URL. + forcedescription: Force printing description. + forcefilename: Force printing final filename. + simulate: Do not download the video files. + format: Video format code. + format_limit: Highest quality format to try. + outtmpl: Template for output names. + restrictfilenames: Do not allow "&" and spaces in file names + ignoreerrors: Do not stop on download errors. + ratelimit: Download speed limit, in bytes/sec. + nooverwrites: Prevent overwriting files. + retries: Number of times to retry for HTTP error 5xx + buffersize: Size of download buffer in bytes. + noresizebuffer: Do not automatically resize the download buffer. + continuedl: Try to continue downloads if possible. + noprogress: Do not print the progress bar. + playliststart: Playlist item to start at. + playlistend: Playlist item to end at. + matchtitle: Download only matching titles. + rejecttitle: Reject downloads for matching titles. + logtostderr: Log messages to stderr instead of stdout. + consoletitle: Display progress in console window's titlebar. + nopart: Do not use temporary .part files. + updatetime: Use the Last-modified header to set output file timestamps. + writedescription: Write the video description to a .description file + writeinfojson: Write the video description to a .info.json file + writesubtitles: Write the video subtitles to a .srt file + subtitleslang: Language of the subtitles to download """ params = None @@ -93,6 +95,9 @@ class FileDownloader(object): self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)] self.params = params + if '%(stitle)s' in self.params['outtmpl']: + self.to_stderr(u'WARNING: %(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.') + @staticmethod def format_bytes(bytes): if bytes is None: @@ -102,7 +107,7 @@ class FileDownloader(object): if bytes == 0.0: exponent = 0 else: - exponent = long(math.log(bytes, 1024.0)) + exponent = int(math.log(bytes, 1024.0)) suffix = 'bkMGTPEZY'[exponent] converted = float(bytes) / float(1024 ** exponent) return '%.2f%s' % (converted, suffix) @@ -121,7 +126,7 @@ class FileDownloader(object): if current == 0 or dif < 0.001: # One millisecond return '--:--' rate = float(current) / dif - eta = long((float(total) - float(current)) / rate) + eta = int((float(total) - float(current)) / rate) (eta_mins, eta_secs) = divmod(eta, 60) if eta_mins > 99: return '--:--' @@ -139,23 +144,23 @@ class FileDownloader(object): new_min = max(bytes / 2.0, 1.0) new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB if elapsed_time < 0.001: - return long(new_max) + return int(new_max) rate = bytes / elapsed_time if rate > new_max: - return long(new_max) + return int(new_max) if rate < new_min: - return long(new_min) - return long(rate) + return int(new_min) + return int(rate) @staticmethod def parse_bytes(bytestr): - """Parse a string indicating a byte quantity into a long integer.""" + """Parse a string indicating a byte quantity into an integer.""" matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr) if matchobj is None: return None number = float(matchobj.group(1)) multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower()) - return long(round(number * multiplier)) + return int(round(number * multiplier)) def add_info_extractor(self, ie): """Add an InfoExtractor object to the end of the list.""" @@ -173,15 +178,15 @@ class FileDownloader(object): if not self.params.get('quiet', False): terminator = [u'\n', u''][skip_eol] output = message + terminator - - if 'b' not in self._screen_file.mode or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr + if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr output = output.encode(preferredencoding(), 'ignore') self._screen_file.write(output) self._screen_file.flush() def to_stderr(self, message): """Print message to stderr.""" - print >>sys.stderr, message.encode(preferredencoding()) + assert type(message) == type(u'') + sys.stderr.write((message + u'\n').encode(preferredencoding())) def to_cons_title(self, message): """Set console/terminal window title to message.""" @@ -196,7 +201,7 @@ class FileDownloader(object): def fixed_template(self): """Checks if the output template is fixed.""" - return (re.search(ur'(?u)%\(.+?\)s', self.params['outtmpl']) is None) + return (re.search(u'(?u)%\\(.+?\\)s', self.params['outtmpl']) is None) def trouble(self, message=None): """Determine action to take when a download problem appears. @@ -241,7 +246,7 @@ class FileDownloader(object): if old_filename == new_filename: return os.rename(encodeFilename(old_filename), encodeFilename(new_filename)) - except (IOError, OSError), err: + except (IOError, OSError) as err: self.trouble(u'ERROR: unable to rename file') def try_utime(self, filename, last_modified_hdr): @@ -299,7 +304,7 @@ class FileDownloader(object): """Report file has already been fully downloaded.""" try: self.to_screen(u'[download] %s has already been downloaded' % file_name) - except (UnicodeEncodeError), err: + except (UnicodeEncodeError) as err: self.to_screen(u'[download] The file has already been downloaded') def report_unable_to_resume(self): @@ -321,11 +326,16 @@ class FileDownloader(object): """Generate the output filename.""" try: template_dict = dict(info_dict) - template_dict['epoch'] = unicode(long(time.time())) - template_dict['autonumber'] = unicode('%05d' % self._num_downloads) + + template_dict['epoch'] = int(time.time()) + template_dict['autonumber'] = u'%05d' % self._num_downloads + + template_dict = dict((key, u'NA' if val is None else val) for key, val in template_dict.items()) + template_dict = dict((k, sanitize_filename(compat_str(v), self.params.get('restrictfilenames'))) for k,v in template_dict.items()) + filename = self.params['outtmpl'] % template_dict return filename - except (ValueError, KeyError), err: + except (ValueError, KeyError) as err: self.trouble(u'ERROR: invalid system charset or erroneous output template') return None @@ -334,17 +344,25 @@ class FileDownloader(object): title = info_dict['title'] matchtitle = self.params.get('matchtitle', False) - if matchtitle and not re.search(matchtitle, title, re.IGNORECASE): - return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"' + if matchtitle: + matchtitle = matchtitle.decode('utf8') + if not re.search(matchtitle, title, re.IGNORECASE): + return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"' rejecttitle = self.params.get('rejecttitle', False) - if rejecttitle and re.search(rejecttitle, title, re.IGNORECASE): - return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"' + if rejecttitle: + rejecttitle = rejecttitle.decode('utf8') + if re.search(rejecttitle, title, re.IGNORECASE): + return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"' return None def process_info(self, info_dict): """Process a single dictionary returned by an InfoExtractor.""" - info_dict['stitle'] = sanitize_filename(info_dict['title']) + # Keep for backwards compatibility + info_dict['stitle'] = info_dict['title'] + + if not 'format' in info_dict: + info_dict['format'] = info_dict['ext'] reason = self._match_entry(info_dict) if reason is not None: @@ -357,20 +375,20 @@ class FileDownloader(object): raise MaxDownloadsReached() filename = self.prepare_filename(info_dict) - + # Forced printings if self.params.get('forcetitle', False): - print info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace') + print(info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace')) if self.params.get('forceurl', False): - print info_dict['url'].encode(preferredencoding(), 'xmlcharrefreplace') + print(info_dict['url'].encode(preferredencoding(), 'xmlcharrefreplace')) if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict: - print info_dict['thumbnail'].encode(preferredencoding(), 'xmlcharrefreplace') + print(info_dict['thumbnail'].encode(preferredencoding(), 'xmlcharrefreplace')) if self.params.get('forcedescription', False) and 'description' in info_dict: - print info_dict['description'].encode(preferredencoding(), 'xmlcharrefreplace') + print(info_dict['description'].encode(preferredencoding(), 'xmlcharrefreplace')) if self.params.get('forcefilename', False) and filename is not None: - print filename.encode(preferredencoding(), 'xmlcharrefreplace') + print(filename.encode(preferredencoding(), 'xmlcharrefreplace')) if self.params.get('forceformat', False): - print info_dict['format'].encode(preferredencoding(), 'xmlcharrefreplace') + print(info_dict['format'].encode(preferredencoding(), 'xmlcharrefreplace')) # Do nothing else if in simulate mode if self.params.get('simulate', False): @@ -383,8 +401,8 @@ class FileDownloader(object): dn = os.path.dirname(encodeFilename(filename)) if dn != '' and not os.path.exists(dn): # dn is already encoded os.makedirs(dn) - except (OSError, IOError), err: - self.trouble(u'ERROR: unable to create directory ' + unicode(err)) + except (OSError, IOError) as err: + self.trouble(u'ERROR: unable to create directory ' + compat_str(err)) return if self.params.get('writedescription', False): @@ -399,10 +417,10 @@ class FileDownloader(object): except (OSError, IOError): self.trouble(u'ERROR: Cannot write description file ' + descfn) return - + if self.params.get('writesubtitles', False) and 'subtitles' in info_dict and info_dict['subtitles']: # subtitles download errors are already managed as troubles in relevant IE - # that way it will silently go on when used with unsupporting IE + # that way it will silently go on when used with unsupporting IE try: srtfn = filename.rsplit('.', 1)[0] + u'.srt' self.report_writesubtitles(srtfn) @@ -440,19 +458,19 @@ class FileDownloader(object): else: try: success = self._do_download(filename, info_dict) - except (OSError, IOError), err: + except (OSError, IOError) as err: raise UnavailableVideoError - except (urllib2.URLError, httplib.HTTPException, socket.error), err: + except (compat_urllib_error.URLError, httplib.HTTPException, socket.error) as err: self.trouble(u'ERROR: unable to download video data: %s' % str(err)) return - except (ContentTooShortError, ), err: + except (ContentTooShortError, ) as err: self.trouble(u'ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded)) return - + if success: try: self.post_process(filename, info_dict) - except (PostProcessingError), err: + except (PostProcessingError) as err: self.trouble(u'ERROR: postprocessing: %s' % str(err)) return @@ -468,12 +486,18 @@ class FileDownloader(object): if not ie.suitable(url): continue + # Warn if the _WORKING attribute is False + if not ie.working(): + self.trouble(u'WARNING: the program functionality for this site has been marked as broken, ' + u'and will probably not work. If you want to go on, use the -i option.') + # Suitable InfoExtractor found suitable_found = True # Extract information from URL and process it videos = ie.extract(url) for video in videos or []: + video['extractor'] = ie.IE_NAME try: self.increment_downloads() self.process_info(video) @@ -560,8 +584,8 @@ class FileDownloader(object): # Do not include the Accept-Encoding header headers = {'Youtubedl-no-compression': 'True'} - basic_request = urllib2.Request(url, None, headers) - request = urllib2.Request(url, None, headers) + basic_request = compat_urllib_request.Request(url, None, headers) + request = compat_urllib_request.Request(url, None, headers) # Establish possible resume length if os.path.isfile(encodeFilename(tmpfilename)): @@ -585,9 +609,9 @@ class FileDownloader(object): try: if count == 0 and 'urlhandle' in info_dict: data = info_dict['urlhandle'] - data = urllib2.urlopen(request) + data = compat_urllib_request.urlopen(request) break - except (urllib2.HTTPError, ), err: + except (compat_urllib_error.HTTPError, ) as err: if (err.code < 500 or err.code >= 600) and err.code != 416: # Unexpected HTTP error raise @@ -595,15 +619,15 @@ class FileDownloader(object): # Unable to resume (requested range not satisfiable) try: # Open the connection again without the range header - data = urllib2.urlopen(basic_request) + data = compat_urllib_request.urlopen(basic_request) content_length = data.info()['Content-Length'] - except (urllib2.HTTPError, ), err: + except (compat_urllib_error.HTTPError, ) as err: if err.code < 500 or err.code >= 600: raise else: # Examine the reported length if (content_length is not None and - (resume_len - 100 < long(content_length) < resume_len + 100)): + (resume_len - 100 < int(content_length) < resume_len + 100)): # The file had already been fully downloaded. # Explanation to the above condition: in issue #175 it was revealed that # YouTube sometimes adds or removes a few bytes from the end of the file, @@ -630,10 +654,10 @@ class FileDownloader(object): data_len = data.info().get('Content-length', None) if data_len is not None: - data_len = long(data_len) + resume_len + data_len = int(data_len) + resume_len data_len_str = self.format_bytes(data_len) byte_counter = 0 + resume_len - block_size = 1024 + block_size = self.params.get('buffersize', 1024) start = time.time() while True: # Download and write @@ -651,15 +675,16 @@ class FileDownloader(object): assert stream is not None filename = self.undo_temp_name(tmpfilename) self.report_destination(filename) - except (OSError, IOError), err: + except (OSError, IOError) as err: self.trouble(u'ERROR: unable to open for writing: %s' % str(err)) return False try: stream.write(data_block) - except (IOError, OSError), err: + except (IOError, OSError) as err: self.trouble(u'\nERROR: unable to write data: %s' % str(err)) return False - block_size = self.best_block_size(after - before, len(data_block)) + if not self.params.get('noresizebuffer', False): + block_size = self.best_block_size(after - before, len(data_block)) # Progress message speed_str = self.calc_speed(start, time.time(), byte_counter - resume_len) @@ -679,7 +704,7 @@ class FileDownloader(object): stream.close() self.report_finish() if data_len is not None and byte_counter != data_len: - raise ContentTooShortError(byte_counter, long(data_len)) + raise ContentTooShortError(byte_counter, int(data_len)) self.try_rename(tmpfilename, filename) # Update file modification time