Encode the entire filename
[youtube-dl] / youtube_dl / FileDownloader.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 import httplib
5 import math
6 import os
7 import re
8 import socket
9 import subprocess
10 import sys
11 import time
12 import urllib2
13
14 if os.name == 'nt':
15         import ctypes
16
17 from utils import *
18
19
20 class FileDownloader(object):
21         """File Downloader class.
22
23         File downloader objects are the ones responsible of downloading the
24         actual video file and writing it to disk if the user has requested
25         it, among some other tasks. In most cases there should be one per
26         program. As, given a video URL, the downloader doesn't know how to
27         extract all the needed information, task that InfoExtractors do, it
28         has to pass the URL to one of them.
29
30         For this, file downloader objects have a method that allows
31         InfoExtractors to be registered in a given order. When it is passed
32         a URL, the file downloader handles it to the first InfoExtractor it
33         finds that reports being able to handle it. The InfoExtractor extracts
34         all the information about the video or videos the URL refers to, and
35         asks the FileDownloader to process the video information, possibly
36         downloading the video.
37
38         File downloaders accept a lot of parameters. In order not to saturate
39         the object constructor with arguments, it receives a dictionary of
40         options instead. These options are available through the params
41         attribute for the InfoExtractors to use. The FileDownloader also
42         registers itself as the downloader in charge for the InfoExtractors
43         that are added to it, so this is a "mutual registration".
44
45         Available options:
46
47         username:          Username for authentication purposes.
48         password:          Password for authentication purposes.
49         usenetrc:          Use netrc for authentication instead.
50         quiet:             Do not print messages to stdout.
51         forceurl:          Force printing final URL.
52         forcetitle:        Force printing title.
53         forcethumbnail:    Force printing thumbnail URL.
54         forcedescription:  Force printing description.
55         forcefilename:     Force printing final filename.
56         simulate:          Do not download the video files.
57         format:            Video format code.
58         format_limit:      Highest quality format to try.
59         outtmpl:           Template for output names.
60         restrictfilenames: Do not allow "&" and spaces in file names
61         ignoreerrors:      Do not stop on download errors.
62         ratelimit:         Download speed limit, in bytes/sec.
63         nooverwrites:      Prevent overwriting files.
64         retries:           Number of times to retry for HTTP error 5xx
65         continuedl:        Try to continue downloads if possible.
66         noprogress:        Do not print the progress bar.
67         playliststart:     Playlist item to start at.
68         playlistend:       Playlist item to end at.
69         matchtitle:        Download only matching titles.
70         rejecttitle:       Reject downloads for matching titles.
71         logtostderr:       Log messages to stderr instead of stdout.
72         consoletitle:      Display progress in console window's titlebar.
73         nopart:            Do not use temporary .part files.
74         updatetime:        Use the Last-modified header to set output file timestamps.
75         writedescription:  Write the video description to a .description file
76         writeinfojson:     Write the video description to a .info.json file
77         writesubtitles:    Write the video subtitles to a .srt file
78         subtitleslang:     Language of the subtitles to download
79         """
80
81         params = None
82         _ies = []
83         _pps = []
84         _download_retcode = None
85         _num_downloads = None
86         _screen_file = None
87
88         def __init__(self, params):
89                 """Create a FileDownloader object with the given options."""
90                 self._ies = []
91                 self._pps = []
92                 self._download_retcode = 0
93                 self._num_downloads = 0
94                 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
95                 self.params = params
96
97         @staticmethod
98         def format_bytes(bytes):
99                 if bytes is None:
100                         return 'N/A'
101                 if type(bytes) is str:
102                         bytes = float(bytes)
103                 if bytes == 0.0:
104                         exponent = 0
105                 else:
106                         exponent = long(math.log(bytes, 1024.0))
107                 suffix = 'bkMGTPEZY'[exponent]
108                 converted = float(bytes) / float(1024 ** exponent)
109                 return '%.2f%s' % (converted, suffix)
110
111         @staticmethod
112         def calc_percent(byte_counter, data_len):
113                 if data_len is None:
114                         return '---.-%'
115                 return '%6s' % ('%3.1f%%' % (float(byte_counter) / float(data_len) * 100.0))
116
117         @staticmethod
118         def calc_eta(start, now, total, current):
119                 if total is None:
120                         return '--:--'
121                 dif = now - start
122                 if current == 0 or dif < 0.001: # One millisecond
123                         return '--:--'
124                 rate = float(current) / dif
125                 eta = long((float(total) - float(current)) / rate)
126                 (eta_mins, eta_secs) = divmod(eta, 60)
127                 if eta_mins > 99:
128                         return '--:--'
129                 return '%02d:%02d' % (eta_mins, eta_secs)
130
131         @staticmethod
132         def calc_speed(start, now, bytes):
133                 dif = now - start
134                 if bytes == 0 or dif < 0.001: # One millisecond
135                         return '%10s' % '---b/s'
136                 return '%10s' % ('%s/s' % FileDownloader.format_bytes(float(bytes) / dif))
137
138         @staticmethod
139         def best_block_size(elapsed_time, bytes):
140                 new_min = max(bytes / 2.0, 1.0)
141                 new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
142                 if elapsed_time < 0.001:
143                         return int(new_max)
144                 rate = bytes / elapsed_time
145                 if rate > new_max:
146                         return int(new_max)
147                 if rate < new_min:
148                         return int(new_min)
149                 return int(rate)
150
151         @staticmethod
152         def parse_bytes(bytestr):
153                 """Parse a string indicating a byte quantity into an integer."""
154                 matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
155                 if matchobj is None:
156                         return None
157                 number = float(matchobj.group(1))
158                 multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
159                 return int(round(number * multiplier))
160
161         def add_info_extractor(self, ie):
162                 """Add an InfoExtractor object to the end of the list."""
163                 self._ies.append(ie)
164                 ie.set_downloader(self)
165
166         def add_post_processor(self, pp):
167                 """Add a PostProcessor object to the end of the chain."""
168                 self._pps.append(pp)
169                 pp.set_downloader(self)
170
171         def to_screen(self, message, skip_eol=False):
172                 """Print message to stdout if not in quiet mode."""
173                 assert type(message) == type(u'')
174                 if not self.params.get('quiet', False):
175                         terminator = [u'\n', u''][skip_eol]
176                         output = message + terminator
177                         if 'b' not in self._screen_file.mode or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
178                                 output = output.encode(preferredencoding(), 'ignore')
179                         self._screen_file.write(output)
180                         self._screen_file.flush()
181
182         def to_stderr(self, message):
183                 """Print message to stderr."""
184                 assert type(message) == type(u'')
185                 sys.stderr.write((message + u'\n').encode(preferredencoding()))
186
187         def to_cons_title(self, message):
188                 """Set console/terminal window title to message."""
189                 if not self.params.get('consoletitle', False):
190                         return
191                 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
192                         # c_wchar_p() might not be necessary if `message` is
193                         # already of type unicode()
194                         ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
195                 elif 'TERM' in os.environ:
196                         sys.stderr.write('\033]0;%s\007' % message.encode(preferredencoding()))
197
198         def fixed_template(self):
199                 """Checks if the output template is fixed."""
200                 return (re.search(ur'(?u)%\(.+?\)s', self.params['outtmpl']) is None)
201
202         def trouble(self, message=None):
203                 """Determine action to take when a download problem appears.
204
205                 Depending on if the downloader has been configured to ignore
206                 download errors or not, this method may throw an exception or
207                 not when errors are found, after printing the message.
208                 """
209                 if message is not None:
210                         self.to_stderr(message)
211                 if not self.params.get('ignoreerrors', False):
212                         raise DownloadError(message)
213                 self._download_retcode = 1
214
215         def slow_down(self, start_time, byte_counter):
216                 """Sleep if the download speed is over the rate limit."""
217                 rate_limit = self.params.get('ratelimit', None)
218                 if rate_limit is None or byte_counter == 0:
219                         return
220                 now = time.time()
221                 elapsed = now - start_time
222                 if elapsed <= 0.0:
223                         return
224                 speed = float(byte_counter) / elapsed
225                 if speed > rate_limit:
226                         time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
227
228         def temp_name(self, filename):
229                 """Returns a temporary filename for the given filename."""
230                 if self.params.get('nopart', False) or filename == u'-' or \
231                                 (os.path.exists(encodeFilename(filename)) and not os.path.isfile(encodeFilename(filename))):
232                         return filename
233                 return filename + u'.part'
234
235         def undo_temp_name(self, filename):
236                 if filename.endswith(u'.part'):
237                         return filename[:-len(u'.part')]
238                 return filename
239
240         def try_rename(self, old_filename, new_filename):
241                 try:
242                         if old_filename == new_filename:
243                                 return
244                         os.rename(encodeFilename(old_filename), encodeFilename(new_filename))
245                 except (IOError, OSError), err:
246                         self.trouble(u'ERROR: unable to rename file')
247
248         def try_utime(self, filename, last_modified_hdr):
249                 """Try to set the last-modified time of the given file."""
250                 if last_modified_hdr is None:
251                         return
252                 if not os.path.isfile(encodeFilename(filename)):
253                         return
254                 timestr = last_modified_hdr
255                 if timestr is None:
256                         return
257                 filetime = timeconvert(timestr)
258                 if filetime is None:
259                         return filetime
260                 try:
261                         os.utime(filename, (time.time(), filetime))
262                 except:
263                         pass
264                 return filetime
265
266         def report_writedescription(self, descfn):
267                 """ Report that the description file is being written """
268                 self.to_screen(u'[info] Writing video description to: ' + descfn)
269
270         def report_writesubtitles(self, srtfn):
271                 """ Report that the subtitles file is being written """
272                 self.to_screen(u'[info] Writing video subtitles to: ' + srtfn)
273
274         def report_writeinfojson(self, infofn):
275                 """ Report that the metadata file has been written """
276                 self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn)
277
278         def report_destination(self, filename):
279                 """Report destination filename."""
280                 self.to_screen(u'[download] Destination: ' + filename)
281
282         def report_progress(self, percent_str, data_len_str, speed_str, eta_str):
283                 """Report download progress."""
284                 if self.params.get('noprogress', False):
285                         return
286                 self.to_screen(u'\r[download] %s of %s at %s ETA %s' %
287                                 (percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
288                 self.to_cons_title(u'youtube-dl - %s of %s at %s ETA %s' %
289                                 (percent_str.strip(), data_len_str.strip(), speed_str.strip(), eta_str.strip()))
290
291         def report_resuming_byte(self, resume_len):
292                 """Report attempt to resume at given byte."""
293                 self.to_screen(u'[download] Resuming download at byte %s' % resume_len)
294
295         def report_retry(self, count, retries):
296                 """Report retry in case of HTTP error 5xx"""
297                 self.to_screen(u'[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count, retries))
298
299         def report_file_already_downloaded(self, file_name):
300                 """Report file has already been fully downloaded."""
301                 try:
302                         self.to_screen(u'[download] %s has already been downloaded' % file_name)
303                 except (UnicodeEncodeError), err:
304                         self.to_screen(u'[download] The file has already been downloaded')
305
306         def report_unable_to_resume(self):
307                 """Report it was impossible to resume download."""
308                 self.to_screen(u'[download] Unable to resume')
309
310         def report_finish(self):
311                 """Report download finished."""
312                 if self.params.get('noprogress', False):
313                         self.to_screen(u'[download] Download completed')
314                 else:
315                         self.to_screen(u'')
316
317         def increment_downloads(self):
318                 """Increment the ordinal that assigns a number to each file."""
319                 self._num_downloads += 1
320
321         def prepare_filename(self, info_dict):
322                 """Generate the output filename."""
323                 try:
324                         template_dict = dict(info_dict)
325                         template_dict['epoch'] = unicode(int(time.time()))
326                         template_dict['autonumber'] = unicode('%05d' % self._num_downloads)
327                         filename = self.params['outtmpl'] % template_dict
328                         return filename
329                 except (ValueError, KeyError), err:
330                         self.trouble(u'ERROR: invalid system charset or erroneous output template')
331                         return None
332
333         def _match_entry(self, info_dict):
334                 """ Returns None iff the file should be downloaded """
335
336                 title = info_dict['title']
337                 matchtitle = self.params.get('matchtitle', False)
338                 if matchtitle:
339                         matchtitle = matchtitle.decode('utf8')
340                         if not re.search(matchtitle, title, re.IGNORECASE):
341                                 return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
342                 rejecttitle = self.params.get('rejecttitle', False)
343                 if rejecttitle:
344                         rejecttitle = rejecttitle.decode('utf8')
345                         if re.search(rejecttitle, title, re.IGNORECASE):
346                                 return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
347                 return None
348
349         def process_info(self, info_dict):
350                 """Process a single dictionary returned by an InfoExtractor."""
351
352                 # Keep for backwards compatibility
353                 info_dict['stitle'] = info_dict['title']
354
355                 reason = self._match_entry(info_dict)
356                 if reason is not None:
357                         self.to_screen(u'[download] ' + reason)
358                         return
359
360                 max_downloads = self.params.get('max_downloads')
361                 if max_downloads is not None:
362                         if self._num_downloads > int(max_downloads):
363                                 raise MaxDownloadsReached()
364
365                 filename = self.prepare_filename(info_dict)
366                 filename = sanitize_filename(filename, self.params.get('restrictfilenames'))
367
368                 # Forced printings
369                 if self.params.get('forcetitle', False):
370                         print(info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace'))
371                 if self.params.get('forceurl', False):
372                         print(info_dict['url'].encode(preferredencoding(), 'xmlcharrefreplace'))
373                 if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
374                         print(info_dict['thumbnail'].encode(preferredencoding(), 'xmlcharrefreplace'))
375                 if self.params.get('forcedescription', False) and 'description' in info_dict:
376                         print(info_dict['description'].encode(preferredencoding(), 'xmlcharrefreplace'))
377                 if self.params.get('forcefilename', False) and filename is not None:
378                         print(filename.encode(preferredencoding(), 'xmlcharrefreplace'))
379                 if self.params.get('forceformat', False):
380                         print(info_dict['format'].encode(preferredencoding(), 'xmlcharrefreplace'))
381
382                 # Do nothing else if in simulate mode
383                 if self.params.get('simulate', False):
384                         return
385
386                 if filename is None:
387                         return
388
389                 try:
390                         dn = os.path.dirname(encodeFilename(filename))
391                         if dn != '' and not os.path.exists(dn): # dn is already encoded
392                                 os.makedirs(dn)
393                 except (OSError, IOError), err:
394                         self.trouble(u'ERROR: unable to create directory ' + unicode(err))
395                         return
396
397                 if self.params.get('writedescription', False):
398                         try:
399                                 descfn = filename + u'.description'
400                                 self.report_writedescription(descfn)
401                                 descfile = open(encodeFilename(descfn), 'wb')
402                                 try:
403                                         descfile.write(info_dict['description'].encode('utf-8'))
404                                 finally:
405                                         descfile.close()
406                         except (OSError, IOError):
407                                 self.trouble(u'ERROR: Cannot write description file ' + descfn)
408                                 return
409
410                 if self.params.get('writesubtitles', False) and 'subtitles' in info_dict and info_dict['subtitles']:
411                         # subtitles download errors are already managed as troubles in relevant IE
412                         # that way it will silently go on when used with unsupporting IE
413                         try:
414                                 srtfn = filename.rsplit('.', 1)[0] + u'.srt'
415                                 self.report_writesubtitles(srtfn)
416                                 srtfile = open(encodeFilename(srtfn), 'wb')
417                                 try:
418                                         srtfile.write(info_dict['subtitles'].encode('utf-8'))
419                                 finally:
420                                         srtfile.close()
421                         except (OSError, IOError):
422                                 self.trouble(u'ERROR: Cannot write subtitles file ' + descfn)
423                                 return
424
425                 if self.params.get('writeinfojson', False):
426                         infofn = filename + u'.info.json'
427                         self.report_writeinfojson(infofn)
428                         try:
429                                 json.dump
430                         except (NameError,AttributeError):
431                                 self.trouble(u'ERROR: No JSON encoder found. Update to Python 2.6+, setup a json module, or leave out --write-info-json.')
432                                 return
433                         try:
434                                 infof = open(encodeFilename(infofn), 'wb')
435                                 try:
436                                         json_info_dict = dict((k,v) for k,v in info_dict.iteritems() if not k in ('urlhandle',))
437                                         json.dump(json_info_dict, infof)
438                                 finally:
439                                         infof.close()
440                         except (OSError, IOError):
441                                 self.trouble(u'ERROR: Cannot write metadata to JSON file ' + infofn)
442                                 return
443
444                 if not self.params.get('skip_download', False):
445                         if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
446                                 success = True
447                         else:
448                                 try:
449                                         success = self._do_download(filename, info_dict)
450                                 except (OSError, IOError), err:
451                                         raise UnavailableVideoError
452                                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
453                                         self.trouble(u'ERROR: unable to download video data: %s' % str(err))
454                                         return
455                                 except (ContentTooShortError, ), err:
456                                         self.trouble(u'ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
457                                         return
458
459                         if success:
460                                 try:
461                                         self.post_process(filename, info_dict)
462                                 except (PostProcessingError), err:
463                                         self.trouble(u'ERROR: postprocessing: %s' % str(err))
464                                         return
465
466         def download(self, url_list):
467                 """Download a given list of URLs."""
468                 if len(url_list) > 1 and self.fixed_template():
469                         raise SameFileError(self.params['outtmpl'])
470
471                 for url in url_list:
472                         suitable_found = False
473                         for ie in self._ies:
474                                 # Go to next InfoExtractor if not suitable
475                                 if not ie.suitable(url):
476                                         continue
477
478                                 # Suitable InfoExtractor found
479                                 suitable_found = True
480
481                                 # Extract information from URL and process it
482                                 videos = ie.extract(url)
483                                 for video in videos or []:
484                                         video['extractor'] = ie.IE_NAME
485                                         try:
486                                                 self.increment_downloads()
487                                                 self.process_info(video)
488                                         except UnavailableVideoError:
489                                                 self.trouble(u'\nERROR: unable to download video')
490
491                                 # Suitable InfoExtractor had been found; go to next URL
492                                 break
493
494                         if not suitable_found:
495                                 self.trouble(u'ERROR: no suitable InfoExtractor: %s' % url)
496
497                 return self._download_retcode
498
499         def post_process(self, filename, ie_info):
500                 """Run the postprocessing chain on the given file."""
501                 info = dict(ie_info)
502                 info['filepath'] = filename
503                 for pp in self._pps:
504                         info = pp.run(info)
505                         if info is None:
506                                 break
507
508         def _download_with_rtmpdump(self, filename, url, player_url):
509                 self.report_destination(filename)
510                 tmpfilename = self.temp_name(filename)
511
512                 # Check for rtmpdump first
513                 try:
514                         subprocess.call(['rtmpdump', '-h'], stdout=(file(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
515                 except (OSError, IOError):
516                         self.trouble(u'ERROR: RTMP download detected but "rtmpdump" could not be run')
517                         return False
518
519                 # Download using rtmpdump. rtmpdump returns exit code 2 when
520                 # the connection was interrumpted and resuming appears to be
521                 # possible. This is part of rtmpdump's normal usage, AFAIK.
522                 basic_args = ['rtmpdump', '-q'] + [[], ['-W', player_url]][player_url is not None] + ['-r', url, '-o', tmpfilename]
523                 args = basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)]
524                 if self.params.get('verbose', False):
525                         try:
526                                 import pipes
527                                 shell_quote = lambda args: ' '.join(map(pipes.quote, args))
528                         except ImportError:
529                                 shell_quote = repr
530                         self.to_screen(u'[debug] rtmpdump command line: ' + shell_quote(args))
531                 retval = subprocess.call(args)
532                 while retval == 2 or retval == 1:
533                         prevsize = os.path.getsize(encodeFilename(tmpfilename))
534                         self.to_screen(u'\r[rtmpdump] %s bytes' % prevsize, skip_eol=True)
535                         time.sleep(5.0) # This seems to be needed
536                         retval = subprocess.call(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
537                         cursize = os.path.getsize(encodeFilename(tmpfilename))
538                         if prevsize == cursize and retval == 1:
539                                 break
540                          # Some rtmp streams seem abort after ~ 99.8%. Don't complain for those
541                         if prevsize == cursize and retval == 2 and cursize > 1024:
542                                 self.to_screen(u'\r[rtmpdump] Could not download the whole video. This can happen for some advertisements.')
543                                 retval = 0
544                                 break
545                 if retval == 0:
546                         self.to_screen(u'\r[rtmpdump] %s bytes' % os.path.getsize(encodeFilename(tmpfilename)))
547                         self.try_rename(tmpfilename, filename)
548                         return True
549                 else:
550                         self.trouble(u'\nERROR: rtmpdump exited with code %d' % retval)
551                         return False
552
553         def _do_download(self, filename, info_dict):
554                 url = info_dict['url']
555                 player_url = info_dict.get('player_url', None)
556
557                 # Check file already present
558                 if self.params.get('continuedl', False) and os.path.isfile(encodeFilename(filename)) and not self.params.get('nopart', False):
559                         self.report_file_already_downloaded(filename)
560                         return True
561
562                 # Attempt to download using rtmpdump
563                 if url.startswith('rtmp'):
564                         return self._download_with_rtmpdump(filename, url, player_url)
565
566                 tmpfilename = self.temp_name(filename)
567                 stream = None
568
569                 # Do not include the Accept-Encoding header
570                 headers = {'Youtubedl-no-compression': 'True'}
571                 basic_request = urllib2.Request(url, None, headers)
572                 request = urllib2.Request(url, None, headers)
573
574                 # Establish possible resume length
575                 if os.path.isfile(encodeFilename(tmpfilename)):
576                         resume_len = os.path.getsize(encodeFilename(tmpfilename))
577                 else:
578                         resume_len = 0
579
580                 open_mode = 'wb'
581                 if resume_len != 0:
582                         if self.params.get('continuedl', False):
583                                 self.report_resuming_byte(resume_len)
584                                 request.add_header('Range','bytes=%d-' % resume_len)
585                                 open_mode = 'ab'
586                         else:
587                                 resume_len = 0
588
589                 count = 0
590                 retries = self.params.get('retries', 0)
591                 while count <= retries:
592                         # Establish connection
593                         try:
594                                 if count == 0 and 'urlhandle' in info_dict:
595                                         data = info_dict['urlhandle']
596                                 data = urllib2.urlopen(request)
597                                 break
598                         except (urllib2.HTTPError, ), err:
599                                 if (err.code < 500 or err.code >= 600) and err.code != 416:
600                                         # Unexpected HTTP error
601                                         raise
602                                 elif err.code == 416:
603                                         # Unable to resume (requested range not satisfiable)
604                                         try:
605                                                 # Open the connection again without the range header
606                                                 data = urllib2.urlopen(basic_request)
607                                                 content_length = data.info()['Content-Length']
608                                         except (urllib2.HTTPError, ), err:
609                                                 if err.code < 500 or err.code >= 600:
610                                                         raise
611                                         else:
612                                                 # Examine the reported length
613                                                 if (content_length is not None and
614                                                                 (resume_len - 100 < long(content_length) < resume_len + 100)):
615                                                         # The file had already been fully downloaded.
616                                                         # Explanation to the above condition: in issue #175 it was revealed that
617                                                         # YouTube sometimes adds or removes a few bytes from the end of the file,
618                                                         # changing the file size slightly and causing problems for some users. So
619                                                         # I decided to implement a suggested change and consider the file
620                                                         # completely downloaded if the file size differs less than 100 bytes from
621                                                         # the one in the hard drive.
622                                                         self.report_file_already_downloaded(filename)
623                                                         self.try_rename(tmpfilename, filename)
624                                                         return True
625                                                 else:
626                                                         # The length does not match, we start the download over
627                                                         self.report_unable_to_resume()
628                                                         open_mode = 'wb'
629                                                         break
630                         # Retry
631                         count += 1
632                         if count <= retries:
633                                 self.report_retry(count, retries)
634
635                 if count > retries:
636                         self.trouble(u'ERROR: giving up after %s retries' % retries)
637                         return False
638
639                 data_len = data.info().get('Content-length', None)
640                 if data_len is not None:
641                         data_len = long(data_len) + resume_len
642                 data_len_str = self.format_bytes(data_len)
643                 byte_counter = 0 + resume_len
644                 block_size = 1024
645                 start = time.time()
646                 while True:
647                         # Download and write
648                         before = time.time()
649                         data_block = data.read(block_size)
650                         after = time.time()
651                         if len(data_block) == 0:
652                                 break
653                         byte_counter += len(data_block)
654
655                         # Open file just in time
656                         if stream is None:
657                                 try:
658                                         (stream, tmpfilename) = sanitize_open(tmpfilename, open_mode)
659                                         assert stream is not None
660                                         filename = self.undo_temp_name(tmpfilename)
661                                         self.report_destination(filename)
662                                 except (OSError, IOError), err:
663                                         self.trouble(u'ERROR: unable to open for writing: %s' % str(err))
664                                         return False
665                         try:
666                                 stream.write(data_block)
667                         except (IOError, OSError), err:
668                                 self.trouble(u'\nERROR: unable to write data: %s' % str(err))
669                                 return False
670                         block_size = self.best_block_size(after - before, len(data_block))
671
672                         # Progress message
673                         speed_str = self.calc_speed(start, time.time(), byte_counter - resume_len)
674                         if data_len is None:
675                                 self.report_progress('Unknown %', data_len_str, speed_str, 'Unknown ETA')
676                         else:
677                                 percent_str = self.calc_percent(byte_counter, data_len)
678                                 eta_str = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
679                                 self.report_progress(percent_str, data_len_str, speed_str, eta_str)
680
681                         # Apply rate limit
682                         self.slow_down(start, byte_counter - resume_len)
683
684                 if stream is None:
685                         self.trouble(u'\nERROR: Did not get any data blocks')
686                         return False
687                 stream.close()
688                 self.report_finish()
689                 if data_len is not None and byte_counter != data_len:
690                         raise ContentTooShortError(byte_counter, long(data_len))
691                 self.try_rename(tmpfilename, filename)
692
693                 # Update file modification time
694                 if self.params.get('updatetime', True):
695                         info_dict['filetime'] = self.try_utime(filename, data.info().get('last-modified', None))
696
697                 return True