_ Git - youtube-dl/blob - youtube_dl/FileDownloader.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 from __future__ import absolute_import
   5
   6 import math
   7 import io
   8 import os
   9 import re
  10 import socket
  11 import subprocess
  12 import sys
  13 import time
  14 import traceback
  15
  16 if os.name == 'nt':
  17     import ctypes
  18
  19 from .utils import *
  20
  21
  22 class FileDownloader(object):
  23     """File Downloader class.
  24
  25     File downloader objects are the ones responsible of downloading the
  26     actual video file and writing it to disk if the user has requested
  27     it, among some other tasks. In most cases there should be one per
  28     program. As, given a video URL, the downloader doesn't know how to
  29     extract all the needed information, task that InfoExtractors do, it
  30     has to pass the URL to one of them.
  31
  32     For this, file downloader objects have a method that allows
  33     InfoExtractors to be registered in a given order. When it is passed
  34     a URL, the file downloader handles it to the first InfoExtractor it
  35     finds that reports being able to handle it. The InfoExtractor extracts
  36     all the information about the video or videos the URL refers to, and
  37     asks the FileDownloader to process the video information, possibly
  38     downloading the video.
  39
  40     File downloaders accept a lot of parameters. In order not to saturate
  41     the object constructor with arguments, it receives a dictionary of
  42     options instead. These options are available through the params
  43     attribute for the InfoExtractors to use. The FileDownloader also
  44     registers itself as the downloader in charge for the InfoExtractors
  45     that are added to it, so this is a "mutual registration".
  46
  47     Available options:
  48
  49     username:          Username for authentication purposes.
  50     password:          Password for authentication purposes.
  51     usenetrc:          Use netrc for authentication instead.
  52     quiet:             Do not print messages to stdout.
  53     forceurl:          Force printing final URL.
  54     forcetitle:        Force printing title.
  55     forcethumbnail:    Force printing thumbnail URL.
  56     forcedescription:  Force printing description.
  57     forcefilename:     Force printing final filename.
  58     simulate:          Do not download the video files.
  59     format:            Video format code.
  60     format_limit:      Highest quality format to try.
  61     outtmpl:           Template for output names.
  62     restrictfilenames: Do not allow "&" and spaces in file names
  63     ignoreerrors:      Do not stop on download errors.
  64     ratelimit:         Download speed limit, in bytes/sec.
  65     nooverwrites:      Prevent overwriting files.
  66     retries:           Number of times to retry for HTTP error 5xx
  67     buffersize:        Size of download buffer in bytes.
  68     noresizebuffer:    Do not automatically resize the download buffer.
  69     continuedl:        Try to continue downloads if possible.
  70     noprogress:        Do not print the progress bar.
  71     playliststart:     Playlist item to start at.
  72     playlistend:       Playlist item to end at.
  73     matchtitle:        Download only matching titles.
  74     rejecttitle:       Reject downloads for matching titles.
  75     logtostderr:       Log messages to stderr instead of stdout.
  76     consoletitle:      Display progress in console window's titlebar.
  77     nopart:            Do not use temporary .part files.
  78     updatetime:        Use the Last-modified header to set output file timestamps.
  79     writedescription:  Write the video description to a .description file
  80     writeinfojson:     Write the video description to a .info.json file
  81     writesubtitles:    Write the video subtitles to a .srt file
  82     subtitleslang:     Language of the subtitles to download
  83     test:              Download only first bytes to test the downloader.
  84     keepvideo:         Keep the video file after post-processing
  85     """
  86
  87     params = None
  88     _ies = []
  89     _pps = []
  90     _download_retcode = None
  91     _num_downloads = None
  92     _screen_file = None
  93
  94     def __init__(self, params):
  95         """Create a FileDownloader object with the given options."""
  96         self._ies = []
  97         self._pps = []
  98         self._progress_hooks = []
  99         self._download_retcode = 0
 100         self._num_downloads = 0
 101         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 102         self.params = params
 103
 104         if '%(stitle)s' in self.params['outtmpl']:
 105             self.to_stderr(u'WARNING: %(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
 106
 107     @staticmethod
 108     def format_bytes(bytes):
 109         if bytes is None:
 110             return 'N/A'
 111         if type(bytes) is str:
 112             bytes = float(bytes)
 113         if bytes == 0.0:
 114             exponent = 0
 115         else:
 116             exponent = int(math.log(bytes, 1024.0))
 117         suffix = 'bkMGTPEZY'[exponent]
 118         converted = float(bytes) / float(1024 ** exponent)
 119         return '%.2f%s' % (converted, suffix)
 120
 121     @staticmethod
 122     def calc_percent(byte_counter, data_len):
 123         if data_len is None:
 124             return '---.-%'
 125         return '%6s' % ('%3.1f%%' % (float(byte_counter) / float(data_len) * 100.0))
 126
 127     @staticmethod
 128     def calc_eta(start, now, total, current):
 129         if total is None:
 130             return '--:--'
 131         dif = now - start
 132         if current == 0 or dif < 0.001: # One millisecond
 133             return '--:--'
 134         rate = float(current) / dif
 135         eta = int((float(total) - float(current)) / rate)
 136         (eta_mins, eta_secs) = divmod(eta, 60)
 137         if eta_mins > 99:
 138             return '--:--'
 139         return '%02d:%02d' % (eta_mins, eta_secs)
 140
 141     @staticmethod
 142     def calc_speed(start, now, bytes):
 143         dif = now - start
 144         if bytes == 0 or dif < 0.001: # One millisecond
 145             return '%10s' % '---b/s'
 146         return '%10s' % ('%s/s' % FileDownloader.format_bytes(float(bytes) / dif))
 147
 148     @staticmethod
 149     def best_block_size(elapsed_time, bytes):
 150         new_min = max(bytes / 2.0, 1.0)
 151         new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
 152         if elapsed_time < 0.001:
 153             return int(new_max)
 154         rate = bytes / elapsed_time
 155         if rate > new_max:
 156             return int(new_max)
 157         if rate < new_min:
 158             return int(new_min)
 159         return int(rate)
 160
 161     @staticmethod
 162     def parse_bytes(bytestr):
 163         """Parse a string indicating a byte quantity into an integer."""
 164         matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
 165         if matchobj is None:
 166             return None
 167         number = float(matchobj.group(1))
 168         multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
 169         return int(round(number * multiplier))
 170
 171     def add_info_extractor(self, ie):
 172         """Add an InfoExtractor object to the end of the list."""
 173         self._ies.append(ie)
 174         ie.set_downloader(self)
 175
 176     def add_post_processor(self, pp):
 177         """Add a PostProcessor object to the end of the chain."""
 178         self._pps.append(pp)
 179         pp.set_downloader(self)
 180
 181     def to_screen(self, message, skip_eol=False):
 182         """Print message to stdout if not in quiet mode."""
 183         assert type(message) == type(u'')
 184         if not self.params.get('quiet', False):
 185             terminator = [u'\n', u''][skip_eol]
 186             output = message + terminator
 187             if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
 188                 output = output.encode(preferredencoding(), 'ignore')
 189             self._screen_file.write(output)
 190             self._screen_file.flush()
 191
 192     def to_stderr(self, message):
 193         """Print message to stderr."""
 194         assert type(message) == type(u'')
 195         output = message + u'\n'
 196         if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
 197             output = output.encode(preferredencoding())
 198         sys.stderr.write(output)
 199
 200     def to_cons_title(self, message):
 201         """Set console/terminal window title to message."""
 202         if not self.params.get('consoletitle', False):
 203             return
 204         if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
 205             # c_wchar_p() might not be necessary if `message` is
 206             # already of type unicode()
 207             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 208         elif 'TERM' in os.environ:
 209             sys.stderr.write('\033]0;%s\007' % message.encode(preferredencoding()))
 210
 211     def fixed_template(self):
 212         """Checks if the output template is fixed."""
 213         return (re.search(u'(?u)%\\(.+?\\)s', self.params['outtmpl']) is None)
 214
 215     def trouble(self, message=None, tb=None):
 216         """Determine action to take when a download problem appears.
 217
 218         Depending on if the downloader has been configured to ignore
 219         download errors or not, this method may throw an exception or
 220         not when errors are found, after printing the message.
 221
 222         tb, if given, is additional traceback information.
 223         """
 224         if message is not None:
 225             self.to_stderr(message)
 226         if self.params.get('verbose'):
 227             if tb is None:
 228                 tb_data = traceback.format_list(traceback.extract_stack())
 229                 tb = u''.join(tb_data)
 230             self.to_stderr(tb)
 231         if not self.params.get('ignoreerrors', False):
 232             raise DownloadError(message)
 233         self._download_retcode = 1
 234
 235     def slow_down(self, start_time, byte_counter):
 236         """Sleep if the download speed is over the rate limit."""
 237         rate_limit = self.params.get('ratelimit', None)
 238         if rate_limit is None or byte_counter == 0:
 239             return
 240         now = time.time()
 241         elapsed = now - start_time
 242         if elapsed <= 0.0:
 243             return
 244         speed = float(byte_counter) / elapsed
 245         if speed > rate_limit:
 246             time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
 247
 248     def temp_name(self, filename):
 249         """Returns a temporary filename for the given filename."""
 250         if self.params.get('nopart', False) or filename == u'-' or \
 251                 (os.path.exists(encodeFilename(filename)) and not os.path.isfile(encodeFilename(filename))):
 252             return filename
 253         return filename + u'.part'
 254
 255     def undo_temp_name(self, filename):
 256         if filename.endswith(u'.part'):
 257             return filename[:-len(u'.part')]
 258         return filename
 259
 260     def try_rename(self, old_filename, new_filename):
 261         try:
 262             if old_filename == new_filename:
 263                 return
 264             os.rename(encodeFilename(old_filename), encodeFilename(new_filename))
 265         except (IOError, OSError) as err:
 266             self.trouble(u'ERROR: unable to rename file')
 267
 268     def try_utime(self, filename, last_modified_hdr):
 269         """Try to set the last-modified time of the given file."""
 270         if last_modified_hdr is None:
 271             return
 272         if not os.path.isfile(encodeFilename(filename)):
 273             return
 274         timestr = last_modified_hdr
 275         if timestr is None:
 276             return
 277         filetime = timeconvert(timestr)
 278         if filetime is None:
 279             return filetime
 280         try:
 281             os.utime(filename, (time.time(), filetime))
 282         except:
 283             pass
 284         return filetime
 285
 286     def report_writedescription(self, descfn):
 287         """ Report that the description file is being written """
 288         self.to_screen(u'[info] Writing video description to: ' + descfn)
 289
 290     def report_writesubtitles(self, srtfn):
 291         """ Report that the subtitles file is being written """
 292         self.to_screen(u'[info] Writing video subtitles to: ' + srtfn)
 293
 294     def report_writeinfojson(self, infofn):
 295         """ Report that the metadata file has been written """
 296         self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn)
 297
 298     def report_destination(self, filename):
 299         """Report destination filename."""
 300         self.to_screen(u'[download] Destination: ' + filename)
 301
 302     def report_progress(self, percent_str, data_len_str, speed_str, eta_str):
 303         """Report download progress."""
 304         if self.params.get('noprogress', False):
 305             return
 306         self.to_screen(u'\r[download] %s of %s at %s ETA %s' %
 307                 (percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
 308         self.to_cons_title(u'youtube-dl - %s of %s at %s ETA %s' %
 309                 (percent_str.strip(), data_len_str.strip(), speed_str.strip(), eta_str.strip()))
 310
 311     def report_resuming_byte(self, resume_len):
 312         """Report attempt to resume at given byte."""
 313         self.to_screen(u'[download] Resuming download at byte %s' % resume_len)
 314
 315     def report_retry(self, count, retries):
 316         """Report retry in case of HTTP error 5xx"""
 317         self.to_screen(u'[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count, retries))
 318
 319     def report_file_already_downloaded(self, file_name):
 320         """Report file has already been fully downloaded."""
 321         try:
 322             self.to_screen(u'[download] %s has already been downloaded' % file_name)
 323         except (UnicodeEncodeError) as err:
 324             self.to_screen(u'[download] The file has already been downloaded')
 325
 326     def report_unable_to_resume(self):
 327         """Report it was impossible to resume download."""
 328         self.to_screen(u'[download] Unable to resume')
 329
 330     def report_finish(self):
 331         """Report download finished."""
 332         if self.params.get('noprogress', False):
 333             self.to_screen(u'[download] Download completed')
 334         else:
 335             self.to_screen(u'')
 336
 337     def increment_downloads(self):
 338         """Increment the ordinal that assigns a number to each file."""
 339         self._num_downloads += 1
 340
 341     def prepare_filename(self, info_dict):
 342         """Generate the output filename."""
 343         try:
 344             template_dict = dict(info_dict)
 345
 346             template_dict['epoch'] = int(time.time())
 347             template_dict['autonumber'] = u'%05d' % self._num_downloads
 348
 349             sanitize = lambda k,v: sanitize_filename(
 350                 u'NA' if v is None else compat_str(v),
 351                 restricted=self.params.get('restrictfilenames'),
 352                 is_id=(k==u'id'))
 353             template_dict = dict((k, sanitize(k, v)) for k,v in template_dict.items())
 354
 355             filename = self.params['outtmpl'] % template_dict
 356             return filename
 357         except (ValueError, KeyError) as err:
 358             self.trouble(u'ERROR: invalid system charset or erroneous output template')
 359             return None
 360
 361     def _match_entry(self, info_dict):
 362         """ Returns None iff the file should be downloaded """
 363
 364         title = info_dict['title']
 365         matchtitle = self.params.get('matchtitle', False)
 366         if matchtitle:
 367             matchtitle = matchtitle.decode('utf8')
 368             if not re.search(matchtitle, title, re.IGNORECASE):
 369                 return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
 370         rejecttitle = self.params.get('rejecttitle', False)
 371         if rejecttitle:
 372             rejecttitle = rejecttitle.decode('utf8')
 373             if re.search(rejecttitle, title, re.IGNORECASE):
 374                 return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
 375         return None
 376
 377     def process_info(self, info_dict):
 378         """Process a single dictionary returned by an InfoExtractor."""
 379
 380         # Keep for backwards compatibility
 381         info_dict['stitle'] = info_dict['title']
 382
 383         if not 'format' in info_dict:
 384             info_dict['format'] = info_dict['ext']
 385
 386         reason = self._match_entry(info_dict)
 387         if reason is not None:
 388             self.to_screen(u'[download] ' + reason)
 389             return
 390
 391         max_downloads = self.params.get('max_downloads')
 392         if max_downloads is not None:
 393             if self._num_downloads > int(max_downloads):
 394                 raise MaxDownloadsReached()
 395
 396         filename = self.prepare_filename(info_dict)
 397
 398         # Forced printings
 399         if self.params.get('forcetitle', False):
 400             compat_print(info_dict['title'])
 401         if self.params.get('forceurl', False):
 402             compat_print(info_dict['url'])
 403         if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
 404             compat_print(info_dict['thumbnail'])
 405         if self.params.get('forcedescription', False) and 'description' in info_dict:
 406             compat_print(info_dict['description'])
 407         if self.params.get('forcefilename', False) and filename is not None:
 408             compat_print(filename)
 409         if self.params.get('forceformat', False):
 410             compat_print(info_dict['format'])
 411
 412         # Do nothing else if in simulate mode
 413         if self.params.get('simulate', False):
 414             return
 415
 416         if filename is None:
 417             return
 418
 419         try:
 420             dn = os.path.dirname(encodeFilename(filename))
 421             if dn != '' and not os.path.exists(dn): # dn is already encoded
 422                 os.makedirs(dn)
 423         except (OSError, IOError) as err:
 424             self.trouble(u'ERROR: unable to create directory ' + compat_str(err))
 425             return
 426
 427         if self.params.get('writedescription', False):
 428             try:
 429                 descfn = filename + u'.description'
 430                 self.report_writedescription(descfn)
 431                 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
 432                     descfile.write(info_dict['description'])
 433             except (OSError, IOError):
 434                 self.trouble(u'ERROR: Cannot write description file ' + descfn)
 435                 return
 436
 437         if self.params.get('writesubtitles', False) and 'subtitles' in info_dict and info_dict['subtitles']:
 438             # subtitles download errors are already managed as troubles in relevant IE
 439             # that way it will silently go on when used with unsupporting IE
 440             try:
 441                 srtfn = filename.rsplit('.', 1)[0] + u'.srt'
 442                 self.report_writesubtitles(srtfn)
 443                 with io.open(encodeFilename(srtfn), 'w', encoding='utf-8') as srtfile:
 444                     srtfile.write(info_dict['subtitles'])
 445             except (OSError, IOError):
 446                 self.trouble(u'ERROR: Cannot write subtitles file ' + descfn)
 447                 return
 448
 449         if self.params.get('writeinfojson', False):
 450             infofn = filename + u'.info.json'
 451             self.report_writeinfojson(infofn)
 452             try:
 453                 json_info_dict = dict((k, v) for k,v in info_dict.items() if not k in ['urlhandle'])
 454                 write_json_file(json_info_dict, encodeFilename(infofn))
 455             except (OSError, IOError):
 456                 self.trouble(u'ERROR: Cannot write metadata to JSON file ' + infofn)
 457                 return
 458
 459         if not self.params.get('skip_download', False):
 460             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
 461                 success = True
 462             else:
 463                 try:
 464                     success = self._do_download(filename, info_dict)
 465                 except (OSError, IOError) as err:
 466                     raise UnavailableVideoError()
 467                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
 468                     self.trouble(u'ERROR: unable to download video data: %s' % str(err))
 469                     return
 470                 except (ContentTooShortError, ) as err:
 471                     self.trouble(u'ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
 472                     return
 473
 474             if success:
 475                 try:
 476                     self.post_process(filename, info_dict)
 477                 except (PostProcessingError) as err:
 478                     self.trouble(u'ERROR: postprocessing: %s' % str(err))
 479                     return
 480
 481     def download(self, url_list):
 482         """Download a given list of URLs."""
 483         if len(url_list) > 1 and self.fixed_template():
 484             raise SameFileError(self.params['outtmpl'])
 485
 486         for url in url_list:
 487             suitable_found = False
 488             for ie in self._ies:
 489                 # Go to next InfoExtractor if not suitable
 490                 if not ie.suitable(url):
 491                     continue
 492
 493                 # Warn if the _WORKING attribute is False
 494                 if not ie.working():
 495                     self.to_stderr(u'WARNING: the program functionality for this site has been marked as broken, '
 496                                    u'and will probably not work. If you want to go on, use the -i option.')
 497
 498                 # Suitable InfoExtractor found
 499                 suitable_found = True
 500
 501                 # Extract information from URL and process it
 502                 try:
 503                     videos = ie.extract(url)
 504                 except ExtractorError as de: # An error we somewhat expected
 505                     self.trouble(u'ERROR: ' + compat_str(de), de.format_traceback())
 506                     break
 507                 except Exception as e:
 508                     if self.params.get('ignoreerrors', False):
 509                         self.trouble(u'ERROR: ' + compat_str(e), tb=compat_str(traceback.format_exc()))
 510                         break
 511                     else:
 512                         raise
 513
 514                 if len(videos or []) > 1 and self.fixed_template():
 515                     raise SameFileError(self.params['outtmpl'])
 516
 517                 for video in videos or []:
 518                     video['extractor'] = ie.IE_NAME
 519                     try:
 520                         self.increment_downloads()
 521                         self.process_info(video)
 522                     except UnavailableVideoError:
 523                         self.trouble(u'\nERROR: unable to download video')
 524
 525                 # Suitable InfoExtractor had been found; go to next URL
 526                 break
 527
 528             if not suitable_found:
 529                 self.trouble(u'ERROR: no suitable InfoExtractor: %s' % url)
 530
 531         return self._download_retcode
 532
 533     def post_process(self, filename, ie_info):
 534         """Run all the postprocessors on the given file."""
 535         info = dict(ie_info)
 536         info['filepath'] = filename
 537         keep_video = None
 538         for pp in self._pps:
 539             try:
 540                 keep_video_wish,new_info = pp.run(info)
 541                 if keep_video_wish is not None:
 542                     if keep_video_wish:
 543                         keep_video = keep_video_wish
 544                     elif keep_video is None:
 545                         # No clear decision yet, let IE decide
 546                         keep_video = keep_video_wish
 547             except PostProcessingError as e:
 548                 self.to_stderr(u'ERROR: ' + e.msg)
 549         if keep_video is False and not self.params.get('keepvideo', False):
 550             try:
 551                 self.to_stderr(u'Deleting original file %s (pass -k to keep)' % filename)
 552                 os.remove(encodeFilename(filename))
 553             except (IOError, OSError):
 554                 self.to_stderr(u'WARNING: Unable to remove downloaded video file')
 555
 556     def _download_with_rtmpdump(self, filename, url, player_url, page_url):
 557         self.report_destination(filename)
 558         tmpfilename = self.temp_name(filename)
 559
 560         # Check for rtmpdump first
 561         try:
 562             subprocess.call(['rtmpdump', '-h'], stdout=(file(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
 563         except (OSError, IOError):
 564             self.trouble(u'ERROR: RTMP download detected but "rtmpdump" could not be run')
 565             return False
 566
 567         # Download using rtmpdump. rtmpdump returns exit code 2 when
 568         # the connection was interrumpted and resuming appears to be
 569         # possible. This is part of rtmpdump's normal usage, AFAIK.
 570         basic_args = ['rtmpdump', '-q', '-r', url, '-o', tmpfilename]
 571         if player_url is not None:
 572             basic_args += ['-W', player_url]
 573         if page_url is not None:
 574             basic_args += ['--pageUrl', page_url]
 575         args = basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)]
 576         if self.params.get('verbose', False):
 577             try:
 578                 import pipes
 579                 shell_quote = lambda args: ' '.join(map(pipes.quote, args))
 580             except ImportError:
 581                 shell_quote = repr
 582             self.to_screen(u'[debug] rtmpdump command line: ' + shell_quote(args))
 583         retval = subprocess.call(args)
 584         while retval == 2 or retval == 1:
 585             prevsize = os.path.getsize(encodeFilename(tmpfilename))
 586             self.to_screen(u'\r[rtmpdump] %s bytes' % prevsize, skip_eol=True)
 587             time.sleep(5.0) # This seems to be needed
 588             retval = subprocess.call(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
 589             cursize = os.path.getsize(encodeFilename(tmpfilename))
 590             if prevsize == cursize and retval == 1:
 591                 break
 592              # Some rtmp streams seem abort after ~ 99.8%. Don't complain for those
 593             if prevsize == cursize and retval == 2 and cursize > 1024:
 594                 self.to_screen(u'\r[rtmpdump] Could not download the whole video. This can happen for some advertisements.')
 595                 retval = 0
 596                 break
 597         if retval == 0:
 598             fsize = os.path.getsize(encodeFilename(tmpfilename))
 599             self.to_screen(u'\r[rtmpdump] %s bytes' % fsize)
 600             self.try_rename(tmpfilename, filename)
 601             self._hook_progress({
 602                 'downloaded_bytes': fsize,
 603                 'total_bytes': fsize,
 604                 'filename': filename,
 605                 'status': 'finished',
 606             })
 607             return True
 608         else:
 609             self.trouble(u'\nERROR: rtmpdump exited with code %d' % retval)
 610             return False
 611
 612     def _do_download(self, filename, info_dict):
 613         url = info_dict['url']
 614
 615         # Check file already present
 616         if self.params.get('continuedl', False) and os.path.isfile(encodeFilename(filename)) and not self.params.get('nopart', False):
 617             self.report_file_already_downloaded(filename)
 618             self._hook_progress({
 619                 'filename': filename,
 620                 'status': 'finished',
 621             })
 622             return True
 623
 624         # Attempt to download using rtmpdump
 625         if url.startswith('rtmp'):
 626             return self._download_with_rtmpdump(filename, url,
 627                                                 info_dict.get('player_url', None),
 628                                                 info_dict.get('page_url', None))
 629
 630         tmpfilename = self.temp_name(filename)
 631         stream = None
 632
 633         # Do not include the Accept-Encoding header
 634         headers = {'Youtubedl-no-compression': 'True'}
 635         if 'user_agent' in info_dict:
 636             headers['Youtubedl-user-agent'] = info_dict['user_agent']
 637         basic_request = compat_urllib_request.Request(url, None, headers)
 638         request = compat_urllib_request.Request(url, None, headers)
 639
 640         if self.params.get('test', False):
 641             request.add_header('Range','bytes=0-10240')
 642
 643         # Establish possible resume length
 644         if os.path.isfile(encodeFilename(tmpfilename)):
 645             resume_len = os.path.getsize(encodeFilename(tmpfilename))
 646         else:
 647             resume_len = 0
 648
 649         open_mode = 'wb'
 650         if resume_len != 0:
 651             if self.params.get('continuedl', False):
 652                 self.report_resuming_byte(resume_len)
 653                 request.add_header('Range','bytes=%d-' % resume_len)
 654                 open_mode = 'ab'
 655             else:
 656                 resume_len = 0
 657
 658         count = 0
 659         retries = self.params.get('retries', 0)
 660         while count <= retries:
 661             # Establish connection
 662             try:
 663                 if count == 0 and 'urlhandle' in info_dict:
 664                     data = info_dict['urlhandle']
 665                 data = compat_urllib_request.urlopen(request)
 666                 break
 667             except (compat_urllib_error.HTTPError, ) as err:
 668                 if (err.code < 500 or err.code >= 600) and err.code != 416:
 669                     # Unexpected HTTP error
 670                     raise
 671                 elif err.code == 416:
 672                     # Unable to resume (requested range not satisfiable)
 673                     try:
 674                         # Open the connection again without the range header
 675                         data = compat_urllib_request.urlopen(basic_request)
 676                         content_length = data.info()['Content-Length']
 677                     except (compat_urllib_error.HTTPError, ) as err:
 678                         if err.code < 500 or err.code >= 600:
 679                             raise
 680                     else:
 681                         # Examine the reported length
 682                         if (content_length is not None and
 683                                 (resume_len - 100 < int(content_length) < resume_len + 100)):
 684                             # The file had already been fully downloaded.
 685                             # Explanation to the above condition: in issue #175 it was revealed that
 686                             # YouTube sometimes adds or removes a few bytes from the end of the file,
 687                             # changing the file size slightly and causing problems for some users. So
 688                             # I decided to implement a suggested change and consider the file
 689                             # completely downloaded if the file size differs less than 100 bytes from
 690                             # the one in the hard drive.
 691                             self.report_file_already_downloaded(filename)
 692                             self.try_rename(tmpfilename, filename)
 693                             self._hook_progress({
 694                                 'filename': filename,
 695                                 'status': 'finished',
 696                             })
 697                             return True
 698                         else:
 699                             # The length does not match, we start the download over
 700                             self.report_unable_to_resume()
 701                             open_mode = 'wb'
 702                             break
 703             # Retry
 704             count += 1
 705             if count <= retries:
 706                 self.report_retry(count, retries)
 707
 708         if count > retries:
 709             self.trouble(u'ERROR: giving up after %s retries' % retries)
 710             return False
 711
 712         data_len = data.info().get('Content-length', None)
 713         if data_len is not None:
 714             data_len = int(data_len) + resume_len
 715         data_len_str = self.format_bytes(data_len)
 716         byte_counter = 0 + resume_len
 717         block_size = self.params.get('buffersize', 1024)
 718         start = time.time()
 719         while True:
 720             # Download and write
 721             before = time.time()
 722             data_block = data.read(block_size)
 723             after = time.time()
 724             if len(data_block) == 0:
 725                 break
 726             byte_counter += len(data_block)
 727
 728             # Open file just in time
 729             if stream is None:
 730                 try:
 731                     (stream, tmpfilename) = sanitize_open(tmpfilename, open_mode)
 732                     assert stream is not None
 733                     filename = self.undo_temp_name(tmpfilename)
 734                     self.report_destination(filename)
 735                 except (OSError, IOError) as err:
 736                     self.trouble(u'ERROR: unable to open for writing: %s' % str(err))
 737                     return False
 738             try:
 739                 stream.write(data_block)
 740             except (IOError, OSError) as err:
 741                 self.trouble(u'\nERROR: unable to write data: %s' % str(err))
 742                 return False
 743             if not self.params.get('noresizebuffer', False):
 744                 block_size = self.best_block_size(after - before, len(data_block))
 745
 746             # Progress message
 747             speed_str = self.calc_speed(start, time.time(), byte_counter - resume_len)
 748             if data_len is None:
 749                 self.report_progress('Unknown %', data_len_str, speed_str, 'Unknown ETA')
 750             else:
 751                 percent_str = self.calc_percent(byte_counter, data_len)
 752                 eta_str = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
 753                 self.report_progress(percent_str, data_len_str, speed_str, eta_str)
 754
 755             self._hook_progress({
 756                 'downloaded_bytes': byte_counter,
 757                 'total_bytes': data_len,
 758                 'tmpfilename': tmpfilename,
 759                 'filename': filename,
 760                 'status': 'downloading',
 761             })
 762
 763             # Apply rate limit
 764             self.slow_down(start, byte_counter - resume_len)
 765
 766         if stream is None:
 767             self.trouble(u'\nERROR: Did not get any data blocks')
 768             return False
 769         stream.close()
 770         self.report_finish()
 771         if data_len is not None and byte_counter != data_len:
 772             raise ContentTooShortError(byte_counter, int(data_len))
 773         self.try_rename(tmpfilename, filename)
 774
 775         # Update file modification time
 776         if self.params.get('updatetime', True):
 777             info_dict['filetime'] = self.try_utime(filename, data.info().get('last-modified', None))
 778
 779         self._hook_progress({
 780             'downloaded_bytes': byte_counter,
 781             'total_bytes': byte_counter,
 782             'filename': filename,
 783             'status': 'finished',
 784         })
 785
 786         return True
 787
 788     def _hook_progress(self, status):
 789         for ph in self._progress_hooks:
 790             ph(status)
 791
 792     def add_progress_hook(self, ph):
 793         """ ph gets called on download progress, with a dictionary with the entries
 794         * filename: The final filename
 795         * status: One of "downloading" and "finished"
 796
 797         It can also have some of the following entries:
 798
 799         * downloaded_bytes: Bytes on disks
 800         * total_bytes: Total bytes, None if unknown
 801         * tmpfilename: The filename we're currently writing to
 802
 803         Hooks are guaranteed to be called at least once (with status "finished")
 804         if the download is successful.
 805         """
 806         self._progress_hooks.append(ph)