_ Git - youtube-dl/blob - youtube_dl/FileDownloader.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 from __future__ import absolute_import
   5
   6 import math
   7 import io
   8 import os
   9 import re
  10 import socket
  11 import subprocess
  12 import sys
  13 import time
  14 import traceback
  15
  16 if os.name == 'nt':
  17     import ctypes
  18
  19 from .utils import *
  20
  21
  22 class FileDownloader(object):
  23     """File Downloader class.
  24
  25     File downloader objects are the ones responsible of downloading the
  26     actual video file and writing it to disk if the user has requested
  27     it, among some other tasks. In most cases there should be one per
  28     program. As, given a video URL, the downloader doesn't know how to
  29     extract all the needed information, task that InfoExtractors do, it
  30     has to pass the URL to one of them.
  31
  32     For this, file downloader objects have a method that allows
  33     InfoExtractors to be registered in a given order. When it is passed
  34     a URL, the file downloader handles it to the first InfoExtractor it
  35     finds that reports being able to handle it. The InfoExtractor extracts
  36     all the information about the video or videos the URL refers to, and
  37     asks the FileDownloader to process the video information, possibly
  38     downloading the video.
  39
  40     File downloaders accept a lot of parameters. In order not to saturate
  41     the object constructor with arguments, it receives a dictionary of
  42     options instead. These options are available through the params
  43     attribute for the InfoExtractors to use. The FileDownloader also
  44     registers itself as the downloader in charge for the InfoExtractors
  45     that are added to it, so this is a "mutual registration".
  46
  47     Available options:
  48
  49     username:          Username for authentication purposes.
  50     password:          Password for authentication purposes.
  51     usenetrc:          Use netrc for authentication instead.
  52     quiet:             Do not print messages to stdout.
  53     forceurl:          Force printing final URL.
  54     forcetitle:        Force printing title.
  55     forcethumbnail:    Force printing thumbnail URL.
  56     forcedescription:  Force printing description.
  57     forcefilename:     Force printing final filename.
  58     simulate:          Do not download the video files.
  59     format:            Video format code.
  60     format_limit:      Highest quality format to try.
  61     outtmpl:           Template for output names.
  62     restrictfilenames: Do not allow "&" and spaces in file names
  63     ignoreerrors:      Do not stop on download errors.
  64     ratelimit:         Download speed limit, in bytes/sec.
  65     nooverwrites:      Prevent overwriting files.
  66     retries:           Number of times to retry for HTTP error 5xx
  67     buffersize:        Size of download buffer in bytes.
  68     noresizebuffer:    Do not automatically resize the download buffer.
  69     continuedl:        Try to continue downloads if possible.
  70     noprogress:        Do not print the progress bar.
  71     playliststart:     Playlist item to start at.
  72     playlistend:       Playlist item to end at.
  73     matchtitle:        Download only matching titles.
  74     rejecttitle:       Reject downloads for matching titles.
  75     logtostderr:       Log messages to stderr instead of stdout.
  76     consoletitle:      Display progress in console window's titlebar.
  77     nopart:            Do not use temporary .part files.
  78     updatetime:        Use the Last-modified header to set output file timestamps.
  79     writedescription:  Write the video description to a .description file
  80     writeinfojson:     Write the video description to a .info.json file
  81     writesubtitles:    Write the video subtitles to a .srt file
  82     subtitleslang:     Language of the subtitles to download
  83     test:              Download only first bytes to test the downloader.
  84     keepvideo:         Keep the video file after post-processing
  85     """
  86
  87     params = None
  88     _ies = []
  89     _pps = []
  90     _download_retcode = None
  91     _num_downloads = None
  92     _screen_file = None
  93
  94     def __init__(self, params):
  95         """Create a FileDownloader object with the given options."""
  96         self._ies = []
  97         self._pps = []
  98         self._download_retcode = 0
  99         self._num_downloads = 0
 100         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 101         self.params = params
 102
 103         if '%(stitle)s' in self.params['outtmpl']:
 104             self.to_stderr(u'WARNING: %(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
 105
 106     @staticmethod
 107     def format_bytes(bytes):
 108         if bytes is None:
 109             return 'N/A'
 110         if type(bytes) is str:
 111             bytes = float(bytes)
 112         if bytes == 0.0:
 113             exponent = 0
 114         else:
 115             exponent = int(math.log(bytes, 1024.0))
 116         suffix = 'bkMGTPEZY'[exponent]
 117         converted = float(bytes) / float(1024 ** exponent)
 118         return '%.2f%s' % (converted, suffix)
 119
 120     @staticmethod
 121     def calc_percent(byte_counter, data_len):
 122         if data_len is None:
 123             return '---.-%'
 124         return '%6s' % ('%3.1f%%' % (float(byte_counter) / float(data_len) * 100.0))
 125
 126     @staticmethod
 127     def calc_eta(start, now, total, current):
 128         if total is None:
 129             return '--:--'
 130         dif = now - start
 131         if current == 0 or dif < 0.001: # One millisecond
 132             return '--:--'
 133         rate = float(current) / dif
 134         eta = int((float(total) - float(current)) / rate)
 135         (eta_mins, eta_secs) = divmod(eta, 60)
 136         if eta_mins > 99:
 137             return '--:--'
 138         return '%02d:%02d' % (eta_mins, eta_secs)
 139
 140     @staticmethod
 141     def calc_speed(start, now, bytes):
 142         dif = now - start
 143         if bytes == 0 or dif < 0.001: # One millisecond
 144             return '%10s' % '---b/s'
 145         return '%10s' % ('%s/s' % FileDownloader.format_bytes(float(bytes) / dif))
 146
 147     @staticmethod
 148     def best_block_size(elapsed_time, bytes):
 149         new_min = max(bytes / 2.0, 1.0)
 150         new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
 151         if elapsed_time < 0.001:
 152             return int(new_max)
 153         rate = bytes / elapsed_time
 154         if rate > new_max:
 155             return int(new_max)
 156         if rate < new_min:
 157             return int(new_min)
 158         return int(rate)
 159
 160     @staticmethod
 161     def parse_bytes(bytestr):
 162         """Parse a string indicating a byte quantity into an integer."""
 163         matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
 164         if matchobj is None:
 165             return None
 166         number = float(matchobj.group(1))
 167         multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
 168         return int(round(number * multiplier))
 169
 170     def add_info_extractor(self, ie):
 171         """Add an InfoExtractor object to the end of the list."""
 172         self._ies.append(ie)
 173         ie.set_downloader(self)
 174
 175     def add_post_processor(self, pp):
 176         """Add a PostProcessor object to the end of the chain."""
 177         self._pps.append(pp)
 178         pp.set_downloader(self)
 179
 180     def to_screen(self, message, skip_eol=False):
 181         """Print message to stdout if not in quiet mode."""
 182         assert type(message) == type(u'')
 183         if not self.params.get('quiet', False):
 184             terminator = [u'\n', u''][skip_eol]
 185             output = message + terminator
 186             if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
 187                 output = output.encode(preferredencoding(), 'ignore')
 188             self._screen_file.write(output)
 189             self._screen_file.flush()
 190
 191     def to_stderr(self, message):
 192         """Print message to stderr."""
 193         assert type(message) == type(u'')
 194         output = message + u'\n'
 195         if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
 196             output = output.encode(preferredencoding())
 197         sys.stderr.write(output)
 198
 199     def to_cons_title(self, message):
 200         """Set console/terminal window title to message."""
 201         if not self.params.get('consoletitle', False):
 202             return
 203         if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
 204             # c_wchar_p() might not be necessary if `message` is
 205             # already of type unicode()
 206             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 207         elif 'TERM' in os.environ:
 208             sys.stderr.write('\033]0;%s\007' % message.encode(preferredencoding()))
 209
 210     def fixed_template(self):
 211         """Checks if the output template is fixed."""
 212         return (re.search(u'(?u)%\\(.+?\\)s', self.params['outtmpl']) is None)
 213
 214     def trouble(self, message=None, tb=None):
 215         """Determine action to take when a download problem appears.
 216
 217         Depending on if the downloader has been configured to ignore
 218         download errors or not, this method may throw an exception or
 219         not when errors are found, after printing the message.
 220
 221         tb, if given, is additional traceback information.
 222         """
 223         if message is not None:
 224             self.to_stderr(message)
 225         if self.params.get('verbose'):
 226             if tb is None:
 227                 tb_data = traceback.format_list(traceback.extract_stack())
 228                 tb = u''.join(tb_data)
 229             self.to_stderr(tb)
 230         if not self.params.get('ignoreerrors', False):
 231             raise DownloadError(message)
 232         self._download_retcode = 1
 233
 234     def slow_down(self, start_time, byte_counter):
 235         """Sleep if the download speed is over the rate limit."""
 236         rate_limit = self.params.get('ratelimit', None)
 237         if rate_limit is None or byte_counter == 0:
 238             return
 239         now = time.time()
 240         elapsed = now - start_time
 241         if elapsed <= 0.0:
 242             return
 243         speed = float(byte_counter) / elapsed
 244         if speed > rate_limit:
 245             time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
 246
 247     def temp_name(self, filename):
 248         """Returns a temporary filename for the given filename."""
 249         if self.params.get('nopart', False) or filename == u'-' or \
 250                 (os.path.exists(encodeFilename(filename)) and not os.path.isfile(encodeFilename(filename))):
 251             return filename
 252         return filename + u'.part'
 253
 254     def undo_temp_name(self, filename):
 255         if filename.endswith(u'.part'):
 256             return filename[:-len(u'.part')]
 257         return filename
 258
 259     def try_rename(self, old_filename, new_filename):
 260         try:
 261             if old_filename == new_filename:
 262                 return
 263             os.rename(encodeFilename(old_filename), encodeFilename(new_filename))
 264         except (IOError, OSError) as err:
 265             self.trouble(u'ERROR: unable to rename file')
 266
 267     def try_utime(self, filename, last_modified_hdr):
 268         """Try to set the last-modified time of the given file."""
 269         if last_modified_hdr is None:
 270             return
 271         if not os.path.isfile(encodeFilename(filename)):
 272             return
 273         timestr = last_modified_hdr
 274         if timestr is None:
 275             return
 276         filetime = timeconvert(timestr)
 277         if filetime is None:
 278             return filetime
 279         try:
 280             os.utime(filename, (time.time(), filetime))
 281         except:
 282             pass
 283         return filetime
 284
 285     def report_writedescription(self, descfn):
 286         """ Report that the description file is being written """
 287         self.to_screen(u'[info] Writing video description to: ' + descfn)
 288
 289     def report_writesubtitles(self, srtfn):
 290         """ Report that the subtitles file is being written """
 291         self.to_screen(u'[info] Writing video subtitles to: ' + srtfn)
 292
 293     def report_writeinfojson(self, infofn):
 294         """ Report that the metadata file has been written """
 295         self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn)
 296
 297     def report_destination(self, filename):
 298         """Report destination filename."""
 299         self.to_screen(u'[download] Destination: ' + filename)
 300
 301     def report_progress(self, percent_str, data_len_str, speed_str, eta_str):
 302         """Report download progress."""
 303         if self.params.get('noprogress', False):
 304             return
 305         self.to_screen(u'\r[download] %s of %s at %s ETA %s' %
 306                 (percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
 307         self.to_cons_title(u'youtube-dl - %s of %s at %s ETA %s' %
 308                 (percent_str.strip(), data_len_str.strip(), speed_str.strip(), eta_str.strip()))
 309
 310     def report_resuming_byte(self, resume_len):
 311         """Report attempt to resume at given byte."""
 312         self.to_screen(u'[download] Resuming download at byte %s' % resume_len)
 313
 314     def report_retry(self, count, retries):
 315         """Report retry in case of HTTP error 5xx"""
 316         self.to_screen(u'[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count, retries))
 317
 318     def report_file_already_downloaded(self, file_name):
 319         """Report file has already been fully downloaded."""
 320         try:
 321             self.to_screen(u'[download] %s has already been downloaded' % file_name)
 322         except (UnicodeEncodeError) as err:
 323             self.to_screen(u'[download] The file has already been downloaded')
 324
 325     def report_unable_to_resume(self):
 326         """Report it was impossible to resume download."""
 327         self.to_screen(u'[download] Unable to resume')
 328
 329     def report_finish(self):
 330         """Report download finished."""
 331         if self.params.get('noprogress', False):
 332             self.to_screen(u'[download] Download completed')
 333         else:
 334             self.to_screen(u'')
 335
 336     def increment_downloads(self):
 337         """Increment the ordinal that assigns a number to each file."""
 338         self._num_downloads += 1
 339
 340     def prepare_filename(self, info_dict):
 341         """Generate the output filename."""
 342         try:
 343             template_dict = dict(info_dict)
 344
 345             template_dict['epoch'] = int(time.time())
 346             template_dict['autonumber'] = u'%05d' % self._num_downloads
 347
 348             sanitize = lambda k,v: sanitize_filename(
 349                 u'NA' if v is None else compat_str(v),
 350                 restricted=self.params.get('restrictfilenames'),
 351                 is_id=(k==u'id'))
 352             template_dict = dict((k, sanitize(k, v)) for k,v in template_dict.items())
 353
 354             filename = self.params['outtmpl'] % template_dict
 355             return filename
 356         except (ValueError, KeyError) as err:
 357             self.trouble(u'ERROR: invalid system charset or erroneous output template')
 358             return None
 359
 360     def _match_entry(self, info_dict):
 361         """ Returns None iff the file should be downloaded """
 362
 363         title = info_dict['title']
 364         matchtitle = self.params.get('matchtitle', False)
 365         if matchtitle:
 366             matchtitle = matchtitle.decode('utf8')
 367             if not re.search(matchtitle, title, re.IGNORECASE):
 368                 return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
 369         rejecttitle = self.params.get('rejecttitle', False)
 370         if rejecttitle:
 371             rejecttitle = rejecttitle.decode('utf8')
 372             if re.search(rejecttitle, title, re.IGNORECASE):
 373                 return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
 374         return None
 375
 376     def process_info(self, info_dict):
 377         """Process a single dictionary returned by an InfoExtractor."""
 378
 379         # Keep for backwards compatibility
 380         info_dict['stitle'] = info_dict['title']
 381
 382         if not 'format' in info_dict:
 383             info_dict['format'] = info_dict['ext']
 384
 385         reason = self._match_entry(info_dict)
 386         if reason is not None:
 387             self.to_screen(u'[download] ' + reason)
 388             return
 389
 390         max_downloads = self.params.get('max_downloads')
 391         if max_downloads is not None:
 392             if self._num_downloads > int(max_downloads):
 393                 raise MaxDownloadsReached()
 394
 395         filename = self.prepare_filename(info_dict)
 396
 397         # Forced printings
 398         if self.params.get('forcetitle', False):
 399             compat_print(info_dict['title'])
 400         if self.params.get('forceurl', False):
 401             compat_print(info_dict['url'])
 402         if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
 403             compat_print(info_dict['thumbnail'])
 404         if self.params.get('forcedescription', False) and 'description' in info_dict:
 405             compat_print(info_dict['description'])
 406         if self.params.get('forcefilename', False) and filename is not None:
 407             compat_print(filename)
 408         if self.params.get('forceformat', False):
 409             compat_print(info_dict['format'])
 410
 411         # Do nothing else if in simulate mode
 412         if self.params.get('simulate', False):
 413             return
 414
 415         if filename is None:
 416             return
 417
 418         try:
 419             dn = os.path.dirname(encodeFilename(filename))
 420             if dn != '' and not os.path.exists(dn): # dn is already encoded
 421                 os.makedirs(dn)
 422         except (OSError, IOError) as err:
 423             self.trouble(u'ERROR: unable to create directory ' + compat_str(err))
 424             return
 425
 426         if self.params.get('writedescription', False):
 427             try:
 428                 descfn = filename + u'.description'
 429                 self.report_writedescription(descfn)
 430                 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
 431                     descfile.write(info_dict['description'])
 432             except (OSError, IOError):
 433                 self.trouble(u'ERROR: Cannot write description file ' + descfn)
 434                 return
 435
 436         if self.params.get('writesubtitles', False) and 'subtitles' in info_dict and info_dict['subtitles']:
 437             # subtitles download errors are already managed as troubles in relevant IE
 438             # that way it will silently go on when used with unsupporting IE
 439             try:
 440                 srtfn = filename.rsplit('.', 1)[0] + u'.srt'
 441                 self.report_writesubtitles(srtfn)
 442                 with io.open(encodeFilename(srtfn), 'w', encoding='utf-8') as srtfile:
 443                     srtfile.write(info_dict['subtitles'])
 444             except (OSError, IOError):
 445                 self.trouble(u'ERROR: Cannot write subtitles file ' + descfn)
 446                 return
 447
 448         if self.params.get('writeinfojson', False):
 449             infofn = filename + u'.info.json'
 450             self.report_writeinfojson(infofn)
 451             try:
 452                 json_info_dict = dict((k, v) for k,v in info_dict.items() if not k in ['urlhandle'])
 453                 write_json_file(json_info_dict, encodeFilename(infofn))
 454             except (OSError, IOError):
 455                 self.trouble(u'ERROR: Cannot write metadata to JSON file ' + infofn)
 456                 return
 457
 458         if not self.params.get('skip_download', False):
 459             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
 460                 success = True
 461             else:
 462                 try:
 463                     success = self._do_download(filename, info_dict)
 464                 except (OSError, IOError) as err:
 465                     raise UnavailableVideoError()
 466                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
 467                     self.trouble(u'ERROR: unable to download video data: %s' % str(err))
 468                     return
 469                 except (ContentTooShortError, ) as err:
 470                     self.trouble(u'ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
 471                     return
 472
 473             if success:
 474                 try:
 475                     self.post_process(filename, info_dict)
 476                 except (PostProcessingError) as err:
 477                     self.trouble(u'ERROR: postprocessing: %s' % str(err))
 478                     return
 479
 480     def download(self, url_list):
 481         """Download a given list of URLs."""
 482         if len(url_list) > 1 and self.fixed_template():
 483             raise SameFileError(self.params['outtmpl'])
 484
 485         for url in url_list:
 486             suitable_found = False
 487             for ie in self._ies:
 488                 # Go to next InfoExtractor if not suitable
 489                 if not ie.suitable(url):
 490                     continue
 491
 492                 # Warn if the _WORKING attribute is False
 493                 if not ie.working():
 494                     self.to_stderr(u'WARNING: the program functionality for this site has been marked as broken, '
 495                                    u'and will probably not work. If you want to go on, use the -i option.')
 496
 497                 # Suitable InfoExtractor found
 498                 suitable_found = True
 499
 500                 # Extract information from URL and process it
 501                 try:
 502                     videos = ie.extract(url)
 503                 except ExtractorError as de: # An error we somewhat expected
 504                     self.trouble(u'ERROR: ' + compat_str(de), de.format_traceback())
 505                     break
 506                 except Exception as e:
 507                     if self.params.get('ignoreerrors', False):
 508                         self.trouble(u'ERROR: ' + compat_str(e), tb=compat_str(traceback.format_exc()))
 509                         break
 510                     else:
 511                         raise
 512
 513                 if len(videos or []) > 1 and self.fixed_template():
 514                     raise SameFileError(self.params['outtmpl'])
 515
 516                 for video in videos or []:
 517                     video['extractor'] = ie.IE_NAME
 518                     try:
 519                         self.increment_downloads()
 520                         self.process_info(video)
 521                     except UnavailableVideoError:
 522                         self.trouble(u'\nERROR: unable to download video')
 523
 524                 # Suitable InfoExtractor had been found; go to next URL
 525                 break
 526
 527             if not suitable_found:
 528                 self.trouble(u'ERROR: no suitable InfoExtractor: %s' % url)
 529
 530         return self._download_retcode
 531
 532     def post_process(self, filename, ie_info):
 533         """Run all the postprocessors on the given file."""
 534         info = dict(ie_info)
 535         info['filepath'] = filename
 536         keep_video = None
 537         for pp in self._pps:
 538             try:
 539                 keep_video_wish,new_info = pp.run(info)
 540                 if keep_video_wish is not None:
 541                     if keep_video_wish:
 542                         keep_video = keep_video_wish
 543                     elif keep_video is None:
 544                         # No clear decision yet, let IE decide
 545                         keep_video = keep_video_wish
 546             except PostProcessingError as e:
 547                 self.to_stderr(u'ERROR: ' + e.msg)
 548         if keep_video is False and not self.params.get('keepvideo', False):
 549             try:
 550                 self.to_stderr(u'Deleting original file %s (pass -k to keep)' % filename)
 551                 os.remove(encodeFilename(filename))
 552             except (IOError, OSError):
 553                 self.to_stderr(u'WARNING: Unable to remove downloaded video file')
 554
 555     def _download_with_rtmpdump(self, filename, url, player_url, page_url):
 556         self.report_destination(filename)
 557         tmpfilename = self.temp_name(filename)
 558
 559         # Check for rtmpdump first
 560         try:
 561             subprocess.call(['rtmpdump', '-h'], stdout=(file(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
 562         except (OSError, IOError):
 563             self.trouble(u'ERROR: RTMP download detected but "rtmpdump" could not be run')
 564             return False
 565
 566         # Download using rtmpdump. rtmpdump returns exit code 2 when
 567         # the connection was interrumpted and resuming appears to be
 568         # possible. This is part of rtmpdump's normal usage, AFAIK.
 569         basic_args = ['rtmpdump', '-q', '-r', url, '-o', tmpfilename]
 570         if player_url is not None:
 571             basic_args += ['-W', player_url]
 572         if page_url is not None:
 573             basic_args += ['--pageUrl', page_url]
 574         args = basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)]
 575         if self.params.get('verbose', False):
 576             try:
 577                 import pipes
 578                 shell_quote = lambda args: ' '.join(map(pipes.quote, args))
 579             except ImportError:
 580                 shell_quote = repr
 581             self.to_screen(u'[debug] rtmpdump command line: ' + shell_quote(args))
 582         retval = subprocess.call(args)
 583         while retval == 2 or retval == 1:
 584             prevsize = os.path.getsize(encodeFilename(tmpfilename))
 585             self.to_screen(u'\r[rtmpdump] %s bytes' % prevsize, skip_eol=True)
 586             time.sleep(5.0) # This seems to be needed
 587             retval = subprocess.call(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
 588             cursize = os.path.getsize(encodeFilename(tmpfilename))
 589             if prevsize == cursize and retval == 1:
 590                 break
 591              # Some rtmp streams seem abort after ~ 99.8%. Don't complain for those
 592             if prevsize == cursize and retval == 2 and cursize > 1024:
 593                 self.to_screen(u'\r[rtmpdump] Could not download the whole video. This can happen for some advertisements.')
 594                 retval = 0
 595                 break
 596         if retval == 0:
 597             self.to_screen(u'\r[rtmpdump] %s bytes' % os.path.getsize(encodeFilename(tmpfilename)))
 598             self.try_rename(tmpfilename, filename)
 599             return True
 600         else:
 601             self.trouble(u'\nERROR: rtmpdump exited with code %d' % retval)
 602             return False
 603
 604     def _do_download(self, filename, info_dict):
 605         url = info_dict['url']
 606
 607         # Check file already present
 608         if self.params.get('continuedl', False) and os.path.isfile(encodeFilename(filename)) and not self.params.get('nopart', False):
 609             self.report_file_already_downloaded(filename)
 610             return True
 611
 612         # Attempt to download using rtmpdump
 613         if url.startswith('rtmp'):
 614             return self._download_with_rtmpdump(filename, url,
 615                                                 info_dict.get('player_url', None),
 616                                                 info_dict.get('page_url', None))
 617
 618         tmpfilename = self.temp_name(filename)
 619         stream = None
 620
 621         # Do not include the Accept-Encoding header
 622         headers = {'Youtubedl-no-compression': 'True'}
 623         basic_request = compat_urllib_request.Request(url, None, headers)
 624         request = compat_urllib_request.Request(url, None, headers)
 625
 626         if self.params.get('test', False):
 627             request.add_header('Range','bytes=0-10240')
 628
 629         # Establish possible resume length
 630         if os.path.isfile(encodeFilename(tmpfilename)):
 631             resume_len = os.path.getsize(encodeFilename(tmpfilename))
 632         else:
 633             resume_len = 0
 634
 635         open_mode = 'wb'
 636         if resume_len != 0:
 637             if self.params.get('continuedl', False):
 638                 self.report_resuming_byte(resume_len)
 639                 request.add_header('Range','bytes=%d-' % resume_len)
 640                 open_mode = 'ab'
 641             else:
 642                 resume_len = 0
 643
 644         count = 0
 645         retries = self.params.get('retries', 0)
 646         while count <= retries:
 647             # Establish connection
 648             try:
 649                 if count == 0 and 'urlhandle' in info_dict:
 650                     data = info_dict['urlhandle']
 651                 data = compat_urllib_request.urlopen(request)
 652                 break
 653             except (compat_urllib_error.HTTPError, ) as err:
 654                 if (err.code < 500 or err.code >= 600) and err.code != 416:
 655                     # Unexpected HTTP error
 656                     raise
 657                 elif err.code == 416:
 658                     # Unable to resume (requested range not satisfiable)
 659                     try:
 660                         # Open the connection again without the range header
 661                         data = compat_urllib_request.urlopen(basic_request)
 662                         content_length = data.info()['Content-Length']
 663                     except (compat_urllib_error.HTTPError, ) as err:
 664                         if err.code < 500 or err.code >= 600:
 665                             raise
 666                     else:
 667                         # Examine the reported length
 668                         if (content_length is not None and
 669                                 (resume_len - 100 < int(content_length) < resume_len + 100)):
 670                             # The file had already been fully downloaded.
 671                             # Explanation to the above condition: in issue #175 it was revealed that
 672                             # YouTube sometimes adds or removes a few bytes from the end of the file,
 673                             # changing the file size slightly and causing problems for some users. So
 674                             # I decided to implement a suggested change and consider the file
 675                             # completely downloaded if the file size differs less than 100 bytes from
 676                             # the one in the hard drive.
 677                             self.report_file_already_downloaded(filename)
 678                             self.try_rename(tmpfilename, filename)
 679                             return True
 680                         else:
 681                             # The length does not match, we start the download over
 682                             self.report_unable_to_resume()
 683                             open_mode = 'wb'
 684                             break
 685             # Retry
 686             count += 1
 687             if count <= retries:
 688                 self.report_retry(count, retries)
 689
 690         if count > retries:
 691             self.trouble(u'ERROR: giving up after %s retries' % retries)
 692             return False
 693
 694         data_len = data.info().get('Content-length', None)
 695         if data_len is not None:
 696             data_len = int(data_len) + resume_len
 697         data_len_str = self.format_bytes(data_len)
 698         byte_counter = 0 + resume_len
 699         block_size = self.params.get('buffersize', 1024)
 700         start = time.time()
 701         while True:
 702             # Download and write
 703             before = time.time()
 704             data_block = data.read(block_size)
 705             after = time.time()
 706             if len(data_block) == 0:
 707                 break
 708             byte_counter += len(data_block)
 709
 710             # Open file just in time
 711             if stream is None:
 712                 try:
 713                     (stream, tmpfilename) = sanitize_open(tmpfilename, open_mode)
 714                     assert stream is not None
 715                     filename = self.undo_temp_name(tmpfilename)
 716                     self.report_destination(filename)
 717                 except (OSError, IOError) as err:
 718                     self.trouble(u'ERROR: unable to open for writing: %s' % str(err))
 719                     return False
 720             try:
 721                 stream.write(data_block)
 722             except (IOError, OSError) as err:
 723                 self.trouble(u'\nERROR: unable to write data: %s' % str(err))
 724                 return False
 725             if not self.params.get('noresizebuffer', False):
 726                 block_size = self.best_block_size(after - before, len(data_block))
 727
 728             # Progress message
 729             speed_str = self.calc_speed(start, time.time(), byte_counter - resume_len)
 730             if data_len is None:
 731                 self.report_progress('Unknown %', data_len_str, speed_str, 'Unknown ETA')
 732             else:
 733                 percent_str = self.calc_percent(byte_counter, data_len)
 734                 eta_str = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
 735                 self.report_progress(percent_str, data_len_str, speed_str, eta_str)
 736
 737             # Apply rate limit
 738             self.slow_down(start, byte_counter - resume_len)
 739
 740         if stream is None:
 741             self.trouble(u'\nERROR: Did not get any data blocks')
 742             return False
 743         stream.close()
 744         self.report_finish()
 745         if data_len is not None and byte_counter != data_len:
 746             raise ContentTooShortError(byte_counter, int(data_len))
 747         self.try_rename(tmpfilename, filename)
 748
 749         # Update file modification time
 750         if self.params.get('updatetime', True):
 751             info_dict['filetime'] = self.try_utime(filename, data.info().get('last-modified', None))
 752
 753         return True