git.bitcoin.ninja Git - youtube-dl/blob - youtube_dl/FileDownloader.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 import httplib
   5 import math
   6 import os
   7 import re
   8 import socket
   9 import subprocess
  10 import sys
  11 import time
  12 import urllib2
  13
  14 if os.name == 'nt':
  15         import ctypes
  16
  17 from utils import *
  18
  19
  20 class FileDownloader(object):
  21         """File Downloader class.
  22
  23         File downloader objects are the ones responsible of downloading the
  24         actual video file and writing it to disk if the user has requested
  25         it, among some other tasks. In most cases there should be one per
  26         program. As, given a video URL, the downloader doesn't know how to
  27         extract all the needed information, task that InfoExtractors do, it
  28         has to pass the URL to one of them.
  29
  30         For this, file downloader objects have a method that allows
  31         InfoExtractors to be registered in a given order. When it is passed
  32         a URL, the file downloader handles it to the first InfoExtractor it
  33         finds that reports being able to handle it. The InfoExtractor extracts
  34         all the information about the video or videos the URL refers to, and
  35         asks the FileDownloader to process the video information, possibly
  36         downloading the video.
  37
  38         File downloaders accept a lot of parameters. In order not to saturate
  39         the object constructor with arguments, it receives a dictionary of
  40         options instead. These options are available through the params
  41         attribute for the InfoExtractors to use. The FileDownloader also
  42         registers itself as the downloader in charge for the InfoExtractors
  43         that are added to it, so this is a "mutual registration".
  44
  45         Available options:
  46
  47         username:          Username for authentication purposes.
  48         password:          Password for authentication purposes.
  49         usenetrc:          Use netrc for authentication instead.
  50         quiet:             Do not print messages to stdout.
  51         forceurl:          Force printing final URL.
  52         forcetitle:        Force printing title.
  53         forcethumbnail:    Force printing thumbnail URL.
  54         forcedescription:  Force printing description.
  55         forcefilename:     Force printing final filename.
  56         simulate:          Do not download the video files.
  57         format:            Video format code.
  58         format_limit:      Highest quality format to try.
  59         outtmpl:           Template for output names.
  60         restrictfilenames: Do not allow "&" and spaces in file names
  61         ignoreerrors:      Do not stop on download errors.
  62         ratelimit:         Download speed limit, in bytes/sec.
  63         nooverwrites:      Prevent overwriting files.
  64         retries:           Number of times to retry for HTTP error 5xx
  65         continuedl:        Try to continue downloads if possible.
  66         noprogress:        Do not print the progress bar.
  67         playliststart:     Playlist item to start at.
  68         playlistend:       Playlist item to end at.
  69         matchtitle:        Download only matching titles.
  70         rejecttitle:       Reject downloads for matching titles.
  71         logtostderr:       Log messages to stderr instead of stdout.
  72         consoletitle:      Display progress in console window's titlebar.
  73         nopart:            Do not use temporary .part files.
  74         updatetime:        Use the Last-modified header to set output file timestamps.
  75         writedescription:  Write the video description to a .description file
  76         writeinfojson:     Write the video description to a .info.json file
  77         writesubtitles:    Write the video subtitles to a .srt file
  78         subtitleslang:     Language of the subtitles to download
  79         """
  80
  81         params = None
  82         _ies = []
  83         _pps = []
  84         _download_retcode = None
  85         _num_downloads = None
  86         _screen_file = None
  87
  88         def __init__(self, params):
  89                 """Create a FileDownloader object with the given options."""
  90                 self._ies = []
  91                 self._pps = []
  92                 self._download_retcode = 0
  93                 self._num_downloads = 0
  94                 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
  95                 self.params = params
  96
  97         @staticmethod
  98         def format_bytes(bytes):
  99                 if bytes is None:
 100                         return 'N/A'
 101                 if type(bytes) is str:
 102                         bytes = float(bytes)
 103                 if bytes == 0.0:
 104                         exponent = 0
 105                 else:
 106                         exponent = long(math.log(bytes, 1024.0))
 107                 suffix = 'bkMGTPEZY'[exponent]
 108                 converted = float(bytes) / float(1024 ** exponent)
 109                 return '%.2f%s' % (converted, suffix)
 110
 111         @staticmethod
 112         def calc_percent(byte_counter, data_len):
 113                 if data_len is None:
 114                         return '---.-%'
 115                 return '%6s' % ('%3.1f%%' % (float(byte_counter) / float(data_len) * 100.0))
 116
 117         @staticmethod
 118         def calc_eta(start, now, total, current):
 119                 if total is None:
 120                         return '--:--'
 121                 dif = now - start
 122                 if current == 0 or dif < 0.001: # One millisecond
 123                         return '--:--'
 124                 rate = float(current) / dif
 125                 eta = long((float(total) - float(current)) / rate)
 126                 (eta_mins, eta_secs) = divmod(eta, 60)
 127                 if eta_mins > 99:
 128                         return '--:--'
 129                 return '%02d:%02d' % (eta_mins, eta_secs)
 130
 131         @staticmethod
 132         def calc_speed(start, now, bytes):
 133                 dif = now - start
 134                 if bytes == 0 or dif < 0.001: # One millisecond
 135                         return '%10s' % '---b/s'
 136                 return '%10s' % ('%s/s' % FileDownloader.format_bytes(float(bytes) / dif))
 137
 138         @staticmethod
 139         def best_block_size(elapsed_time, bytes):
 140                 new_min = max(bytes / 2.0, 1.0)
 141                 new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
 142                 if elapsed_time < 0.001:
 143                         return int(new_max)
 144                 rate = bytes / elapsed_time
 145                 if rate > new_max:
 146                         return int(new_max)
 147                 if rate < new_min:
 148                         return int(new_min)
 149                 return int(rate)
 150
 151         @staticmethod
 152         def parse_bytes(bytestr):
 153                 """Parse a string indicating a byte quantity into an integer."""
 154                 matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
 155                 if matchobj is None:
 156                         return None
 157                 number = float(matchobj.group(1))
 158                 multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
 159                 return int(round(number * multiplier))
 160
 161         def add_info_extractor(self, ie):
 162                 """Add an InfoExtractor object to the end of the list."""
 163                 self._ies.append(ie)
 164                 ie.set_downloader(self)
 165
 166         def add_post_processor(self, pp):
 167                 """Add a PostProcessor object to the end of the chain."""
 168                 self._pps.append(pp)
 169                 pp.set_downloader(self)
 170
 171         def to_screen(self, message, skip_eol=False):
 172                 """Print message to stdout if not in quiet mode."""
 173                 assert type(message) == type(u'')
 174                 if not self.params.get('quiet', False):
 175                         terminator = [u'\n', u''][skip_eol]
 176                         output = message + terminator
 177                         if 'b' not in self._screen_file.mode or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
 178                                 output = output.encode(preferredencoding(), 'ignore')
 179                         self._screen_file.write(output)
 180                         self._screen_file.flush()
 181
 182         def to_stderr(self, message):
 183                 """Print message to stderr."""
 184                 assert type(message) == type(u'')
 185                 sys.stderr.write((message + u'\n').encode(preferredencoding()))
 186
 187         def to_cons_title(self, message):
 188                 """Set console/terminal window title to message."""
 189                 if not self.params.get('consoletitle', False):
 190                         return
 191                 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
 192                         # c_wchar_p() might not be necessary if `message` is
 193                         # already of type unicode()
 194                         ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 195                 elif 'TERM' in os.environ:
 196                         sys.stderr.write('\033]0;%s\007' % message.encode(preferredencoding()))
 197
 198         def fixed_template(self):
 199                 """Checks if the output template is fixed."""
 200                 return (re.search(ur'(?u)%\(.+?\)s', self.params['outtmpl']) is None)
 201
 202         def trouble(self, message=None):
 203                 """Determine action to take when a download problem appears.
 204
 205                 Depending on if the downloader has been configured to ignore
 206                 download errors or not, this method may throw an exception or
 207                 not when errors are found, after printing the message.
 208                 """
 209                 if message is not None:
 210                         self.to_stderr(message)
 211                 if not self.params.get('ignoreerrors', False):
 212                         raise DownloadError(message)
 213                 self._download_retcode = 1
 214
 215         def slow_down(self, start_time, byte_counter):
 216                 """Sleep if the download speed is over the rate limit."""
 217                 rate_limit = self.params.get('ratelimit', None)
 218                 if rate_limit is None or byte_counter == 0:
 219                         return
 220                 now = time.time()
 221                 elapsed = now - start_time
 222                 if elapsed <= 0.0:
 223                         return
 224                 speed = float(byte_counter) / elapsed
 225                 if speed > rate_limit:
 226                         time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
 227
 228         def temp_name(self, filename):
 229                 """Returns a temporary filename for the given filename."""
 230                 if self.params.get('nopart', False) or filename == u'-' or \
 231                                 (os.path.exists(encodeFilename(filename)) and not os.path.isfile(encodeFilename(filename))):
 232                         return filename
 233                 return filename + u'.part'
 234
 235         def undo_temp_name(self, filename):
 236                 if filename.endswith(u'.part'):
 237                         return filename[:-len(u'.part')]
 238                 return filename
 239
 240         def try_rename(self, old_filename, new_filename):
 241                 try:
 242                         if old_filename == new_filename:
 243                                 return
 244                         os.rename(encodeFilename(old_filename), encodeFilename(new_filename))
 245                 except (IOError, OSError), err:
 246                         self.trouble(u'ERROR: unable to rename file')
 247
 248         def try_utime(self, filename, last_modified_hdr):
 249                 """Try to set the last-modified time of the given file."""
 250                 if last_modified_hdr is None:
 251                         return
 252                 if not os.path.isfile(encodeFilename(filename)):
 253                         return
 254                 timestr = last_modified_hdr
 255                 if timestr is None:
 256                         return
 257                 filetime = timeconvert(timestr)
 258                 if filetime is None:
 259                         return filetime
 260                 try:
 261                         os.utime(filename, (time.time(), filetime))
 262                 except:
 263                         pass
 264                 return filetime
 265
 266         def report_writedescription(self, descfn):
 267                 """ Report that the description file is being written """
 268                 self.to_screen(u'[info] Writing video description to: ' + descfn)
 269
 270         def report_writesubtitles(self, srtfn):
 271                 """ Report that the subtitles file is being written """
 272                 self.to_screen(u'[info] Writing video subtitles to: ' + srtfn)
 273
 274         def report_writeinfojson(self, infofn):
 275                 """ Report that the metadata file has been written """
 276                 self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn)
 277
 278         def report_destination(self, filename):
 279                 """Report destination filename."""
 280                 self.to_screen(u'[download] Destination: ' + filename)
 281
 282         def report_progress(self, percent_str, data_len_str, speed_str, eta_str):
 283                 """Report download progress."""
 284                 if self.params.get('noprogress', False):
 285                         return
 286                 self.to_screen(u'\r[download] %s of %s at %s ETA %s' %
 287                                 (percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
 288                 self.to_cons_title(u'youtube-dl - %s of %s at %s ETA %s' %
 289                                 (percent_str.strip(), data_len_str.strip(), speed_str.strip(), eta_str.strip()))
 290
 291         def report_resuming_byte(self, resume_len):
 292                 """Report attempt to resume at given byte."""
 293                 self.to_screen(u'[download] Resuming download at byte %s' % resume_len)
 294
 295         def report_retry(self, count, retries):
 296                 """Report retry in case of HTTP error 5xx"""
 297                 self.to_screen(u'[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count, retries))
 298
 299         def report_file_already_downloaded(self, file_name):
 300                 """Report file has already been fully downloaded."""
 301                 try:
 302                         self.to_screen(u'[download] %s has already been downloaded' % file_name)
 303                 except (UnicodeEncodeError), err:
 304                         self.to_screen(u'[download] The file has already been downloaded')
 305
 306         def report_unable_to_resume(self):
 307                 """Report it was impossible to resume download."""
 308                 self.to_screen(u'[download] Unable to resume')
 309
 310         def report_finish(self):
 311                 """Report download finished."""
 312                 if self.params.get('noprogress', False):
 313                         self.to_screen(u'[download] Download completed')
 314                 else:
 315                         self.to_screen(u'')
 316
 317         def increment_downloads(self):
 318                 """Increment the ordinal that assigns a number to each file."""
 319                 self._num_downloads += 1
 320
 321         def prepare_filename(self, info_dict):
 322                 """Generate the output filename."""
 323                 try:
 324                         template_dict = dict(info_dict)
 325                         template_dict['epoch'] = unicode(long(time.time()))
 326                         template_dict['autonumber'] = unicode('%05d' % self._num_downloads)
 327                         template_dict['title'] = template_dict['stitle'] # Keep both for backwards compatibility
 328                         filename = self.params['outtmpl'] % template_dict
 329                         return filename
 330                 except (ValueError, KeyError), err:
 331                         self.trouble(u'ERROR: invalid system charset or erroneous output template')
 332                         return None
 333
 334         def _match_entry(self, info_dict):
 335                 """ Returns None iff the file should be downloaded """
 336
 337                 title = info_dict['title']
 338                 matchtitle = self.params.get('matchtitle', False)
 339                 if matchtitle:
 340                         matchtitle = matchtitle.decode('utf8')
 341                         if not re.search(matchtitle, title, re.IGNORECASE):
 342                                 return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
 343                 rejecttitle = self.params.get('rejecttitle', False)
 344                 if rejecttitle:
 345                         rejecttitle = rejecttitle.decode('utf8')
 346                         if re.search(rejecttitle, title, re.IGNORECASE):
 347                                 return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
 348                 return None
 349
 350         def process_info(self, info_dict):
 351                 """Process a single dictionary returned by an InfoExtractor."""
 352
 353                 info_dict['stitle'] = sanitize_filename(info_dict['title'], self.params.get('restrictfilenames'))
 354
 355                 reason = self._match_entry(info_dict)
 356                 if reason is not None:
 357                         self.to_screen(u'[download] ' + reason)
 358                         return
 359
 360                 max_downloads = self.params.get('max_downloads')
 361                 if max_downloads is not None:
 362                         if self._num_downloads > int(max_downloads):
 363                                 raise MaxDownloadsReached()
 364
 365                 filename = self.prepare_filename(info_dict)
 366
 367                 # Forced printings
 368                 if self.params.get('forcetitle', False):
 369                         print(info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace'))
 370                 if self.params.get('forceurl', False):
 371                         print(info_dict['url'].encode(preferredencoding(), 'xmlcharrefreplace'))
 372                 if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
 373                         print(info_dict['thumbnail'].encode(preferredencoding(), 'xmlcharrefreplace'))
 374                 if self.params.get('forcedescription', False) and 'description' in info_dict:
 375                         print(info_dict['description'].encode(preferredencoding(), 'xmlcharrefreplace'))
 376                 if self.params.get('forcefilename', False) and filename is not None:
 377                         print(filename.encode(preferredencoding(), 'xmlcharrefreplace'))
 378                 if self.params.get('forceformat', False):
 379                         print(info_dict['format'].encode(preferredencoding(), 'xmlcharrefreplace'))
 380
 381                 # Do nothing else if in simulate mode
 382                 if self.params.get('simulate', False):
 383                         return
 384
 385                 if filename is None:
 386                         return
 387
 388                 try:
 389                         dn = os.path.dirname(encodeFilename(filename))
 390                         if dn != '' and not os.path.exists(dn): # dn is already encoded
 391                                 os.makedirs(dn)
 392                 except (OSError, IOError), err:
 393                         self.trouble(u'ERROR: unable to create directory ' + unicode(err))
 394                         return
 395
 396                 if self.params.get('writedescription', False):
 397                         try:
 398                                 descfn = filename + u'.description'
 399                                 self.report_writedescription(descfn)
 400                                 descfile = open(encodeFilename(descfn), 'wb')
 401                                 try:
 402                                         descfile.write(info_dict['description'].encode('utf-8'))
 403                                 finally:
 404                                         descfile.close()
 405                         except (OSError, IOError):
 406                                 self.trouble(u'ERROR: Cannot write description file ' + descfn)
 407                                 return
 408
 409                 if self.params.get('writesubtitles', False) and 'subtitles' in info_dict and info_dict['subtitles']:
 410                         # subtitles download errors are already managed as troubles in relevant IE
 411                         # that way it will silently go on when used with unsupporting IE
 412                         try:
 413                                 srtfn = filename.rsplit('.', 1)[0] + u'.srt'
 414                                 self.report_writesubtitles(srtfn)
 415                                 srtfile = open(encodeFilename(srtfn), 'wb')
 416                                 try:
 417                                         srtfile.write(info_dict['subtitles'].encode('utf-8'))
 418                                 finally:
 419                                         srtfile.close()
 420                         except (OSError, IOError):
 421                                 self.trouble(u'ERROR: Cannot write subtitles file ' + descfn)
 422                                 return
 423
 424                 if self.params.get('writeinfojson', False):
 425                         infofn = filename + u'.info.json'
 426                         self.report_writeinfojson(infofn)
 427                         try:
 428                                 json.dump
 429                         except (NameError,AttributeError):
 430                                 self.trouble(u'ERROR: No JSON encoder found. Update to Python 2.6+, setup a json module, or leave out --write-info-json.')
 431                                 return
 432                         try:
 433                                 infof = open(encodeFilename(infofn), 'wb')
 434                                 try:
 435                                         json_info_dict = dict((k,v) for k,v in info_dict.iteritems() if not k in ('urlhandle',))
 436                                         json.dump(json_info_dict, infof)
 437                                 finally:
 438                                         infof.close()
 439                         except (OSError, IOError):
 440                                 self.trouble(u'ERROR: Cannot write metadata to JSON file ' + infofn)
 441                                 return
 442
 443                 if not self.params.get('skip_download', False):
 444                         if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
 445                                 success = True
 446                         else:
 447                                 try:
 448                                         success = self._do_download(filename, info_dict)
 449                                 except (OSError, IOError), err:
 450                                         raise UnavailableVideoError
 451                                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 452                                         self.trouble(u'ERROR: unable to download video data: %s' % str(err))
 453                                         return
 454                                 except (ContentTooShortError, ), err:
 455                                         self.trouble(u'ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
 456                                         return
 457
 458                         if success:
 459                                 try:
 460                                         self.post_process(filename, info_dict)
 461                                 except (PostProcessingError), err:
 462                                         self.trouble(u'ERROR: postprocessing: %s' % str(err))
 463                                         return
 464
 465         def download(self, url_list):
 466                 """Download a given list of URLs."""
 467                 if len(url_list) > 1 and self.fixed_template():
 468                         raise SameFileError(self.params['outtmpl'])
 469
 470                 for url in url_list:
 471                         suitable_found = False
 472                         for ie in self._ies:
 473                                 # Go to next InfoExtractor if not suitable
 474                                 if not ie.suitable(url):
 475                                         continue
 476
 477                                 # Suitable InfoExtractor found
 478                                 suitable_found = True
 479
 480                                 # Extract information from URL and process it
 481                                 videos = ie.extract(url)
 482                                 for video in videos or []:
 483                                         video['extractor'] = ie.IE_NAME
 484                                         try:
 485                                                 self.increment_downloads()
 486                                                 self.process_info(video)
 487                                         except UnavailableVideoError:
 488                                                 self.trouble(u'\nERROR: unable to download video')
 489
 490                                 # Suitable InfoExtractor had been found; go to next URL
 491                                 break
 492
 493                         if not suitable_found:
 494                                 self.trouble(u'ERROR: no suitable InfoExtractor: %s' % url)
 495
 496                 return self._download_retcode
 497
 498         def post_process(self, filename, ie_info):
 499                 """Run the postprocessing chain on the given file."""
 500                 info = dict(ie_info)
 501                 info['filepath'] = filename
 502                 for pp in self._pps:
 503                         info = pp.run(info)
 504                         if info is None:
 505                                 break
 506
 507         def _download_with_rtmpdump(self, filename, url, player_url):
 508                 self.report_destination(filename)
 509                 tmpfilename = self.temp_name(filename)
 510
 511                 # Check for rtmpdump first
 512                 try:
 513                         subprocess.call(['rtmpdump', '-h'], stdout=(file(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
 514                 except (OSError, IOError):
 515                         self.trouble(u'ERROR: RTMP download detected but "rtmpdump" could not be run')
 516                         return False
 517
 518                 # Download using rtmpdump. rtmpdump returns exit code 2 when
 519                 # the connection was interrumpted and resuming appears to be
 520                 # possible. This is part of rtmpdump's normal usage, AFAIK.
 521                 basic_args = ['rtmpdump', '-q'] + [[], ['-W', player_url]][player_url is not None] + ['-r', url, '-o', tmpfilename]
 522                 args = basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)]
 523                 if self.params.get('verbose', False):
 524                         try:
 525                                 import pipes
 526                                 shell_quote = lambda args: ' '.join(map(pipes.quote, args))
 527                         except ImportError:
 528                                 shell_quote = repr
 529                         self.to_screen(u'[debug] rtmpdump command line: ' + shell_quote(args))
 530                 retval = subprocess.call(args)
 531                 while retval == 2 or retval == 1:
 532                         prevsize = os.path.getsize(encodeFilename(tmpfilename))
 533                         self.to_screen(u'\r[rtmpdump] %s bytes' % prevsize, skip_eol=True)
 534                         time.sleep(5.0) # This seems to be needed
 535                         retval = subprocess.call(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
 536                         cursize = os.path.getsize(encodeFilename(tmpfilename))
 537                         if prevsize == cursize and retval == 1:
 538                                 break
 539                          # Some rtmp streams seem abort after ~ 99.8%. Don't complain for those
 540                         if prevsize == cursize and retval == 2 and cursize > 1024:
 541                                 self.to_screen(u'\r[rtmpdump] Could not download the whole video. This can happen for some advertisements.')
 542                                 retval = 0
 543                                 break
 544                 if retval == 0:
 545                         self.to_screen(u'\r[rtmpdump] %s bytes' % os.path.getsize(encodeFilename(tmpfilename)))
 546                         self.try_rename(tmpfilename, filename)
 547                         return True
 548                 else:
 549                         self.trouble(u'\nERROR: rtmpdump exited with code %d' % retval)
 550                         return False
 551
 552         def _do_download(self, filename, info_dict):
 553                 url = info_dict['url']
 554                 player_url = info_dict.get('player_url', None)
 555
 556                 # Check file already present
 557                 if self.params.get('continuedl', False) and os.path.isfile(encodeFilename(filename)) and not self.params.get('nopart', False):
 558                         self.report_file_already_downloaded(filename)
 559                         return True
 560
 561                 # Attempt to download using rtmpdump
 562                 if url.startswith('rtmp'):
 563                         return self._download_with_rtmpdump(filename, url, player_url)
 564
 565                 tmpfilename = self.temp_name(filename)
 566                 stream = None
 567
 568                 # Do not include the Accept-Encoding header
 569                 headers = {'Youtubedl-no-compression': 'True'}
 570                 basic_request = urllib2.Request(url, None, headers)
 571                 request = urllib2.Request(url, None, headers)
 572
 573                 # Establish possible resume length
 574                 if os.path.isfile(encodeFilename(tmpfilename)):
 575                         resume_len = os.path.getsize(encodeFilename(tmpfilename))
 576                 else:
 577                         resume_len = 0
 578
 579                 open_mode = 'wb'
 580                 if resume_len != 0:
 581                         if self.params.get('continuedl', False):
 582                                 self.report_resuming_byte(resume_len)
 583                                 request.add_header('Range','bytes=%d-' % resume_len)
 584                                 open_mode = 'ab'
 585                         else:
 586                                 resume_len = 0
 587
 588                 count = 0
 589                 retries = self.params.get('retries', 0)
 590                 while count <= retries:
 591                         # Establish connection
 592                         try:
 593                                 if count == 0 and 'urlhandle' in info_dict:
 594                                         data = info_dict['urlhandle']
 595                                 data = urllib2.urlopen(request)
 596                                 break
 597                         except (urllib2.HTTPError, ), err:
 598                                 if (err.code < 500 or err.code >= 600) and err.code != 416:
 599                                         # Unexpected HTTP error
 600                                         raise
 601                                 elif err.code == 416:
 602                                         # Unable to resume (requested range not satisfiable)
 603                                         try:
 604                                                 # Open the connection again without the range header
 605                                                 data = urllib2.urlopen(basic_request)
 606                                                 content_length = data.info()['Content-Length']
 607                                         except (urllib2.HTTPError, ), err:
 608                                                 if err.code < 500 or err.code >= 600:
 609                                                         raise
 610                                         else:
 611                                                 # Examine the reported length
 612                                                 if (content_length is not None and
 613                                                                 (resume_len - 100 < long(content_length) < resume_len + 100)):
 614                                                         # The file had already been fully downloaded.
 615                                                         # Explanation to the above condition: in issue #175 it was revealed that
 616                                                         # YouTube sometimes adds or removes a few bytes from the end of the file,
 617                                                         # changing the file size slightly and causing problems for some users. So
 618                                                         # I decided to implement a suggested change and consider the file
 619                                                         # completely downloaded if the file size differs less than 100 bytes from
 620                                                         # the one in the hard drive.
 621                                                         self.report_file_already_downloaded(filename)
 622                                                         self.try_rename(tmpfilename, filename)
 623                                                         return True
 624                                                 else:
 625                                                         # The length does not match, we start the download over
 626                                                         self.report_unable_to_resume()
 627                                                         open_mode = 'wb'
 628                                                         break
 629                         # Retry
 630                         count += 1
 631                         if count <= retries:
 632                                 self.report_retry(count, retries)
 633
 634                 if count > retries:
 635                         self.trouble(u'ERROR: giving up after %s retries' % retries)
 636                         return False
 637
 638                 data_len = data.info().get('Content-length', None)
 639                 if data_len is not None:
 640                         data_len = long(data_len) + resume_len
 641                 data_len_str = self.format_bytes(data_len)
 642                 byte_counter = 0 + resume_len
 643                 block_size = 1024
 644                 start = time.time()
 645                 while True:
 646                         # Download and write
 647                         before = time.time()
 648                         data_block = data.read(block_size)
 649                         after = time.time()
 650                         if len(data_block) == 0:
 651                                 break
 652                         byte_counter += len(data_block)
 653
 654                         # Open file just in time
 655                         if stream is None:
 656                                 try:
 657                                         (stream, tmpfilename) = sanitize_open(tmpfilename, open_mode)
 658                                         assert stream is not None
 659                                         filename = self.undo_temp_name(tmpfilename)
 660                                         self.report_destination(filename)
 661                                 except (OSError, IOError), err:
 662                                         self.trouble(u'ERROR: unable to open for writing: %s' % str(err))
 663                                         return False
 664                         try:
 665                                 stream.write(data_block)
 666                         except (IOError, OSError), err:
 667                                 self.trouble(u'\nERROR: unable to write data: %s' % str(err))
 668                                 return False
 669                         block_size = self.best_block_size(after - before, len(data_block))
 670
 671                         # Progress message
 672                         speed_str = self.calc_speed(start, time.time(), byte_counter - resume_len)
 673                         if data_len is None:
 674                                 self.report_progress('Unknown %', data_len_str, speed_str, 'Unknown ETA')
 675                         else:
 676                                 percent_str = self.calc_percent(byte_counter, data_len)
 677                                 eta_str = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
 678                                 self.report_progress(percent_str, data_len_str, speed_str, eta_str)
 679
 680                         # Apply rate limit
 681                         self.slow_down(start, byte_counter - resume_len)
 682
 683                 if stream is None:
 684                         self.trouble(u'\nERROR: Did not get any data blocks')
 685                         return False
 686                 stream.close()
 687                 self.report_finish()
 688                 if data_len is not None and byte_counter != data_len:
 689                         raise ContentTooShortError(byte_counter, long(data_len))
 690                 self.try_rename(tmpfilename, filename)
 691
 692                 # Update file modification time
 693                 if self.params.get('updatetime', True):
 694                         info_dict['filetime'] = self.try_utime(filename, data.info().get('last-modified', None))
 695
 696                 return True