_ Git - youtube-dl/blob - youtube_dl/FileDownloader.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 import httplib
   5 import math
   6 import os
   7 import re
   8 import socket
   9 import subprocess
  10 import sys
  11 import time
  12 import urllib2
  13
  14 if os.name == 'nt':
  15         import ctypes
  16
  17 from utils import *
  18
  19
  20 class FileDownloader(object):
  21         """File Downloader class.
  22
  23         File downloader objects are the ones responsible of downloading the
  24         actual video file and writing it to disk if the user has requested
  25         it, among some other tasks. In most cases there should be one per
  26         program. As, given a video URL, the downloader doesn't know how to
  27         extract all the needed information, task that InfoExtractors do, it
  28         has to pass the URL to one of them.
  29
  30         For this, file downloader objects have a method that allows
  31         InfoExtractors to be registered in a given order. When it is passed
  32         a URL, the file downloader handles it to the first InfoExtractor it
  33         finds that reports being able to handle it. The InfoExtractor extracts
  34         all the information about the video or videos the URL refers to, and
  35         asks the FileDownloader to process the video information, possibly
  36         downloading the video.
  37
  38         File downloaders accept a lot of parameters. In order not to saturate
  39         the object constructor with arguments, it receives a dictionary of
  40         options instead. These options are available through the params
  41         attribute for the InfoExtractors to use. The FileDownloader also
  42         registers itself as the downloader in charge for the InfoExtractors
  43         that are added to it, so this is a "mutual registration".
  44
  45         Available options:
  46
  47         username:         Username for authentication purposes.
  48         password:         Password for authentication purposes.
  49         usenetrc:         Use netrc for authentication instead.
  50         quiet:            Do not print messages to stdout.
  51         forceurl:         Force printing final URL.
  52         forcetitle:       Force printing title.
  53         forcethumbnail:   Force printing thumbnail URL.
  54         forcedescription: Force printing description.
  55         forcefilename:    Force printing final filename.
  56         simulate:         Do not download the video files.
  57         format:           Video format code.
  58         format_limit:     Highest quality format to try.
  59         outtmpl:          Template for output names.
  60         ignoreerrors:     Do not stop on download errors.
  61         ratelimit:        Download speed limit, in bytes/sec.
  62         nooverwrites:     Prevent overwriting files.
  63         retries:          Number of times to retry for HTTP error 5xx
  64         continuedl:       Try to continue downloads if possible.
  65         noprogress:       Do not print the progress bar.
  66         playliststart:    Playlist item to start at.
  67         playlistend:      Playlist item to end at.
  68         matchtitle:       Download only matching titles.
  69         rejecttitle:      Reject downloads for matching titles.
  70         logtostderr:      Log messages to stderr instead of stdout.
  71         consoletitle:     Display progress in console window's titlebar.
  72         nopart:           Do not use temporary .part files.
  73         updatetime:       Use the Last-modified header to set output file timestamps.
  74         writedescription: Write the video description to a .description file
  75         writeinfojson:    Write the video description to a .info.json file
  76         writesubtitles:   Write the video subtitles to a .srt file
  77         subtitleslang:    Language of the subtitles to download
  78         """
  79
  80         params = None
  81         _ies = []
  82         _pps = []
  83         _download_retcode = None
  84         _num_downloads = None
  85         _screen_file = None
  86
  87         def __init__(self, params):
  88                 """Create a FileDownloader object with the given options."""
  89                 self._ies = []
  90                 self._pps = []
  91                 self._download_retcode = 0
  92                 self._num_downloads = 0
  93                 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
  94                 self.params = params
  95
  96         @staticmethod
  97         def format_bytes(bytes):
  98                 if bytes is None:
  99                         return 'N/A'
 100                 if type(bytes) is str:
 101                         bytes = float(bytes)
 102                 if bytes == 0.0:
 103                         exponent = 0
 104                 else:
 105                         exponent = long(math.log(bytes, 1024.0))
 106                 suffix = 'bkMGTPEZY'[exponent]
 107                 converted = float(bytes) / float(1024 ** exponent)
 108                 return '%.2f%s' % (converted, suffix)
 109
 110         @staticmethod
 111         def calc_percent(byte_counter, data_len):
 112                 if data_len is None:
 113                         return '---.-%'
 114                 return '%6s' % ('%3.1f%%' % (float(byte_counter) / float(data_len) * 100.0))
 115
 116         @staticmethod
 117         def calc_eta(start, now, total, current):
 118                 if total is None:
 119                         return '--:--'
 120                 dif = now - start
 121                 if current == 0 or dif < 0.001: # One millisecond
 122                         return '--:--'
 123                 rate = float(current) / dif
 124                 eta = long((float(total) - float(current)) / rate)
 125                 (eta_mins, eta_secs) = divmod(eta, 60)
 126                 if eta_mins > 99:
 127                         return '--:--'
 128                 return '%02d:%02d' % (eta_mins, eta_secs)
 129
 130         @staticmethod
 131         def calc_speed(start, now, bytes):
 132                 dif = now - start
 133                 if bytes == 0 or dif < 0.001: # One millisecond
 134                         return '%10s' % '---b/s'
 135                 return '%10s' % ('%s/s' % FileDownloader.format_bytes(float(bytes) / dif))
 136
 137         @staticmethod
 138         def best_block_size(elapsed_time, bytes):
 139                 new_min = max(bytes / 2.0, 1.0)
 140                 new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
 141                 if elapsed_time < 0.001:
 142                         return long(new_max)
 143                 rate = bytes / elapsed_time
 144                 if rate > new_max:
 145                         return long(new_max)
 146                 if rate < new_min:
 147                         return long(new_min)
 148                 return long(rate)
 149
 150         @staticmethod
 151         def parse_bytes(bytestr):
 152                 """Parse a string indicating a byte quantity into a long integer."""
 153                 matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
 154                 if matchobj is None:
 155                         return None
 156                 number = float(matchobj.group(1))
 157                 multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
 158                 return long(round(number * multiplier))
 159
 160         def add_info_extractor(self, ie):
 161                 """Add an InfoExtractor object to the end of the list."""
 162                 self._ies.append(ie)
 163                 ie.set_downloader(self)
 164
 165         def add_post_processor(self, pp):
 166                 """Add a PostProcessor object to the end of the chain."""
 167                 self._pps.append(pp)
 168                 pp.set_downloader(self)
 169
 170         def to_screen(self, message, skip_eol=False):
 171                 """Print message to stdout if not in quiet mode."""
 172                 assert type(message) == type(u'')
 173                 if not self.params.get('quiet', False):
 174                         terminator = [u'\n', u''][skip_eol]
 175                         output = message + terminator
 176                         if 'b' not in self._screen_file.mode or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
 177                                 output = output.encode(preferredencoding(), 'ignore')
 178                         self._screen_file.write(output)
 179                         self._screen_file.flush()
 180
 181         def to_stderr(self, message):
 182                 """Print message to stderr."""
 183                 print >>sys.stderr, message.encode(preferredencoding())
 184
 185         def to_cons_title(self, message):
 186                 """Set console/terminal window title to message."""
 187                 if not self.params.get('consoletitle', False):
 188                         return
 189                 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
 190                         # c_wchar_p() might not be necessary if `message` is
 191                         # already of type unicode()
 192                         ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 193                 elif 'TERM' in os.environ:
 194                         sys.stderr.write('\033]0;%s\007' % message.encode(preferredencoding()))
 195
 196         def fixed_template(self):
 197                 """Checks if the output template is fixed."""
 198                 return (re.search(ur'(?u)%\(.+?\)s', self.params['outtmpl']) is None)
 199
 200         def trouble(self, message=None):
 201                 """Determine action to take when a download problem appears.
 202
 203                 Depending on if the downloader has been configured to ignore
 204                 download errors or not, this method may throw an exception or
 205                 not when errors are found, after printing the message.
 206                 """
 207                 if message is not None:
 208                         self.to_stderr(message)
 209                 if not self.params.get('ignoreerrors', False):
 210                         raise DownloadError(message)
 211                 self._download_retcode = 1
 212
 213         def slow_down(self, start_time, byte_counter):
 214                 """Sleep if the download speed is over the rate limit."""
 215                 rate_limit = self.params.get('ratelimit', None)
 216                 if rate_limit is None or byte_counter == 0:
 217                         return
 218                 now = time.time()
 219                 elapsed = now - start_time
 220                 if elapsed <= 0.0:
 221                         return
 222                 speed = float(byte_counter) / elapsed
 223                 if speed > rate_limit:
 224                         time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
 225
 226         def temp_name(self, filename):
 227                 """Returns a temporary filename for the given filename."""
 228                 if self.params.get('nopart', False) or filename == u'-' or \
 229                                 (os.path.exists(encodeFilename(filename)) and not os.path.isfile(encodeFilename(filename))):
 230                         return filename
 231                 return filename + u'.part'
 232
 233         def undo_temp_name(self, filename):
 234                 if filename.endswith(u'.part'):
 235                         return filename[:-len(u'.part')]
 236                 return filename
 237
 238         def try_rename(self, old_filename, new_filename):
 239                 try:
 240                         if old_filename == new_filename:
 241                                 return
 242                         os.rename(encodeFilename(old_filename), encodeFilename(new_filename))
 243                 except (IOError, OSError), err:
 244                         self.trouble(u'ERROR: unable to rename file')
 245
 246         def try_utime(self, filename, last_modified_hdr):
 247                 """Try to set the last-modified time of the given file."""
 248                 if last_modified_hdr is None:
 249                         return
 250                 if not os.path.isfile(encodeFilename(filename)):
 251                         return
 252                 timestr = last_modified_hdr
 253                 if timestr is None:
 254                         return
 255                 filetime = timeconvert(timestr)
 256                 if filetime is None:
 257                         return filetime
 258                 try:
 259                         os.utime(filename, (time.time(), filetime))
 260                 except:
 261                         pass
 262                 return filetime
 263
 264         def report_writedescription(self, descfn):
 265                 """ Report that the description file is being written """
 266                 self.to_screen(u'[info] Writing video description to: ' + descfn)
 267
 268         def report_writesubtitles(self, srtfn):
 269                 """ Report that the subtitles file is being written """
 270                 self.to_screen(u'[info] Writing video subtitles to: ' + srtfn)
 271
 272         def report_writeinfojson(self, infofn):
 273                 """ Report that the metadata file has been written """
 274                 self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn)
 275
 276         def report_destination(self, filename):
 277                 """Report destination filename."""
 278                 self.to_screen(u'[download] Destination: ' + filename)
 279
 280         def report_progress(self, percent_str, data_len_str, speed_str, eta_str):
 281                 """Report download progress."""
 282                 if self.params.get('noprogress', False):
 283                         return
 284                 self.to_screen(u'\r[download] %s of %s at %s ETA %s' %
 285                                 (percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
 286                 self.to_cons_title(u'youtube-dl - %s of %s at %s ETA %s' %
 287                                 (percent_str.strip(), data_len_str.strip(), speed_str.strip(), eta_str.strip()))
 288
 289         def report_resuming_byte(self, resume_len):
 290                 """Report attempt to resume at given byte."""
 291                 self.to_screen(u'[download] Resuming download at byte %s' % resume_len)
 292
 293         def report_retry(self, count, retries):
 294                 """Report retry in case of HTTP error 5xx"""
 295                 self.to_screen(u'[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count, retries))
 296
 297         def report_file_already_downloaded(self, file_name):
 298                 """Report file has already been fully downloaded."""
 299                 try:
 300                         self.to_screen(u'[download] %s has already been downloaded' % file_name)
 301                 except (UnicodeEncodeError), err:
 302                         self.to_screen(u'[download] The file has already been downloaded')
 303
 304         def report_unable_to_resume(self):
 305                 """Report it was impossible to resume download."""
 306                 self.to_screen(u'[download] Unable to resume')
 307
 308         def report_finish(self):
 309                 """Report download finished."""
 310                 if self.params.get('noprogress', False):
 311                         self.to_screen(u'[download] Download completed')
 312                 else:
 313                         self.to_screen(u'')
 314
 315         def increment_downloads(self):
 316                 """Increment the ordinal that assigns a number to each file."""
 317                 self._num_downloads += 1
 318
 319         def prepare_filename(self, info_dict):
 320                 """Generate the output filename."""
 321                 try:
 322                         template_dict = dict(info_dict)
 323                         template_dict['epoch'] = unicode(long(time.time()))
 324                         template_dict['autonumber'] = unicode('%05d' % self._num_downloads)
 325                         template_dict['title'] = template_dict['stitle'] # Keep both for backwards compatibility
 326                         filename = self.params['outtmpl'] % template_dict
 327                         return filename
 328                 except (ValueError, KeyError), err:
 329                         self.trouble(u'ERROR: invalid system charset or erroneous output template')
 330                         return None
 331
 332         def _match_entry(self, info_dict):
 333                 """ Returns None iff the file should be downloaded """
 334
 335                 title = info_dict['title']
 336                 matchtitle = self.params.get('matchtitle', False)
 337                 if matchtitle:
 338                         matchtitle = matchtitle.decode('utf8')
 339                         if not re.search(matchtitle, title, re.IGNORECASE):
 340                                 return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
 341                 rejecttitle = self.params.get('rejecttitle', False)
 342                 if rejecttitle:
 343                         rejecttitle = rejecttitle.decode('utf8')
 344                         if re.search(rejecttitle, title, re.IGNORECASE):
 345                                 return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
 346                 return None
 347
 348         def process_info(self, info_dict):
 349                 """Process a single dictionary returned by an InfoExtractor."""
 350
 351                 info_dict['stitle'] = sanitize_filename(info_dict['title'])
 352
 353                 reason = self._match_entry(info_dict)
 354                 if reason is not None:
 355                         self.to_screen(u'[download] ' + reason)
 356                         return
 357
 358                 max_downloads = self.params.get('max_downloads')
 359                 if max_downloads is not None:
 360                         if self._num_downloads > int(max_downloads):
 361                                 raise MaxDownloadsReached()
 362
 363                 filename = self.prepare_filename(info_dict)
 364
 365                 # Forced printings
 366                 if self.params.get('forcetitle', False):
 367                         print(info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace'))
 368                 if self.params.get('forceurl', False):
 369                         print(info_dict['url'].encode(preferredencoding(), 'xmlcharrefreplace'))
 370                 if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
 371                         print(info_dict['thumbnail'].encode(preferredencoding(), 'xmlcharrefreplace'))
 372                 if self.params.get('forcedescription', False) and 'description' in info_dict:
 373                         print(info_dict['description'].encode(preferredencoding(), 'xmlcharrefreplace'))
 374                 if self.params.get('forcefilename', False) and filename is not None:
 375                         print(filename.encode(preferredencoding(), 'xmlcharrefreplace'))
 376                 if self.params.get('forceformat', False):
 377                         print(info_dict['format'].encode(preferredencoding(), 'xmlcharrefreplace'))
 378
 379                 # Do nothing else if in simulate mode
 380                 if self.params.get('simulate', False):
 381                         return
 382
 383                 if filename is None:
 384                         return
 385
 386                 try:
 387                         dn = os.path.dirname(encodeFilename(filename))
 388                         if dn != '' and not os.path.exists(dn): # dn is already encoded
 389                                 os.makedirs(dn)
 390                 except (OSError, IOError), err:
 391                         self.trouble(u'ERROR: unable to create directory ' + unicode(err))
 392                         return
 393
 394                 if self.params.get('writedescription', False):
 395                         try:
 396                                 descfn = filename + u'.description'
 397                                 self.report_writedescription(descfn)
 398                                 descfile = open(encodeFilename(descfn), 'wb')
 399                                 try:
 400                                         descfile.write(info_dict['description'].encode('utf-8'))
 401                                 finally:
 402                                         descfile.close()
 403                         except (OSError, IOError):
 404                                 self.trouble(u'ERROR: Cannot write description file ' + descfn)
 405                                 return
 406
 407                 if self.params.get('writesubtitles', False) and 'subtitles' in info_dict and info_dict['subtitles']:
 408                         # subtitles download errors are already managed as troubles in relevant IE
 409                         # that way it will silently go on when used with unsupporting IE
 410                         try:
 411                                 srtfn = filename.rsplit('.', 1)[0] + u'.srt'
 412                                 self.report_writesubtitles(srtfn)
 413                                 srtfile = open(encodeFilename(srtfn), 'wb')
 414                                 try:
 415                                         srtfile.write(info_dict['subtitles'].encode('utf-8'))
 416                                 finally:
 417                                         srtfile.close()
 418                         except (OSError, IOError):
 419                                 self.trouble(u'ERROR: Cannot write subtitles file ' + descfn)
 420                                 return
 421
 422                 if self.params.get('writeinfojson', False):
 423                         infofn = filename + u'.info.json'
 424                         self.report_writeinfojson(infofn)
 425                         try:
 426                                 json.dump
 427                         except (NameError,AttributeError):
 428                                 self.trouble(u'ERROR: No JSON encoder found. Update to Python 2.6+, setup a json module, or leave out --write-info-json.')
 429                                 return
 430                         try:
 431                                 infof = open(encodeFilename(infofn), 'wb')
 432                                 try:
 433                                         json_info_dict = dict((k,v) for k,v in info_dict.iteritems() if not k in ('urlhandle',))
 434                                         json.dump(json_info_dict, infof)
 435                                 finally:
 436                                         infof.close()
 437                         except (OSError, IOError):
 438                                 self.trouble(u'ERROR: Cannot write metadata to JSON file ' + infofn)
 439                                 return
 440
 441                 if not self.params.get('skip_download', False):
 442                         if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
 443                                 success = True
 444                         else:
 445                                 try:
 446                                         success = self._do_download(filename, info_dict)
 447                                 except (OSError, IOError), err:
 448                                         raise UnavailableVideoError
 449                                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 450                                         self.trouble(u'ERROR: unable to download video data: %s' % str(err))
 451                                         return
 452                                 except (ContentTooShortError, ), err:
 453                                         self.trouble(u'ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
 454                                         return
 455
 456                         if success:
 457                                 try:
 458                                         self.post_process(filename, info_dict)
 459                                 except (PostProcessingError), err:
 460                                         self.trouble(u'ERROR: postprocessing: %s' % str(err))
 461                                         return
 462
 463         def download(self, url_list):
 464                 """Download a given list of URLs."""
 465                 if len(url_list) > 1 and self.fixed_template():
 466                         raise SameFileError(self.params['outtmpl'])
 467
 468                 for url in url_list:
 469                         suitable_found = False
 470                         for ie in self._ies:
 471                                 # Go to next InfoExtractor if not suitable
 472                                 if not ie.suitable(url):
 473                                         continue
 474
 475                                 # Suitable InfoExtractor found
 476                                 suitable_found = True
 477
 478                                 # Extract information from URL and process it
 479                                 videos = ie.extract(url)
 480                                 for video in videos or []:
 481                                         video['extractor'] = ie.IE_NAME
 482                                         try:
 483                                                 self.increment_downloads()
 484                                                 self.process_info(video)
 485                                         except UnavailableVideoError:
 486                                                 self.trouble(u'\nERROR: unable to download video')
 487
 488                                 # Suitable InfoExtractor had been found; go to next URL
 489                                 break
 490
 491                         if not suitable_found:
 492                                 self.trouble(u'ERROR: no suitable InfoExtractor: %s' % url)
 493
 494                 return self._download_retcode
 495
 496         def post_process(self, filename, ie_info):
 497                 """Run the postprocessing chain on the given file."""
 498                 info = dict(ie_info)
 499                 info['filepath'] = filename
 500                 for pp in self._pps:
 501                         info = pp.run(info)
 502                         if info is None:
 503                                 break
 504
 505         def _download_with_rtmpdump(self, filename, url, player_url):
 506                 self.report_destination(filename)
 507                 tmpfilename = self.temp_name(filename)
 508
 509                 # Check for rtmpdump first
 510                 try:
 511                         subprocess.call(['rtmpdump', '-h'], stdout=(file(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
 512                 except (OSError, IOError):
 513                         self.trouble(u'ERROR: RTMP download detected but "rtmpdump" could not be run')
 514                         return False
 515
 516                 # Download using rtmpdump. rtmpdump returns exit code 2 when
 517                 # the connection was interrumpted and resuming appears to be
 518                 # possible. This is part of rtmpdump's normal usage, AFAIK.
 519                 basic_args = ['rtmpdump', '-q'] + [[], ['-W', player_url]][player_url is not None] + ['-r', url, '-o', tmpfilename]
 520                 args = basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)]
 521                 if self.params.get('verbose', False):
 522                         try:
 523                                 import pipes
 524                                 shell_quote = lambda args: ' '.join(map(pipes.quote, args))
 525                         except ImportError:
 526                                 shell_quote = repr
 527                         self.to_screen(u'[debug] rtmpdump command line: ' + shell_quote(args))
 528                 retval = subprocess.call(args)
 529                 while retval == 2 or retval == 1:
 530                         prevsize = os.path.getsize(encodeFilename(tmpfilename))
 531                         self.to_screen(u'\r[rtmpdump] %s bytes' % prevsize, skip_eol=True)
 532                         time.sleep(5.0) # This seems to be needed
 533                         retval = subprocess.call(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
 534                         cursize = os.path.getsize(encodeFilename(tmpfilename))
 535                         if prevsize == cursize and retval == 1:
 536                                 break
 537                          # Some rtmp streams seem abort after ~ 99.8%. Don't complain for those
 538                         if prevsize == cursize and retval == 2 and cursize > 1024:
 539                                 self.to_screen(u'\r[rtmpdump] Could not download the whole video. This can happen for some advertisements.')
 540                                 retval = 0
 541                                 break
 542                 if retval == 0:
 543                         self.to_screen(u'\r[rtmpdump] %s bytes' % os.path.getsize(encodeFilename(tmpfilename)))
 544                         self.try_rename(tmpfilename, filename)
 545                         return True
 546                 else:
 547                         self.trouble(u'\nERROR: rtmpdump exited with code %d' % retval)
 548                         return False
 549
 550         def _do_download(self, filename, info_dict):
 551                 url = info_dict['url']
 552                 player_url = info_dict.get('player_url', None)
 553
 554                 # Check file already present
 555                 if self.params.get('continuedl', False) and os.path.isfile(encodeFilename(filename)) and not self.params.get('nopart', False):
 556                         self.report_file_already_downloaded(filename)
 557                         return True
 558
 559                 # Attempt to download using rtmpdump
 560                 if url.startswith('rtmp'):
 561                         return self._download_with_rtmpdump(filename, url, player_url)
 562
 563                 tmpfilename = self.temp_name(filename)
 564                 stream = None
 565
 566                 # Do not include the Accept-Encoding header
 567                 headers = {'Youtubedl-no-compression': 'True'}
 568                 basic_request = urllib2.Request(url, None, headers)
 569                 request = urllib2.Request(url, None, headers)
 570
 571                 # Establish possible resume length
 572                 if os.path.isfile(encodeFilename(tmpfilename)):
 573                         resume_len = os.path.getsize(encodeFilename(tmpfilename))
 574                 else:
 575                         resume_len = 0
 576
 577                 open_mode = 'wb'
 578                 if resume_len != 0:
 579                         if self.params.get('continuedl', False):
 580                                 self.report_resuming_byte(resume_len)
 581                                 request.add_header('Range','bytes=%d-' % resume_len)
 582                                 open_mode = 'ab'
 583                         else:
 584                                 resume_len = 0
 585
 586                 count = 0
 587                 retries = self.params.get('retries', 0)
 588                 while count <= retries:
 589                         # Establish connection
 590                         try:
 591                                 if count == 0 and 'urlhandle' in info_dict:
 592                                         data = info_dict['urlhandle']
 593                                 data = urllib2.urlopen(request)
 594                                 break
 595                         except (urllib2.HTTPError, ), err:
 596                                 if (err.code < 500 or err.code >= 600) and err.code != 416:
 597                                         # Unexpected HTTP error
 598                                         raise
 599                                 elif err.code == 416:
 600                                         # Unable to resume (requested range not satisfiable)
 601                                         try:
 602                                                 # Open the connection again without the range header
 603                                                 data = urllib2.urlopen(basic_request)
 604                                                 content_length = data.info()['Content-Length']
 605                                         except (urllib2.HTTPError, ), err:
 606                                                 if err.code < 500 or err.code >= 600:
 607                                                         raise
 608                                         else:
 609                                                 # Examine the reported length
 610                                                 if (content_length is not None and
 611                                                                 (resume_len - 100 < long(content_length) < resume_len + 100)):
 612                                                         # The file had already been fully downloaded.
 613                                                         # Explanation to the above condition: in issue #175 it was revealed that
 614                                                         # YouTube sometimes adds or removes a few bytes from the end of the file,
 615                                                         # changing the file size slightly and causing problems for some users. So
 616                                                         # I decided to implement a suggested change and consider the file
 617                                                         # completely downloaded if the file size differs less than 100 bytes from
 618                                                         # the one in the hard drive.
 619                                                         self.report_file_already_downloaded(filename)
 620                                                         self.try_rename(tmpfilename, filename)
 621                                                         return True
 622                                                 else:
 623                                                         # The length does not match, we start the download over
 624                                                         self.report_unable_to_resume()
 625                                                         open_mode = 'wb'
 626                                                         break
 627                         # Retry
 628                         count += 1
 629                         if count <= retries:
 630                                 self.report_retry(count, retries)
 631
 632                 if count > retries:
 633                         self.trouble(u'ERROR: giving up after %s retries' % retries)
 634                         return False
 635
 636                 data_len = data.info().get('Content-length', None)
 637                 if data_len is not None:
 638                         data_len = long(data_len) + resume_len
 639                 data_len_str = self.format_bytes(data_len)
 640                 byte_counter = 0 + resume_len
 641                 block_size = 1024
 642                 start = time.time()
 643                 while True:
 644                         # Download and write
 645                         before = time.time()
 646                         data_block = data.read(block_size)
 647                         after = time.time()
 648                         if len(data_block) == 0:
 649                                 break
 650                         byte_counter += len(data_block)
 651
 652                         # Open file just in time
 653                         if stream is None:
 654                                 try:
 655                                         (stream, tmpfilename) = sanitize_open(tmpfilename, open_mode)
 656                                         assert stream is not None
 657                                         filename = self.undo_temp_name(tmpfilename)
 658                                         self.report_destination(filename)
 659                                 except (OSError, IOError), err:
 660                                         self.trouble(u'ERROR: unable to open for writing: %s' % str(err))
 661                                         return False
 662                         try:
 663                                 stream.write(data_block)
 664                         except (IOError, OSError), err:
 665                                 self.trouble(u'\nERROR: unable to write data: %s' % str(err))
 666                                 return False
 667                         block_size = self.best_block_size(after - before, len(data_block))
 668
 669                         # Progress message
 670                         speed_str = self.calc_speed(start, time.time(), byte_counter - resume_len)
 671                         if data_len is None:
 672                                 self.report_progress('Unknown %', data_len_str, speed_str, 'Unknown ETA')
 673                         else:
 674                                 percent_str = self.calc_percent(byte_counter, data_len)
 675                                 eta_str = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
 676                                 self.report_progress(percent_str, data_len_str, speed_str, eta_str)
 677
 678                         # Apply rate limit
 679                         self.slow_down(start, byte_counter - resume_len)
 680
 681                 if stream is None:
 682                         self.trouble(u'\nERROR: Did not get any data blocks')
 683                         return False
 684                 stream.close()
 685                 self.report_finish()
 686                 if data_len is not None and byte_counter != data_len:
 687                         raise ContentTooShortError(byte_counter, long(data_len))
 688                 self.try_rename(tmpfilename, filename)
 689
 690                 # Update file modification time
 691                 if self.params.get('updatetime', True):
 692                         info_dict['filetime'] = self.try_utime(filename, data.info().get('last-modified', None))
 693
 694                 return True