_ Git - youtube-dl/blob - youtube_dl/FileDownloader.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 from __future__ import absolute_import
   5
   6 import math
   7 import io
   8 import os
   9 import re
  10 import socket
  11 import subprocess
  12 import sys
  13 import time
  14 import traceback
  15
  16 if os.name == 'nt':
  17     import ctypes
  18
  19 from .utils import *
  20 from .InfoExtractors import get_info_extractor
  21
  22
  23 class FileDownloader(object):
  24     """File Downloader class.
  25
  26     File downloader objects are the ones responsible of downloading the
  27     actual video file and writing it to disk if the user has requested
  28     it, among some other tasks. In most cases there should be one per
  29     program. As, given a video URL, the downloader doesn't know how to
  30     extract all the needed information, task that InfoExtractors do, it
  31     has to pass the URL to one of them.
  32
  33     For this, file downloader objects have a method that allows
  34     InfoExtractors to be registered in a given order. When it is passed
  35     a URL, the file downloader handles it to the first InfoExtractor it
  36     finds that reports being able to handle it. The InfoExtractor extracts
  37     all the information about the video or videos the URL refers to, and
  38     asks the FileDownloader to process the video information, possibly
  39     downloading the video.
  40
  41     File downloaders accept a lot of parameters. In order not to saturate
  42     the object constructor with arguments, it receives a dictionary of
  43     options instead. These options are available through the params
  44     attribute for the InfoExtractors to use. The FileDownloader also
  45     registers itself as the downloader in charge for the InfoExtractors
  46     that are added to it, so this is a "mutual registration".
  47
  48     Available options:
  49
  50     username:          Username for authentication purposes.
  51     password:          Password for authentication purposes.
  52     usenetrc:          Use netrc for authentication instead.
  53     quiet:             Do not print messages to stdout.
  54     forceurl:          Force printing final URL.
  55     forcetitle:        Force printing title.
  56     forcethumbnail:    Force printing thumbnail URL.
  57     forcedescription:  Force printing description.
  58     forcefilename:     Force printing final filename.
  59     simulate:          Do not download the video files.
  60     format:            Video format code.
  61     format_limit:      Highest quality format to try.
  62     outtmpl:           Template for output names.
  63     restrictfilenames: Do not allow "&" and spaces in file names
  64     ignoreerrors:      Do not stop on download errors.
  65     ratelimit:         Download speed limit, in bytes/sec.
  66     nooverwrites:      Prevent overwriting files.
  67     retries:           Number of times to retry for HTTP error 5xx
  68     buffersize:        Size of download buffer in bytes.
  69     noresizebuffer:    Do not automatically resize the download buffer.
  70     continuedl:        Try to continue downloads if possible.
  71     noprogress:        Do not print the progress bar.
  72     playliststart:     Playlist item to start at.
  73     playlistend:       Playlist item to end at.
  74     matchtitle:        Download only matching titles.
  75     rejecttitle:       Reject downloads for matching titles.
  76     logtostderr:       Log messages to stderr instead of stdout.
  77     consoletitle:      Display progress in console window's titlebar.
  78     nopart:            Do not use temporary .part files.
  79     updatetime:        Use the Last-modified header to set output file timestamps.
  80     writedescription:  Write the video description to a .description file
  81     writeinfojson:     Write the video description to a .info.json file
  82     writesubtitles:    Write the video subtitles to a file
  83     onlysubtitles:     Downloads only the subtitles of the video
  84     allsubtitles:      Downloads all the subtitles of the video
  85     listsubtitles:     Lists all available subtitles for the video
  86     subtitlesformat:   Subtitle format [sbv/srt] (default=srt)
  87     subtitleslang:     Language of the subtitles to download
  88     test:              Download only first bytes to test the downloader.
  89     keepvideo:         Keep the video file after post-processing
  90     min_filesize:      Skip files smaller than this size
  91     max_filesize:      Skip files larger than this size
  92     daterange:         A DateRange object, download only if the upload_date is in the range.
  93     """
  94
  95     params = None
  96     _ies = []
  97     _pps = []
  98     _download_retcode = None
  99     _num_downloads = None
 100     _screen_file = None
 101
 102     def __init__(self, params):
 103         """Create a FileDownloader object with the given options."""
 104         self._ies = []
 105         self._pps = []
 106         self._progress_hooks = []
 107         self._download_retcode = 0
 108         self._num_downloads = 0
 109         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 110         self.params = params
 111
 112         if '%(stitle)s' in self.params['outtmpl']:
 113             self.report_warning(u'%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
 114
 115     @staticmethod
 116     def format_bytes(bytes):
 117         if bytes is None:
 118             return 'N/A'
 119         if type(bytes) is str:
 120             bytes = float(bytes)
 121         if bytes == 0.0:
 122             exponent = 0
 123         else:
 124             exponent = int(math.log(bytes, 1024.0))
 125         suffix = ['B','KiB','MiB','GiB','TiB','PiB','EiB','ZiB','YiB'][exponent]
 126         converted = float(bytes) / float(1024 ** exponent)
 127         return '%.2f%s' % (converted, suffix)
 128
 129     @staticmethod
 130     def calc_percent(byte_counter, data_len):
 131         if data_len is None:
 132             return '---.-%'
 133         return '%6s' % ('%3.1f%%' % (float(byte_counter) / float(data_len) * 100.0))
 134
 135     @staticmethod
 136     def calc_eta(start, now, total, current):
 137         if total is None:
 138             return '--:--'
 139         dif = now - start
 140         if current == 0 or dif < 0.001: # One millisecond
 141             return '--:--'
 142         rate = float(current) / dif
 143         eta = int((float(total) - float(current)) / rate)
 144         (eta_mins, eta_secs) = divmod(eta, 60)
 145         if eta_mins > 99:
 146             return '--:--'
 147         return '%02d:%02d' % (eta_mins, eta_secs)
 148
 149     @staticmethod
 150     def calc_speed(start, now, bytes):
 151         dif = now - start
 152         if bytes == 0 or dif < 0.001: # One millisecond
 153             return '%10s' % '---b/s'
 154         return '%10s' % ('%s/s' % FileDownloader.format_bytes(float(bytes) / dif))
 155
 156     @staticmethod
 157     def best_block_size(elapsed_time, bytes):
 158         new_min = max(bytes / 2.0, 1.0)
 159         new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
 160         if elapsed_time < 0.001:
 161             return int(new_max)
 162         rate = bytes / elapsed_time
 163         if rate > new_max:
 164             return int(new_max)
 165         if rate < new_min:
 166             return int(new_min)
 167         return int(rate)
 168
 169     @staticmethod
 170     def parse_bytes(bytestr):
 171         """Parse a string indicating a byte quantity into an integer."""
 172         matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
 173         if matchobj is None:
 174             return None
 175         number = float(matchobj.group(1))
 176         multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
 177         return int(round(number * multiplier))
 178
 179     def add_info_extractor(self, ie):
 180         """Add an InfoExtractor object to the end of the list."""
 181         self._ies.append(ie)
 182         ie.set_downloader(self)
 183
 184     def add_post_processor(self, pp):
 185         """Add a PostProcessor object to the end of the chain."""
 186         self._pps.append(pp)
 187         pp.set_downloader(self)
 188
 189     def to_screen(self, message, skip_eol=False):
 190         """Print message to stdout if not in quiet mode."""
 191         assert type(message) == type(u'')
 192         if not self.params.get('quiet', False):
 193             terminator = [u'\n', u''][skip_eol]
 194             output = message + terminator
 195             if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
 196                 output = output.encode(preferredencoding(), 'ignore')
 197             self._screen_file.write(output)
 198             self._screen_file.flush()
 199
 200     def to_stderr(self, message):
 201         """Print message to stderr."""
 202         assert type(message) == type(u'')
 203         output = message + u'\n'
 204         if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
 205             output = output.encode(preferredencoding())
 206         sys.stderr.write(output)
 207
 208     def to_cons_title(self, message):
 209         """Set console/terminal window title to message."""
 210         if not self.params.get('consoletitle', False):
 211             return
 212         if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
 213             # c_wchar_p() might not be necessary if `message` is
 214             # already of type unicode()
 215             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 216         elif 'TERM' in os.environ:
 217             self.to_screen('\033]0;%s\007' % message, skip_eol=True)
 218
 219     def fixed_template(self):
 220         """Checks if the output template is fixed."""
 221         return (re.search(u'(?u)%\\(.+?\\)s', self.params['outtmpl']) is None)
 222
 223     def trouble(self, message=None, tb=None):
 224         """Determine action to take when a download problem appears.
 225
 226         Depending on if the downloader has been configured to ignore
 227         download errors or not, this method may throw an exception or
 228         not when errors are found, after printing the message.
 229
 230         tb, if given, is additional traceback information.
 231         """
 232         if message is not None:
 233             self.to_stderr(message)
 234         if self.params.get('verbose'):
 235             if tb is None:
 236                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 237                     tb = u''
 238                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 239                         tb += u''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 240                     tb += compat_str(traceback.format_exc())
 241                 else:
 242                     tb_data = traceback.format_list(traceback.extract_stack())
 243                     tb = u''.join(tb_data)
 244             self.to_stderr(tb)
 245         if not self.params.get('ignoreerrors', False):
 246             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 247                 exc_info = sys.exc_info()[1].exc_info
 248             else:
 249                 exc_info = sys.exc_info()
 250             raise DownloadError(message, exc_info)
 251         self._download_retcode = 1
 252
 253     def report_warning(self, message):
 254         '''
 255         Print the message to stderr, it will be prefixed with 'WARNING:'
 256         If stderr is a tty file the 'WARNING:' will be colored
 257         '''
 258         if sys.stderr.isatty() and os.name != 'nt':
 259             _msg_header=u'\033[0;33mWARNING:\033[0m'
 260         else:
 261             _msg_header=u'WARNING:'
 262         warning_message=u'%s %s' % (_msg_header,message)
 263         self.to_stderr(warning_message)
 264
 265     def report_error(self, message, tb=None):
 266         '''
 267         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 268         in red if stderr is a tty file.
 269         '''
 270         if sys.stderr.isatty() and os.name != 'nt':
 271             _msg_header = u'\033[0;31mERROR:\033[0m'
 272         else:
 273             _msg_header = u'ERROR:'
 274         error_message = u'%s %s' % (_msg_header, message)
 275         self.trouble(error_message, tb)
 276
 277     def slow_down(self, start_time, byte_counter):
 278         """Sleep if the download speed is over the rate limit."""
 279         rate_limit = self.params.get('ratelimit', None)
 280         if rate_limit is None or byte_counter == 0:
 281             return
 282         now = time.time()
 283         elapsed = now - start_time
 284         if elapsed <= 0.0:
 285             return
 286         speed = float(byte_counter) / elapsed
 287         if speed > rate_limit:
 288             time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
 289
 290     def temp_name(self, filename):
 291         """Returns a temporary filename for the given filename."""
 292         if self.params.get('nopart', False) or filename == u'-' or \
 293                 (os.path.exists(encodeFilename(filename)) and not os.path.isfile(encodeFilename(filename))):
 294             return filename
 295         return filename + u'.part'
 296
 297     def undo_temp_name(self, filename):
 298         if filename.endswith(u'.part'):
 299             return filename[:-len(u'.part')]
 300         return filename
 301
 302     def try_rename(self, old_filename, new_filename):
 303         try:
 304             if old_filename == new_filename:
 305                 return
 306             os.rename(encodeFilename(old_filename), encodeFilename(new_filename))
 307         except (IOError, OSError) as err:
 308             self.report_error(u'unable to rename file')
 309
 310     def try_utime(self, filename, last_modified_hdr):
 311         """Try to set the last-modified time of the given file."""
 312         if last_modified_hdr is None:
 313             return
 314         if not os.path.isfile(encodeFilename(filename)):
 315             return
 316         timestr = last_modified_hdr
 317         if timestr is None:
 318             return
 319         filetime = timeconvert(timestr)
 320         if filetime is None:
 321             return filetime
 322         try:
 323             os.utime(filename, (time.time(), filetime))
 324         except:
 325             pass
 326         return filetime
 327
 328     def report_writedescription(self, descfn):
 329         """ Report that the description file is being written """
 330         self.to_screen(u'[info] Writing video description to: ' + descfn)
 331
 332     def report_writesubtitles(self, sub_filename):
 333         """ Report that the subtitles file is being written """
 334         self.to_screen(u'[info] Writing video subtitles to: ' + sub_filename)
 335
 336     def report_writeinfojson(self, infofn):
 337         """ Report that the metadata file has been written """
 338         self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn)
 339
 340     def report_destination(self, filename):
 341         """Report destination filename."""
 342         self.to_screen(u'[download] Destination: ' + filename)
 343
 344     def report_progress(self, percent_str, data_len_str, speed_str, eta_str):
 345         """Report download progress."""
 346         if self.params.get('noprogress', False):
 347             return
 348         if self.params.get('progress_with_newline', False):
 349             self.to_screen(u'[download] %s of %s at %s ETA %s' %
 350                 (percent_str, data_len_str, speed_str, eta_str))
 351         else:
 352             self.to_screen(u'\r[download] %s of %s at %s ETA %s' %
 353                 (percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
 354         self.to_cons_title(u'youtube-dl - %s of %s at %s ETA %s' %
 355                 (percent_str.strip(), data_len_str.strip(), speed_str.strip(), eta_str.strip()))
 356
 357     def report_resuming_byte(self, resume_len):
 358         """Report attempt to resume at given byte."""
 359         self.to_screen(u'[download] Resuming download at byte %s' % resume_len)
 360
 361     def report_retry(self, count, retries):
 362         """Report retry in case of HTTP error 5xx"""
 363         self.to_screen(u'[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count, retries))
 364
 365     def report_file_already_downloaded(self, file_name):
 366         """Report file has already been fully downloaded."""
 367         try:
 368             self.to_screen(u'[download] %s has already been downloaded' % file_name)
 369         except (UnicodeEncodeError) as err:
 370             self.to_screen(u'[download] The file has already been downloaded')
 371
 372     def report_unable_to_resume(self):
 373         """Report it was impossible to resume download."""
 374         self.to_screen(u'[download] Unable to resume')
 375
 376     def report_finish(self):
 377         """Report download finished."""
 378         if self.params.get('noprogress', False):
 379             self.to_screen(u'[download] Download completed')
 380         else:
 381             self.to_screen(u'')
 382
 383     def increment_downloads(self):
 384         """Increment the ordinal that assigns a number to each file."""
 385         self._num_downloads += 1
 386
 387     def prepare_filename(self, info_dict):
 388         """Generate the output filename."""
 389         try:
 390             template_dict = dict(info_dict)
 391
 392             template_dict['epoch'] = int(time.time())
 393             autonumber_size = self.params.get('autonumber_size')
 394             if autonumber_size is None:
 395                 autonumber_size = 5
 396             autonumber_templ = u'%0' + str(autonumber_size) + u'd'
 397             template_dict['autonumber'] = autonumber_templ % self._num_downloads
 398             if template_dict['playlist_index'] is not None:
 399                 template_dict['playlist_index'] = u'%05d' % template_dict['playlist_index']
 400
 401             sanitize = lambda k,v: sanitize_filename(
 402                 u'NA' if v is None else compat_str(v),
 403                 restricted=self.params.get('restrictfilenames'),
 404                 is_id=(k==u'id'))
 405             template_dict = dict((k, sanitize(k, v)) for k,v in template_dict.items())
 406
 407             filename = self.params['outtmpl'] % template_dict
 408             return filename
 409         except KeyError as err:
 410             self.report_error(u'Erroneous output template')
 411             return None
 412         except ValueError as err:
 413             self.report_error(u'Insufficient system charset ' + repr(preferredencoding()))
 414             return None
 415
 416     def _match_entry(self, info_dict):
 417         """ Returns None iff the file should be downloaded """
 418
 419         title = info_dict['title']
 420         matchtitle = self.params.get('matchtitle', False)
 421         if matchtitle:
 422             if not re.search(matchtitle, title, re.IGNORECASE):
 423                 return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
 424         rejecttitle = self.params.get('rejecttitle', False)
 425         if rejecttitle:
 426             if re.search(rejecttitle, title, re.IGNORECASE):
 427                 return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
 428         date = info_dict.get('upload_date', None)
 429         if date is not None:
 430             dateRange = self.params.get('daterange', DateRange())
 431             if date not in dateRange:
 432                 return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
 433         return None
 434
 435     def extract_info(self, url, download = True, ie_name = None):
 436         '''
 437         Returns a list with a dictionary for each video we find.
 438         If 'download', also downloads the videos.
 439          '''
 440         suitable_found = False
 441
 442         #We copy the original list
 443         ies = list(self._ies)
 444
 445         if ie_name is not None:
 446             #We put in the first place the given info extractor
 447             first_ie = get_info_extractor(ie_name)()
 448             first_ie.set_downloader(self)
 449             ies.insert(0, first_ie)
 450
 451         for ie in ies:
 452             # Go to next InfoExtractor if not suitable
 453             if not ie.suitable(url):
 454                 continue
 455
 456             # Warn if the _WORKING attribute is False
 457             if not ie.working():
 458                 self.to_stderr(u'WARNING: the program functionality for this site has been marked as broken, '
 459                                u'and will probably not work. If you want to go on, use the -i option.')
 460
 461             # Suitable InfoExtractor found
 462             suitable_found = True
 463
 464             # Extract information from URL and process it
 465             try:
 466                 ie_results = ie.extract(url)
 467                 if ie_results is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
 468                     break
 469                 results = []
 470                 for ie_result in ie_results:
 471                     if not 'extractor' in ie_result:
 472                         #The extractor has already been set somewhere else
 473                         ie_result['extractor'] = ie.IE_NAME
 474                     results.append(self.process_ie_result(ie_result, download))
 475                 return results
 476             except ExtractorError as de: # An error we somewhat expected
 477                 self.report_error(compat_str(de), de.format_traceback())
 478                 break
 479             except Exception as e:
 480                 if self.params.get('ignoreerrors', False):
 481                     self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
 482                     break
 483                 else:
 484                     raise
 485         if not suitable_found:
 486                 self.report_error(u'no suitable InfoExtractor: %s' % url)
 487
 488     def process_ie_result(self, ie_result, download = True):
 489         """
 490         Take the result of the ie and return a list of videos.
 491         For url elements it will search the suitable ie and get the videos
 492         For playlist elements it will process each of the elements of the 'entries' key
 493
 494         It will also download the videos if 'download'.
 495         """
 496         result_type = ie_result.get('_type', 'video') #If not given we suppose it's a video, support the dafault old system
 497         if result_type == 'video':
 498             if 'playlist' not in ie_result:
 499                 #It isn't part of a playlist
 500                 ie_result['playlist'] = None
 501                 ie_result['playlist_index'] = None
 502             if download:
 503                 #Do the download:
 504                 self.process_info(ie_result)
 505             return ie_result
 506         elif result_type == 'url':
 507             #We get the video pointed by the url
 508             result = self.extract_info(ie_result['url'], download, ie_name = ie_result['ie_key'])[0]
 509             return result
 510         elif result_type == 'playlist':
 511             #We process each entry in the playlist
 512             playlist = ie_result.get('title', None) or ie_result.get('id', None)
 513             self.to_screen(u'[download] Downloading playlist: %s'  % playlist)
 514
 515             playlist_results = []
 516
 517             n_all_entries = len(ie_result['entries'])
 518             playliststart = self.params.get('playliststart', 1) - 1
 519             playlistend = self.params.get('playlistend', -1)
 520
 521             if playlistend == -1:
 522                 entries = ie_result['entries'][playliststart:]
 523             else:
 524                 entries = ie_result['entries'][playliststart:playlistend]
 525
 526             n_entries = len(entries)
 527
 528             self.to_screen(u"[%s] playlist '%s': Collected %d video ids (downloading %d of them)" %
 529                 (ie_result['extractor'], playlist, n_all_entries, n_entries))
 530
 531             for i,entry in enumerate(entries,1):
 532                 self.to_screen(u'[download] Downloading video #%s of %s' %(i, n_entries))
 533                 entry_result = self.process_ie_result(entry, False)
 534                 entry_result['playlist'] = playlist
 535                 entry_result['playlist_index'] = i + playliststart
 536                 #We must do the download here to correctly set the 'playlist' key
 537                 if download:
 538                     self.process_info(entry_result)
 539                 playlist_results.append(entry_result)
 540             result = ie_result.copy()
 541             result['entries'] = playlist_results
 542             return result
 543
 544     def process_info(self, info_dict):
 545         """Process a single dictionary returned by an InfoExtractor."""
 546
 547         #We increment the download the download count here to match the previous behaviour.
 548         self.increment_downloads()
 549
 550         info_dict['fulltitle'] = info_dict['title']
 551         if len(info_dict['title']) > 200:
 552             info_dict['title'] = info_dict['title'][:197] + u'...'
 553
 554         # Keep for backwards compatibility
 555         info_dict['stitle'] = info_dict['title']
 556
 557         if not 'format' in info_dict:
 558             info_dict['format'] = info_dict['ext']
 559
 560         reason = self._match_entry(info_dict)
 561         if reason is not None:
 562             self.to_screen(u'[download] ' + reason)
 563             return
 564
 565         max_downloads = self.params.get('max_downloads')
 566         if max_downloads is not None:
 567             if self._num_downloads > int(max_downloads):
 568                 raise MaxDownloadsReached()
 569
 570         filename = self.prepare_filename(info_dict)
 571
 572         # Forced printings
 573         if self.params.get('forcetitle', False):
 574             compat_print(info_dict['title'])
 575         if self.params.get('forceurl', False):
 576             compat_print(info_dict['url'])
 577         if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
 578             compat_print(info_dict['thumbnail'])
 579         if self.params.get('forcedescription', False) and 'description' in info_dict:
 580             compat_print(info_dict['description'])
 581         if self.params.get('forcefilename', False) and filename is not None:
 582             compat_print(filename)
 583         if self.params.get('forceformat', False):
 584             compat_print(info_dict['format'])
 585
 586         # Do nothing else if in simulate mode
 587         if self.params.get('simulate', False):
 588             return
 589
 590         if filename is None:
 591             return
 592
 593         try:
 594             dn = os.path.dirname(encodeFilename(filename))
 595             if dn != '' and not os.path.exists(dn): # dn is already encoded
 596                 os.makedirs(dn)
 597         except (OSError, IOError) as err:
 598             self.report_error(u'unable to create directory ' + compat_str(err))
 599             return
 600
 601         if self.params.get('writedescription', False):
 602             try:
 603                 descfn = filename + u'.description'
 604                 self.report_writedescription(descfn)
 605                 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
 606                     descfile.write(info_dict['description'])
 607             except (OSError, IOError):
 608                 self.report_error(u'Cannot write description file ' + descfn)
 609                 return
 610
 611         if self.params.get('writesubtitles', False) and 'subtitles' in info_dict and info_dict['subtitles']:
 612             # subtitles download errors are already managed as troubles in relevant IE
 613             # that way it will silently go on when used with unsupporting IE
 614             subtitle = info_dict['subtitles'][0]
 615             (sub_error, sub_lang, sub) = subtitle
 616             sub_format = self.params.get('subtitlesformat')
 617             if sub_error:
 618                 self.report_warning("Some error while getting the subtitles")
 619             else:
 620                 try:
 621                     sub_filename = filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format
 622                     self.report_writesubtitles(sub_filename)
 623                     with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
 624                         subfile.write(sub)
 625                 except (OSError, IOError):
 626                     self.report_error(u'Cannot write subtitles file ' + descfn)
 627                     return
 628             if self.params.get('onlysubtitles', False):
 629                 return
 630
 631         if self.params.get('allsubtitles', False) and 'subtitles' in info_dict and info_dict['subtitles']:
 632             subtitles = info_dict['subtitles']
 633             sub_format = self.params.get('subtitlesformat')
 634             for subtitle in subtitles:
 635                 (sub_error, sub_lang, sub) = subtitle
 636                 if sub_error:
 637                     self.report_warning("Some error while getting the subtitles")
 638                 else:
 639                     try:
 640                         sub_filename = filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format
 641                         self.report_writesubtitles(sub_filename)
 642                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
 643                                 subfile.write(sub)
 644                     except (OSError, IOError):
 645                         self.report_error(u'Cannot write subtitles file ' + descfn)
 646                         return
 647             if self.params.get('onlysubtitles', False):
 648                 return
 649
 650         if self.params.get('writeinfojson', False):
 651             infofn = filename + u'.info.json'
 652             self.report_writeinfojson(infofn)
 653             try:
 654                 json_info_dict = dict((k, v) for k,v in info_dict.items() if not k in ['urlhandle'])
 655                 write_json_file(json_info_dict, encodeFilename(infofn))
 656             except (OSError, IOError):
 657                 self.report_error(u'Cannot write metadata to JSON file ' + infofn)
 658                 return
 659
 660         if not self.params.get('skip_download', False):
 661             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
 662                 success = True
 663             else:
 664                 try:
 665                     success = self._do_download(filename, info_dict)
 666                 except (OSError, IOError) as err:
 667                     raise UnavailableVideoError()
 668                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
 669                     self.report_error(u'unable to download video data: %s' % str(err))
 670                     return
 671                 except (ContentTooShortError, ) as err:
 672                     self.report_error(u'content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
 673                     return
 674
 675             if success:
 676                 try:
 677                     self.post_process(filename, info_dict)
 678                 except (PostProcessingError) as err:
 679                     self.report_error(u'postprocessing: %s' % str(err))
 680                     return
 681
 682     def download(self, url_list):
 683         """Download a given list of URLs."""
 684         if len(url_list) > 1 and self.fixed_template():
 685             raise SameFileError(self.params['outtmpl'])
 686
 687         for url in url_list:
 688             try:
 689                 #It also downloads the videos
 690                 videos = self.extract_info(url)
 691             except UnavailableVideoError:
 692                 self.report_error(u'unable to download video')
 693             except MaxDownloadsReached:
 694                 self.to_screen(u'[info] Maximum number of downloaded files reached.')
 695                 raise
 696
 697         return self._download_retcode
 698
 699     def post_process(self, filename, ie_info):
 700         """Run all the postprocessors on the given file."""
 701         info = dict(ie_info)
 702         info['filepath'] = filename
 703         keep_video = None
 704         for pp in self._pps:
 705             try:
 706                 keep_video_wish,new_info = pp.run(info)
 707                 if keep_video_wish is not None:
 708                     if keep_video_wish:
 709                         keep_video = keep_video_wish
 710                     elif keep_video is None:
 711                         # No clear decision yet, let IE decide
 712                         keep_video = keep_video_wish
 713             except PostProcessingError as e:
 714                 self.to_stderr(u'ERROR: ' + e.msg)
 715         if keep_video is False and not self.params.get('keepvideo', False):
 716             try:
 717                 self.to_screen(u'Deleting original file %s (pass -k to keep)' % filename)
 718                 os.remove(encodeFilename(filename))
 719             except (IOError, OSError):
 720                 self.report_warning(u'Unable to remove downloaded video file')
 721
 722     def _download_with_rtmpdump(self, filename, url, player_url, page_url, play_path):
 723         self.report_destination(filename)
 724         tmpfilename = self.temp_name(filename)
 725
 726         # Check for rtmpdump first
 727         try:
 728             subprocess.call(['rtmpdump', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
 729         except (OSError, IOError):
 730             self.report_error(u'RTMP download detected but "rtmpdump" could not be run')
 731             return False
 732
 733         # Download using rtmpdump. rtmpdump returns exit code 2 when
 734         # the connection was interrumpted and resuming appears to be
 735         # possible. This is part of rtmpdump's normal usage, AFAIK.
 736         basic_args = ['rtmpdump', '-q', '-r', url, '-o', tmpfilename]
 737         if player_url is not None:
 738             basic_args += ['-W', player_url]
 739         if page_url is not None:
 740             basic_args += ['--pageUrl', page_url]
 741         if play_path is not None:
 742             basic_args += ['-y', play_path]
 743         args = basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)]
 744         if self.params.get('verbose', False):
 745             try:
 746                 import pipes
 747                 shell_quote = lambda args: ' '.join(map(pipes.quote, args))
 748             except ImportError:
 749                 shell_quote = repr
 750             self.to_screen(u'[debug] rtmpdump command line: ' + shell_quote(args))
 751         retval = subprocess.call(args)
 752         while retval == 2 or retval == 1:
 753             prevsize = os.path.getsize(encodeFilename(tmpfilename))
 754             self.to_screen(u'\r[rtmpdump] %s bytes' % prevsize, skip_eol=True)
 755             time.sleep(5.0) # This seems to be needed
 756             retval = subprocess.call(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
 757             cursize = os.path.getsize(encodeFilename(tmpfilename))
 758             if prevsize == cursize and retval == 1:
 759                 break
 760              # Some rtmp streams seem abort after ~ 99.8%. Don't complain for those
 761             if prevsize == cursize and retval == 2 and cursize > 1024:
 762                 self.to_screen(u'\r[rtmpdump] Could not download the whole video. This can happen for some advertisements.')
 763                 retval = 0
 764                 break
 765         if retval == 0:
 766             fsize = os.path.getsize(encodeFilename(tmpfilename))
 767             self.to_screen(u'\r[rtmpdump] %s bytes' % fsize)
 768             self.try_rename(tmpfilename, filename)
 769             self._hook_progress({
 770                 'downloaded_bytes': fsize,
 771                 'total_bytes': fsize,
 772                 'filename': filename,
 773                 'status': 'finished',
 774             })
 775             return True
 776         else:
 777             self.to_stderr(u"\n")
 778             self.report_error(u'rtmpdump exited with code %d' % retval)
 779             return False
 780
 781     def _do_download(self, filename, info_dict):
 782         url = info_dict['url']
 783
 784         # Check file already present
 785         if self.params.get('continuedl', False) and os.path.isfile(encodeFilename(filename)) and not self.params.get('nopart', False):
 786             self.report_file_already_downloaded(filename)
 787             self._hook_progress({
 788                 'filename': filename,
 789                 'status': 'finished',
 790             })
 791             return True
 792
 793         # Attempt to download using rtmpdump
 794         if url.startswith('rtmp'):
 795             return self._download_with_rtmpdump(filename, url,
 796                                                 info_dict.get('player_url', None),
 797                                                 info_dict.get('page_url', None),
 798                                                 info_dict.get('play_path', None))
 799
 800         tmpfilename = self.temp_name(filename)
 801         stream = None
 802
 803         # Do not include the Accept-Encoding header
 804         headers = {'Youtubedl-no-compression': 'True'}
 805         if 'user_agent' in info_dict:
 806             headers['Youtubedl-user-agent'] = info_dict['user_agent']
 807         basic_request = compat_urllib_request.Request(url, None, headers)
 808         request = compat_urllib_request.Request(url, None, headers)
 809
 810         if self.params.get('test', False):
 811             request.add_header('Range','bytes=0-10240')
 812
 813         # Establish possible resume length
 814         if os.path.isfile(encodeFilename(tmpfilename)):
 815             resume_len = os.path.getsize(encodeFilename(tmpfilename))
 816         else:
 817             resume_len = 0
 818
 819         open_mode = 'wb'
 820         if resume_len != 0:
 821             if self.params.get('continuedl', False):
 822                 self.report_resuming_byte(resume_len)
 823                 request.add_header('Range','bytes=%d-' % resume_len)
 824                 open_mode = 'ab'
 825             else:
 826                 resume_len = 0
 827
 828         count = 0
 829         retries = self.params.get('retries', 0)
 830         while count <= retries:
 831             # Establish connection
 832             try:
 833                 if count == 0 and 'urlhandle' in info_dict:
 834                     data = info_dict['urlhandle']
 835                 data = compat_urllib_request.urlopen(request)
 836                 break
 837             except (compat_urllib_error.HTTPError, ) as err:
 838                 if (err.code < 500 or err.code >= 600) and err.code != 416:
 839                     # Unexpected HTTP error
 840                     raise
 841                 elif err.code == 416:
 842                     # Unable to resume (requested range not satisfiable)
 843                     try:
 844                         # Open the connection again without the range header
 845                         data = compat_urllib_request.urlopen(basic_request)
 846                         content_length = data.info()['Content-Length']
 847                     except (compat_urllib_error.HTTPError, ) as err:
 848                         if err.code < 500 or err.code >= 600:
 849                             raise
 850                     else:
 851                         # Examine the reported length
 852                         if (content_length is not None and
 853                                 (resume_len - 100 < int(content_length) < resume_len + 100)):
 854                             # The file had already been fully downloaded.
 855                             # Explanation to the above condition: in issue #175 it was revealed that
 856                             # YouTube sometimes adds or removes a few bytes from the end of the file,
 857                             # changing the file size slightly and causing problems for some users. So
 858                             # I decided to implement a suggested change and consider the file
 859                             # completely downloaded if the file size differs less than 100 bytes from
 860                             # the one in the hard drive.
 861                             self.report_file_already_downloaded(filename)
 862                             self.try_rename(tmpfilename, filename)
 863                             self._hook_progress({
 864                                 'filename': filename,
 865                                 'status': 'finished',
 866                             })
 867                             return True
 868                         else:
 869                             # The length does not match, we start the download over
 870                             self.report_unable_to_resume()
 871                             open_mode = 'wb'
 872                             break
 873             # Retry
 874             count += 1
 875             if count <= retries:
 876                 self.report_retry(count, retries)
 877
 878         if count > retries:
 879             self.report_error(u'giving up after %s retries' % retries)
 880             return False
 881
 882         data_len = data.info().get('Content-length', None)
 883         if data_len is not None:
 884             data_len = int(data_len) + resume_len
 885             min_data_len = self.params.get("min_filesize", None)
 886             max_data_len =  self.params.get("max_filesize", None)
 887             if min_data_len is not None and data_len < min_data_len:
 888                 self.to_screen(u'\r[download] File is smaller than min-filesize (%s bytes < %s bytes). Aborting.' % (data_len, min_data_len))
 889                 return False
 890             if max_data_len is not None and data_len > max_data_len:
 891                 self.to_screen(u'\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len))
 892                 return False
 893
 894         data_len_str = self.format_bytes(data_len)
 895         byte_counter = 0 + resume_len
 896         block_size = self.params.get('buffersize', 1024)
 897         start = time.time()
 898         while True:
 899             # Download and write
 900             before = time.time()
 901             data_block = data.read(block_size)
 902             after = time.time()
 903             if len(data_block) == 0:
 904                 break
 905             byte_counter += len(data_block)
 906
 907             # Open file just in time
 908             if stream is None:
 909                 try:
 910                     (stream, tmpfilename) = sanitize_open(tmpfilename, open_mode)
 911                     assert stream is not None
 912                     filename = self.undo_temp_name(tmpfilename)
 913                     self.report_destination(filename)
 914                 except (OSError, IOError) as err:
 915                     self.report_error(u'unable to open for writing: %s' % str(err))
 916                     return False
 917             try:
 918                 stream.write(data_block)
 919             except (IOError, OSError) as err:
 920                 self.to_stderr(u"\n")
 921                 self.report_error(u'unable to write data: %s' % str(err))
 922                 return False
 923             if not self.params.get('noresizebuffer', False):
 924                 block_size = self.best_block_size(after - before, len(data_block))
 925
 926             # Progress message
 927             speed_str = self.calc_speed(start, time.time(), byte_counter - resume_len)
 928             if data_len is None:
 929                 self.report_progress('Unknown %', data_len_str, speed_str, 'Unknown ETA')
 930             else:
 931                 percent_str = self.calc_percent(byte_counter, data_len)
 932                 eta_str = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
 933                 self.report_progress(percent_str, data_len_str, speed_str, eta_str)
 934
 935             self._hook_progress({
 936                 'downloaded_bytes': byte_counter,
 937                 'total_bytes': data_len,
 938                 'tmpfilename': tmpfilename,
 939                 'filename': filename,
 940                 'status': 'downloading',
 941             })
 942
 943             # Apply rate limit
 944             self.slow_down(start, byte_counter - resume_len)
 945
 946         if stream is None:
 947             self.to_stderr(u"\n")
 948             self.report_error(u'Did not get any data blocks')
 949             return False
 950         stream.close()
 951         self.report_finish()
 952         if data_len is not None and byte_counter != data_len:
 953             raise ContentTooShortError(byte_counter, int(data_len))
 954         self.try_rename(tmpfilename, filename)
 955
 956         # Update file modification time
 957         if self.params.get('updatetime', True):
 958             info_dict['filetime'] = self.try_utime(filename, data.info().get('last-modified', None))
 959
 960         self._hook_progress({
 961             'downloaded_bytes': byte_counter,
 962             'total_bytes': byte_counter,
 963             'filename': filename,
 964             'status': 'finished',
 965         })
 966
 967         return True
 968
 969     def _hook_progress(self, status):
 970         for ph in self._progress_hooks:
 971             ph(status)
 972
 973     def add_progress_hook(self, ph):
 974         """ ph gets called on download progress, with a dictionary with the entries
 975         * filename: The final filename
 976         * status: One of "downloading" and "finished"
 977
 978         It can also have some of the following entries:
 979
 980         * downloaded_bytes: Bytes on disks
 981         * total_bytes: Total bytes, None if unknown
 982         * tmpfilename: The filename we're currently writing to
 983
 984         Hooks are guaranteed to be called at least once (with status "finished")
 985         if the download is successful.
 986         """
 987         self._progress_hooks.append(ph)