_ Git - youtube-dl/blob - youtube_dl/FileDownloader.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 from __future__ import absolute_import
   5
   6 import math
   7 import io
   8 import os
   9 import re
  10 import shutil
  11 import socket
  12 import subprocess
  13 import sys
  14 import time
  15 import traceback
  16
  17 if os.name == 'nt':
  18     import ctypes
  19
  20 from .utils import *
  21 from .InfoExtractors import get_info_extractor
  22
  23
  24 class FileDownloader(object):
  25     """File Downloader class.
  26
  27     File downloader objects are the ones responsible of downloading the
  28     actual video file and writing it to disk if the user has requested
  29     it, among some other tasks. In most cases there should be one per
  30     program. As, given a video URL, the downloader doesn't know how to
  31     extract all the needed information, task that InfoExtractors do, it
  32     has to pass the URL to one of them.
  33
  34     For this, file downloader objects have a method that allows
  35     InfoExtractors to be registered in a given order. When it is passed
  36     a URL, the file downloader handles it to the first InfoExtractor it
  37     finds that reports being able to handle it. The InfoExtractor extracts
  38     all the information about the video or videos the URL refers to, and
  39     asks the FileDownloader to process the video information, possibly
  40     downloading the video.
  41
  42     File downloaders accept a lot of parameters. In order not to saturate
  43     the object constructor with arguments, it receives a dictionary of
  44     options instead. These options are available through the params
  45     attribute for the InfoExtractors to use. The FileDownloader also
  46     registers itself as the downloader in charge for the InfoExtractors
  47     that are added to it, so this is a "mutual registration".
  48
  49     Available options:
  50
  51     username:          Username for authentication purposes.
  52     password:          Password for authentication purposes.
  53     usenetrc:          Use netrc for authentication instead.
  54     quiet:             Do not print messages to stdout.
  55     forceurl:          Force printing final URL.
  56     forcetitle:        Force printing title.
  57     forcethumbnail:    Force printing thumbnail URL.
  58     forcedescription:  Force printing description.
  59     forcefilename:     Force printing final filename.
  60     simulate:          Do not download the video files.
  61     format:            Video format code.
  62     format_limit:      Highest quality format to try.
  63     outtmpl:           Template for output names.
  64     restrictfilenames: Do not allow "&" and spaces in file names
  65     ignoreerrors:      Do not stop on download errors.
  66     ratelimit:         Download speed limit, in bytes/sec.
  67     nooverwrites:      Prevent overwriting files.
  68     retries:           Number of times to retry for HTTP error 5xx
  69     buffersize:        Size of download buffer in bytes.
  70     noresizebuffer:    Do not automatically resize the download buffer.
  71     continuedl:        Try to continue downloads if possible.
  72     noprogress:        Do not print the progress bar.
  73     playliststart:     Playlist item to start at.
  74     playlistend:       Playlist item to end at.
  75     matchtitle:        Download only matching titles.
  76     rejecttitle:       Reject downloads for matching titles.
  77     logtostderr:       Log messages to stderr instead of stdout.
  78     consoletitle:      Display progress in console window's titlebar.
  79     nopart:            Do not use temporary .part files.
  80     updatetime:        Use the Last-modified header to set output file timestamps.
  81     writedescription:  Write the video description to a .description file
  82     writeinfojson:     Write the video description to a .info.json file
  83     writethumbnail:    Write the thumbnail image to a file
  84     writesubtitles:    Write the video subtitles to a file
  85     onlysubtitles:     Downloads only the subtitles of the video
  86     allsubtitles:      Downloads all the subtitles of the video
  87     listsubtitles:     Lists all available subtitles for the video
  88     subtitlesformat:   Subtitle format [sbv/srt] (default=srt)
  89     subtitleslang:     Language of the subtitles to download
  90     test:              Download only first bytes to test the downloader.
  91     keepvideo:         Keep the video file after post-processing
  92     min_filesize:      Skip files smaller than this size
  93     max_filesize:      Skip files larger than this size
  94     daterange:         A DateRange object, download only if the upload_date is in the range.
  95     """
  96
  97     params = None
  98     _ies = []
  99     _pps = []
 100     _download_retcode = None
 101     _num_downloads = None
 102     _screen_file = None
 103
 104     def __init__(self, params):
 105         """Create a FileDownloader object with the given options."""
 106         self._ies = []
 107         self._pps = []
 108         self._progress_hooks = []
 109         self._download_retcode = 0
 110         self._num_downloads = 0
 111         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 112         self.params = params
 113
 114         if '%(stitle)s' in self.params['outtmpl']:
 115             self.report_warning(u'%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
 116
 117     @staticmethod
 118     def format_bytes(bytes):
 119         if bytes is None:
 120             return 'N/A'
 121         if type(bytes) is str:
 122             bytes = float(bytes)
 123         if bytes == 0.0:
 124             exponent = 0
 125         else:
 126             exponent = int(math.log(bytes, 1024.0))
 127         suffix = ['B','KiB','MiB','GiB','TiB','PiB','EiB','ZiB','YiB'][exponent]
 128         converted = float(bytes) / float(1024 ** exponent)
 129         return '%.2f%s' % (converted, suffix)
 130
 131     @staticmethod
 132     def calc_percent(byte_counter, data_len):
 133         if data_len is None:
 134             return '---.-%'
 135         return '%6s' % ('%3.1f%%' % (float(byte_counter) / float(data_len) * 100.0))
 136
 137     @staticmethod
 138     def calc_eta(start, now, total, current):
 139         if total is None:
 140             return '--:--'
 141         dif = now - start
 142         if current == 0 or dif < 0.001: # One millisecond
 143             return '--:--'
 144         rate = float(current) / dif
 145         eta = int((float(total) - float(current)) / rate)
 146         (eta_mins, eta_secs) = divmod(eta, 60)
 147         if eta_mins > 99:
 148             return '--:--'
 149         return '%02d:%02d' % (eta_mins, eta_secs)
 150
 151     @staticmethod
 152     def calc_speed(start, now, bytes):
 153         dif = now - start
 154         if bytes == 0 or dif < 0.001: # One millisecond
 155             return '%10s' % '---b/s'
 156         return '%10s' % ('%s/s' % FileDownloader.format_bytes(float(bytes) / dif))
 157
 158     @staticmethod
 159     def best_block_size(elapsed_time, bytes):
 160         new_min = max(bytes / 2.0, 1.0)
 161         new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
 162         if elapsed_time < 0.001:
 163             return int(new_max)
 164         rate = bytes / elapsed_time
 165         if rate > new_max:
 166             return int(new_max)
 167         if rate < new_min:
 168             return int(new_min)
 169         return int(rate)
 170
 171     @staticmethod
 172     def parse_bytes(bytestr):
 173         """Parse a string indicating a byte quantity into an integer."""
 174         matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
 175         if matchobj is None:
 176             return None
 177         number = float(matchobj.group(1))
 178         multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
 179         return int(round(number * multiplier))
 180
 181     def add_info_extractor(self, ie):
 182         """Add an InfoExtractor object to the end of the list."""
 183         self._ies.append(ie)
 184         ie.set_downloader(self)
 185
 186     def add_post_processor(self, pp):
 187         """Add a PostProcessor object to the end of the chain."""
 188         self._pps.append(pp)
 189         pp.set_downloader(self)
 190
 191     def to_screen(self, message, skip_eol=False):
 192         """Print message to stdout if not in quiet mode."""
 193         assert type(message) == type(u'')
 194         if not self.params.get('quiet', False):
 195             terminator = [u'\n', u''][skip_eol]
 196             output = message + terminator
 197             if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
 198                 output = output.encode(preferredencoding(), 'ignore')
 199             self._screen_file.write(output)
 200             self._screen_file.flush()
 201
 202     def to_stderr(self, message):
 203         """Print message to stderr."""
 204         assert type(message) == type(u'')
 205         output = message + u'\n'
 206         if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
 207             output = output.encode(preferredencoding())
 208         sys.stderr.write(output)
 209
 210     def to_cons_title(self, message):
 211         """Set console/terminal window title to message."""
 212         if not self.params.get('consoletitle', False):
 213             return
 214         if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
 215             # c_wchar_p() might not be necessary if `message` is
 216             # already of type unicode()
 217             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 218         elif 'TERM' in os.environ:
 219             self.to_screen('\033]0;%s\007' % message, skip_eol=True)
 220
 221     def fixed_template(self):
 222         """Checks if the output template is fixed."""
 223         return (re.search(u'(?u)%\\(.+?\\)s', self.params['outtmpl']) is None)
 224
 225     def trouble(self, message=None, tb=None):
 226         """Determine action to take when a download problem appears.
 227
 228         Depending on if the downloader has been configured to ignore
 229         download errors or not, this method may throw an exception or
 230         not when errors are found, after printing the message.
 231
 232         tb, if given, is additional traceback information.
 233         """
 234         if message is not None:
 235             self.to_stderr(message)
 236         if self.params.get('verbose'):
 237             if tb is None:
 238                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 239                     tb = u''
 240                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 241                         tb += u''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 242                     tb += compat_str(traceback.format_exc())
 243                 else:
 244                     tb_data = traceback.format_list(traceback.extract_stack())
 245                     tb = u''.join(tb_data)
 246             self.to_stderr(tb)
 247         if not self.params.get('ignoreerrors', False):
 248             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 249                 exc_info = sys.exc_info()[1].exc_info
 250             else:
 251                 exc_info = sys.exc_info()
 252             raise DownloadError(message, exc_info)
 253         self._download_retcode = 1
 254
 255     def report_warning(self, message):
 256         '''
 257         Print the message to stderr, it will be prefixed with 'WARNING:'
 258         If stderr is a tty file the 'WARNING:' will be colored
 259         '''
 260         if sys.stderr.isatty() and os.name != 'nt':
 261             _msg_header=u'\033[0;33mWARNING:\033[0m'
 262         else:
 263             _msg_header=u'WARNING:'
 264         warning_message=u'%s %s' % (_msg_header,message)
 265         self.to_stderr(warning_message)
 266
 267     def report_error(self, message, tb=None):
 268         '''
 269         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 270         in red if stderr is a tty file.
 271         '''
 272         if sys.stderr.isatty() and os.name != 'nt':
 273             _msg_header = u'\033[0;31mERROR:\033[0m'
 274         else:
 275             _msg_header = u'ERROR:'
 276         error_message = u'%s %s' % (_msg_header, message)
 277         self.trouble(error_message, tb)
 278
 279     def slow_down(self, start_time, byte_counter):
 280         """Sleep if the download speed is over the rate limit."""
 281         rate_limit = self.params.get('ratelimit', None)
 282         if rate_limit is None or byte_counter == 0:
 283             return
 284         now = time.time()
 285         elapsed = now - start_time
 286         if elapsed <= 0.0:
 287             return
 288         speed = float(byte_counter) / elapsed
 289         if speed > rate_limit:
 290             time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
 291
 292     def temp_name(self, filename):
 293         """Returns a temporary filename for the given filename."""
 294         if self.params.get('nopart', False) or filename == u'-' or \
 295                 (os.path.exists(encodeFilename(filename)) and not os.path.isfile(encodeFilename(filename))):
 296             return filename
 297         return filename + u'.part'
 298
 299     def undo_temp_name(self, filename):
 300         if filename.endswith(u'.part'):
 301             return filename[:-len(u'.part')]
 302         return filename
 303
 304     def try_rename(self, old_filename, new_filename):
 305         try:
 306             if old_filename == new_filename:
 307                 return
 308             os.rename(encodeFilename(old_filename), encodeFilename(new_filename))
 309         except (IOError, OSError) as err:
 310             self.report_error(u'unable to rename file')
 311
 312     def try_utime(self, filename, last_modified_hdr):
 313         """Try to set the last-modified time of the given file."""
 314         if last_modified_hdr is None:
 315             return
 316         if not os.path.isfile(encodeFilename(filename)):
 317             return
 318         timestr = last_modified_hdr
 319         if timestr is None:
 320             return
 321         filetime = timeconvert(timestr)
 322         if filetime is None:
 323             return filetime
 324         try:
 325             os.utime(filename, (time.time(), filetime))
 326         except:
 327             pass
 328         return filetime
 329
 330     def report_writedescription(self, descfn):
 331         """ Report that the description file is being written """
 332         self.to_screen(u'[info] Writing video description to: ' + descfn)
 333
 334     def report_writesubtitles(self, sub_filename):
 335         """ Report that the subtitles file is being written """
 336         self.to_screen(u'[info] Writing video subtitles to: ' + sub_filename)
 337
 338     def report_writeinfojson(self, infofn):
 339         """ Report that the metadata file has been written """
 340         self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn)
 341
 342     def report_destination(self, filename):
 343         """Report destination filename."""
 344         self.to_screen(u'[download] Destination: ' + filename)
 345
 346     def report_progress(self, percent_str, data_len_str, speed_str, eta_str):
 347         """Report download progress."""
 348         if self.params.get('noprogress', False):
 349             return
 350         clear_line = (u'\x1b[K' if sys.stderr.isatty() and os.name != 'nt' else u'')
 351         if self.params.get('progress_with_newline', False):
 352             self.to_screen(u'[download] %s of %s at %s ETA %s' %
 353                 (percent_str, data_len_str, speed_str, eta_str))
 354         else:
 355             self.to_screen(u'\r%s[download] %s of %s at %s ETA %s' %
 356                 (clear_line, percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
 357         self.to_cons_title(u'youtube-dl - %s of %s at %s ETA %s' %
 358                 (percent_str.strip(), data_len_str.strip(), speed_str.strip(), eta_str.strip()))
 359
 360     def report_resuming_byte(self, resume_len):
 361         """Report attempt to resume at given byte."""
 362         self.to_screen(u'[download] Resuming download at byte %s' % resume_len)
 363
 364     def report_retry(self, count, retries):
 365         """Report retry in case of HTTP error 5xx"""
 366         self.to_screen(u'[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count, retries))
 367
 368     def report_file_already_downloaded(self, file_name):
 369         """Report file has already been fully downloaded."""
 370         try:
 371             self.to_screen(u'[download] %s has already been downloaded' % file_name)
 372         except (UnicodeEncodeError) as err:
 373             self.to_screen(u'[download] The file has already been downloaded')
 374
 375     def report_unable_to_resume(self):
 376         """Report it was impossible to resume download."""
 377         self.to_screen(u'[download] Unable to resume')
 378
 379     def report_finish(self):
 380         """Report download finished."""
 381         if self.params.get('noprogress', False):
 382             self.to_screen(u'[download] Download completed')
 383         else:
 384             self.to_screen(u'')
 385
 386     def increment_downloads(self):
 387         """Increment the ordinal that assigns a number to each file."""
 388         self._num_downloads += 1
 389
 390     def prepare_filename(self, info_dict):
 391         """Generate the output filename."""
 392         try:
 393             template_dict = dict(info_dict)
 394
 395             template_dict['epoch'] = int(time.time())
 396             autonumber_size = self.params.get('autonumber_size')
 397             if autonumber_size is None:
 398                 autonumber_size = 5
 399             autonumber_templ = u'%0' + str(autonumber_size) + u'd'
 400             template_dict['autonumber'] = autonumber_templ % self._num_downloads
 401             if template_dict['playlist_index'] is not None:
 402                 template_dict['playlist_index'] = u'%05d' % template_dict['playlist_index']
 403
 404             sanitize = lambda k,v: sanitize_filename(
 405                 u'NA' if v is None else compat_str(v),
 406                 restricted=self.params.get('restrictfilenames'),
 407                 is_id=(k==u'id'))
 408             template_dict = dict((k, sanitize(k, v)) for k,v in template_dict.items())
 409
 410             filename = self.params['outtmpl'] % template_dict
 411             return filename
 412         except KeyError as err:
 413             self.report_error(u'Erroneous output template')
 414             return None
 415         except ValueError as err:
 416             self.report_error(u'Insufficient system charset ' + repr(preferredencoding()))
 417             return None
 418
 419     def _match_entry(self, info_dict):
 420         """ Returns None iff the file should be downloaded """
 421
 422         title = info_dict['title']
 423         matchtitle = self.params.get('matchtitle', False)
 424         if matchtitle:
 425             if not re.search(matchtitle, title, re.IGNORECASE):
 426                 return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
 427         rejecttitle = self.params.get('rejecttitle', False)
 428         if rejecttitle:
 429             if re.search(rejecttitle, title, re.IGNORECASE):
 430                 return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
 431         date = info_dict.get('upload_date', None)
 432         if date is not None:
 433             dateRange = self.params.get('daterange', DateRange())
 434             if date not in dateRange:
 435                 return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
 436         return None
 437
 438     def extract_info(self, url, download=True, ie_key=None):
 439         '''
 440         Returns a list with a dictionary for each video we find.
 441         If 'download', also downloads the videos.
 442          '''
 443
 444         if ie_key:
 445             ie = get_info_extractor(ie_key)()
 446             ie.set_downloader(self)
 447             ies = [ie]
 448         else:
 449             ies = self._ies
 450
 451         for ie in ies:
 452             if not ie.suitable(url):
 453                 continue
 454
 455             if not ie.working():
 456                 self.report_warning(u'The program functionality for this site has been marked as broken, '
 457                                     u'and will probably not work.')
 458
 459             try:
 460                 ie_result = ie.extract(url)
 461                 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
 462                     break
 463                 if isinstance(ie_result, list):
 464                     # Backwards compatibility: old IE result format
 465                     ie_result = {
 466                         '_type': 'compat_list',
 467                         'entries': ie_result,
 468                     }
 469                 if 'extractor' not in ie_result:
 470                     ie_result['extractor'] = ie.IE_NAME
 471                 return self.process_ie_result(ie_result, download=download)
 472             except ExtractorError as de: # An error we somewhat expected
 473                 self.report_error(compat_str(de), de.format_traceback())
 474                 break
 475             except Exception as e:
 476                 if self.params.get('ignoreerrors', False):
 477                     self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
 478                     break
 479                 else:
 480                     raise
 481         else:
 482             self.report_error(u'no suitable InfoExtractor: %s' % url)
 483
 484     def process_ie_result(self, ie_result, download=True):
 485         """
 486         Take the result of the ie(may be modified) and resolve all unresolved
 487         references (URLs, playlist items).
 488
 489         It will also download the videos if 'download'.
 490         Returns the resolved ie_result.
 491         """
 492
 493         result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
 494         if result_type == 'video':
 495             if 'playlist' not in ie_result:
 496                 # It isn't part of a playlist
 497                 ie_result['playlist'] = None
 498                 ie_result['playlist_index'] = None
 499             if download:
 500                 self.process_info(ie_result)
 501             return ie_result
 502         elif result_type == 'url':
 503             return self.extract_info(ie_result['url'], download, ie_key=ie_result.get('ie_key'))
 504         elif result_type == 'playlist':
 505             # We process each entry in the playlist
 506             playlist = ie_result.get('title', None) or ie_result.get('id', None)
 507             self.to_screen(u'[download] Downloading playlist: %s'  % playlist)
 508
 509             playlist_results = []
 510
 511             n_all_entries = len(ie_result['entries'])
 512             playliststart = self.params.get('playliststart', 1) - 1
 513             playlistend = self.params.get('playlistend', -1)
 514
 515             if playlistend == -1:
 516                 entries = ie_result['entries'][playliststart:]
 517             else:
 518                 entries = ie_result['entries'][playliststart:playlistend]
 519
 520             n_entries = len(entries)
 521
 522             self.to_screen(u"[%s] playlist '%s': Collected %d video ids (downloading %d of them)" %
 523                 (ie_result['extractor'], playlist, n_all_entries, n_entries))
 524
 525             for i,entry in enumerate(entries,1):
 526                 self.to_screen(u'[download] Downloading video #%s of %s' %(i, n_entries))
 527                 entry['playlist'] = playlist
 528                 entry['playlist_index'] = i + playliststart
 529                 entry_result = self.process_ie_result(entry, download=download)
 530                 playlist_results.append(entry_result)
 531             ie_result['entries'] = playlist_results
 532             return ie_result
 533         elif result_type == 'compat_list':
 534             def _fixup(r):
 535                 r.setdefault('extractor', ie_result['extractor'])
 536                 return r
 537             ie_result['entries'] = [
 538                 self.process_ie_result(_fixup(r), download=download)
 539                 for r in ie_result['entries']
 540             ]
 541             return ie_result
 542         else:
 543             raise Exception('Invalid result type: %s' % result_type)
 544
 545     def process_info(self, info_dict):
 546         """Process a single resolved IE result."""
 547
 548         assert info_dict.get('_type', 'video') == 'video'
 549         #We increment the download the download count here to match the previous behaviour.
 550         self.increment_downloads()
 551
 552         info_dict['fulltitle'] = info_dict['title']
 553         if len(info_dict['title']) > 200:
 554             info_dict['title'] = info_dict['title'][:197] + u'...'
 555
 556         # Keep for backwards compatibility
 557         info_dict['stitle'] = info_dict['title']
 558
 559         if not 'format' in info_dict:
 560             info_dict['format'] = info_dict['ext']
 561
 562         reason = self._match_entry(info_dict)
 563         if reason is not None:
 564             self.to_screen(u'[download] ' + reason)
 565             return
 566
 567         max_downloads = self.params.get('max_downloads')
 568         if max_downloads is not None:
 569             if self._num_downloads > int(max_downloads):
 570                 raise MaxDownloadsReached()
 571
 572         filename = self.prepare_filename(info_dict)
 573
 574         # Forced printings
 575         if self.params.get('forcetitle', False):
 576             compat_print(info_dict['title'])
 577         if self.params.get('forceurl', False):
 578             compat_print(info_dict['url'])
 579         if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
 580             compat_print(info_dict['thumbnail'])
 581         if self.params.get('forcedescription', False) and 'description' in info_dict:
 582             compat_print(info_dict['description'])
 583         if self.params.get('forcefilename', False) and filename is not None:
 584             compat_print(filename)
 585         if self.params.get('forceformat', False):
 586             compat_print(info_dict['format'])
 587
 588         # Do nothing else if in simulate mode
 589         if self.params.get('simulate', False):
 590             return
 591
 592         if filename is None:
 593             return
 594
 595         try:
 596             dn = os.path.dirname(encodeFilename(filename))
 597             if dn != '' and not os.path.exists(dn): # dn is already encoded
 598                 os.makedirs(dn)
 599         except (OSError, IOError) as err:
 600             self.report_error(u'unable to create directory ' + compat_str(err))
 601             return
 602
 603         if self.params.get('writedescription', False):
 604             try:
 605                 descfn = filename + u'.description'
 606                 self.report_writedescription(descfn)
 607                 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
 608                     descfile.write(info_dict['description'])
 609             except (OSError, IOError):
 610                 self.report_error(u'Cannot write description file ' + descfn)
 611                 return
 612
 613         if self.params.get('writesubtitles', False) and 'subtitles' in info_dict and info_dict['subtitles']:
 614             # subtitles download errors are already managed as troubles in relevant IE
 615             # that way it will silently go on when used with unsupporting IE
 616             subtitle = info_dict['subtitles'][0]
 617             (sub_error, sub_lang, sub) = subtitle
 618             sub_format = self.params.get('subtitlesformat')
 619             if sub_error:
 620                 self.report_warning("Some error while getting the subtitles")
 621             else:
 622                 try:
 623                     sub_filename = filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format
 624                     self.report_writesubtitles(sub_filename)
 625                     with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
 626                         subfile.write(sub)
 627                 except (OSError, IOError):
 628                     self.report_error(u'Cannot write subtitles file ' + descfn)
 629                     return
 630             if self.params.get('onlysubtitles', False):
 631                 return
 632
 633         if self.params.get('allsubtitles', False) and 'subtitles' in info_dict and info_dict['subtitles']:
 634             subtitles = info_dict['subtitles']
 635             sub_format = self.params.get('subtitlesformat')
 636             for subtitle in subtitles:
 637                 (sub_error, sub_lang, sub) = subtitle
 638                 if sub_error:
 639                     self.report_warning("Some error while getting the subtitles")
 640                 else:
 641                     try:
 642                         sub_filename = filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format
 643                         self.report_writesubtitles(sub_filename)
 644                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
 645                                 subfile.write(sub)
 646                     except (OSError, IOError):
 647                         self.report_error(u'Cannot write subtitles file ' + descfn)
 648                         return
 649             if self.params.get('onlysubtitles', False):
 650                 return
 651
 652         if self.params.get('writeinfojson', False):
 653             infofn = filename + u'.info.json'
 654             self.report_writeinfojson(infofn)
 655             try:
 656                 json_info_dict = dict((k, v) for k,v in info_dict.items() if not k in ['urlhandle'])
 657                 write_json_file(json_info_dict, encodeFilename(infofn))
 658             except (OSError, IOError):
 659                 self.report_error(u'Cannot write metadata to JSON file ' + infofn)
 660                 return
 661
 662         if self.params.get('writethumbnail', False):
 663             if 'thumbnail' in info_dict:
 664                 thumb_format = info_dict['thumbnail'].rpartition(u'/')[2].rpartition(u'.')[2]
 665                 if not thumb_format:
 666                     thumb_format = 'jpg'
 667                 thumb_filename = filename.rpartition('.')[0] + u'.' + thumb_format
 668                 self.to_screen(u'[%s] %s: Downloading thumbnail ...' %
 669                                (info_dict['extractor'], info_dict['id']))
 670                 uf = compat_urllib_request.urlopen(info_dict['thumbnail'])
 671                 with open(thumb_filename, 'wb') as thumbf:
 672                     shutil.copyfileobj(uf, thumbf)
 673                 self.to_screen(u'[%s] %s: Writing thumbnail to: %s' %
 674                                (info_dict['extractor'], info_dict['id'], thumb_filename))
 675
 676         if not self.params.get('skip_download', False):
 677             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
 678                 success = True
 679             else:
 680                 try:
 681                     success = self._do_download(filename, info_dict)
 682                 except (OSError, IOError) as err:
 683                     raise UnavailableVideoError()
 684                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
 685                     self.report_error(u'unable to download video data: %s' % str(err))
 686                     return
 687                 except (ContentTooShortError, ) as err:
 688                     self.report_error(u'content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
 689                     return
 690
 691             if success:
 692                 try:
 693                     self.post_process(filename, info_dict)
 694                 except (PostProcessingError) as err:
 695                     self.report_error(u'postprocessing: %s' % str(err))
 696                     return
 697
 698     def download(self, url_list):
 699         """Download a given list of URLs."""
 700         if len(url_list) > 1 and self.fixed_template():
 701             raise SameFileError(self.params['outtmpl'])
 702
 703         for url in url_list:
 704             try:
 705                 #It also downloads the videos
 706                 videos = self.extract_info(url)
 707             except UnavailableVideoError:
 708                 self.report_error(u'unable to download video')
 709             except MaxDownloadsReached:
 710                 self.to_screen(u'[info] Maximum number of downloaded files reached.')
 711                 raise
 712
 713         return self._download_retcode
 714
 715     def post_process(self, filename, ie_info):
 716         """Run all the postprocessors on the given file."""
 717         info = dict(ie_info)
 718         info['filepath'] = filename
 719         keep_video = None
 720         for pp in self._pps:
 721             try:
 722                 keep_video_wish,new_info = pp.run(info)
 723                 if keep_video_wish is not None:
 724                     if keep_video_wish:
 725                         keep_video = keep_video_wish
 726                     elif keep_video is None:
 727                         # No clear decision yet, let IE decide
 728                         keep_video = keep_video_wish
 729             except PostProcessingError as e:
 730                 self.to_stderr(u'ERROR: ' + e.msg)
 731         if keep_video is False and not self.params.get('keepvideo', False):
 732             try:
 733                 self.to_screen(u'Deleting original file %s (pass -k to keep)' % filename)
 734                 os.remove(encodeFilename(filename))
 735             except (IOError, OSError):
 736                 self.report_warning(u'Unable to remove downloaded video file')
 737
 738     def _download_with_rtmpdump(self, filename, url, player_url, page_url, play_path):
 739         self.report_destination(filename)
 740         tmpfilename = self.temp_name(filename)
 741
 742         # Check for rtmpdump first
 743         try:
 744             subprocess.call(['rtmpdump', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
 745         except (OSError, IOError):
 746             self.report_error(u'RTMP download detected but "rtmpdump" could not be run')
 747             return False
 748
 749         # Download using rtmpdump. rtmpdump returns exit code 2 when
 750         # the connection was interrumpted and resuming appears to be
 751         # possible. This is part of rtmpdump's normal usage, AFAIK.
 752         basic_args = ['rtmpdump', '-q', '-r', url, '-o', tmpfilename]
 753         if player_url is not None:
 754             basic_args += ['-W', player_url]
 755         if page_url is not None:
 756             basic_args += ['--pageUrl', page_url]
 757         if play_path is not None:
 758             basic_args += ['-y', play_path]
 759         args = basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)]
 760         if self.params.get('verbose', False):
 761             try:
 762                 import pipes
 763                 shell_quote = lambda args: ' '.join(map(pipes.quote, args))
 764             except ImportError:
 765                 shell_quote = repr
 766             self.to_screen(u'[debug] rtmpdump command line: ' + shell_quote(args))
 767         retval = subprocess.call(args)
 768         while retval == 2 or retval == 1:
 769             prevsize = os.path.getsize(encodeFilename(tmpfilename))
 770             self.to_screen(u'\r[rtmpdump] %s bytes' % prevsize, skip_eol=True)
 771             time.sleep(5.0) # This seems to be needed
 772             retval = subprocess.call(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
 773             cursize = os.path.getsize(encodeFilename(tmpfilename))
 774             if prevsize == cursize and retval == 1:
 775                 break
 776              # Some rtmp streams seem abort after ~ 99.8%. Don't complain for those
 777             if prevsize == cursize and retval == 2 and cursize > 1024:
 778                 self.to_screen(u'\r[rtmpdump] Could not download the whole video. This can happen for some advertisements.')
 779                 retval = 0
 780                 break
 781         if retval == 0:
 782             fsize = os.path.getsize(encodeFilename(tmpfilename))
 783             self.to_screen(u'\r[rtmpdump] %s bytes' % fsize)
 784             self.try_rename(tmpfilename, filename)
 785             self._hook_progress({
 786                 'downloaded_bytes': fsize,
 787                 'total_bytes': fsize,
 788                 'filename': filename,
 789                 'status': 'finished',
 790             })
 791             return True
 792         else:
 793             self.to_stderr(u"\n")
 794             self.report_error(u'rtmpdump exited with code %d' % retval)
 795             return False
 796
 797     def _do_download(self, filename, info_dict):
 798         url = info_dict['url']
 799
 800         # Check file already present
 801         if self.params.get('continuedl', False) and os.path.isfile(encodeFilename(filename)) and not self.params.get('nopart', False):
 802             self.report_file_already_downloaded(filename)
 803             self._hook_progress({
 804                 'filename': filename,
 805                 'status': 'finished',
 806             })
 807             return True
 808
 809         # Attempt to download using rtmpdump
 810         if url.startswith('rtmp'):
 811             return self._download_with_rtmpdump(filename, url,
 812                                                 info_dict.get('player_url', None),
 813                                                 info_dict.get('page_url', None),
 814                                                 info_dict.get('play_path', None))
 815
 816         tmpfilename = self.temp_name(filename)
 817         stream = None
 818
 819         # Do not include the Accept-Encoding header
 820         headers = {'Youtubedl-no-compression': 'True'}
 821         if 'user_agent' in info_dict:
 822             headers['Youtubedl-user-agent'] = info_dict['user_agent']
 823         basic_request = compat_urllib_request.Request(url, None, headers)
 824         request = compat_urllib_request.Request(url, None, headers)
 825
 826         if self.params.get('test', False):
 827             request.add_header('Range','bytes=0-10240')
 828
 829         # Establish possible resume length
 830         if os.path.isfile(encodeFilename(tmpfilename)):
 831             resume_len = os.path.getsize(encodeFilename(tmpfilename))
 832         else:
 833             resume_len = 0
 834
 835         open_mode = 'wb'
 836         if resume_len != 0:
 837             if self.params.get('continuedl', False):
 838                 self.report_resuming_byte(resume_len)
 839                 request.add_header('Range','bytes=%d-' % resume_len)
 840                 open_mode = 'ab'
 841             else:
 842                 resume_len = 0
 843
 844         count = 0
 845         retries = self.params.get('retries', 0)
 846         while count <= retries:
 847             # Establish connection
 848             try:
 849                 if count == 0 and 'urlhandle' in info_dict:
 850                     data = info_dict['urlhandle']
 851                 data = compat_urllib_request.urlopen(request)
 852                 break
 853             except (compat_urllib_error.HTTPError, ) as err:
 854                 if (err.code < 500 or err.code >= 600) and err.code != 416:
 855                     # Unexpected HTTP error
 856                     raise
 857                 elif err.code == 416:
 858                     # Unable to resume (requested range not satisfiable)
 859                     try:
 860                         # Open the connection again without the range header
 861                         data = compat_urllib_request.urlopen(basic_request)
 862                         content_length = data.info()['Content-Length']
 863                     except (compat_urllib_error.HTTPError, ) as err:
 864                         if err.code < 500 or err.code >= 600:
 865                             raise
 866                     else:
 867                         # Examine the reported length
 868                         if (content_length is not None and
 869                                 (resume_len - 100 < int(content_length) < resume_len + 100)):
 870                             # The file had already been fully downloaded.
 871                             # Explanation to the above condition: in issue #175 it was revealed that
 872                             # YouTube sometimes adds or removes a few bytes from the end of the file,
 873                             # changing the file size slightly and causing problems for some users. So
 874                             # I decided to implement a suggested change and consider the file
 875                             # completely downloaded if the file size differs less than 100 bytes from
 876                             # the one in the hard drive.
 877                             self.report_file_already_downloaded(filename)
 878                             self.try_rename(tmpfilename, filename)
 879                             self._hook_progress({
 880                                 'filename': filename,
 881                                 'status': 'finished',
 882                             })
 883                             return True
 884                         else:
 885                             # The length does not match, we start the download over
 886                             self.report_unable_to_resume()
 887                             open_mode = 'wb'
 888                             break
 889             # Retry
 890             count += 1
 891             if count <= retries:
 892                 self.report_retry(count, retries)
 893
 894         if count > retries:
 895             self.report_error(u'giving up after %s retries' % retries)
 896             return False
 897
 898         data_len = data.info().get('Content-length', None)
 899         if data_len is not None:
 900             data_len = int(data_len) + resume_len
 901             min_data_len = self.params.get("min_filesize", None)
 902             max_data_len =  self.params.get("max_filesize", None)
 903             if min_data_len is not None and data_len < min_data_len:
 904                 self.to_screen(u'\r[download] File is smaller than min-filesize (%s bytes < %s bytes). Aborting.' % (data_len, min_data_len))
 905                 return False
 906             if max_data_len is not None and data_len > max_data_len:
 907                 self.to_screen(u'\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len))
 908                 return False
 909
 910         data_len_str = self.format_bytes(data_len)
 911         byte_counter = 0 + resume_len
 912         block_size = self.params.get('buffersize', 1024)
 913         start = time.time()
 914         while True:
 915             # Download and write
 916             before = time.time()
 917             data_block = data.read(block_size)
 918             after = time.time()
 919             if len(data_block) == 0:
 920                 break
 921             byte_counter += len(data_block)
 922
 923             # Open file just in time
 924             if stream is None:
 925                 try:
 926                     (stream, tmpfilename) = sanitize_open(tmpfilename, open_mode)
 927                     assert stream is not None
 928                     filename = self.undo_temp_name(tmpfilename)
 929                     self.report_destination(filename)
 930                 except (OSError, IOError) as err:
 931                     self.report_error(u'unable to open for writing: %s' % str(err))
 932                     return False
 933             try:
 934                 stream.write(data_block)
 935             except (IOError, OSError) as err:
 936                 self.to_stderr(u"\n")
 937                 self.report_error(u'unable to write data: %s' % str(err))
 938                 return False
 939             if not self.params.get('noresizebuffer', False):
 940                 block_size = self.best_block_size(after - before, len(data_block))
 941
 942             # Progress message
 943             speed_str = self.calc_speed(start, time.time(), byte_counter - resume_len)
 944             if data_len is None:
 945                 self.report_progress('Unknown %', data_len_str, speed_str, 'Unknown ETA')
 946             else:
 947                 percent_str = self.calc_percent(byte_counter, data_len)
 948                 eta_str = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
 949                 self.report_progress(percent_str, data_len_str, speed_str, eta_str)
 950
 951             self._hook_progress({
 952                 'downloaded_bytes': byte_counter,
 953                 'total_bytes': data_len,
 954                 'tmpfilename': tmpfilename,
 955                 'filename': filename,
 956                 'status': 'downloading',
 957             })
 958
 959             # Apply rate limit
 960             self.slow_down(start, byte_counter - resume_len)
 961
 962         if stream is None:
 963             self.to_stderr(u"\n")
 964             self.report_error(u'Did not get any data blocks')
 965             return False
 966         stream.close()
 967         self.report_finish()
 968         if data_len is not None and byte_counter != data_len:
 969             raise ContentTooShortError(byte_counter, int(data_len))
 970         self.try_rename(tmpfilename, filename)
 971
 972         # Update file modification time
 973         if self.params.get('updatetime', True):
 974             info_dict['filetime'] = self.try_utime(filename, data.info().get('last-modified', None))
 975
 976         self._hook_progress({
 977             'downloaded_bytes': byte_counter,
 978             'total_bytes': byte_counter,
 979             'filename': filename,
 980             'status': 'finished',
 981         })
 982
 983         return True
 984
 985     def _hook_progress(self, status):
 986         for ph in self._progress_hooks:
 987             ph(status)
 988
 989     def add_progress_hook(self, ph):
 990         """ ph gets called on download progress, with a dictionary with the entries
 991         * filename: The final filename
 992         * status: One of "downloading" and "finished"
 993
 994         It can also have some of the following entries:
 995
 996         * downloaded_bytes: Bytes on disks
 997         * total_bytes: Total bytes, None if unknown
 998         * tmpfilename: The filename we're currently writing to
 999
1000         Hooks are guaranteed to be called at least once (with status "finished")
1001         if the download is successful.
1002         """
1003         self._progress_hooks.append(ph)