_ Git - youtube-dl/blob - youtube_dl/FileDownloader.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 from __future__ import absolute_import
   5
   6 import math
   7 import io
   8 import os
   9 import re
  10 import shutil
  11 import socket
  12 import subprocess
  13 import sys
  14 import time
  15 import traceback
  16
  17 if os.name == 'nt':
  18     import ctypes
  19
  20 from .utils import *
  21 from .InfoExtractors import get_info_extractor
  22
  23
  24 class FileDownloader(object):
  25     """File Downloader class.
  26
  27     File downloader objects are the ones responsible of downloading the
  28     actual video file and writing it to disk if the user has requested
  29     it, among some other tasks. In most cases there should be one per
  30     program. As, given a video URL, the downloader doesn't know how to
  31     extract all the needed information, task that InfoExtractors do, it
  32     has to pass the URL to one of them.
  33
  34     For this, file downloader objects have a method that allows
  35     InfoExtractors to be registered in a given order. When it is passed
  36     a URL, the file downloader handles it to the first InfoExtractor it
  37     finds that reports being able to handle it. The InfoExtractor extracts
  38     all the information about the video or videos the URL refers to, and
  39     asks the FileDownloader to process the video information, possibly
  40     downloading the video.
  41
  42     File downloaders accept a lot of parameters. In order not to saturate
  43     the object constructor with arguments, it receives a dictionary of
  44     options instead. These options are available through the params
  45     attribute for the InfoExtractors to use. The FileDownloader also
  46     registers itself as the downloader in charge for the InfoExtractors
  47     that are added to it, so this is a "mutual registration".
  48
  49     Available options:
  50
  51     username:          Username for authentication purposes.
  52     password:          Password for authentication purposes.
  53     usenetrc:          Use netrc for authentication instead.
  54     quiet:             Do not print messages to stdout.
  55     forceurl:          Force printing final URL.
  56     forcetitle:        Force printing title.
  57     forcethumbnail:    Force printing thumbnail URL.
  58     forcedescription:  Force printing description.
  59     forcefilename:     Force printing final filename.
  60     simulate:          Do not download the video files.
  61     format:            Video format code.
  62     format_limit:      Highest quality format to try.
  63     outtmpl:           Template for output names.
  64     restrictfilenames: Do not allow "&" and spaces in file names
  65     ignoreerrors:      Do not stop on download errors.
  66     ratelimit:         Download speed limit, in bytes/sec.
  67     nooverwrites:      Prevent overwriting files.
  68     retries:           Number of times to retry for HTTP error 5xx
  69     buffersize:        Size of download buffer in bytes.
  70     noresizebuffer:    Do not automatically resize the download buffer.
  71     continuedl:        Try to continue downloads if possible.
  72     noprogress:        Do not print the progress bar.
  73     playliststart:     Playlist item to start at.
  74     playlistend:       Playlist item to end at.
  75     matchtitle:        Download only matching titles.
  76     rejecttitle:       Reject downloads for matching titles.
  77     logtostderr:       Log messages to stderr instead of stdout.
  78     consoletitle:      Display progress in console window's titlebar.
  79     nopart:            Do not use temporary .part files.
  80     updatetime:        Use the Last-modified header to set output file timestamps.
  81     writedescription:  Write the video description to a .description file
  82     writeinfojson:     Write the video description to a .info.json file
  83     writethumbnail:    Write the thumbnail image to a file
  84     writesubtitles:    Write the video subtitles to a file
  85     onlysubtitles:     Downloads only the subtitles of the video
  86     allsubtitles:      Downloads all the subtitles of the video
  87     listsubtitles:     Lists all available subtitles for the video
  88     subtitlesformat:   Subtitle format [sbv/srt] (default=srt)
  89     subtitleslang:     Language of the subtitles to download
  90     test:              Download only first bytes to test the downloader.
  91     keepvideo:         Keep the video file after post-processing
  92     min_filesize:      Skip files smaller than this size
  93     max_filesize:      Skip files larger than this size
  94     daterange:         A DateRange object, download only if the upload_date is in the range.
  95     """
  96
  97     params = None
  98     _ies = []
  99     _pps = []
 100     _download_retcode = None
 101     _num_downloads = None
 102     _screen_file = None
 103
 104     def __init__(self, params):
 105         """Create a FileDownloader object with the given options."""
 106         self._ies = []
 107         self._pps = []
 108         self._progress_hooks = []
 109         self._download_retcode = 0
 110         self._num_downloads = 0
 111         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 112         self.params = params
 113
 114         if '%(stitle)s' in self.params['outtmpl']:
 115             self.report_warning(u'%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
 116
 117     @staticmethod
 118     def format_bytes(bytes):
 119         if bytes is None:
 120             return 'N/A'
 121         if type(bytes) is str:
 122             bytes = float(bytes)
 123         if bytes == 0.0:
 124             exponent = 0
 125         else:
 126             exponent = int(math.log(bytes, 1024.0))
 127         suffix = ['B','KiB','MiB','GiB','TiB','PiB','EiB','ZiB','YiB'][exponent]
 128         converted = float(bytes) / float(1024 ** exponent)
 129         return '%.2f%s' % (converted, suffix)
 130
 131     @staticmethod
 132     def calc_percent(byte_counter, data_len):
 133         if data_len is None:
 134             return '---.-%'
 135         return '%6s' % ('%3.1f%%' % (float(byte_counter) / float(data_len) * 100.0))
 136
 137     @staticmethod
 138     def calc_eta(start, now, total, current):
 139         if total is None:
 140             return '--:--'
 141         dif = now - start
 142         if current == 0 or dif < 0.001: # One millisecond
 143             return '--:--'
 144         rate = float(current) / dif
 145         eta = int((float(total) - float(current)) / rate)
 146         (eta_mins, eta_secs) = divmod(eta, 60)
 147         if eta_mins > 99:
 148             return '--:--'
 149         return '%02d:%02d' % (eta_mins, eta_secs)
 150
 151     @staticmethod
 152     def calc_speed(start, now, bytes):
 153         dif = now - start
 154         if bytes == 0 or dif < 0.001: # One millisecond
 155             return '%10s' % '---b/s'
 156         return '%10s' % ('%s/s' % FileDownloader.format_bytes(float(bytes) / dif))
 157
 158     @staticmethod
 159     def best_block_size(elapsed_time, bytes):
 160         new_min = max(bytes / 2.0, 1.0)
 161         new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
 162         if elapsed_time < 0.001:
 163             return int(new_max)
 164         rate = bytes / elapsed_time
 165         if rate > new_max:
 166             return int(new_max)
 167         if rate < new_min:
 168             return int(new_min)
 169         return int(rate)
 170
 171     @staticmethod
 172     def parse_bytes(bytestr):
 173         """Parse a string indicating a byte quantity into an integer."""
 174         matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
 175         if matchobj is None:
 176             return None
 177         number = float(matchobj.group(1))
 178         multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
 179         return int(round(number * multiplier))
 180
 181     def add_info_extractor(self, ie):
 182         """Add an InfoExtractor object to the end of the list."""
 183         self._ies.append(ie)
 184         ie.set_downloader(self)
 185
 186     def add_post_processor(self, pp):
 187         """Add a PostProcessor object to the end of the chain."""
 188         self._pps.append(pp)
 189         pp.set_downloader(self)
 190
 191     def to_screen(self, message, skip_eol=False):
 192         """Print message to stdout if not in quiet mode."""
 193         assert type(message) == type(u'')
 194         if not self.params.get('quiet', False):
 195             terminator = [u'\n', u''][skip_eol]
 196             output = message + terminator
 197             if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
 198                 output = output.encode(preferredencoding(), 'ignore')
 199             self._screen_file.write(output)
 200             self._screen_file.flush()
 201
 202     def to_stderr(self, message):
 203         """Print message to stderr."""
 204         assert type(message) == type(u'')
 205         output = message + u'\n'
 206         if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
 207             output = output.encode(preferredencoding())
 208         sys.stderr.write(output)
 209
 210     def to_cons_title(self, message):
 211         """Set console/terminal window title to message."""
 212         if not self.params.get('consoletitle', False):
 213             return
 214         if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
 215             # c_wchar_p() might not be necessary if `message` is
 216             # already of type unicode()
 217             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 218         elif 'TERM' in os.environ:
 219             self.to_screen('\033]0;%s\007' % message, skip_eol=True)
 220
 221     def fixed_template(self):
 222         """Checks if the output template is fixed."""
 223         return (re.search(u'(?u)%\\(.+?\\)s', self.params['outtmpl']) is None)
 224
 225     def trouble(self, message=None, tb=None):
 226         """Determine action to take when a download problem appears.
 227
 228         Depending on if the downloader has been configured to ignore
 229         download errors or not, this method may throw an exception or
 230         not when errors are found, after printing the message.
 231
 232         tb, if given, is additional traceback information.
 233         """
 234         if message is not None:
 235             self.to_stderr(message)
 236         if self.params.get('verbose'):
 237             if tb is None:
 238                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 239                     tb = u''
 240                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 241                         tb += u''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 242                     tb += compat_str(traceback.format_exc())
 243                 else:
 244                     tb_data = traceback.format_list(traceback.extract_stack())
 245                     tb = u''.join(tb_data)
 246             self.to_stderr(tb)
 247         if not self.params.get('ignoreerrors', False):
 248             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 249                 exc_info = sys.exc_info()[1].exc_info
 250             else:
 251                 exc_info = sys.exc_info()
 252             raise DownloadError(message, exc_info)
 253         self._download_retcode = 1
 254
 255     def report_warning(self, message):
 256         '''
 257         Print the message to stderr, it will be prefixed with 'WARNING:'
 258         If stderr is a tty file the 'WARNING:' will be colored
 259         '''
 260         if sys.stderr.isatty() and os.name != 'nt':
 261             _msg_header=u'\033[0;33mWARNING:\033[0m'
 262         else:
 263             _msg_header=u'WARNING:'
 264         warning_message=u'%s %s' % (_msg_header,message)
 265         self.to_stderr(warning_message)
 266
 267     def report_error(self, message, tb=None):
 268         '''
 269         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 270         in red if stderr is a tty file.
 271         '''
 272         if sys.stderr.isatty() and os.name != 'nt':
 273             _msg_header = u'\033[0;31mERROR:\033[0m'
 274         else:
 275             _msg_header = u'ERROR:'
 276         error_message = u'%s %s' % (_msg_header, message)
 277         self.trouble(error_message, tb)
 278
 279     def slow_down(self, start_time, byte_counter):
 280         """Sleep if the download speed is over the rate limit."""
 281         rate_limit = self.params.get('ratelimit', None)
 282         if rate_limit is None or byte_counter == 0:
 283             return
 284         now = time.time()
 285         elapsed = now - start_time
 286         if elapsed <= 0.0:
 287             return
 288         speed = float(byte_counter) / elapsed
 289         if speed > rate_limit:
 290             time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
 291
 292     def temp_name(self, filename):
 293         """Returns a temporary filename for the given filename."""
 294         if self.params.get('nopart', False) or filename == u'-' or \
 295                 (os.path.exists(encodeFilename(filename)) and not os.path.isfile(encodeFilename(filename))):
 296             return filename
 297         return filename + u'.part'
 298
 299     def undo_temp_name(self, filename):
 300         if filename.endswith(u'.part'):
 301             return filename[:-len(u'.part')]
 302         return filename
 303
 304     def try_rename(self, old_filename, new_filename):
 305         try:
 306             if old_filename == new_filename:
 307                 return
 308             os.rename(encodeFilename(old_filename), encodeFilename(new_filename))
 309         except (IOError, OSError) as err:
 310             self.report_error(u'unable to rename file')
 311
 312     def try_utime(self, filename, last_modified_hdr):
 313         """Try to set the last-modified time of the given file."""
 314         if last_modified_hdr is None:
 315             return
 316         if not os.path.isfile(encodeFilename(filename)):
 317             return
 318         timestr = last_modified_hdr
 319         if timestr is None:
 320             return
 321         filetime = timeconvert(timestr)
 322         if filetime is None:
 323             return filetime
 324         try:
 325             os.utime(filename, (time.time(), filetime))
 326         except:
 327             pass
 328         return filetime
 329
 330     def report_writedescription(self, descfn):
 331         """ Report that the description file is being written """
 332         self.to_screen(u'[info] Writing video description to: ' + descfn)
 333
 334     def report_writesubtitles(self, sub_filename):
 335         """ Report that the subtitles file is being written """
 336         self.to_screen(u'[info] Writing video subtitles to: ' + sub_filename)
 337
 338     def report_writeinfojson(self, infofn):
 339         """ Report that the metadata file has been written """
 340         self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn)
 341
 342     def report_destination(self, filename):
 343         """Report destination filename."""
 344         self.to_screen(u'[download] Destination: ' + filename)
 345
 346     def report_progress(self, percent_str, data_len_str, speed_str, eta_str):
 347         """Report download progress."""
 348         if self.params.get('noprogress', False):
 349             return
 350         clear_line = (u'\x1b[K' if sys.stderr.isatty() and os.name != 'nt' else u'')
 351         if self.params.get('progress_with_newline', False):
 352             self.to_screen(u'[download] %s of %s at %s ETA %s' %
 353                 (percent_str, data_len_str, speed_str, eta_str))
 354         else:
 355             self.to_screen(u'\r%s[download] %s of %s at %s ETA %s' %
 356                 (clear_line, percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
 357         self.to_cons_title(u'youtube-dl - %s of %s at %s ETA %s' %
 358                 (percent_str.strip(), data_len_str.strip(), speed_str.strip(), eta_str.strip()))
 359
 360     def report_resuming_byte(self, resume_len):
 361         """Report attempt to resume at given byte."""
 362         self.to_screen(u'[download] Resuming download at byte %s' % resume_len)
 363
 364     def report_retry(self, count, retries):
 365         """Report retry in case of HTTP error 5xx"""
 366         self.to_screen(u'[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count, retries))
 367
 368     def report_file_already_downloaded(self, file_name):
 369         """Report file has already been fully downloaded."""
 370         try:
 371             self.to_screen(u'[download] %s has already been downloaded' % file_name)
 372         except (UnicodeEncodeError) as err:
 373             self.to_screen(u'[download] The file has already been downloaded')
 374
 375     def report_unable_to_resume(self):
 376         """Report it was impossible to resume download."""
 377         self.to_screen(u'[download] Unable to resume')
 378
 379     def report_finish(self):
 380         """Report download finished."""
 381         if self.params.get('noprogress', False):
 382             self.to_screen(u'[download] Download completed')
 383         else:
 384             self.to_screen(u'')
 385
 386     def increment_downloads(self):
 387         """Increment the ordinal that assigns a number to each file."""
 388         self._num_downloads += 1
 389
 390     def prepare_filename(self, info_dict):
 391         """Generate the output filename."""
 392         try:
 393             template_dict = dict(info_dict)
 394
 395             template_dict['epoch'] = int(time.time())
 396             autonumber_size = self.params.get('autonumber_size')
 397             if autonumber_size is None:
 398                 autonumber_size = 5
 399             autonumber_templ = u'%0' + str(autonumber_size) + u'd'
 400             template_dict['autonumber'] = autonumber_templ % self._num_downloads
 401             if template_dict['playlist_index'] is not None:
 402                 template_dict['playlist_index'] = u'%05d' % template_dict['playlist_index']
 403
 404             sanitize = lambda k,v: sanitize_filename(
 405                 u'NA' if v is None else compat_str(v),
 406                 restricted=self.params.get('restrictfilenames'),
 407                 is_id=(k==u'id'))
 408             template_dict = dict((k, sanitize(k, v)) for k,v in template_dict.items())
 409
 410             filename = self.params['outtmpl'] % template_dict
 411             return filename
 412         except KeyError as err:
 413             self.report_error(u'Erroneous output template')
 414             return None
 415         except ValueError as err:
 416             self.report_error(u'Insufficient system charset ' + repr(preferredencoding()))
 417             return None
 418
 419     def _match_entry(self, info_dict):
 420         """ Returns None iff the file should be downloaded """
 421
 422         title = info_dict['title']
 423         matchtitle = self.params.get('matchtitle', False)
 424         if matchtitle:
 425             if not re.search(matchtitle, title, re.IGNORECASE):
 426                 return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
 427         rejecttitle = self.params.get('rejecttitle', False)
 428         if rejecttitle:
 429             if re.search(rejecttitle, title, re.IGNORECASE):
 430                 return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
 431         date = info_dict.get('upload_date', None)
 432         if date is not None:
 433             dateRange = self.params.get('daterange', DateRange())
 434             if date not in dateRange:
 435                 return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
 436         return None
 437
 438     def extract_info(self, url, download = True, ie_name = None):
 439         '''
 440         Returns a list with a dictionary for each video we find.
 441         If 'download', also downloads the videos.
 442          '''
 443         suitable_found = False
 444
 445         #We copy the original list
 446         ies = list(self._ies)
 447
 448         if ie_name is not None:
 449             #We put in the first place the given info extractor
 450             first_ie = get_info_extractor(ie_name)()
 451             first_ie.set_downloader(self)
 452             ies.insert(0, first_ie)
 453
 454         for ie in ies:
 455             # Go to next InfoExtractor if not suitable
 456             if not ie.suitable(url):
 457                 continue
 458
 459             # Warn if the _WORKING attribute is False
 460             if not ie.working():
 461                 self.report_warning(u'the program functionality for this site has been marked as broken, '
 462                                u'and will probably not work. If you want to go on, use the -i option.')
 463
 464             # Suitable InfoExtractor found
 465             suitable_found = True
 466
 467             # Extract information from URL and process it
 468             try:
 469                 ie_results = ie.extract(url)
 470                 if ie_results is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
 471                     break
 472                 results = []
 473                 for ie_result in ie_results:
 474                     if not 'extractor' in ie_result:
 475                         #The extractor has already been set somewhere else
 476                         ie_result['extractor'] = ie.IE_NAME
 477                     results.append(self.process_ie_result(ie_result, download))
 478                 return results
 479             except ExtractorError as de: # An error we somewhat expected
 480                 self.report_error(compat_str(de), de.format_traceback())
 481                 break
 482             except Exception as e:
 483                 if self.params.get('ignoreerrors', False):
 484                     self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
 485                     break
 486                 else:
 487                     raise
 488         if not suitable_found:
 489                 self.report_error(u'no suitable InfoExtractor: %s' % url)
 490
 491     def process_ie_result(self, ie_result, download = True):
 492         """
 493         Take the result of the ie and return a list of videos.
 494         For url elements it will search the suitable ie and get the videos
 495         For playlist elements it will process each of the elements of the 'entries' key
 496
 497         It will also download the videos if 'download'.
 498         """
 499         result_type = ie_result.get('_type', 'video') #If not given we suppose it's a video, support the dafault old system
 500         if result_type == 'video':
 501             if 'playlist' not in ie_result:
 502                 #It isn't part of a playlist
 503                 ie_result['playlist'] = None
 504                 ie_result['playlist_index'] = None
 505             if download:
 506                 #Do the download:
 507                 self.process_info(ie_result)
 508             return ie_result
 509         elif result_type == 'url':
 510             #We get the video pointed by the url
 511             result = self.extract_info(ie_result['url'], download, ie_name = ie_result['ie_key'])[0]
 512             return result
 513         elif result_type == 'playlist':
 514             #We process each entry in the playlist
 515             playlist = ie_result.get('title', None) or ie_result.get('id', None)
 516             self.to_screen(u'[download] Downloading playlist: %s'  % playlist)
 517
 518             playlist_results = []
 519
 520             n_all_entries = len(ie_result['entries'])
 521             playliststart = self.params.get('playliststart', 1) - 1
 522             playlistend = self.params.get('playlistend', -1)
 523
 524             if playlistend == -1:
 525                 entries = ie_result['entries'][playliststart:]
 526             else:
 527                 entries = ie_result['entries'][playliststart:playlistend]
 528
 529             n_entries = len(entries)
 530
 531             self.to_screen(u"[%s] playlist '%s': Collected %d video ids (downloading %d of them)" %
 532                 (ie_result['extractor'], playlist, n_all_entries, n_entries))
 533
 534             for i,entry in enumerate(entries,1):
 535                 self.to_screen(u'[download] Downloading video #%s of %s' %(i, n_entries))
 536                 entry_result = self.process_ie_result(entry, False)
 537                 entry_result['playlist'] = playlist
 538                 entry_result['playlist_index'] = i + playliststart
 539                 #We must do the download here to correctly set the 'playlist' key
 540                 if download:
 541                     self.process_info(entry_result)
 542                 playlist_results.append(entry_result)
 543             result = ie_result.copy()
 544             result['entries'] = playlist_results
 545             return result
 546
 547     def process_info(self, info_dict):
 548         """Process a single dictionary returned by an InfoExtractor."""
 549
 550         #We increment the download the download count here to match the previous behaviour.
 551         self.increment_downloads()
 552
 553         info_dict['fulltitle'] = info_dict['title']
 554         if len(info_dict['title']) > 200:
 555             info_dict['title'] = info_dict['title'][:197] + u'...'
 556
 557         # Keep for backwards compatibility
 558         info_dict['stitle'] = info_dict['title']
 559
 560         if not 'format' in info_dict:
 561             info_dict['format'] = info_dict['ext']
 562
 563         reason = self._match_entry(info_dict)
 564         if reason is not None:
 565             self.to_screen(u'[download] ' + reason)
 566             return
 567
 568         max_downloads = self.params.get('max_downloads')
 569         if max_downloads is not None:
 570             if self._num_downloads > int(max_downloads):
 571                 raise MaxDownloadsReached()
 572
 573         filename = self.prepare_filename(info_dict)
 574
 575         # Forced printings
 576         if self.params.get('forcetitle', False):
 577             compat_print(info_dict['title'])
 578         if self.params.get('forceurl', False):
 579             compat_print(info_dict['url'])
 580         if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
 581             compat_print(info_dict['thumbnail'])
 582         if self.params.get('forcedescription', False) and 'description' in info_dict:
 583             compat_print(info_dict['description'])
 584         if self.params.get('forcefilename', False) and filename is not None:
 585             compat_print(filename)
 586         if self.params.get('forceformat', False):
 587             compat_print(info_dict['format'])
 588
 589         # Do nothing else if in simulate mode
 590         if self.params.get('simulate', False):
 591             return
 592
 593         if filename is None:
 594             return
 595
 596         try:
 597             dn = os.path.dirname(encodeFilename(filename))
 598             if dn != '' and not os.path.exists(dn): # dn is already encoded
 599                 os.makedirs(dn)
 600         except (OSError, IOError) as err:
 601             self.report_error(u'unable to create directory ' + compat_str(err))
 602             return
 603
 604         if self.params.get('writedescription', False):
 605             try:
 606                 descfn = filename + u'.description'
 607                 self.report_writedescription(descfn)
 608                 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
 609                     descfile.write(info_dict['description'])
 610             except (OSError, IOError):
 611                 self.report_error(u'Cannot write description file ' + descfn)
 612                 return
 613
 614         if self.params.get('writesubtitles', False) and 'subtitles' in info_dict and info_dict['subtitles']:
 615             # subtitles download errors are already managed as troubles in relevant IE
 616             # that way it will silently go on when used with unsupporting IE
 617             subtitle = info_dict['subtitles'][0]
 618             (sub_error, sub_lang, sub) = subtitle
 619             sub_format = self.params.get('subtitlesformat')
 620             if sub_error:
 621                 self.report_warning("Some error while getting the subtitles")
 622             else:
 623                 try:
 624                     sub_filename = filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format
 625                     self.report_writesubtitles(sub_filename)
 626                     with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
 627                         subfile.write(sub)
 628                 except (OSError, IOError):
 629                     self.report_error(u'Cannot write subtitles file ' + descfn)
 630                     return
 631             if self.params.get('onlysubtitles', False):
 632                 return
 633
 634         if self.params.get('allsubtitles', False) and 'subtitles' in info_dict and info_dict['subtitles']:
 635             subtitles = info_dict['subtitles']
 636             sub_format = self.params.get('subtitlesformat')
 637             for subtitle in subtitles:
 638                 (sub_error, sub_lang, sub) = subtitle
 639                 if sub_error:
 640                     self.report_warning("Some error while getting the subtitles")
 641                 else:
 642                     try:
 643                         sub_filename = filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format
 644                         self.report_writesubtitles(sub_filename)
 645                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
 646                                 subfile.write(sub)
 647                     except (OSError, IOError):
 648                         self.report_error(u'Cannot write subtitles file ' + descfn)
 649                         return
 650             if self.params.get('onlysubtitles', False):
 651                 return
 652
 653         if self.params.get('writeinfojson', False):
 654             infofn = filename + u'.info.json'
 655             self.report_writeinfojson(infofn)
 656             try:
 657                 json_info_dict = dict((k, v) for k,v in info_dict.items() if not k in ['urlhandle'])
 658                 write_json_file(json_info_dict, encodeFilename(infofn))
 659             except (OSError, IOError):
 660                 self.report_error(u'Cannot write metadata to JSON file ' + infofn)
 661                 return
 662
 663         if self.params.get('writethumbnail', False):
 664             if 'thumbnail' in info_dict:
 665                 thumb_format = info_dict['thumbnail'].rpartition(u'/')[2].rpartition(u'.')[2]
 666                 if not thumb_format:
 667                     thumb_format = 'jpg'
 668                 thumb_filename = filename.rpartition('.')[0] + u'.' + thumb_format
 669                 self.to_screen(u'[%s] %s: Downloading thumbnail ...' %
 670                                (info_dict['extractor'], info_dict['id']))
 671                 uf = compat_urllib_request.urlopen(info_dict['thumbnail'])
 672                 with open(thumb_filename, 'wb') as thumbf:
 673                     shutil.copyfileobj(uf, thumbf)
 674                 self.to_screen(u'[%s] %s: Writing thumbnail to: %s' %
 675                                (info_dict['extractor'], info_dict['id'], thumb_filename))
 676
 677         if not self.params.get('skip_download', False):
 678             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
 679                 success = True
 680             else:
 681                 try:
 682                     success = self._do_download(filename, info_dict)
 683                 except (OSError, IOError) as err:
 684                     raise UnavailableVideoError()
 685                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
 686                     self.report_error(u'unable to download video data: %s' % str(err))
 687                     return
 688                 except (ContentTooShortError, ) as err:
 689                     self.report_error(u'content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
 690                     return
 691
 692             if success:
 693                 try:
 694                     self.post_process(filename, info_dict)
 695                 except (PostProcessingError) as err:
 696                     self.report_error(u'postprocessing: %s' % str(err))
 697                     return
 698
 699     def download(self, url_list):
 700         """Download a given list of URLs."""
 701         if len(url_list) > 1 and self.fixed_template():
 702             raise SameFileError(self.params['outtmpl'])
 703
 704         for url in url_list:
 705             try:
 706                 #It also downloads the videos
 707                 videos = self.extract_info(url)
 708             except UnavailableVideoError:
 709                 self.report_error(u'unable to download video')
 710             except MaxDownloadsReached:
 711                 self.to_screen(u'[info] Maximum number of downloaded files reached.')
 712                 raise
 713
 714         return self._download_retcode
 715
 716     def post_process(self, filename, ie_info):
 717         """Run all the postprocessors on the given file."""
 718         info = dict(ie_info)
 719         info['filepath'] = filename
 720         keep_video = None
 721         for pp in self._pps:
 722             try:
 723                 keep_video_wish,new_info = pp.run(info)
 724                 if keep_video_wish is not None:
 725                     if keep_video_wish:
 726                         keep_video = keep_video_wish
 727                     elif keep_video is None:
 728                         # No clear decision yet, let IE decide
 729                         keep_video = keep_video_wish
 730             except PostProcessingError as e:
 731                 self.to_stderr(u'ERROR: ' + e.msg)
 732         if keep_video is False and not self.params.get('keepvideo', False):
 733             try:
 734                 self.to_screen(u'Deleting original file %s (pass -k to keep)' % filename)
 735                 os.remove(encodeFilename(filename))
 736             except (IOError, OSError):
 737                 self.report_warning(u'Unable to remove downloaded video file')
 738
 739     def _download_with_rtmpdump(self, filename, url, player_url, page_url, play_path):
 740         self.report_destination(filename)
 741         tmpfilename = self.temp_name(filename)
 742
 743         # Check for rtmpdump first
 744         try:
 745             subprocess.call(['rtmpdump', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
 746         except (OSError, IOError):
 747             self.report_error(u'RTMP download detected but "rtmpdump" could not be run')
 748             return False
 749
 750         # Download using rtmpdump. rtmpdump returns exit code 2 when
 751         # the connection was interrumpted and resuming appears to be
 752         # possible. This is part of rtmpdump's normal usage, AFAIK.
 753         basic_args = ['rtmpdump', '-q', '-r', url, '-o', tmpfilename]
 754         if player_url is not None:
 755             basic_args += ['-W', player_url]
 756         if page_url is not None:
 757             basic_args += ['--pageUrl', page_url]
 758         if play_path is not None:
 759             basic_args += ['-y', play_path]
 760         args = basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)]
 761         if self.params.get('verbose', False):
 762             try:
 763                 import pipes
 764                 shell_quote = lambda args: ' '.join(map(pipes.quote, args))
 765             except ImportError:
 766                 shell_quote = repr
 767             self.to_screen(u'[debug] rtmpdump command line: ' + shell_quote(args))
 768         retval = subprocess.call(args)
 769         while retval == 2 or retval == 1:
 770             prevsize = os.path.getsize(encodeFilename(tmpfilename))
 771             self.to_screen(u'\r[rtmpdump] %s bytes' % prevsize, skip_eol=True)
 772             time.sleep(5.0) # This seems to be needed
 773             retval = subprocess.call(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
 774             cursize = os.path.getsize(encodeFilename(tmpfilename))
 775             if prevsize == cursize and retval == 1:
 776                 break
 777              # Some rtmp streams seem abort after ~ 99.8%. Don't complain for those
 778             if prevsize == cursize and retval == 2 and cursize > 1024:
 779                 self.to_screen(u'\r[rtmpdump] Could not download the whole video. This can happen for some advertisements.')
 780                 retval = 0
 781                 break
 782         if retval == 0:
 783             fsize = os.path.getsize(encodeFilename(tmpfilename))
 784             self.to_screen(u'\r[rtmpdump] %s bytes' % fsize)
 785             self.try_rename(tmpfilename, filename)
 786             self._hook_progress({
 787                 'downloaded_bytes': fsize,
 788                 'total_bytes': fsize,
 789                 'filename': filename,
 790                 'status': 'finished',
 791             })
 792             return True
 793         else:
 794             self.to_stderr(u"\n")
 795             self.report_error(u'rtmpdump exited with code %d' % retval)
 796             return False
 797
 798     def _do_download(self, filename, info_dict):
 799         url = info_dict['url']
 800
 801         # Check file already present
 802         if self.params.get('continuedl', False) and os.path.isfile(encodeFilename(filename)) and not self.params.get('nopart', False):
 803             self.report_file_already_downloaded(filename)
 804             self._hook_progress({
 805                 'filename': filename,
 806                 'status': 'finished',
 807             })
 808             return True
 809
 810         # Attempt to download using rtmpdump
 811         if url.startswith('rtmp'):
 812             return self._download_with_rtmpdump(filename, url,
 813                                                 info_dict.get('player_url', None),
 814                                                 info_dict.get('page_url', None),
 815                                                 info_dict.get('play_path', None))
 816
 817         tmpfilename = self.temp_name(filename)
 818         stream = None
 819
 820         # Do not include the Accept-Encoding header
 821         headers = {'Youtubedl-no-compression': 'True'}
 822         if 'user_agent' in info_dict:
 823             headers['Youtubedl-user-agent'] = info_dict['user_agent']
 824         basic_request = compat_urllib_request.Request(url, None, headers)
 825         request = compat_urllib_request.Request(url, None, headers)
 826
 827         if self.params.get('test', False):
 828             request.add_header('Range','bytes=0-10240')
 829
 830         # Establish possible resume length
 831         if os.path.isfile(encodeFilename(tmpfilename)):
 832             resume_len = os.path.getsize(encodeFilename(tmpfilename))
 833         else:
 834             resume_len = 0
 835
 836         open_mode = 'wb'
 837         if resume_len != 0:
 838             if self.params.get('continuedl', False):
 839                 self.report_resuming_byte(resume_len)
 840                 request.add_header('Range','bytes=%d-' % resume_len)
 841                 open_mode = 'ab'
 842             else:
 843                 resume_len = 0
 844
 845         count = 0
 846         retries = self.params.get('retries', 0)
 847         while count <= retries:
 848             # Establish connection
 849             try:
 850                 if count == 0 and 'urlhandle' in info_dict:
 851                     data = info_dict['urlhandle']
 852                 data = compat_urllib_request.urlopen(request)
 853                 break
 854             except (compat_urllib_error.HTTPError, ) as err:
 855                 if (err.code < 500 or err.code >= 600) and err.code != 416:
 856                     # Unexpected HTTP error
 857                     raise
 858                 elif err.code == 416:
 859                     # Unable to resume (requested range not satisfiable)
 860                     try:
 861                         # Open the connection again without the range header
 862                         data = compat_urllib_request.urlopen(basic_request)
 863                         content_length = data.info()['Content-Length']
 864                     except (compat_urllib_error.HTTPError, ) as err:
 865                         if err.code < 500 or err.code >= 600:
 866                             raise
 867                     else:
 868                         # Examine the reported length
 869                         if (content_length is not None and
 870                                 (resume_len - 100 < int(content_length) < resume_len + 100)):
 871                             # The file had already been fully downloaded.
 872                             # Explanation to the above condition: in issue #175 it was revealed that
 873                             # YouTube sometimes adds or removes a few bytes from the end of the file,
 874                             # changing the file size slightly and causing problems for some users. So
 875                             # I decided to implement a suggested change and consider the file
 876                             # completely downloaded if the file size differs less than 100 bytes from
 877                             # the one in the hard drive.
 878                             self.report_file_already_downloaded(filename)
 879                             self.try_rename(tmpfilename, filename)
 880                             self._hook_progress({
 881                                 'filename': filename,
 882                                 'status': 'finished',
 883                             })
 884                             return True
 885                         else:
 886                             # The length does not match, we start the download over
 887                             self.report_unable_to_resume()
 888                             open_mode = 'wb'
 889                             break
 890             # Retry
 891             count += 1
 892             if count <= retries:
 893                 self.report_retry(count, retries)
 894
 895         if count > retries:
 896             self.report_error(u'giving up after %s retries' % retries)
 897             return False
 898
 899         data_len = data.info().get('Content-length', None)
 900         if data_len is not None:
 901             data_len = int(data_len) + resume_len
 902             min_data_len = self.params.get("min_filesize", None)
 903             max_data_len =  self.params.get("max_filesize", None)
 904             if min_data_len is not None and data_len < min_data_len:
 905                 self.to_screen(u'\r[download] File is smaller than min-filesize (%s bytes < %s bytes). Aborting.' % (data_len, min_data_len))
 906                 return False
 907             if max_data_len is not None and data_len > max_data_len:
 908                 self.to_screen(u'\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len))
 909                 return False
 910
 911         data_len_str = self.format_bytes(data_len)
 912         byte_counter = 0 + resume_len
 913         block_size = self.params.get('buffersize', 1024)
 914         start = time.time()
 915         while True:
 916             # Download and write
 917             before = time.time()
 918             data_block = data.read(block_size)
 919             after = time.time()
 920             if len(data_block) == 0:
 921                 break
 922             byte_counter += len(data_block)
 923
 924             # Open file just in time
 925             if stream is None:
 926                 try:
 927                     (stream, tmpfilename) = sanitize_open(tmpfilename, open_mode)
 928                     assert stream is not None
 929                     filename = self.undo_temp_name(tmpfilename)
 930                     self.report_destination(filename)
 931                 except (OSError, IOError) as err:
 932                     self.report_error(u'unable to open for writing: %s' % str(err))
 933                     return False
 934             try:
 935                 stream.write(data_block)
 936             except (IOError, OSError) as err:
 937                 self.to_stderr(u"\n")
 938                 self.report_error(u'unable to write data: %s' % str(err))
 939                 return False
 940             if not self.params.get('noresizebuffer', False):
 941                 block_size = self.best_block_size(after - before, len(data_block))
 942
 943             # Progress message
 944             speed_str = self.calc_speed(start, time.time(), byte_counter - resume_len)
 945             if data_len is None:
 946                 self.report_progress('Unknown %', data_len_str, speed_str, 'Unknown ETA')
 947             else:
 948                 percent_str = self.calc_percent(byte_counter, data_len)
 949                 eta_str = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
 950                 self.report_progress(percent_str, data_len_str, speed_str, eta_str)
 951
 952             self._hook_progress({
 953                 'downloaded_bytes': byte_counter,
 954                 'total_bytes': data_len,
 955                 'tmpfilename': tmpfilename,
 956                 'filename': filename,
 957                 'status': 'downloading',
 958             })
 959
 960             # Apply rate limit
 961             self.slow_down(start, byte_counter - resume_len)
 962
 963         if stream is None:
 964             self.to_stderr(u"\n")
 965             self.report_error(u'Did not get any data blocks')
 966             return False
 967         stream.close()
 968         self.report_finish()
 969         if data_len is not None and byte_counter != data_len:
 970             raise ContentTooShortError(byte_counter, int(data_len))
 971         self.try_rename(tmpfilename, filename)
 972
 973         # Update file modification time
 974         if self.params.get('updatetime', True):
 975             info_dict['filetime'] = self.try_utime(filename, data.info().get('last-modified', None))
 976
 977         self._hook_progress({
 978             'downloaded_bytes': byte_counter,
 979             'total_bytes': byte_counter,
 980             'filename': filename,
 981             'status': 'finished',
 982         })
 983
 984         return True
 985
 986     def _hook_progress(self, status):
 987         for ph in self._progress_hooks:
 988             ph(status)
 989
 990     def add_progress_hook(self, ph):
 991         """ ph gets called on download progress, with a dictionary with the entries
 992         * filename: The final filename
 993         * status: One of "downloading" and "finished"
 994
 995         It can also have some of the following entries:
 996
 997         * downloaded_bytes: Bytes on disks
 998         * total_bytes: Total bytes, None if unknown
 999         * tmpfilename: The filename we're currently writing to
1000
1001         Hooks are guaranteed to be called at least once (with status "finished")
1002         if the download is successful.
1003         """
1004         self._progress_hooks.append(ph)