_ Git - youtube-dl/blob - youtube_dl/FileDownloader.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 from __future__ import absolute_import
   5
   6 import math
   7 import io
   8 import os
   9 import re
  10 import socket
  11 import subprocess
  12 import sys
  13 import time
  14 import traceback
  15
  16 if os.name == 'nt':
  17     import ctypes
  18
  19 from .utils import *
  20 from .InfoExtractors import get_info_extractor
  21
  22
  23 class FileDownloader(object):
  24     """File Downloader class.
  25
  26     File downloader objects are the ones responsible of downloading the
  27     actual video file and writing it to disk if the user has requested
  28     it, among some other tasks. In most cases there should be one per
  29     program. As, given a video URL, the downloader doesn't know how to
  30     extract all the needed information, task that InfoExtractors do, it
  31     has to pass the URL to one of them.
  32
  33     For this, file downloader objects have a method that allows
  34     InfoExtractors to be registered in a given order. When it is passed
  35     a URL, the file downloader handles it to the first InfoExtractor it
  36     finds that reports being able to handle it. The InfoExtractor extracts
  37     all the information about the video or videos the URL refers to, and
  38     asks the FileDownloader to process the video information, possibly
  39     downloading the video.
  40
  41     File downloaders accept a lot of parameters. In order not to saturate
  42     the object constructor with arguments, it receives a dictionary of
  43     options instead. These options are available through the params
  44     attribute for the InfoExtractors to use. The FileDownloader also
  45     registers itself as the downloader in charge for the InfoExtractors
  46     that are added to it, so this is a "mutual registration".
  47
  48     Available options:
  49
  50     username:          Username for authentication purposes.
  51     password:          Password for authentication purposes.
  52     usenetrc:          Use netrc for authentication instead.
  53     quiet:             Do not print messages to stdout.
  54     forceurl:          Force printing final URL.
  55     forcetitle:        Force printing title.
  56     forcethumbnail:    Force printing thumbnail URL.
  57     forcedescription:  Force printing description.
  58     forcefilename:     Force printing final filename.
  59     simulate:          Do not download the video files.
  60     format:            Video format code.
  61     format_limit:      Highest quality format to try.
  62     outtmpl:           Template for output names.
  63     restrictfilenames: Do not allow "&" and spaces in file names
  64     ignoreerrors:      Do not stop on download errors.
  65     ratelimit:         Download speed limit, in bytes/sec.
  66     nooverwrites:      Prevent overwriting files.
  67     retries:           Number of times to retry for HTTP error 5xx
  68     buffersize:        Size of download buffer in bytes.
  69     noresizebuffer:    Do not automatically resize the download buffer.
  70     continuedl:        Try to continue downloads if possible.
  71     noprogress:        Do not print the progress bar.
  72     playliststart:     Playlist item to start at.
  73     playlistend:       Playlist item to end at.
  74     matchtitle:        Download only matching titles.
  75     rejecttitle:       Reject downloads for matching titles.
  76     logtostderr:       Log messages to stderr instead of stdout.
  77     consoletitle:      Display progress in console window's titlebar.
  78     nopart:            Do not use temporary .part files.
  79     updatetime:        Use the Last-modified header to set output file timestamps.
  80     writedescription:  Write the video description to a .description file
  81     writeinfojson:     Write the video description to a .info.json file
  82     writesubtitles:    Write the video subtitles to a file
  83     onlysubtitles:     Downloads only the subtitles of the video
  84     allsubtitles:      Downloads all the subtitles of the video
  85     listsubtitles:     Lists all available subtitles for the video
  86     subtitlesformat:   Subtitle format [sbv/srt] (default=srt)
  87     subtitleslang:     Language of the subtitles to download
  88     test:              Download only first bytes to test the downloader.
  89     keepvideo:         Keep the video file after post-processing
  90     min_filesize:      Skip files smaller than this size
  91     max_filesize:      Skip files larger than this size
  92     """
  93
  94     params = None
  95     _ies = []
  96     _pps = []
  97     _download_retcode = None
  98     _num_downloads = None
  99     _screen_file = None
 100
 101     def __init__(self, params):
 102         """Create a FileDownloader object with the given options."""
 103         self._ies = []
 104         self._pps = []
 105         self._progress_hooks = []
 106         self._download_retcode = 0
 107         self._num_downloads = 0
 108         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 109         self.params = params
 110
 111         if '%(stitle)s' in self.params['outtmpl']:
 112             self.report_warning(u'%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
 113
 114     @staticmethod
 115     def format_bytes(bytes):
 116         if bytes is None:
 117             return 'N/A'
 118         if type(bytes) is str:
 119             bytes = float(bytes)
 120         if bytes == 0.0:
 121             exponent = 0
 122         else:
 123             exponent = int(math.log(bytes, 1024.0))
 124         suffix = 'bkMGTPEZY'[exponent]
 125         converted = float(bytes) / float(1024 ** exponent)
 126         return '%.2f%s' % (converted, suffix)
 127
 128     @staticmethod
 129     def calc_percent(byte_counter, data_len):
 130         if data_len is None:
 131             return '---.-%'
 132         return '%6s' % ('%3.1f%%' % (float(byte_counter) / float(data_len) * 100.0))
 133
 134     @staticmethod
 135     def calc_eta(start, now, total, current):
 136         if total is None:
 137             return '--:--'
 138         dif = now - start
 139         if current == 0 or dif < 0.001: # One millisecond
 140             return '--:--'
 141         rate = float(current) / dif
 142         eta = int((float(total) - float(current)) / rate)
 143         (eta_mins, eta_secs) = divmod(eta, 60)
 144         if eta_mins > 99:
 145             return '--:--'
 146         return '%02d:%02d' % (eta_mins, eta_secs)
 147
 148     @staticmethod
 149     def calc_speed(start, now, bytes):
 150         dif = now - start
 151         if bytes == 0 or dif < 0.001: # One millisecond
 152             return '%10s' % '---b/s'
 153         return '%10s' % ('%s/s' % FileDownloader.format_bytes(float(bytes) / dif))
 154
 155     @staticmethod
 156     def best_block_size(elapsed_time, bytes):
 157         new_min = max(bytes / 2.0, 1.0)
 158         new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
 159         if elapsed_time < 0.001:
 160             return int(new_max)
 161         rate = bytes / elapsed_time
 162         if rate > new_max:
 163             return int(new_max)
 164         if rate < new_min:
 165             return int(new_min)
 166         return int(rate)
 167
 168     @staticmethod
 169     def parse_bytes(bytestr):
 170         """Parse a string indicating a byte quantity into an integer."""
 171         matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
 172         if matchobj is None:
 173             return None
 174         number = float(matchobj.group(1))
 175         multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
 176         return int(round(number * multiplier))
 177
 178     def add_info_extractor(self, ie):
 179         """Add an InfoExtractor object to the end of the list."""
 180         self._ies.append(ie)
 181         ie.set_downloader(self)
 182
 183     def add_post_processor(self, pp):
 184         """Add a PostProcessor object to the end of the chain."""
 185         self._pps.append(pp)
 186         pp.set_downloader(self)
 187
 188     def to_screen(self, message, skip_eol=False):
 189         """Print message to stdout if not in quiet mode."""
 190         assert type(message) == type(u'')
 191         if not self.params.get('quiet', False):
 192             terminator = [u'\n', u''][skip_eol]
 193             output = message + terminator
 194             if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
 195                 output = output.encode(preferredencoding(), 'ignore')
 196             self._screen_file.write(output)
 197             self._screen_file.flush()
 198
 199     def to_stderr(self, message):
 200         """Print message to stderr."""
 201         assert type(message) == type(u'')
 202         output = message + u'\n'
 203         if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
 204             output = output.encode(preferredencoding())
 205         sys.stderr.write(output)
 206
 207     def to_cons_title(self, message):
 208         """Set console/terminal window title to message."""
 209         if not self.params.get('consoletitle', False):
 210             return
 211         if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
 212             # c_wchar_p() might not be necessary if `message` is
 213             # already of type unicode()
 214             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 215         elif 'TERM' in os.environ:
 216             self.to_screen('\033]0;%s\007' % message, skip_eol=True)
 217
 218     def fixed_template(self):
 219         """Checks if the output template is fixed."""
 220         return (re.search(u'(?u)%\\(.+?\\)s', self.params['outtmpl']) is None)
 221
 222     def trouble(self, message=None, tb=None):
 223         """Determine action to take when a download problem appears.
 224
 225         Depending on if the downloader has been configured to ignore
 226         download errors or not, this method may throw an exception or
 227         not when errors are found, after printing the message.
 228
 229         tb, if given, is additional traceback information.
 230         """
 231         if message is not None:
 232             self.to_stderr(message)
 233         if self.params.get('verbose'):
 234             if tb is None:
 235                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 236                     tb = u''
 237                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 238                         tb += u''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 239                     tb += compat_str(traceback.format_exc())
 240                 else:
 241                     tb_data = traceback.format_list(traceback.extract_stack())
 242                     tb = u''.join(tb_data)
 243             self.to_stderr(tb)
 244         if not self.params.get('ignoreerrors', False):
 245             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 246                 exc_info = sys.exc_info()[1].exc_info
 247             else:
 248                 exc_info = sys.exc_info()
 249             raise DownloadError(message, exc_info)
 250         self._download_retcode = 1
 251
 252     def report_warning(self, message):
 253         '''
 254         Print the message to stderr, it will be prefixed with 'WARNING:'
 255         If stderr is a tty file the 'WARNING:' will be colored
 256         '''
 257         if sys.stderr.isatty():
 258             _msg_header=u'\033[0;33mWARNING:\033[0m'
 259         else:
 260             _msg_header=u'WARNING:'
 261         warning_message=u'%s %s' % (_msg_header,message)
 262         self.to_stderr(warning_message)
 263
 264     def report_error(self, message, tb=None):
 265         '''
 266         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 267         in red if stderr is a tty file.
 268         '''
 269         if sys.stderr.isatty():
 270             _msg_header = u'\033[0;31mERROR:\033[0m'
 271         else:
 272             _msg_header = u'ERROR:'
 273         error_message = u'%s %s' % (_msg_header, message)
 274         self.trouble(error_message, tb)
 275
 276     def slow_down(self, start_time, byte_counter):
 277         """Sleep if the download speed is over the rate limit."""
 278         rate_limit = self.params.get('ratelimit', None)
 279         if rate_limit is None or byte_counter == 0:
 280             return
 281         now = time.time()
 282         elapsed = now - start_time
 283         if elapsed <= 0.0:
 284             return
 285         speed = float(byte_counter) / elapsed
 286         if speed > rate_limit:
 287             time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
 288
 289     def temp_name(self, filename):
 290         """Returns a temporary filename for the given filename."""
 291         if self.params.get('nopart', False) or filename == u'-' or \
 292                 (os.path.exists(encodeFilename(filename)) and not os.path.isfile(encodeFilename(filename))):
 293             return filename
 294         return filename + u'.part'
 295
 296     def undo_temp_name(self, filename):
 297         if filename.endswith(u'.part'):
 298             return filename[:-len(u'.part')]
 299         return filename
 300
 301     def try_rename(self, old_filename, new_filename):
 302         try:
 303             if old_filename == new_filename:
 304                 return
 305             os.rename(encodeFilename(old_filename), encodeFilename(new_filename))
 306         except (IOError, OSError) as err:
 307             self.report_error(u'unable to rename file')
 308
 309     def try_utime(self, filename, last_modified_hdr):
 310         """Try to set the last-modified time of the given file."""
 311         if last_modified_hdr is None:
 312             return
 313         if not os.path.isfile(encodeFilename(filename)):
 314             return
 315         timestr = last_modified_hdr
 316         if timestr is None:
 317             return
 318         filetime = timeconvert(timestr)
 319         if filetime is None:
 320             return filetime
 321         try:
 322             os.utime(filename, (time.time(), filetime))
 323         except:
 324             pass
 325         return filetime
 326
 327     def report_writedescription(self, descfn):
 328         """ Report that the description file is being written """
 329         self.to_screen(u'[info] Writing video description to: ' + descfn)
 330
 331     def report_writesubtitles(self, sub_filename):
 332         """ Report that the subtitles file is being written """
 333         self.to_screen(u'[info] Writing video subtitles to: ' + sub_filename)
 334
 335     def report_writeinfojson(self, infofn):
 336         """ Report that the metadata file has been written """
 337         self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn)
 338
 339     def report_destination(self, filename):
 340         """Report destination filename."""
 341         self.to_screen(u'[download] Destination: ' + filename)
 342
 343     def report_progress(self, percent_str, data_len_str, speed_str, eta_str):
 344         """Report download progress."""
 345         if self.params.get('noprogress', False):
 346             return
 347         if self.params.get('progress_with_newline', False):
 348             self.to_screen(u'[download] %s of %s at %s ETA %s' %
 349                 (percent_str, data_len_str, speed_str, eta_str))
 350         else:
 351             self.to_screen(u'\r[download] %s of %s at %s ETA %s' %
 352                 (percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
 353         self.to_cons_title(u'youtube-dl - %s of %s at %s ETA %s' %
 354                 (percent_str.strip(), data_len_str.strip(), speed_str.strip(), eta_str.strip()))
 355
 356     def report_resuming_byte(self, resume_len):
 357         """Report attempt to resume at given byte."""
 358         self.to_screen(u'[download] Resuming download at byte %s' % resume_len)
 359
 360     def report_retry(self, count, retries):
 361         """Report retry in case of HTTP error 5xx"""
 362         self.to_screen(u'[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count, retries))
 363
 364     def report_file_already_downloaded(self, file_name):
 365         """Report file has already been fully downloaded."""
 366         try:
 367             self.to_screen(u'[download] %s has already been downloaded' % file_name)
 368         except (UnicodeEncodeError) as err:
 369             self.to_screen(u'[download] The file has already been downloaded')
 370
 371     def report_unable_to_resume(self):
 372         """Report it was impossible to resume download."""
 373         self.to_screen(u'[download] Unable to resume')
 374
 375     def report_finish(self):
 376         """Report download finished."""
 377         if self.params.get('noprogress', False):
 378             self.to_screen(u'[download] Download completed')
 379         else:
 380             self.to_screen(u'')
 381
 382     def increment_downloads(self):
 383         """Increment the ordinal that assigns a number to each file."""
 384         self._num_downloads += 1
 385
 386     def prepare_filename(self, info_dict):
 387         """Generate the output filename."""
 388         try:
 389             template_dict = dict(info_dict)
 390
 391             template_dict['epoch'] = int(time.time())
 392             autonumber_size = self.params.get('autonumber_size')
 393             if autonumber_size is None:
 394                 autonumber_size = 5
 395             autonumber_templ = u'%0' + str(autonumber_size) + u'd'
 396             template_dict['autonumber'] = autonumber_templ % self._num_downloads
 397             if template_dict['playlist_index'] is not None:
 398                 template_dict['playlist_index'] = u'%05d' % template_dict['playlist_index']
 399
 400             sanitize = lambda k,v: sanitize_filename(
 401                 u'NA' if v is None else compat_str(v),
 402                 restricted=self.params.get('restrictfilenames'),
 403                 is_id=(k==u'id'))
 404             template_dict = dict((k, sanitize(k, v)) for k,v in template_dict.items())
 405
 406             filename = self.params['outtmpl'] % template_dict
 407             return filename
 408         except KeyError as err:
 409             self.trouble(u'ERROR: Erroneous output template')
 410             return None
 411         except ValueError as err:
 412             self.trouble(u'ERROR: Insufficient system charset ' + repr(preferredencoding()))
 413             return None
 414
 415     def _match_entry(self, info_dict):
 416         """ Returns None iff the file should be downloaded """
 417
 418         title = info_dict['title']
 419         matchtitle = self.params.get('matchtitle', False)
 420         if matchtitle:
 421             if not re.search(matchtitle, title, re.IGNORECASE):
 422                 return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
 423         rejecttitle = self.params.get('rejecttitle', False)
 424         if rejecttitle:
 425             if re.search(rejecttitle, title, re.IGNORECASE):
 426                 return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
 427         return None
 428
 429     def extract_info(self, url, download = True, ie_name = None):
 430         '''
 431         Returns a list with a dictionary for each video we find.
 432         If 'download', also downloads the videos.
 433          '''
 434         suitable_found = False
 435
 436         #We copy the original list
 437         ies = list(self._ies)
 438
 439         if ie_name is not None:
 440             #We put in the first place the given info extractor
 441             first_ie = get_info_extractor(ie_name)()
 442             first_ie.set_downloader(self)
 443             ies.insert(0, first_ie)
 444
 445         for ie in ies:
 446             # Go to next InfoExtractor if not suitable
 447             if not ie.suitable(url):
 448                 continue
 449
 450             # Warn if the _WORKING attribute is False
 451             if not ie.working():
 452                 self.to_stderr(u'WARNING: the program functionality for this site has been marked as broken, '
 453                                u'and will probably not work. If you want to go on, use the -i option.')
 454
 455             # Suitable InfoExtractor found
 456             suitable_found = True
 457
 458             # Extract information from URL and process it
 459             try:
 460                 ie_results = ie.extract(url)
 461                 if ie_results is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
 462                     break
 463                 results = []
 464                 for ie_result in ie_results:
 465                     if not 'extractor' in ie_result:
 466                         #The extractor has already been set somewhere else
 467                         ie_result['extractor'] = ie.IE_NAME
 468                     results.append(self.process_ie_result(ie_result, download))
 469                 return results
 470             except ExtractorError as de: # An error we somewhat expected
 471                 self.trouble(u'ERROR: ' + compat_str(de), de.format_traceback())
 472                 break
 473             except Exception as e:
 474                 if self.params.get('ignoreerrors', False):
 475                     self.trouble(u'ERROR: ' + compat_str(e), tb=compat_str(traceback.format_exc()))
 476                     break
 477                 else:
 478                     raise
 479         if not suitable_found:
 480                 self.trouble(u'ERROR: no suitable InfoExtractor: %s' % url)
 481
 482     def process_ie_result(self, ie_result, download = True):
 483         """
 484         Take the result of the ie and return a list of videos.
 485         For url elements it will search the suitable ie and get the videos
 486         For playlist elements it will process each of the elements of the 'entries' key
 487
 488         It will also download the videos if 'download'.
 489         """
 490         result_type = ie_result.get('_type', 'video') #If not given we suppose it's a video, support the dafault old system
 491         if result_type == 'video':
 492             if 'playlist' not in ie_result:
 493                 #It isn't part of a playlist
 494                 ie_result['playlist'] = None
 495                 ie_result['playlist_index'] = None
 496             if download:
 497                 #Do the download:
 498                 self.process_info(ie_result)
 499             return ie_result
 500         elif result_type == 'url':
 501             #We get the video pointed by the url
 502             result = self.extract_info(ie_result['url'], download, ie_name = ie_result['ie_key'])[0]
 503             return result
 504         elif result_type == 'playlist':
 505             #We process each entry in the playlist
 506             playlist = ie_result.get('title', None) or ie_result.get('id', None)
 507             self.to_screen(u'[download] Downloading playlist: %s'  % playlist)
 508
 509             playlist_results = []
 510
 511             n_all_entries = len(ie_result['entries'])
 512             playliststart = self.params.get('playliststart', 1) - 1
 513             playlistend = self.params.get('playlistend', -1)
 514
 515             if playlistend == -1:
 516                 entries = ie_result['entries'][playliststart:]
 517             else:
 518                 entries = ie_result['entries'][playliststart:playlistend]
 519
 520             n_entries = len(entries)
 521
 522             self.to_screen(u"[%s] playlist '%s': Collected %d video ids (downloading %d of them)" %
 523                 (ie_result['extractor'], playlist, n_all_entries, n_entries))
 524
 525             for i,entry in enumerate(entries,1):
 526                 self.to_screen(u'[download] Downloading video #%s of %s' %(i, n_entries))
 527                 entry_result = self.process_ie_result(entry, False)
 528                 entry_result['playlist'] = playlist
 529                 entry_result['playlist_index'] = i + playliststart
 530                 #We must do the download here to correctly set the 'playlist' key
 531                 if download:
 532                     self.process_info(entry_result)
 533                 playlist_results.append(entry_result)
 534             result = ie_result.copy()
 535             result['entries'] = playlist_results
 536             return result
 537
 538     def process_info(self, info_dict):
 539         """Process a single dictionary returned by an InfoExtractor."""
 540
 541         #We increment the download the download count here to match the previous behaviour.
 542         self.increment_downloads()
 543
 544         info_dict['fulltitle'] = info_dict['title']
 545         if len(info_dict['title']) > 200:
 546             info_dict['title'] = info_dict['title'][:197] + u'...'
 547
 548         # Keep for backwards compatibility
 549         info_dict['stitle'] = info_dict['title']
 550
 551         if not 'format' in info_dict:
 552             info_dict['format'] = info_dict['ext']
 553
 554         reason = self._match_entry(info_dict)
 555         if reason is not None:
 556             self.to_screen(u'[download] ' + reason)
 557             return
 558
 559         max_downloads = self.params.get('max_downloads')
 560         if max_downloads is not None:
 561             if self._num_downloads > int(max_downloads):
 562                 raise MaxDownloadsReached()
 563
 564         filename = self.prepare_filename(info_dict)
 565
 566         # Forced printings
 567         if self.params.get('forcetitle', False):
 568             compat_print(info_dict['title'])
 569         if self.params.get('forceurl', False):
 570             compat_print(info_dict['url'])
 571         if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
 572             compat_print(info_dict['thumbnail'])
 573         if self.params.get('forcedescription', False) and 'description' in info_dict:
 574             compat_print(info_dict['description'])
 575         if self.params.get('forcefilename', False) and filename is not None:
 576             compat_print(filename)
 577         if self.params.get('forceformat', False):
 578             compat_print(info_dict['format'])
 579
 580         # Do nothing else if in simulate mode
 581         if self.params.get('simulate', False):
 582             return
 583
 584         if filename is None:
 585             return
 586
 587         try:
 588             dn = os.path.dirname(encodeFilename(filename))
 589             if dn != '' and not os.path.exists(dn): # dn is already encoded
 590                 os.makedirs(dn)
 591         except (OSError, IOError) as err:
 592             self.report_error(u'unable to create directory ' + compat_str(err))
 593             return
 594
 595         if self.params.get('writedescription', False):
 596             try:
 597                 descfn = filename + u'.description'
 598                 self.report_writedescription(descfn)
 599                 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
 600                     descfile.write(info_dict['description'])
 601             except (OSError, IOError):
 602                 self.report_error(u'Cannot write description file ' + descfn)
 603                 return
 604
 605         if self.params.get('writesubtitles', False) and 'subtitles' in info_dict and info_dict['subtitles']:
 606             # subtitles download errors are already managed as troubles in relevant IE
 607             # that way it will silently go on when used with unsupporting IE
 608             subtitle = info_dict['subtitles'][0]
 609             (sub_error, sub_lang, sub) = subtitle
 610             sub_format = self.params.get('subtitlesformat')
 611             if sub_error:
 612                 self.report_warning("Some error while getting the subtitles")
 613             else:
 614                 try:
 615                     sub_filename = filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format
 616                     self.report_writesubtitles(sub_filename)
 617                     with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
 618                         subfile.write(sub)
 619                 except (OSError, IOError):
 620                     self.report_error(u'Cannot write subtitles file ' + descfn)
 621                     return
 622             if self.params.get('onlysubtitles', False):
 623                 return
 624
 625         if self.params.get('allsubtitles', False) and 'subtitles' in info_dict and info_dict['subtitles']:
 626             subtitles = info_dict['subtitles']
 627             sub_format = self.params.get('subtitlesformat')
 628             for subtitle in subtitles:
 629                 (sub_error, sub_lang, sub) = subtitle
 630                 if sub_error:
 631                     self.report_warning("Some error while getting the subtitles")
 632                 else:
 633                     try:
 634                         sub_filename = filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format
 635                         self.report_writesubtitles(sub_filename)
 636                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
 637                                 subfile.write(sub)
 638                     except (OSError, IOError):
 639                         self.trouble(u'ERROR: Cannot write subtitles file ' + descfn)
 640                         return
 641             if self.params.get('onlysubtitles', False):
 642                 return
 643
 644         if self.params.get('writeinfojson', False):
 645             infofn = filename + u'.info.json'
 646             self.report_writeinfojson(infofn)
 647             try:
 648                 json_info_dict = dict((k, v) for k,v in info_dict.items() if not k in ['urlhandle'])
 649                 write_json_file(json_info_dict, encodeFilename(infofn))
 650             except (OSError, IOError):
 651                 self.report_error(u'Cannot write metadata to JSON file ' + infofn)
 652                 return
 653
 654         if not self.params.get('skip_download', False):
 655             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
 656                 success = True
 657             else:
 658                 try:
 659                     success = self._do_download(filename, info_dict)
 660                 except (OSError, IOError) as err:
 661                     raise UnavailableVideoError()
 662                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
 663                     self.report_error(u'unable to download video data: %s' % str(err))
 664                     return
 665                 except (ContentTooShortError, ) as err:
 666                     self.report_error(u'content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
 667                     return
 668
 669             if success:
 670                 try:
 671                     self.post_process(filename, info_dict)
 672                 except (PostProcessingError) as err:
 673                     self.report_error(u'postprocessing: %s' % str(err))
 674                     return
 675
 676     def download(self, url_list):
 677         """Download a given list of URLs."""
 678         if len(url_list) > 1 and self.fixed_template():
 679             raise SameFileError(self.params['outtmpl'])
 680
 681         for url in url_list:
 682             try:
 683                 #It also downloads the videos
 684                 videos = self.extract_info(url)
 685             except UnavailableVideoError:
 686                 self.trouble(u'\nERROR: unable to download video')
 687             except MaxDownloadsReached:
 688                 self.to_screen(u'[info] Maximum number of downloaded files reached.')
 689                 raise
 690
 691         return self._download_retcode
 692
 693     def post_process(self, filename, ie_info):
 694         """Run all the postprocessors on the given file."""
 695         info = dict(ie_info)
 696         info['filepath'] = filename
 697         keep_video = None
 698         for pp in self._pps:
 699             try:
 700                 keep_video_wish,new_info = pp.run(info)
 701                 if keep_video_wish is not None:
 702                     if keep_video_wish:
 703                         keep_video = keep_video_wish
 704                     elif keep_video is None:
 705                         # No clear decision yet, let IE decide
 706                         keep_video = keep_video_wish
 707             except PostProcessingError as e:
 708                 self.to_stderr(u'ERROR: ' + e.msg)
 709         if keep_video is False and not self.params.get('keepvideo', False):
 710             try:
 711                 self.to_screen(u'Deleting original file %s (pass -k to keep)' % filename)
 712                 os.remove(encodeFilename(filename))
 713             except (IOError, OSError):
 714                 self.report_warning(u'Unable to remove downloaded video file')
 715
 716     def _download_with_rtmpdump(self, filename, url, player_url, page_url, play_path):
 717         self.report_destination(filename)
 718         tmpfilename = self.temp_name(filename)
 719
 720         # Check for rtmpdump first
 721         try:
 722             subprocess.call(['rtmpdump', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
 723         except (OSError, IOError):
 724             self.report_error(u'RTMP download detected but "rtmpdump" could not be run')
 725             return False
 726
 727         # Download using rtmpdump. rtmpdump returns exit code 2 when
 728         # the connection was interrumpted and resuming appears to be
 729         # possible. This is part of rtmpdump's normal usage, AFAIK.
 730         basic_args = ['rtmpdump', '-q', '-r', url, '-o', tmpfilename]
 731         if player_url is not None:
 732             basic_args += ['-W', player_url]
 733         if page_url is not None:
 734             basic_args += ['--pageUrl', page_url]
 735         if play_path is not None:
 736             basic_args += ['-y', play_path]
 737         args = basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)]
 738         if self.params.get('verbose', False):
 739             try:
 740                 import pipes
 741                 shell_quote = lambda args: ' '.join(map(pipes.quote, args))
 742             except ImportError:
 743                 shell_quote = repr
 744             self.to_screen(u'[debug] rtmpdump command line: ' + shell_quote(args))
 745         retval = subprocess.call(args)
 746         while retval == 2 or retval == 1:
 747             prevsize = os.path.getsize(encodeFilename(tmpfilename))
 748             self.to_screen(u'\r[rtmpdump] %s bytes' % prevsize, skip_eol=True)
 749             time.sleep(5.0) # This seems to be needed
 750             retval = subprocess.call(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
 751             cursize = os.path.getsize(encodeFilename(tmpfilename))
 752             if prevsize == cursize and retval == 1:
 753                 break
 754              # Some rtmp streams seem abort after ~ 99.8%. Don't complain for those
 755             if prevsize == cursize and retval == 2 and cursize > 1024:
 756                 self.to_screen(u'\r[rtmpdump] Could not download the whole video. This can happen for some advertisements.')
 757                 retval = 0
 758                 break
 759         if retval == 0:
 760             fsize = os.path.getsize(encodeFilename(tmpfilename))
 761             self.to_screen(u'\r[rtmpdump] %s bytes' % fsize)
 762             self.try_rename(tmpfilename, filename)
 763             self._hook_progress({
 764                 'downloaded_bytes': fsize,
 765                 'total_bytes': fsize,
 766                 'filename': filename,
 767                 'status': 'finished',
 768             })
 769             return True
 770         else:
 771             self.to_stderr(u"\n")
 772             self.report_error(u'rtmpdump exited with code %d' % retval)
 773             return False
 774
 775     def _do_download(self, filename, info_dict):
 776         url = info_dict['url']
 777
 778         # Check file already present
 779         if self.params.get('continuedl', False) and os.path.isfile(encodeFilename(filename)) and not self.params.get('nopart', False):
 780             self.report_file_already_downloaded(filename)
 781             self._hook_progress({
 782                 'filename': filename,
 783                 'status': 'finished',
 784             })
 785             return True
 786
 787         # Attempt to download using rtmpdump
 788         if url.startswith('rtmp'):
 789             return self._download_with_rtmpdump(filename, url,
 790                                                 info_dict.get('player_url', None),
 791                                                 info_dict.get('page_url', None),
 792                                                 info_dict.get('play_path', None))
 793
 794         tmpfilename = self.temp_name(filename)
 795         stream = None
 796
 797         # Do not include the Accept-Encoding header
 798         headers = {'Youtubedl-no-compression': 'True'}
 799         if 'user_agent' in info_dict:
 800             headers['Youtubedl-user-agent'] = info_dict['user_agent']
 801         basic_request = compat_urllib_request.Request(url, None, headers)
 802         request = compat_urllib_request.Request(url, None, headers)
 803
 804         if self.params.get('test', False):
 805             request.add_header('Range','bytes=0-10240')
 806
 807         # Establish possible resume length
 808         if os.path.isfile(encodeFilename(tmpfilename)):
 809             resume_len = os.path.getsize(encodeFilename(tmpfilename))
 810         else:
 811             resume_len = 0
 812
 813         open_mode = 'wb'
 814         if resume_len != 0:
 815             if self.params.get('continuedl', False):
 816                 self.report_resuming_byte(resume_len)
 817                 request.add_header('Range','bytes=%d-' % resume_len)
 818                 open_mode = 'ab'
 819             else:
 820                 resume_len = 0
 821
 822         count = 0
 823         retries = self.params.get('retries', 0)
 824         while count <= retries:
 825             # Establish connection
 826             try:
 827                 if count == 0 and 'urlhandle' in info_dict:
 828                     data = info_dict['urlhandle']
 829                 data = compat_urllib_request.urlopen(request)
 830                 break
 831             except (compat_urllib_error.HTTPError, ) as err:
 832                 if (err.code < 500 or err.code >= 600) and err.code != 416:
 833                     # Unexpected HTTP error
 834                     raise
 835                 elif err.code == 416:
 836                     # Unable to resume (requested range not satisfiable)
 837                     try:
 838                         # Open the connection again without the range header
 839                         data = compat_urllib_request.urlopen(basic_request)
 840                         content_length = data.info()['Content-Length']
 841                     except (compat_urllib_error.HTTPError, ) as err:
 842                         if err.code < 500 or err.code >= 600:
 843                             raise
 844                     else:
 845                         # Examine the reported length
 846                         if (content_length is not None and
 847                                 (resume_len - 100 < int(content_length) < resume_len + 100)):
 848                             # The file had already been fully downloaded.
 849                             # Explanation to the above condition: in issue #175 it was revealed that
 850                             # YouTube sometimes adds or removes a few bytes from the end of the file,
 851                             # changing the file size slightly and causing problems for some users. So
 852                             # I decided to implement a suggested change and consider the file
 853                             # completely downloaded if the file size differs less than 100 bytes from
 854                             # the one in the hard drive.
 855                             self.report_file_already_downloaded(filename)
 856                             self.try_rename(tmpfilename, filename)
 857                             self._hook_progress({
 858                                 'filename': filename,
 859                                 'status': 'finished',
 860                             })
 861                             return True
 862                         else:
 863                             # The length does not match, we start the download over
 864                             self.report_unable_to_resume()
 865                             open_mode = 'wb'
 866                             break
 867             # Retry
 868             count += 1
 869             if count <= retries:
 870                 self.report_retry(count, retries)
 871
 872         if count > retries:
 873             self.report_error(u'giving up after %s retries' % retries)
 874             return False
 875
 876         data_len = data.info().get('Content-length', None)
 877         if data_len is not None:
 878             data_len = int(data_len) + resume_len
 879             min_data_len = self.params.get("min_filesize", None)
 880             max_data_len =  self.params.get("max_filesize", None)
 881             if min_data_len is not None and data_len < min_data_len:
 882                 self.to_screen(u'\r[download] File is smaller than min-filesize (%s bytes < %s bytes). Aborting.' % (data_len, min_data_len))
 883                 return False
 884             if max_data_len is not None and data_len > max_data_len:
 885                 self.to_screen(u'\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len))
 886                 return False
 887
 888         data_len_str = self.format_bytes(data_len)
 889         byte_counter = 0 + resume_len
 890         block_size = self.params.get('buffersize', 1024)
 891         start = time.time()
 892         while True:
 893             # Download and write
 894             before = time.time()
 895             data_block = data.read(block_size)
 896             after = time.time()
 897             if len(data_block) == 0:
 898                 break
 899             byte_counter += len(data_block)
 900
 901             # Open file just in time
 902             if stream is None:
 903                 try:
 904                     (stream, tmpfilename) = sanitize_open(tmpfilename, open_mode)
 905                     assert stream is not None
 906                     filename = self.undo_temp_name(tmpfilename)
 907                     self.report_destination(filename)
 908                 except (OSError, IOError) as err:
 909                     self.report_error(u'unable to open for writing: %s' % str(err))
 910                     return False
 911             try:
 912                 stream.write(data_block)
 913             except (IOError, OSError) as err:
 914                 self.to_stderr(u"\n")
 915                 self.report_error(u'unable to write data: %s' % str(err))
 916                 return False
 917             if not self.params.get('noresizebuffer', False):
 918                 block_size = self.best_block_size(after - before, len(data_block))
 919
 920             # Progress message
 921             speed_str = self.calc_speed(start, time.time(), byte_counter - resume_len)
 922             if data_len is None:
 923                 self.report_progress('Unknown %', data_len_str, speed_str, 'Unknown ETA')
 924             else:
 925                 percent_str = self.calc_percent(byte_counter, data_len)
 926                 eta_str = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
 927                 self.report_progress(percent_str, data_len_str, speed_str, eta_str)
 928
 929             self._hook_progress({
 930                 'downloaded_bytes': byte_counter,
 931                 'total_bytes': data_len,
 932                 'tmpfilename': tmpfilename,
 933                 'filename': filename,
 934                 'status': 'downloading',
 935             })
 936
 937             # Apply rate limit
 938             self.slow_down(start, byte_counter - resume_len)
 939
 940         if stream is None:
 941             self.to_stderr(u"\n")
 942             self.report_error(u'Did not get any data blocks')
 943             return False
 944         stream.close()
 945         self.report_finish()
 946         if data_len is not None and byte_counter != data_len:
 947             raise ContentTooShortError(byte_counter, int(data_len))
 948         self.try_rename(tmpfilename, filename)
 949
 950         # Update file modification time
 951         if self.params.get('updatetime', True):
 952             info_dict['filetime'] = self.try_utime(filename, data.info().get('last-modified', None))
 953
 954         self._hook_progress({
 955             'downloaded_bytes': byte_counter,
 956             'total_bytes': byte_counter,
 957             'filename': filename,
 958             'status': 'finished',
 959         })
 960
 961         return True
 962
 963     def _hook_progress(self, status):
 964         for ph in self._progress_hooks:
 965             ph(status)
 966
 967     def add_progress_hook(self, ph):
 968         """ ph gets called on download progress, with a dictionary with the entries
 969         * filename: The final filename
 970         * status: One of "downloading" and "finished"
 971
 972         It can also have some of the following entries:
 973
 974         * downloaded_bytes: Bytes on disks
 975         * total_bytes: Total bytes, None if unknown
 976         * tmpfilename: The filename we're currently writing to
 977
 978         Hooks are guaranteed to be called at least once (with status "finished")
 979         if the download is successful.
 980         """
 981         self._progress_hooks.append(ph)