_ Git - youtube-dl/blob - youtube_dl/YoutubeDL.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import errno
   8 import io
   9 import json
  10 import os
  11 import platform
  12 import re
  13 import shutil
  14 import subprocess
  15 import socket
  16 import sys
  17 import time
  18 import traceback
  19
  20 if os.name == 'nt':
  21     import ctypes
  22
  23 from .utils import (
  24     compat_cookiejar,
  25     compat_http_client,
  26     compat_str,
  27     compat_urllib_error,
  28     compat_urllib_request,
  29     ContentTooShortError,
  30     date_from_str,
  31     DateRange,
  32     determine_ext,
  33     DownloadError,
  34     encodeFilename,
  35     ExtractorError,
  36     format_bytes,
  37     formatSeconds,
  38     get_term_width,
  39     locked_file,
  40     make_HTTPS_handler,
  41     MaxDownloadsReached,
  42     PagedList,
  43     PostProcessingError,
  44     platform_name,
  45     preferredencoding,
  46     SameFileError,
  47     sanitize_filename,
  48     subtitles_filename,
  49     takewhile_inclusive,
  50     UnavailableVideoError,
  51     url_basename,
  52     write_json_file,
  53     write_string,
  54     YoutubeDLHandler,
  55     prepend_extension,
  56 )
  57 from .extractor import get_info_extractor, gen_extractors
  58 from .downloader import get_suitable_downloader
  59 from .postprocessor import FFmpegMergerPP
  60 from .version import __version__
  61
  62
  63 class YoutubeDL(object):
  64     """YoutubeDL class.
  65
  66     YoutubeDL objects are the ones responsible of downloading the
  67     actual video file and writing it to disk if the user has requested
  68     it, among some other tasks. In most cases there should be one per
  69     program. As, given a video URL, the downloader doesn't know how to
  70     extract all the needed information, task that InfoExtractors do, it
  71     has to pass the URL to one of them.
  72
  73     For this, YoutubeDL objects have a method that allows
  74     InfoExtractors to be registered in a given order. When it is passed
  75     a URL, the YoutubeDL object handles it to the first InfoExtractor it
  76     finds that reports being able to handle it. The InfoExtractor extracts
  77     all the information about the video or videos the URL refers to, and
  78     YoutubeDL process the extracted information, possibly using a File
  79     Downloader to download the video.
  80
  81     YoutubeDL objects accept a lot of parameters. In order not to saturate
  82     the object constructor with arguments, it receives a dictionary of
  83     options instead. These options are available through the params
  84     attribute for the InfoExtractors to use. The YoutubeDL also
  85     registers itself as the downloader in charge for the InfoExtractors
  86     that are added to it, so this is a "mutual registration".
  87
  88     Available options:
  89
  90     username:          Username for authentication purposes.
  91     password:          Password for authentication purposes.
  92     videopassword:     Password for acces a video.
  93     usenetrc:          Use netrc for authentication instead.
  94     verbose:           Print additional info to stdout.
  95     quiet:             Do not print messages to stdout.
  96     forceurl:          Force printing final URL.
  97     forcetitle:        Force printing title.
  98     forceid:           Force printing ID.
  99     forcethumbnail:    Force printing thumbnail URL.
 100     forcedescription:  Force printing description.
 101     forcefilename:     Force printing final filename.
 102     forceduration:     Force printing duration.
 103     forcejson:         Force printing info_dict as JSON.
 104     simulate:          Do not download the video files.
 105     format:            Video format code.
 106     format_limit:      Highest quality format to try.
 107     outtmpl:           Template for output names.
 108     restrictfilenames: Do not allow "&" and spaces in file names
 109     ignoreerrors:      Do not stop on download errors.
 110     nooverwrites:      Prevent overwriting files.
 111     playliststart:     Playlist item to start at.
 112     playlistend:       Playlist item to end at.
 113     matchtitle:        Download only matching titles.
 114     rejecttitle:       Reject downloads for matching titles.
 115     logger:            Log messages to a logging.Logger instance.
 116     logtostderr:       Log messages to stderr instead of stdout.
 117     writedescription:  Write the video description to a .description file
 118     writeinfojson:     Write the video description to a .info.json file
 119     writeannotations:  Write the video annotations to a .annotations.xml file
 120     writethumbnail:    Write the thumbnail image to a file
 121     writesubtitles:    Write the video subtitles to a file
 122     writeautomaticsub: Write the automatic subtitles to a file
 123     allsubtitles:      Downloads all the subtitles of the video
 124                        (requires writesubtitles or writeautomaticsub)
 125     listsubtitles:     Lists all available subtitles for the video
 126     subtitlesformat:   Subtitle format [srt/sbv/vtt] (default=srt)
 127     subtitleslangs:    List of languages of the subtitles to download
 128     keepvideo:         Keep the video file after post-processing
 129     daterange:         A DateRange object, download only if the upload_date is in the range.
 130     skip_download:     Skip the actual download of the video file
 131     cachedir:          Location of the cache files in the filesystem.
 132                        None to disable filesystem cache.
 133     noplaylist:        Download single video instead of a playlist if in doubt.
 134     age_limit:         An integer representing the user's age in years.
 135                        Unsuitable videos for the given age are skipped.
 136     min_views:         An integer representing the minimum view count the video
 137                        must have in order to not be skipped.
 138                        Videos without view count information are always
 139                        downloaded. None for no limit.
 140     max_views:         An integer representing the maximum view count.
 141                        Videos that are more popular than that are not
 142                        downloaded.
 143                        Videos without view count information are always
 144                        downloaded. None for no limit.
 145     download_archive:  File name of a file where all downloads are recorded.
 146                        Videos already present in the file are not downloaded
 147                        again.
 148     cookiefile:        File name where cookies should be read from and dumped to.
 149     nocheckcertificate:Do not verify SSL certificates
 150     proxy:             URL of the proxy server to use
 151     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 152     bidi_workaround:   Work around buggy terminals without bidirectional text
 153                        support, using fridibi
 154     debug_printtraffic:Print out sent and received HTTP traffic
 155     include_ads:       Download ads as well
 156     default_search:    Prepend this string if an input url is not valid.
 157                        'auto' for elaborate guessing
 158
 159     The following parameters are not used by YoutubeDL itself, they are used by
 160     the FileDownloader:
 161     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
 162     noresizebuffer, retries, continuedl, noprogress, consoletitle
 163
 164     The following options are used by the post processors:
 165     prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
 166                        otherwise prefer avconv.
 167     """
 168
 169     params = None
 170     _ies = []
 171     _pps = []
 172     _download_retcode = None
 173     _num_downloads = None
 174     _screen_file = None
 175
 176     def __init__(self, params=None):
 177         """Create a FileDownloader object with the given options."""
 178         if params is None:
 179             params = {}
 180         self._ies = []
 181         self._ies_instances = {}
 182         self._pps = []
 183         self._progress_hooks = []
 184         self._download_retcode = 0
 185         self._num_downloads = 0
 186         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 187         self._err_file = sys.stderr
 188         self.params = params
 189
 190         if params.get('bidi_workaround', False):
 191             try:
 192                 import pty
 193                 master, slave = pty.openpty()
 194                 width = get_term_width()
 195                 if width is None:
 196                     width_args = []
 197                 else:
 198                     width_args = ['-w', str(width)]
 199                 sp_kwargs = dict(
 200                     stdin=subprocess.PIPE,
 201                     stdout=slave,
 202                     stderr=self._err_file)
 203                 try:
 204                     self._output_process = subprocess.Popen(
 205                         ['bidiv'] + width_args, **sp_kwargs
 206                     )
 207                 except OSError:
 208                     self._output_process = subprocess.Popen(
 209                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 210                 self._output_channel = os.fdopen(master, 'rb')
 211             except OSError as ose:
 212                 if ose.errno == 2:
 213                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 214                 else:
 215                     raise
 216
 217         if (sys.version_info >= (3,) and sys.platform != 'win32' and
 218                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 219                 and not params['restrictfilenames']):
 220             # On Python 3, the Unicode filesystem API will throw errors (#1474)
 221             self.report_warning(
 222                 'Assuming --restrict-filenames since file system encoding '
 223                 'cannot encode all charactes. '
 224                 'Set the LC_ALL environment variable to fix this.')
 225             self.params['restrictfilenames'] = True
 226
 227         if '%(stitle)s' in self.params.get('outtmpl', ''):
 228             self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
 229
 230         self._setup_opener()
 231
 232     def add_info_extractor(self, ie):
 233         """Add an InfoExtractor object to the end of the list."""
 234         self._ies.append(ie)
 235         self._ies_instances[ie.ie_key()] = ie
 236         ie.set_downloader(self)
 237
 238     def get_info_extractor(self, ie_key):
 239         """
 240         Get an instance of an IE with name ie_key, it will try to get one from
 241         the _ies list, if there's no instance it will create a new one and add
 242         it to the extractor list.
 243         """
 244         ie = self._ies_instances.get(ie_key)
 245         if ie is None:
 246             ie = get_info_extractor(ie_key)()
 247             self.add_info_extractor(ie)
 248         return ie
 249
 250     def add_default_info_extractors(self):
 251         """
 252         Add the InfoExtractors returned by gen_extractors to the end of the list
 253         """
 254         for ie in gen_extractors():
 255             self.add_info_extractor(ie)
 256
 257     def add_post_processor(self, pp):
 258         """Add a PostProcessor object to the end of the chain."""
 259         self._pps.append(pp)
 260         pp.set_downloader(self)
 261
 262     def add_progress_hook(self, ph):
 263         """Add the progress hook (currently only for the file downloader)"""
 264         self._progress_hooks.append(ph)
 265
 266     def _bidi_workaround(self, message):
 267         if not hasattr(self, '_output_channel'):
 268             return message
 269
 270         assert hasattr(self, '_output_process')
 271         assert type(message) == type('')
 272         line_count = message.count('\n') + 1
 273         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 274         self._output_process.stdin.flush()
 275         res = ''.join(self._output_channel.readline().decode('utf-8')
 276                        for _ in range(line_count))
 277         return res[:-len('\n')]
 278
 279     def to_screen(self, message, skip_eol=False):
 280         """Print message to stdout if not in quiet mode."""
 281         return self.to_stdout(message, skip_eol, check_quiet=True)
 282
 283     def to_stdout(self, message, skip_eol=False, check_quiet=False):
 284         """Print message to stdout if not in quiet mode."""
 285         if self.params.get('logger'):
 286             self.params['logger'].debug(message)
 287         elif not check_quiet or not self.params.get('quiet', False):
 288             message = self._bidi_workaround(message)
 289             terminator = ['\n', ''][skip_eol]
 290             output = message + terminator
 291
 292             write_string(output, self._screen_file)
 293
 294     def to_stderr(self, message):
 295         """Print message to stderr."""
 296         assert type(message) == type('')
 297         if self.params.get('logger'):
 298             self.params['logger'].error(message)
 299         else:
 300             message = self._bidi_workaround(message)
 301             output = message + '\n'
 302             write_string(output, self._err_file)
 303
 304     def to_console_title(self, message):
 305         if not self.params.get('consoletitle', False):
 306             return
 307         if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
 308             # c_wchar_p() might not be necessary if `message` is
 309             # already of type unicode()
 310             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 311         elif 'TERM' in os.environ:
 312             write_string('\033]0;%s\007' % message, self._screen_file)
 313
 314     def save_console_title(self):
 315         if not self.params.get('consoletitle', False):
 316             return
 317         if 'TERM' in os.environ:
 318             # Save the title on stack
 319             write_string('\033[22;0t', self._screen_file)
 320
 321     def restore_console_title(self):
 322         if not self.params.get('consoletitle', False):
 323             return
 324         if 'TERM' in os.environ:
 325             # Restore the title from stack
 326             write_string('\033[23;0t', self._screen_file)
 327
 328     def __enter__(self):
 329         self.save_console_title()
 330         return self
 331
 332     def __exit__(self, *args):
 333         self.restore_console_title()
 334
 335         if self.params.get('cookiefile') is not None:
 336             self.cookiejar.save()
 337
 338     def trouble(self, message=None, tb=None):
 339         """Determine action to take when a download problem appears.
 340
 341         Depending on if the downloader has been configured to ignore
 342         download errors or not, this method may throw an exception or
 343         not when errors are found, after printing the message.
 344
 345         tb, if given, is additional traceback information.
 346         """
 347         if message is not None:
 348             self.to_stderr(message)
 349         if self.params.get('verbose'):
 350             if tb is None:
 351                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 352                     tb = ''
 353                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 354                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 355                     tb += compat_str(traceback.format_exc())
 356                 else:
 357                     tb_data = traceback.format_list(traceback.extract_stack())
 358                     tb = ''.join(tb_data)
 359             self.to_stderr(tb)
 360         if not self.params.get('ignoreerrors', False):
 361             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 362                 exc_info = sys.exc_info()[1].exc_info
 363             else:
 364                 exc_info = sys.exc_info()
 365             raise DownloadError(message, exc_info)
 366         self._download_retcode = 1
 367
 368     def report_warning(self, message):
 369         '''
 370         Print the message to stderr, it will be prefixed with 'WARNING:'
 371         If stderr is a tty file the 'WARNING:' will be colored
 372         '''
 373         if self._err_file.isatty() and os.name != 'nt':
 374             _msg_header = '\033[0;33mWARNING:\033[0m'
 375         else:
 376             _msg_header = 'WARNING:'
 377         warning_message = '%s %s' % (_msg_header, message)
 378         self.to_stderr(warning_message)
 379
 380     def report_error(self, message, tb=None):
 381         '''
 382         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 383         in red if stderr is a tty file.
 384         '''
 385         if self._err_file.isatty() and os.name != 'nt':
 386             _msg_header = '\033[0;31mERROR:\033[0m'
 387         else:
 388             _msg_header = 'ERROR:'
 389         error_message = '%s %s' % (_msg_header, message)
 390         self.trouble(error_message, tb)
 391
 392     def report_file_already_downloaded(self, file_name):
 393         """Report file has already been fully downloaded."""
 394         try:
 395             self.to_screen('[download] %s has already been downloaded' % file_name)
 396         except UnicodeEncodeError:
 397             self.to_screen('[download] The file has already been downloaded')
 398
 399     def increment_downloads(self):
 400         """Increment the ordinal that assigns a number to each file."""
 401         self._num_downloads += 1
 402
 403     def prepare_filename(self, info_dict):
 404         """Generate the output filename."""
 405         try:
 406             template_dict = dict(info_dict)
 407
 408             template_dict['epoch'] = int(time.time())
 409             autonumber_size = self.params.get('autonumber_size')
 410             if autonumber_size is None:
 411                 autonumber_size = 5
 412             autonumber_templ = '%0' + str(autonumber_size) + 'd'
 413             template_dict['autonumber'] = autonumber_templ % self._num_downloads
 414             if template_dict.get('playlist_index') is not None:
 415                 template_dict['playlist_index'] = '%05d' % template_dict['playlist_index']
 416
 417             sanitize = lambda k, v: sanitize_filename(
 418                 compat_str(v),
 419                 restricted=self.params.get('restrictfilenames'),
 420                 is_id=(k == 'id'))
 421             template_dict = dict((k, sanitize(k, v))
 422                                  for k, v in template_dict.items()
 423                                  if v is not None)
 424             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
 425
 426             tmpl = os.path.expanduser(self.params['outtmpl'])
 427             filename = tmpl % template_dict
 428             return filename
 429         except ValueError as err:
 430             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
 431             return None
 432
 433     def _match_entry(self, info_dict):
 434         """ Returns None iff the file should be downloaded """
 435
 436         video_title = info_dict.get('title', info_dict.get('id', 'video'))
 437         if 'title' in info_dict:
 438             # This can happen when we're just evaluating the playlist
 439             title = info_dict['title']
 440             matchtitle = self.params.get('matchtitle', False)
 441             if matchtitle:
 442                 if not re.search(matchtitle, title, re.IGNORECASE):
 443                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
 444             rejecttitle = self.params.get('rejecttitle', False)
 445             if rejecttitle:
 446                 if re.search(rejecttitle, title, re.IGNORECASE):
 447                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
 448         date = info_dict.get('upload_date', None)
 449         if date is not None:
 450             dateRange = self.params.get('daterange', DateRange())
 451             if date not in dateRange:
 452                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
 453         view_count = info_dict.get('view_count', None)
 454         if view_count is not None:
 455             min_views = self.params.get('min_views')
 456             if min_views is not None and view_count < min_views:
 457                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
 458             max_views = self.params.get('max_views')
 459             if max_views is not None and view_count > max_views:
 460                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
 461         age_limit = self.params.get('age_limit')
 462         if age_limit is not None:
 463             if age_limit < info_dict.get('age_limit', 0):
 464                 return 'Skipping "' + title + '" because it is age restricted'
 465         if self.in_download_archive(info_dict):
 466             return '%s has already been recorded in archive' % video_title
 467         return None
 468
 469     @staticmethod
 470     def add_extra_info(info_dict, extra_info):
 471         '''Set the keys from extra_info in info dict if they are missing'''
 472         for key, value in extra_info.items():
 473             info_dict.setdefault(key, value)
 474
 475     def extract_info(self, url, download=True, ie_key=None, extra_info={},
 476                      process=True):
 477         '''
 478         Returns a list with a dictionary for each video we find.
 479         If 'download', also downloads the videos.
 480         extra_info is a dict containing the extra values to add to each result
 481          '''
 482
 483         if ie_key:
 484             ies = [self.get_info_extractor(ie_key)]
 485         else:
 486             ies = self._ies
 487
 488         for ie in ies:
 489             if not ie.suitable(url):
 490                 continue
 491
 492             if not ie.working():
 493                 self.report_warning('The program functionality for this site has been marked as broken, '
 494                                     'and will probably not work.')
 495
 496             try:
 497                 ie_result = ie.extract(url)
 498                 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
 499                     break
 500                 if isinstance(ie_result, list):
 501                     # Backwards compatibility: old IE result format
 502                     ie_result = {
 503                         '_type': 'compat_list',
 504                         'entries': ie_result,
 505                     }
 506                 self.add_extra_info(ie_result,
 507                     {
 508                         'extractor': ie.IE_NAME,
 509                         'webpage_url': url,
 510                         'webpage_url_basename': url_basename(url),
 511                         'extractor_key': ie.ie_key(),
 512                     })
 513                 if process:
 514                     return self.process_ie_result(ie_result, download, extra_info)
 515                 else:
 516                     return ie_result
 517             except ExtractorError as de: # An error we somewhat expected
 518                 self.report_error(compat_str(de), de.format_traceback())
 519                 break
 520             except MaxDownloadsReached:
 521                 raise
 522             except Exception as e:
 523                 if self.params.get('ignoreerrors', False):
 524                     self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
 525                     break
 526                 else:
 527                     raise
 528         else:
 529             self.report_error('no suitable InfoExtractor: %s' % url)
 530
 531     def process_ie_result(self, ie_result, download=True, extra_info={}):
 532         """
 533         Take the result of the ie(may be modified) and resolve all unresolved
 534         references (URLs, playlist items).
 535
 536         It will also download the videos if 'download'.
 537         Returns the resolved ie_result.
 538         """
 539
 540         result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
 541         if result_type == 'video':
 542             self.add_extra_info(ie_result, extra_info)
 543             return self.process_video_result(ie_result, download=download)
 544         elif result_type == 'url':
 545             # We have to add extra_info to the results because it may be
 546             # contained in a playlist
 547             return self.extract_info(ie_result['url'],
 548                                      download,
 549                                      ie_key=ie_result.get('ie_key'),
 550                                      extra_info=extra_info)
 551         elif result_type == 'url_transparent':
 552             # Use the information from the embedding page
 553             info = self.extract_info(
 554                 ie_result['url'], ie_key=ie_result.get('ie_key'),
 555                 extra_info=extra_info, download=False, process=False)
 556
 557             def make_result(embedded_info):
 558                 new_result = ie_result.copy()
 559                 for f in ('_type', 'url', 'ext', 'player_url', 'formats',
 560                           'entries', 'ie_key', 'duration',
 561                           'subtitles', 'annotations', 'format',
 562                           'thumbnail', 'thumbnails'):
 563                     if f in new_result:
 564                         del new_result[f]
 565                     if f in embedded_info:
 566                         new_result[f] = embedded_info[f]
 567                 return new_result
 568             new_result = make_result(info)
 569
 570             assert new_result.get('_type') != 'url_transparent'
 571             if new_result.get('_type') == 'compat_list':
 572                 new_result['entries'] = [
 573                     make_result(e) for e in new_result['entries']]
 574
 575             return self.process_ie_result(
 576                 new_result, download=download, extra_info=extra_info)
 577         elif result_type == 'playlist':
 578             # We process each entry in the playlist
 579             playlist = ie_result.get('title', None) or ie_result.get('id', None)
 580             self.to_screen('[download] Downloading playlist: %s' % playlist)
 581
 582             playlist_results = []
 583
 584             playliststart = self.params.get('playliststart', 1) - 1
 585             playlistend = self.params.get('playlistend', None)
 586             # For backwards compatibility, interpret -1 as whole list
 587             if playlistend == -1:
 588                 playlistend = None
 589
 590             if isinstance(ie_result['entries'], list):
 591                 n_all_entries = len(ie_result['entries'])
 592                 entries = ie_result['entries'][playliststart:playlistend]
 593                 n_entries = len(entries)
 594                 self.to_screen(
 595                     "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
 596                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
 597             else:
 598                 assert isinstance(ie_result['entries'], PagedList)
 599                 entries = ie_result['entries'].getslice(
 600                     playliststart, playlistend)
 601                 n_entries = len(entries)
 602                 self.to_screen(
 603                     "[%s] playlist %s: Downloading %d videos" %
 604                     (ie_result['extractor'], playlist, n_entries))
 605
 606             for i, entry in enumerate(entries, 1):
 607                 self.to_screen('[download] Downloading video #%s of %s' % (i, n_entries))
 608                 extra = {
 609                     'playlist': playlist,
 610                     'playlist_index': i + playliststart,
 611                     'extractor': ie_result['extractor'],
 612                     'webpage_url': ie_result['webpage_url'],
 613                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
 614                     'extractor_key': ie_result['extractor_key'],
 615                 }
 616
 617                 reason = self._match_entry(entry)
 618                 if reason is not None:
 619                     self.to_screen('[download] ' + reason)
 620                     continue
 621
 622                 entry_result = self.process_ie_result(entry,
 623                                                       download=download,
 624                                                       extra_info=extra)
 625                 playlist_results.append(entry_result)
 626             ie_result['entries'] = playlist_results
 627             return ie_result
 628         elif result_type == 'compat_list':
 629             def _fixup(r):
 630                 self.add_extra_info(r,
 631                     {
 632                         'extractor': ie_result['extractor'],
 633                         'webpage_url': ie_result['webpage_url'],
 634                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
 635                         'extractor_key': ie_result['extractor_key'],
 636                     })
 637                 return r
 638             ie_result['entries'] = [
 639                 self.process_ie_result(_fixup(r), download, extra_info)
 640                 for r in ie_result['entries']
 641             ]
 642             return ie_result
 643         else:
 644             raise Exception('Invalid result type: %s' % result_type)
 645
 646     def select_format(self, format_spec, available_formats):
 647         if format_spec == 'best' or format_spec is None:
 648             return available_formats[-1]
 649         elif format_spec == 'worst':
 650             return available_formats[0]
 651         elif format_spec == 'bestaudio':
 652             audio_formats = [
 653                 f for f in available_formats
 654                 if f.get('vcodec') == 'none']
 655             if audio_formats:
 656                 return audio_formats[-1]
 657         elif format_spec == 'worstaudio':
 658             audio_formats = [
 659                 f for f in available_formats
 660                 if f.get('vcodec') == 'none']
 661             if audio_formats:
 662                 return audio_formats[0]
 663         else:
 664             extensions = ['mp4', 'flv', 'webm', '3gp']
 665             if format_spec in extensions:
 666                 filter_f = lambda f: f['ext'] == format_spec
 667             else:
 668                 filter_f = lambda f: f['format_id'] == format_spec
 669             matches = list(filter(filter_f, available_formats))
 670             if matches:
 671                 return matches[-1]
 672         return None
 673
 674     def process_video_result(self, info_dict, download=True):
 675         assert info_dict.get('_type', 'video') == 'video'
 676
 677         if 'playlist' not in info_dict:
 678             # It isn't part of a playlist
 679             info_dict['playlist'] = None
 680             info_dict['playlist_index'] = None
 681
 682         # This extractors handle format selection themselves
 683         if info_dict['extractor'] in ['Youku']:
 684             if download:
 685                 self.process_info(info_dict)
 686             return info_dict
 687
 688         # We now pick which formats have to be downloaded
 689         if info_dict.get('formats') is None:
 690             # There's only one format available
 691             formats = [info_dict]
 692         else:
 693             formats = info_dict['formats']
 694
 695         # We check that all the formats have the format and format_id fields
 696         for (i, format) in enumerate(formats):
 697             if format.get('format_id') is None:
 698                 format['format_id'] = compat_str(i)
 699             if format.get('format') is None:
 700                 format['format'] = '{id} - {res}{note}'.format(
 701                     id=format['format_id'],
 702                     res=self.format_resolution(format),
 703                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
 704                 )
 705             # Automatically determine file extension if missing
 706             if 'ext' not in format:
 707                 format['ext'] = determine_ext(format['url'])
 708
 709         format_limit = self.params.get('format_limit', None)
 710         if format_limit:
 711             formats = list(takewhile_inclusive(
 712                 lambda f: f['format_id'] != format_limit, formats
 713             ))
 714
 715         # TODO Central sorting goes here
 716
 717         if formats[0] is not info_dict:
 718             # only set the 'formats' fields if the original info_dict list them
 719             # otherwise we end up with a circular reference, the first (and unique)
 720             # element in the 'formats' field in info_dict is info_dict itself,
 721             # wich can't be exported to json
 722             info_dict['formats'] = formats
 723         if self.params.get('listformats', None):
 724             self.list_formats(info_dict)
 725             return
 726
 727         req_format = self.params.get('format')
 728         if req_format is None:
 729             req_format = 'best'
 730         formats_to_download = []
 731         # The -1 is for supporting YoutubeIE
 732         if req_format in ('-1', 'all'):
 733             formats_to_download = formats
 734         else:
 735             # We can accept formats requested in the format: 34/5/best, we pick
 736             # the first that is available, starting from left
 737             req_formats = req_format.split('/')
 738             for rf in req_formats:
 739                 if re.match(r'.+?\+.+?', rf) is not None:
 740                     # Two formats have been requested like '137+139'
 741                     format_1, format_2 = rf.split('+')
 742                     formats_info = (self.select_format(format_1, formats),
 743                         self.select_format(format_2, formats))
 744                     if all(formats_info):
 745                         selected_format = {
 746                             'requested_formats': formats_info,
 747                             'format': rf,
 748                             'ext': formats_info[0]['ext'],
 749                         }
 750                     else:
 751                         selected_format = None
 752                 else:
 753                     selected_format = self.select_format(rf, formats)
 754                 if selected_format is not None:
 755                     formats_to_download = [selected_format]
 756                     break
 757         if not formats_to_download:
 758             raise ExtractorError('requested format not available',
 759                                  expected=True)
 760
 761         if download:
 762             if len(formats_to_download) > 1:
 763                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
 764             for format in formats_to_download:
 765                 new_info = dict(info_dict)
 766                 new_info.update(format)
 767                 self.process_info(new_info)
 768         # We update the info dict with the best quality format (backwards compatibility)
 769         info_dict.update(formats_to_download[-1])
 770         return info_dict
 771
 772     def process_info(self, info_dict):
 773         """Process a single resolved IE result."""
 774
 775         assert info_dict.get('_type', 'video') == 'video'
 776         #We increment the download the download count here to match the previous behaviour.
 777         self.increment_downloads()
 778
 779         info_dict['fulltitle'] = info_dict['title']
 780         if len(info_dict['title']) > 200:
 781             info_dict['title'] = info_dict['title'][:197] + '...'
 782
 783         # Keep for backwards compatibility
 784         info_dict['stitle'] = info_dict['title']
 785
 786         if not 'format' in info_dict:
 787             info_dict['format'] = info_dict['ext']
 788
 789         reason = self._match_entry(info_dict)
 790         if reason is not None:
 791             self.to_screen('[download] ' + reason)
 792             return
 793
 794         max_downloads = self.params.get('max_downloads')
 795         if max_downloads is not None:
 796             if self._num_downloads > int(max_downloads):
 797                 raise MaxDownloadsReached()
 798
 799         filename = self.prepare_filename(info_dict)
 800
 801         # Forced printings
 802         if self.params.get('forcetitle', False):
 803             self.to_stdout(info_dict['fulltitle'])
 804         if self.params.get('forceid', False):
 805             self.to_stdout(info_dict['id'])
 806         if self.params.get('forceurl', False):
 807             # For RTMP URLs, also include the playpath
 808             self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
 809         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
 810             self.to_stdout(info_dict['thumbnail'])
 811         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
 812             self.to_stdout(info_dict['description'])
 813         if self.params.get('forcefilename', False) and filename is not None:
 814             self.to_stdout(filename)
 815         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
 816             self.to_stdout(formatSeconds(info_dict['duration']))
 817         if self.params.get('forceformat', False):
 818             self.to_stdout(info_dict['format'])
 819         if self.params.get('forcejson', False):
 820             info_dict['_filename'] = filename
 821             self.to_stdout(json.dumps(info_dict))
 822
 823         # Do nothing else if in simulate mode
 824         if self.params.get('simulate', False):
 825             return
 826
 827         if filename is None:
 828             return
 829
 830         try:
 831             dn = os.path.dirname(encodeFilename(filename))
 832             if dn != '' and not os.path.exists(dn):
 833                 os.makedirs(dn)
 834         except (OSError, IOError) as err:
 835             self.report_error('unable to create directory ' + compat_str(err))
 836             return
 837
 838         if self.params.get('writedescription', False):
 839             descfn = filename + '.description'
 840             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
 841                 self.to_screen('[info] Video description is already present')
 842             else:
 843                 try:
 844                     self.to_screen('[info] Writing video description to: ' + descfn)
 845                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
 846                         descfile.write(info_dict['description'])
 847                 except (KeyError, TypeError):
 848                     self.report_warning('There\'s no description to write.')
 849                 except (OSError, IOError):
 850                     self.report_error('Cannot write description file ' + descfn)
 851                     return
 852
 853         if self.params.get('writeannotations', False):
 854             annofn = filename + '.annotations.xml'
 855             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
 856                 self.to_screen('[info] Video annotations are already present')
 857             else:
 858                 try:
 859                     self.to_screen('[info] Writing video annotations to: ' + annofn)
 860                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
 861                         annofile.write(info_dict['annotations'])
 862                 except (KeyError, TypeError):
 863                     self.report_warning('There are no annotations to write.')
 864                 except (OSError, IOError):
 865                     self.report_error('Cannot write annotations file: ' + annofn)
 866                     return
 867
 868         subtitles_are_requested = any([self.params.get('writesubtitles', False),
 869                                        self.params.get('writeautomaticsub')])
 870
 871         if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
 872             # subtitles download errors are already managed as troubles in relevant IE
 873             # that way it will silently go on when used with unsupporting IE
 874             subtitles = info_dict['subtitles']
 875             sub_format = self.params.get('subtitlesformat', 'srt')
 876             for sub_lang in subtitles.keys():
 877                 sub = subtitles[sub_lang]
 878                 if sub is None:
 879                     continue
 880                 try:
 881                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
 882                     if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
 883                         self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
 884                     else:
 885                         self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
 886                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
 887                                 subfile.write(sub)
 888                 except (OSError, IOError):
 889                     self.report_error('Cannot write subtitles file ' + descfn)
 890                     return
 891
 892         if self.params.get('writeinfojson', False):
 893             infofn = os.path.splitext(filename)[0] + '.info.json'
 894             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
 895                 self.to_screen('[info] Video description metadata is already present')
 896             else:
 897                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
 898                 try:
 899                     write_json_file(info_dict, encodeFilename(infofn))
 900                 except (OSError, IOError):
 901                     self.report_error('Cannot write metadata to JSON file ' + infofn)
 902                     return
 903
 904         if self.params.get('writethumbnail', False):
 905             if info_dict.get('thumbnail') is not None:
 906                 thumb_format = determine_ext(info_dict['thumbnail'], 'jpg')
 907                 thumb_filename = os.path.splitext(filename)[0] + '.' + thumb_format
 908                 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
 909                     self.to_screen('[%s] %s: Thumbnail is already present' %
 910                                    (info_dict['extractor'], info_dict['id']))
 911                 else:
 912                     self.to_screen('[%s] %s: Downloading thumbnail ...' %
 913                                    (info_dict['extractor'], info_dict['id']))
 914                     try:
 915                         uf = compat_urllib_request.urlopen(info_dict['thumbnail'])
 916                         with open(thumb_filename, 'wb') as thumbf:
 917                             shutil.copyfileobj(uf, thumbf)
 918                         self.to_screen('[%s] %s: Writing thumbnail to: %s' %
 919                             (info_dict['extractor'], info_dict['id'], thumb_filename))
 920                     except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
 921                         self.report_warning('Unable to download thumbnail "%s": %s' %
 922                             (info_dict['thumbnail'], compat_str(err)))
 923
 924         if not self.params.get('skip_download', False):
 925             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
 926                 success = True
 927             else:
 928                 try:
 929                     def dl(name, info):
 930                         fd = get_suitable_downloader(info)(self, self.params)
 931                         for ph in self._progress_hooks:
 932                             fd.add_progress_hook(ph)
 933                         return fd.download(name, info)
 934                     if info_dict.get('requested_formats') is not None:
 935                         downloaded = []
 936                         success = True
 937                         merger = FFmpegMergerPP(self)
 938                         if not merger._get_executable():
 939                             postprocessors = []
 940                             self.report_warning('You have requested multiple '
 941                                 'formats but ffmpeg or avconv are not installed.'
 942                                 ' The formats won\'t be merged')
 943                         else:
 944                             postprocessors = [merger]
 945                         for f in info_dict['requested_formats']:
 946                             new_info = dict(info_dict)
 947                             new_info.update(f)
 948                             fname = self.prepare_filename(new_info)
 949                             fname = prepend_extension(fname, 'f%s' % f['format_id'])
 950                             downloaded.append(fname)
 951                             partial_success = dl(fname, new_info)
 952                             success = success and partial_success
 953                         info_dict['__postprocessors'] = postprocessors
 954                         info_dict['__files_to_merge'] = downloaded
 955                     else:
 956                         # Just a single file
 957                         success = dl(filename, info_dict)
 958                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
 959                     self.report_error('unable to download video data: %s' % str(err))
 960                     return
 961                 except (OSError, IOError) as err:
 962                     raise UnavailableVideoError(err)
 963                 except (ContentTooShortError, ) as err:
 964                     self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
 965                     return
 966
 967             if success:
 968                 try:
 969                     self.post_process(filename, info_dict)
 970                 except (PostProcessingError) as err:
 971                     self.report_error('postprocessing: %s' % str(err))
 972                     return
 973
 974         self.record_download_archive(info_dict)
 975
 976     def download(self, url_list):
 977         """Download a given list of URLs."""
 978         if (len(url_list) > 1 and
 979                 '%' not in self.params['outtmpl']
 980                 and self.params.get('max_downloads') != 1):
 981             raise SameFileError(self.params['outtmpl'])
 982
 983         for url in url_list:
 984             try:
 985                 #It also downloads the videos
 986                 self.extract_info(url)
 987             except UnavailableVideoError:
 988                 self.report_error('unable to download video')
 989             except MaxDownloadsReached:
 990                 self.to_screen('[info] Maximum number of downloaded files reached.')
 991                 raise
 992
 993         return self._download_retcode
 994
 995     def download_with_info_file(self, info_filename):
 996         with io.open(info_filename, 'r', encoding='utf-8') as f:
 997             info = json.load(f)
 998         try:
 999             self.process_ie_result(info, download=True)
1000         except DownloadError:
1001             webpage_url = info.get('webpage_url')
1002             if webpage_url is not None:
1003                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1004                 return self.download([webpage_url])
1005             else:
1006                 raise
1007         return self._download_retcode
1008
1009     def post_process(self, filename, ie_info):
1010         """Run all the postprocessors on the given file."""
1011         info = dict(ie_info)
1012         info['filepath'] = filename
1013         keep_video = None
1014         pps_chain = []
1015         if ie_info.get('__postprocessors') is not None:
1016             pps_chain.extend(ie_info['__postprocessors'])
1017         pps_chain.extend(self._pps)
1018         for pp in pps_chain:
1019             try:
1020                 keep_video_wish, new_info = pp.run(info)
1021                 if keep_video_wish is not None:
1022                     if keep_video_wish:
1023                         keep_video = keep_video_wish
1024                     elif keep_video is None:
1025                         # No clear decision yet, let IE decide
1026                         keep_video = keep_video_wish
1027             except PostProcessingError as e:
1028                 self.report_error(e.msg)
1029         if keep_video is False and not self.params.get('keepvideo', False):
1030             try:
1031                 self.to_screen('Deleting original file %s (pass -k to keep)' % filename)
1032                 os.remove(encodeFilename(filename))
1033             except (IOError, OSError):
1034                 self.report_warning('Unable to remove downloaded video file')
1035
1036     def _make_archive_id(self, info_dict):
1037         # Future-proof against any change in case
1038         # and backwards compatibility with prior versions
1039         extractor = info_dict.get('extractor_key')
1040         if extractor is None:
1041             if 'id' in info_dict:
1042                 extractor = info_dict.get('ie_key')  # key in a playlist
1043         if extractor is None:
1044             return None  # Incomplete video information
1045         return extractor.lower() + ' ' + info_dict['id']
1046
1047     def in_download_archive(self, info_dict):
1048         fn = self.params.get('download_archive')
1049         if fn is None:
1050             return False
1051
1052         vid_id = self._make_archive_id(info_dict)
1053         if vid_id is None:
1054             return False  # Incomplete video information
1055
1056         try:
1057             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1058                 for line in archive_file:
1059                     if line.strip() == vid_id:
1060                         return True
1061         except IOError as ioe:
1062             if ioe.errno != errno.ENOENT:
1063                 raise
1064         return False
1065
1066     def record_download_archive(self, info_dict):
1067         fn = self.params.get('download_archive')
1068         if fn is None:
1069             return
1070         vid_id = self._make_archive_id(info_dict)
1071         assert vid_id
1072         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1073             archive_file.write(vid_id + '\n')
1074
1075     @staticmethod
1076     def format_resolution(format, default='unknown'):
1077         if format.get('vcodec') == 'none':
1078             return 'audio only'
1079         if format.get('resolution') is not None:
1080             return format['resolution']
1081         if format.get('height') is not None:
1082             if format.get('width') is not None:
1083                 res = '%sx%s' % (format['width'], format['height'])
1084             else:
1085                 res = '%sp' % format['height']
1086         elif format.get('width') is not None:
1087             res = '?x%d' % format['width']
1088         else:
1089             res = default
1090         return res
1091
1092     def list_formats(self, info_dict):
1093         def format_note(fdict):
1094             res = ''
1095             if fdict.get('ext') in ['f4f', 'f4m']:
1096                 res += '(unsupported) '
1097             if fdict.get('format_note') is not None:
1098                 res += fdict['format_note'] + ' '
1099             if fdict.get('tbr') is not None:
1100                 res += '%4dk ' % fdict['tbr']
1101             if (fdict.get('vcodec') is not None and
1102                     fdict.get('vcodec') != 'none'):
1103                 res += '%-5s' % fdict['vcodec']
1104                 if fdict.get('vbr') is not None:
1105                     res += '@'
1106             elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1107                 res += 'video@'
1108             if fdict.get('vbr') is not None:
1109                 res += '%4dk' % fdict['vbr']
1110             if fdict.get('acodec') is not None:
1111                 if res:
1112                     res += ', '
1113                 res += '%-5s' % fdict['acodec']
1114             elif fdict.get('abr') is not None:
1115                 if res:
1116                     res += ', '
1117                 res += 'audio'
1118             if fdict.get('abr') is not None:
1119                 res += '@%3dk' % fdict['abr']
1120             if fdict.get('asr') is not None:
1121                 res += ' (%5dHz)' % fdict['asr']
1122             if fdict.get('filesize') is not None:
1123                 if res:
1124                     res += ', '
1125                 res += format_bytes(fdict['filesize'])
1126             return res
1127
1128         def line(format, idlen=20):
1129             return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
1130                 format['format_id'],
1131                 format['ext'],
1132                 self.format_resolution(format),
1133                 format_note(format),
1134             ))
1135
1136         formats = info_dict.get('formats', [info_dict])
1137         idlen = max(len('format code'),
1138                     max(len(f['format_id']) for f in formats))
1139         formats_s = [line(f, idlen) for f in formats]
1140         if len(formats) > 1:
1141             formats_s[0] += (' ' if format_note(formats[0]) else '') + '(worst)'
1142             formats_s[-1] += (' ' if format_note(formats[-1]) else '') + '(best)'
1143
1144         header_line = line({
1145             'format_id': 'format code', 'ext': 'extension',
1146             'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
1147         self.to_screen('[info] Available formats for %s:\n%s\n%s' %
1148                        (info_dict['id'], header_line, '\n'.join(formats_s)))
1149
1150     def urlopen(self, req):
1151         """ Start an HTTP download """
1152         return self._opener.open(req)
1153
1154     def print_debug_header(self):
1155         if not self.params.get('verbose'):
1156             return
1157         write_string('[debug] youtube-dl version ' + __version__ + '\n')
1158         try:
1159             sp = subprocess.Popen(
1160                 ['git', 'rev-parse', '--short', 'HEAD'],
1161                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1162                 cwd=os.path.dirname(os.path.abspath(__file__)))
1163             out, err = sp.communicate()
1164             out = out.decode().strip()
1165             if re.match('[0-9a-f]+', out):
1166                 write_string('[debug] Git HEAD: ' + out + '\n')
1167         except:
1168             try:
1169                 sys.exc_clear()
1170             except:
1171                 pass
1172         write_string('[debug] Python version %s - %s' %
1173                      (platform.python_version(), platform_name()) + '\n')
1174
1175         proxy_map = {}
1176         for handler in self._opener.handlers:
1177             if hasattr(handler, 'proxies'):
1178                 proxy_map.update(handler.proxies)
1179         write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1180
1181     def _setup_opener(self):
1182         timeout_val = self.params.get('socket_timeout')
1183         timeout = 600 if timeout_val is None else float(timeout_val)
1184
1185         opts_cookiefile = self.params.get('cookiefile')
1186         opts_proxy = self.params.get('proxy')
1187
1188         if opts_cookiefile is None:
1189             self.cookiejar = compat_cookiejar.CookieJar()
1190         else:
1191             self.cookiejar = compat_cookiejar.MozillaCookieJar(
1192                 opts_cookiefile)
1193             if os.access(opts_cookiefile, os.R_OK):
1194                 self.cookiejar.load()
1195
1196         cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1197             self.cookiejar)
1198         if opts_proxy is not None:
1199             if opts_proxy == '':
1200                 proxies = {}
1201             else:
1202                 proxies = {'http': opts_proxy, 'https': opts_proxy}
1203         else:
1204             proxies = compat_urllib_request.getproxies()
1205             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1206             if 'http' in proxies and 'https' not in proxies:
1207                 proxies['https'] = proxies['http']
1208         proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1209
1210         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1211         https_handler = make_HTTPS_handler(
1212             self.params.get('nocheckcertificate', False), debuglevel=debuglevel)
1213         ydlh = YoutubeDLHandler(debuglevel=debuglevel)
1214         opener = compat_urllib_request.build_opener(
1215             https_handler, proxy_handler, cookie_processor, ydlh)
1216         # Delete the default user-agent header, which would otherwise apply in
1217         # cases where our custom HTTP handler doesn't come into play
1218         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1219         opener.addheaders = []
1220         self._opener = opener
1221
1222         # TODO remove this global modification
1223         compat_urllib_request.install_opener(opener)
1224         socket.setdefaulttimeout(timeout)