git.bitcoin.ninja Git - youtube-dl/blob - youtube_dl/YoutubeDL.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 from __future__ import absolute_import
   5
   6 import collections
   7 import errno
   8 import io
   9 import json
  10 import os
  11 import platform
  12 import re
  13 import shutil
  14 import subprocess
  15 import socket
  16 import sys
  17 import time
  18 import traceback
  19
  20 if os.name == 'nt':
  21     import ctypes
  22
  23 from .utils import (
  24     compat_cookiejar,
  25     compat_http_client,
  26     compat_str,
  27     compat_urllib_error,
  28     compat_urllib_request,
  29     ContentTooShortError,
  30     date_from_str,
  31     DateRange,
  32     determine_ext,
  33     DownloadError,
  34     encodeFilename,
  35     ExtractorError,
  36     format_bytes,
  37     get_term_width,
  38     locked_file,
  39     make_HTTPS_handler,
  40     MaxDownloadsReached,
  41     PostProcessingError,
  42     platform_name,
  43     preferredencoding,
  44     SameFileError,
  45     sanitize_filename,
  46     subtitles_filename,
  47     takewhile_inclusive,
  48     UnavailableVideoError,
  49     write_json_file,
  50     write_string,
  51     YoutubeDLHandler,
  52 )
  53 from .extractor import get_info_extractor, gen_extractors
  54 from .FileDownloader import FileDownloader
  55 from .version import __version__
  56
  57
  58 class YoutubeDL(object):
  59     """YoutubeDL class.
  60
  61     YoutubeDL objects are the ones responsible of downloading the
  62     actual video file and writing it to disk if the user has requested
  63     it, among some other tasks. In most cases there should be one per
  64     program. As, given a video URL, the downloader doesn't know how to
  65     extract all the needed information, task that InfoExtractors do, it
  66     has to pass the URL to one of them.
  67
  68     For this, YoutubeDL objects have a method that allows
  69     InfoExtractors to be registered in a given order. When it is passed
  70     a URL, the YoutubeDL object handles it to the first InfoExtractor it
  71     finds that reports being able to handle it. The InfoExtractor extracts
  72     all the information about the video or videos the URL refers to, and
  73     YoutubeDL process the extracted information, possibly using a File
  74     Downloader to download the video.
  75
  76     YoutubeDL objects accept a lot of parameters. In order not to saturate
  77     the object constructor with arguments, it receives a dictionary of
  78     options instead. These options are available through the params
  79     attribute for the InfoExtractors to use. The YoutubeDL also
  80     registers itself as the downloader in charge for the InfoExtractors
  81     that are added to it, so this is a "mutual registration".
  82
  83     Available options:
  84
  85     username:          Username for authentication purposes.
  86     password:          Password for authentication purposes.
  87     videopassword:     Password for acces a video.
  88     usenetrc:          Use netrc for authentication instead.
  89     verbose:           Print additional info to stdout.
  90     quiet:             Do not print messages to stdout.
  91     forceurl:          Force printing final URL.
  92     forcetitle:        Force printing title.
  93     forceid:           Force printing ID.
  94     forcethumbnail:    Force printing thumbnail URL.
  95     forcedescription:  Force printing description.
  96     forcefilename:     Force printing final filename.
  97     forcejson:         Force printing info_dict as JSON.
  98     simulate:          Do not download the video files.
  99     format:            Video format code.
 100     format_limit:      Highest quality format to try.
 101     outtmpl:           Template for output names.
 102     restrictfilenames: Do not allow "&" and spaces in file names
 103     ignoreerrors:      Do not stop on download errors.
 104     nooverwrites:      Prevent overwriting files.
 105     playliststart:     Playlist item to start at.
 106     playlistend:       Playlist item to end at.
 107     matchtitle:        Download only matching titles.
 108     rejecttitle:       Reject downloads for matching titles.
 109     logger:            Log messages to a logging.Logger instance.
 110     logtostderr:       Log messages to stderr instead of stdout.
 111     writedescription:  Write the video description to a .description file
 112     writeinfojson:     Write the video description to a .info.json file
 113     writeannotations:  Write the video annotations to a .annotations.xml file
 114     writethumbnail:    Write the thumbnail image to a file
 115     writesubtitles:    Write the video subtitles to a file
 116     writeautomaticsub: Write the automatic subtitles to a file
 117     allsubtitles:      Downloads all the subtitles of the video
 118                        (requires writesubtitles or writeautomaticsub)
 119     listsubtitles:     Lists all available subtitles for the video
 120     subtitlesformat:   Subtitle format [srt/sbv/vtt] (default=srt)
 121     subtitleslangs:    List of languages of the subtitles to download
 122     keepvideo:         Keep the video file after post-processing
 123     daterange:         A DateRange object, download only if the upload_date is in the range.
 124     skip_download:     Skip the actual download of the video file
 125     cachedir:          Location of the cache files in the filesystem.
 126                        None to disable filesystem cache.
 127     noplaylist:        Download single video instead of a playlist if in doubt.
 128     age_limit:         An integer representing the user's age in years.
 129                        Unsuitable videos for the given age are skipped.
 130     min_views:         An integer representing the minimum view count the video
 131                        must have in order to not be skipped.
 132                        Videos without view count information are always
 133                        downloaded. None for no limit.
 134     max_views:         An integer representing the maximum view count.
 135                        Videos that are more popular than that are not
 136                        downloaded.
 137                        Videos without view count information are always
 138                        downloaded. None for no limit.
 139     download_archive:  File name of a file where all downloads are recorded.
 140                        Videos already present in the file are not downloaded
 141                        again.
 142     cookiefile:        File name where cookies should be read from and dumped to.
 143     nocheckcertificate:Do not verify SSL certificates
 144     proxy:             URL of the proxy server to use
 145     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 146     bidi_workaround:   Work around buggy terminals without bidirectional text
 147                        support, using fridibi
 148
 149     The following parameters are not used by YoutubeDL itself, they are used by
 150     the FileDownloader:
 151     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
 152     noresizebuffer, retries, continuedl, noprogress, consoletitle
 153     """
 154
 155     params = None
 156     _ies = []
 157     _pps = []
 158     _download_retcode = None
 159     _num_downloads = None
 160     _screen_file = None
 161
 162     def __init__(self, params=None):
 163         """Create a FileDownloader object with the given options."""
 164         self._ies = []
 165         self._ies_instances = {}
 166         self._pps = []
 167         self._progress_hooks = []
 168         self._download_retcode = 0
 169         self._num_downloads = 0
 170         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 171         self._err_file = sys.stderr
 172         self.params = {} if params is None else params
 173
 174         if params.get('bidi_workaround', False):
 175             try:
 176                 import pty
 177                 master, slave = pty.openpty()
 178                 width = get_term_width()
 179                 if width is None:
 180                     width_args = []
 181                 else:
 182                     width_args = ['-w', str(width)]
 183                 self._fribidi = subprocess.Popen(
 184                     ['fribidi', '-c', 'UTF-8'] + width_args,
 185                     stdin=subprocess.PIPE,
 186                     stdout=slave,
 187                     stderr=self._err_file)
 188                 self._fribidi_channel = os.fdopen(master, 'rb')
 189             except OSError as ose:
 190                 if ose.errno == 2:
 191                     self.report_warning(u'Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 192                 else:
 193                     raise
 194
 195         if (sys.version_info >= (3,) and sys.platform != 'win32' and
 196                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 197                 and not params['restrictfilenames']):
 198             # On Python 3, the Unicode filesystem API will throw errors (#1474)
 199             self.report_warning(
 200                 u'Assuming --restrict-filenames since file system encoding '
 201                 u'cannot encode all charactes. '
 202                 u'Set the LC_ALL environment variable to fix this.')
 203             self.params['restrictfilenames'] = True
 204
 205         self.fd = FileDownloader(self, self.params)
 206
 207         if '%(stitle)s' in self.params.get('outtmpl', ''):
 208             self.report_warning(u'%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
 209
 210         self._setup_opener()
 211
 212     def add_info_extractor(self, ie):
 213         """Add an InfoExtractor object to the end of the list."""
 214         self._ies.append(ie)
 215         self._ies_instances[ie.ie_key()] = ie
 216         ie.set_downloader(self)
 217
 218     def get_info_extractor(self, ie_key):
 219         """
 220         Get an instance of an IE with name ie_key, it will try to get one from
 221         the _ies list, if there's no instance it will create a new one and add
 222         it to the extractor list.
 223         """
 224         ie = self._ies_instances.get(ie_key)
 225         if ie is None:
 226             ie = get_info_extractor(ie_key)()
 227             self.add_info_extractor(ie)
 228         return ie
 229
 230     def add_default_info_extractors(self):
 231         """
 232         Add the InfoExtractors returned by gen_extractors to the end of the list
 233         """
 234         for ie in gen_extractors():
 235             self.add_info_extractor(ie)
 236
 237     def add_post_processor(self, pp):
 238         """Add a PostProcessor object to the end of the chain."""
 239         self._pps.append(pp)
 240         pp.set_downloader(self)
 241
 242     def _bidi_workaround(self, message):
 243         if not hasattr(self, '_fribidi_channel'):
 244             return message
 245
 246         assert type(message) == type(u'')
 247         line_count = message.count(u'\n') + 1
 248         self._fribidi.stdin.write((message + u'\n').encode('utf-8'))
 249         self._fribidi.stdin.flush()
 250         res = u''.join(self._fribidi_channel.readline().decode('utf-8')
 251                        for _ in range(line_count))
 252         return res[:-len(u'\n')]
 253
 254     def to_screen(self, message, skip_eol=False):
 255         """Print message to stdout if not in quiet mode."""
 256         return self.to_stdout(message, skip_eol, check_quiet=True)
 257
 258     def to_stdout(self, message, skip_eol=False, check_quiet=False):
 259         """Print message to stdout if not in quiet mode."""
 260         if self.params.get('logger'):
 261             self.params['logger'].debug(message)
 262         elif not check_quiet or not self.params.get('quiet', False):
 263             message = self._bidi_workaround(message)
 264             terminator = [u'\n', u''][skip_eol]
 265             output = message + terminator
 266
 267             write_string(output, self._screen_file)
 268
 269     def to_stderr(self, message):
 270         """Print message to stderr."""
 271         assert type(message) == type(u'')
 272         if self.params.get('logger'):
 273             self.params['logger'].error(message)
 274         else:
 275             message = self._bidi_workaround(message)
 276             output = message + u'\n'
 277             write_string(output, self._err_file)
 278
 279     def to_console_title(self, message):
 280         if not self.params.get('consoletitle', False):
 281             return
 282         if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
 283             # c_wchar_p() might not be necessary if `message` is
 284             # already of type unicode()
 285             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 286         elif 'TERM' in os.environ:
 287             write_string(u'\033]0;%s\007' % message, self._screen_file)
 288
 289     def save_console_title(self):
 290         if not self.params.get('consoletitle', False):
 291             return
 292         if 'TERM' in os.environ:
 293             # Save the title on stack
 294             write_string(u'\033[22;0t', self._screen_file)
 295
 296     def restore_console_title(self):
 297         if not self.params.get('consoletitle', False):
 298             return
 299         if 'TERM' in os.environ:
 300             # Restore the title from stack
 301             write_string(u'\033[23;0t', self._screen_file)
 302
 303     def __enter__(self):
 304         self.save_console_title()
 305         return self
 306
 307     def __exit__(self, *args):
 308         self.restore_console_title()
 309
 310         if self.params.get('cookiefile') is not None:
 311             self.cookiejar.save()
 312
 313     def trouble(self, message=None, tb=None):
 314         """Determine action to take when a download problem appears.
 315
 316         Depending on if the downloader has been configured to ignore
 317         download errors or not, this method may throw an exception or
 318         not when errors are found, after printing the message.
 319
 320         tb, if given, is additional traceback information.
 321         """
 322         if message is not None:
 323             self.to_stderr(message)
 324         if self.params.get('verbose'):
 325             if tb is None:
 326                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 327                     tb = u''
 328                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 329                         tb += u''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 330                     tb += compat_str(traceback.format_exc())
 331                 else:
 332                     tb_data = traceback.format_list(traceback.extract_stack())
 333                     tb = u''.join(tb_data)
 334             self.to_stderr(tb)
 335         if not self.params.get('ignoreerrors', False):
 336             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 337                 exc_info = sys.exc_info()[1].exc_info
 338             else:
 339                 exc_info = sys.exc_info()
 340             raise DownloadError(message, exc_info)
 341         self._download_retcode = 1
 342
 343     def report_warning(self, message):
 344         '''
 345         Print the message to stderr, it will be prefixed with 'WARNING:'
 346         If stderr is a tty file the 'WARNING:' will be colored
 347         '''
 348         if self._err_file.isatty() and os.name != 'nt':
 349             _msg_header = u'\033[0;33mWARNING:\033[0m'
 350         else:
 351             _msg_header = u'WARNING:'
 352         warning_message = u'%s %s' % (_msg_header, message)
 353         self.to_stderr(warning_message)
 354
 355     def report_error(self, message, tb=None):
 356         '''
 357         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 358         in red if stderr is a tty file.
 359         '''
 360         if self._err_file.isatty() and os.name != 'nt':
 361             _msg_header = u'\033[0;31mERROR:\033[0m'
 362         else:
 363             _msg_header = u'ERROR:'
 364         error_message = u'%s %s' % (_msg_header, message)
 365         self.trouble(error_message, tb)
 366
 367     def report_writedescription(self, descfn):
 368         """ Report that the description file is being written """
 369         self.to_screen(u'[info] Writing video description to: ' + descfn)
 370
 371     def report_writesubtitles(self, sub_filename):
 372         """ Report that the subtitles file is being written """
 373         self.to_screen(u'[info] Writing video subtitles to: ' + sub_filename)
 374
 375     def report_writeinfojson(self, infofn):
 376         """ Report that the metadata file has been written """
 377         self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn)
 378
 379     def report_writeannotations(self, annofn):
 380         """ Report that the annotations file has been written. """
 381         self.to_screen(u'[info] Writing video annotations to: ' + annofn)
 382
 383     def report_file_already_downloaded(self, file_name):
 384         """Report file has already been fully downloaded."""
 385         try:
 386             self.to_screen(u'[download] %s has already been downloaded' % file_name)
 387         except UnicodeEncodeError:
 388             self.to_screen(u'[download] The file has already been downloaded')
 389
 390     def increment_downloads(self):
 391         """Increment the ordinal that assigns a number to each file."""
 392         self._num_downloads += 1
 393
 394     def prepare_filename(self, info_dict):
 395         """Generate the output filename."""
 396         try:
 397             template_dict = dict(info_dict)
 398
 399             template_dict['epoch'] = int(time.time())
 400             autonumber_size = self.params.get('autonumber_size')
 401             if autonumber_size is None:
 402                 autonumber_size = 5
 403             autonumber_templ = u'%0' + str(autonumber_size) + u'd'
 404             template_dict['autonumber'] = autonumber_templ % self._num_downloads
 405             if template_dict.get('playlist_index') is not None:
 406                 template_dict['playlist_index'] = u'%05d' % template_dict['playlist_index']
 407
 408             sanitize = lambda k, v: sanitize_filename(
 409                 compat_str(v),
 410                 restricted=self.params.get('restrictfilenames'),
 411                 is_id=(k == u'id'))
 412             template_dict = dict((k, sanitize(k, v))
 413                                  for k, v in template_dict.items()
 414                                  if v is not None)
 415             template_dict = collections.defaultdict(lambda: u'NA', template_dict)
 416
 417             tmpl = os.path.expanduser(self.params['outtmpl'])
 418             filename = tmpl % template_dict
 419             return filename
 420         except ValueError as err:
 421             self.report_error(u'Error in output template: ' + str(err) + u' (encoding: ' + repr(preferredencoding()) + ')')
 422             return None
 423
 424     def _match_entry(self, info_dict):
 425         """ Returns None iff the file should be downloaded """
 426
 427         video_title = info_dict.get('title', info_dict.get('id', u'video'))
 428         if 'title' in info_dict:
 429             # This can happen when we're just evaluating the playlist
 430             title = info_dict['title']
 431             matchtitle = self.params.get('matchtitle', False)
 432             if matchtitle:
 433                 if not re.search(matchtitle, title, re.IGNORECASE):
 434                     return u'"' + title + '" title did not match pattern "' + matchtitle + '"'
 435             rejecttitle = self.params.get('rejecttitle', False)
 436             if rejecttitle:
 437                 if re.search(rejecttitle, title, re.IGNORECASE):
 438                     return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
 439         date = info_dict.get('upload_date', None)
 440         if date is not None:
 441             dateRange = self.params.get('daterange', DateRange())
 442             if date not in dateRange:
 443                 return u'%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
 444         view_count = info_dict.get('view_count', None)
 445         if view_count is not None:
 446             min_views = self.params.get('min_views')
 447             if min_views is not None and view_count < min_views:
 448                 return u'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
 449             max_views = self.params.get('max_views')
 450             if max_views is not None and view_count > max_views:
 451                 return u'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
 452         age_limit = self.params.get('age_limit')
 453         if age_limit is not None:
 454             if age_limit < info_dict.get('age_limit', 0):
 455                 return u'Skipping "' + title + '" because it is age restricted'
 456         if self.in_download_archive(info_dict):
 457             return u'%s has already been recorded in archive' % video_title
 458         return None
 459
 460     @staticmethod
 461     def add_extra_info(info_dict, extra_info):
 462         '''Set the keys from extra_info in info dict if they are missing'''
 463         for key, value in extra_info.items():
 464             info_dict.setdefault(key, value)
 465
 466     def extract_info(self, url, download=True, ie_key=None, extra_info={},
 467                      process=True):
 468         '''
 469         Returns a list with a dictionary for each video we find.
 470         If 'download', also downloads the videos.
 471         extra_info is a dict containing the extra values to add to each result
 472          '''
 473
 474         if ie_key:
 475             ies = [self.get_info_extractor(ie_key)]
 476         else:
 477             ies = self._ies
 478
 479         for ie in ies:
 480             if not ie.suitable(url):
 481                 continue
 482
 483             if not ie.working():
 484                 self.report_warning(u'The program functionality for this site has been marked as broken, '
 485                                     u'and will probably not work.')
 486
 487             try:
 488                 ie_result = ie.extract(url)
 489                 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
 490                     break
 491                 if isinstance(ie_result, list):
 492                     # Backwards compatibility: old IE result format
 493                     ie_result = {
 494                         '_type': 'compat_list',
 495                         'entries': ie_result,
 496                     }
 497                 self.add_extra_info(ie_result,
 498                     {
 499                         'extractor': ie.IE_NAME,
 500                         'webpage_url': url,
 501                         'extractor_key': ie.ie_key(),
 502                     })
 503                 if process:
 504                     return self.process_ie_result(ie_result, download, extra_info)
 505                 else:
 506                     return ie_result
 507             except ExtractorError as de: # An error we somewhat expected
 508                 self.report_error(compat_str(de), de.format_traceback())
 509                 break
 510             except Exception as e:
 511                 if self.params.get('ignoreerrors', False):
 512                     self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
 513                     break
 514                 else:
 515                     raise
 516         else:
 517             self.report_error(u'no suitable InfoExtractor: %s' % url)
 518
 519     def process_ie_result(self, ie_result, download=True, extra_info={}):
 520         """
 521         Take the result of the ie(may be modified) and resolve all unresolved
 522         references (URLs, playlist items).
 523
 524         It will also download the videos if 'download'.
 525         Returns the resolved ie_result.
 526         """
 527
 528         result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
 529         if result_type == 'video':
 530             self.add_extra_info(ie_result, extra_info)
 531             return self.process_video_result(ie_result, download=download)
 532         elif result_type == 'url':
 533             # We have to add extra_info to the results because it may be
 534             # contained in a playlist
 535             return self.extract_info(ie_result['url'],
 536                                      download,
 537                                      ie_key=ie_result.get('ie_key'),
 538                                      extra_info=extra_info)
 539         elif result_type == 'url_transparent':
 540             # Use the information from the embedding page
 541             info = self.extract_info(
 542                 ie_result['url'], ie_key=ie_result.get('ie_key'),
 543                 extra_info=extra_info, download=False, process=False)
 544
 545             def make_result(embedded_info):
 546                 new_result = ie_result.copy()
 547                 for f in ('_type', 'url', 'ext', 'player_url', 'formats',
 548                           'entries', 'urlhandle', 'ie_key', 'duration',
 549                           'subtitles', 'annotations', 'format',
 550                           'thumbnail', 'thumbnails'):
 551                     if f in new_result:
 552                         del new_result[f]
 553                     if f in embedded_info:
 554                         new_result[f] = embedded_info[f]
 555                 return new_result
 556             new_result = make_result(info)
 557
 558             assert new_result.get('_type') != 'url_transparent'
 559             if new_result.get('_type') == 'compat_list':
 560                 new_result['entries'] = [
 561                     make_result(e) for e in new_result['entries']]
 562
 563             return self.process_ie_result(
 564                 new_result, download=download, extra_info=extra_info)
 565         elif result_type == 'playlist':
 566             # We process each entry in the playlist
 567             playlist = ie_result.get('title', None) or ie_result.get('id', None)
 568             self.to_screen(u'[download] Downloading playlist: %s' % playlist)
 569
 570             playlist_results = []
 571
 572             n_all_entries = len(ie_result['entries'])
 573             playliststart = self.params.get('playliststart', 1) - 1
 574             playlistend = self.params.get('playlistend', -1)
 575
 576             if playlistend == -1:
 577                 entries = ie_result['entries'][playliststart:]
 578             else:
 579                 entries = ie_result['entries'][playliststart:playlistend]
 580
 581             n_entries = len(entries)
 582
 583             self.to_screen(u"[%s] playlist '%s': Collected %d video ids (downloading %d of them)" %
 584                 (ie_result['extractor'], playlist, n_all_entries, n_entries))
 585
 586             for i, entry in enumerate(entries, 1):
 587                 self.to_screen(u'[download] Downloading video #%s of %s' % (i, n_entries))
 588                 extra = {
 589                     'playlist': playlist,
 590                     'playlist_index': i + playliststart,
 591                     'extractor': ie_result['extractor'],
 592                     'webpage_url': ie_result['webpage_url'],
 593                     'extractor_key': ie_result['extractor_key'],
 594                 }
 595
 596                 reason = self._match_entry(entry)
 597                 if reason is not None:
 598                     self.to_screen(u'[download] ' + reason)
 599                     continue
 600
 601                 entry_result = self.process_ie_result(entry,
 602                                                       download=download,
 603                                                       extra_info=extra)
 604                 playlist_results.append(entry_result)
 605             ie_result['entries'] = playlist_results
 606             return ie_result
 607         elif result_type == 'compat_list':
 608             def _fixup(r):
 609                 self.add_extra_info(r,
 610                     {
 611                         'extractor': ie_result['extractor'],
 612                         'webpage_url': ie_result['webpage_url'],
 613                         'extractor_key': ie_result['extractor_key'],
 614                     })
 615                 return r
 616             ie_result['entries'] = [
 617                 self.process_ie_result(_fixup(r), download, extra_info)
 618                 for r in ie_result['entries']
 619             ]
 620             return ie_result
 621         else:
 622             raise Exception('Invalid result type: %s' % result_type)
 623
 624     def select_format(self, format_spec, available_formats):
 625         if format_spec == 'best' or format_spec is None:
 626             return available_formats[-1]
 627         elif format_spec == 'worst':
 628             return available_formats[0]
 629         else:
 630             extensions = [u'mp4', u'flv', u'webm', u'3gp']
 631             if format_spec in extensions:
 632                 filter_f = lambda f: f['ext'] == format_spec
 633             else:
 634                 filter_f = lambda f: f['format_id'] == format_spec
 635             matches = list(filter(filter_f, available_formats))
 636             if matches:
 637                 return matches[-1]
 638         return None
 639
 640     def process_video_result(self, info_dict, download=True):
 641         assert info_dict.get('_type', 'video') == 'video'
 642
 643         if 'playlist' not in info_dict:
 644             # It isn't part of a playlist
 645             info_dict['playlist'] = None
 646             info_dict['playlist_index'] = None
 647
 648         # This extractors handle format selection themselves
 649         if info_dict['extractor'] in [u'youtube', u'Youku']:
 650             if download:
 651                 self.process_info(info_dict)
 652             return info_dict
 653
 654         # We now pick which formats have to be downloaded
 655         if info_dict.get('formats') is None:
 656             # There's only one format available
 657             formats = [info_dict]
 658         else:
 659             formats = info_dict['formats']
 660
 661         # We check that all the formats have the format and format_id fields
 662         for (i, format) in enumerate(formats):
 663             if format.get('format_id') is None:
 664                 format['format_id'] = compat_str(i)
 665             if format.get('format') is None:
 666                 format['format'] = u'{id} - {res}{note}'.format(
 667                     id=format['format_id'],
 668                     res=self.format_resolution(format),
 669                     note=u' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
 670                 )
 671             # Automatically determine file extension if missing
 672             if 'ext' not in format:
 673                 format['ext'] = determine_ext(format['url'])
 674
 675         if self.params.get('listformats', None):
 676             self.list_formats(info_dict)
 677             return
 678
 679         format_limit = self.params.get('format_limit', None)
 680         if format_limit:
 681             formats = list(takewhile_inclusive(
 682                 lambda f: f['format_id'] != format_limit, formats
 683             ))
 684         if self.params.get('prefer_free_formats'):
 685             def _free_formats_key(f):
 686                 try:
 687                     ext_ord = [u'flv', u'mp4', u'webm'].index(f['ext'])
 688                 except ValueError:
 689                     ext_ord = -1
 690                 # We only compare the extension if they have the same height and width
 691                 return (f.get('height'), f.get('width'), ext_ord)
 692             formats = sorted(formats, key=_free_formats_key)
 693
 694         req_format = self.params.get('format', 'best')
 695         if req_format is None:
 696             req_format = 'best'
 697         formats_to_download = []
 698         # The -1 is for supporting YoutubeIE
 699         if req_format in ('-1', 'all'):
 700             formats_to_download = formats
 701         else:
 702             # We can accept formats requestd in the format: 34/5/best, we pick
 703             # the first that is available, starting from left
 704             req_formats = req_format.split('/')
 705             for rf in req_formats:
 706                 selected_format = self.select_format(rf, formats)
 707                 if selected_format is not None:
 708                     formats_to_download = [selected_format]
 709                     break
 710         if not formats_to_download:
 711             raise ExtractorError(u'requested format not available',
 712                                  expected=True)
 713
 714         if download:
 715             if len(formats_to_download) > 1:
 716                 self.to_screen(u'[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
 717             for format in formats_to_download:
 718                 new_info = dict(info_dict)
 719                 new_info.update(format)
 720                 self.process_info(new_info)
 721         # We update the info dict with the best quality format (backwards compatibility)
 722         info_dict.update(formats_to_download[-1])
 723         return info_dict
 724
 725     def process_info(self, info_dict):
 726         """Process a single resolved IE result."""
 727
 728         assert info_dict.get('_type', 'video') == 'video'
 729         #We increment the download the download count here to match the previous behaviour.
 730         self.increment_downloads()
 731
 732         info_dict['fulltitle'] = info_dict['title']
 733         if len(info_dict['title']) > 200:
 734             info_dict['title'] = info_dict['title'][:197] + u'...'
 735
 736         # Keep for backwards compatibility
 737         info_dict['stitle'] = info_dict['title']
 738
 739         if not 'format' in info_dict:
 740             info_dict['format'] = info_dict['ext']
 741
 742         reason = self._match_entry(info_dict)
 743         if reason is not None:
 744             self.to_screen(u'[download] ' + reason)
 745             return
 746
 747         max_downloads = self.params.get('max_downloads')
 748         if max_downloads is not None:
 749             if self._num_downloads > int(max_downloads):
 750                 raise MaxDownloadsReached()
 751
 752         filename = self.prepare_filename(info_dict)
 753
 754         # Forced printings
 755         if self.params.get('forcetitle', False):
 756             self.to_stdout(info_dict['fulltitle'])
 757         if self.params.get('forceid', False):
 758             self.to_stdout(info_dict['id'])
 759         if self.params.get('forceurl', False):
 760             # For RTMP URLs, also include the playpath
 761             self.to_stdout(info_dict['url'] + info_dict.get('play_path', u''))
 762         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
 763             self.to_stdout(info_dict['thumbnail'])
 764         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
 765             self.to_stdout(info_dict['description'])
 766         if self.params.get('forcefilename', False) and filename is not None:
 767             self.to_stdout(filename)
 768         if self.params.get('forceformat', False):
 769             self.to_stdout(info_dict['format'])
 770         if self.params.get('forcejson', False):
 771             info_dict['_filename'] = filename
 772             self.to_stdout(json.dumps(info_dict))
 773
 774         # Do nothing else if in simulate mode
 775         if self.params.get('simulate', False):
 776             return
 777
 778         if filename is None:
 779             return
 780
 781         try:
 782             dn = os.path.dirname(encodeFilename(filename))
 783             if dn != '' and not os.path.exists(dn):
 784                 os.makedirs(dn)
 785         except (OSError, IOError) as err:
 786             self.report_error(u'unable to create directory ' + compat_str(err))
 787             return
 788
 789         if self.params.get('writedescription', False):
 790             try:
 791                 descfn = filename + u'.description'
 792                 self.report_writedescription(descfn)
 793                 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
 794                     descfile.write(info_dict['description'])
 795             except (KeyError, TypeError):
 796                 self.report_warning(u'There\'s no description to write.')
 797             except (OSError, IOError):
 798                 self.report_error(u'Cannot write description file ' + descfn)
 799                 return
 800
 801         if self.params.get('writeannotations', False):
 802             try:
 803                 annofn = filename + u'.annotations.xml'
 804                 self.report_writeannotations(annofn)
 805                 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
 806                     annofile.write(info_dict['annotations'])
 807             except (KeyError, TypeError):
 808                 self.report_warning(u'There are no annotations to write.')
 809             except (OSError, IOError):
 810                 self.report_error(u'Cannot write annotations file: ' + annofn)
 811                 return
 812
 813         subtitles_are_requested = any([self.params.get('writesubtitles', False),
 814                                        self.params.get('writeautomaticsub')])
 815
 816         if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
 817             # subtitles download errors are already managed as troubles in relevant IE
 818             # that way it will silently go on when used with unsupporting IE
 819             subtitles = info_dict['subtitles']
 820             sub_format = self.params.get('subtitlesformat', 'srt')
 821             for sub_lang in subtitles.keys():
 822                 sub = subtitles[sub_lang]
 823                 if sub is None:
 824                     continue
 825                 try:
 826                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
 827                     self.report_writesubtitles(sub_filename)
 828                     with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
 829                             subfile.write(sub)
 830                 except (OSError, IOError):
 831                     self.report_error(u'Cannot write subtitles file ' + descfn)
 832                     return
 833
 834         if self.params.get('writeinfojson', False):
 835             infofn = os.path.splitext(filename)[0] + u'.info.json'
 836             self.report_writeinfojson(infofn)
 837             try:
 838                 json_info_dict = dict((k, v) for k, v in info_dict.items() if not k in ['urlhandle'])
 839                 write_json_file(json_info_dict, encodeFilename(infofn))
 840             except (OSError, IOError):
 841                 self.report_error(u'Cannot write metadata to JSON file ' + infofn)
 842                 return
 843
 844         if self.params.get('writethumbnail', False):
 845             if info_dict.get('thumbnail') is not None:
 846                 thumb_format = determine_ext(info_dict['thumbnail'], u'jpg')
 847                 thumb_filename = os.path.splitext(filename)[0] + u'.' + thumb_format
 848                 self.to_screen(u'[%s] %s: Downloading thumbnail ...' %
 849                                (info_dict['extractor'], info_dict['id']))
 850                 try:
 851                     uf = compat_urllib_request.urlopen(info_dict['thumbnail'])
 852                     with open(thumb_filename, 'wb') as thumbf:
 853                         shutil.copyfileobj(uf, thumbf)
 854                     self.to_screen(u'[%s] %s: Writing thumbnail to: %s' %
 855                         (info_dict['extractor'], info_dict['id'], thumb_filename))
 856                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
 857                     self.report_warning(u'Unable to download thumbnail "%s": %s' %
 858                         (info_dict['thumbnail'], compat_str(err)))
 859
 860         if not self.params.get('skip_download', False):
 861             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
 862                 success = True
 863             else:
 864                 try:
 865                     success = self.fd._do_download(filename, info_dict)
 866                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
 867                     self.report_error(u'unable to download video data: %s' % str(err))
 868                     return
 869                 except (OSError, IOError) as err:
 870                     raise UnavailableVideoError(err)
 871                 except (ContentTooShortError, ) as err:
 872                     self.report_error(u'content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
 873                     return
 874
 875             if success:
 876                 try:
 877                     self.post_process(filename, info_dict)
 878                 except (PostProcessingError) as err:
 879                     self.report_error(u'postprocessing: %s' % str(err))
 880                     return
 881
 882         self.record_download_archive(info_dict)
 883
 884     def download(self, url_list):
 885         """Download a given list of URLs."""
 886         if (len(url_list) > 1 and
 887                 '%' not in self.params['outtmpl']
 888                 and self.params.get('max_downloads') != 1):
 889             raise SameFileError(self.params['outtmpl'])
 890
 891         for url in url_list:
 892             try:
 893                 #It also downloads the videos
 894                 self.extract_info(url)
 895             except UnavailableVideoError:
 896                 self.report_error(u'unable to download video')
 897             except MaxDownloadsReached:
 898                 self.to_screen(u'[info] Maximum number of downloaded files reached.')
 899                 raise
 900
 901         return self._download_retcode
 902
 903     def download_with_info_file(self, info_filename):
 904         with io.open(info_filename, 'r', encoding='utf-8') as f:
 905             info = json.load(f)
 906         try:
 907             self.process_ie_result(info, download=True)
 908         except DownloadError:
 909             webpage_url = info.get('webpage_url')
 910             if webpage_url is not None:
 911                 self.report_warning(u'The info failed to download, trying with "%s"' % webpage_url)
 912                 return self.download([webpage_url])
 913             else:
 914                 raise
 915         return self._download_retcode
 916
 917     def post_process(self, filename, ie_info):
 918         """Run all the postprocessors on the given file."""
 919         info = dict(ie_info)
 920         info['filepath'] = filename
 921         keep_video = None
 922         for pp in self._pps:
 923             try:
 924                 keep_video_wish, new_info = pp.run(info)
 925                 if keep_video_wish is not None:
 926                     if keep_video_wish:
 927                         keep_video = keep_video_wish
 928                     elif keep_video is None:
 929                         # No clear decision yet, let IE decide
 930                         keep_video = keep_video_wish
 931             except PostProcessingError as e:
 932                 self.report_error(e.msg)
 933         if keep_video is False and not self.params.get('keepvideo', False):
 934             try:
 935                 self.to_screen(u'Deleting original file %s (pass -k to keep)' % filename)
 936                 os.remove(encodeFilename(filename))
 937             except (IOError, OSError):
 938                 self.report_warning(u'Unable to remove downloaded video file')
 939
 940     def _make_archive_id(self, info_dict):
 941         # Future-proof against any change in case
 942         # and backwards compatibility with prior versions
 943         extractor = info_dict.get('extractor_key')
 944         if extractor is None:
 945             if 'id' in info_dict:
 946                 extractor = info_dict.get('ie_key')  # key in a playlist
 947         if extractor is None:
 948             return None  # Incomplete video information
 949         return extractor.lower() + u' ' + info_dict['id']
 950
 951     def in_download_archive(self, info_dict):
 952         fn = self.params.get('download_archive')
 953         if fn is None:
 954             return False
 955
 956         vid_id = self._make_archive_id(info_dict)
 957         if vid_id is None:
 958             return False  # Incomplete video information
 959
 960         try:
 961             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
 962                 for line in archive_file:
 963                     if line.strip() == vid_id:
 964                         return True
 965         except IOError as ioe:
 966             if ioe.errno != errno.ENOENT:
 967                 raise
 968         return False
 969
 970     def record_download_archive(self, info_dict):
 971         fn = self.params.get('download_archive')
 972         if fn is None:
 973             return
 974         vid_id = self._make_archive_id(info_dict)
 975         assert vid_id
 976         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
 977             archive_file.write(vid_id + u'\n')
 978
 979     @staticmethod
 980     def format_resolution(format, default='unknown'):
 981         if format.get('vcodec') == 'none':
 982             return 'audio only'
 983         if format.get('_resolution') is not None:
 984             return format['_resolution']
 985         if format.get('height') is not None:
 986             if format.get('width') is not None:
 987                 res = u'%sx%s' % (format['width'], format['height'])
 988             else:
 989                 res = u'%sp' % format['height']
 990         else:
 991             res = default
 992         return res
 993
 994     def list_formats(self, info_dict):
 995         def format_note(fdict):
 996             res = u''
 997             if fdict.get('format_note') is not None:
 998                 res += fdict['format_note'] + u' '
 999             if (fdict.get('vcodec') is not None and
1000                     fdict.get('vcodec') != 'none'):
1001                 res += u'%-5s' % fdict['vcodec']
1002             elif fdict.get('vbr') is not None:
1003                 res += u'video'
1004             if fdict.get('vbr') is not None:
1005                 res += u'@%4dk' % fdict['vbr']
1006             if fdict.get('acodec') is not None:
1007                 if res:
1008                     res += u', '
1009                 res += u'%-5s' % fdict['acodec']
1010             elif fdict.get('abr') is not None:
1011                 if res:
1012                     res += u', '
1013                 res += 'audio'
1014             if fdict.get('abr') is not None:
1015                 res += u'@%3dk' % fdict['abr']
1016             if fdict.get('filesize') is not None:
1017                 if res:
1018                     res += u', '
1019                 res += format_bytes(fdict['filesize'])
1020             return res
1021
1022         def line(format, idlen=20):
1023             return ((u'%-' + compat_str(idlen + 1) + u's%-10s%-12s%s') % (
1024                 format['format_id'],
1025                 format['ext'],
1026                 self.format_resolution(format),
1027                 format_note(format),
1028             ))
1029
1030         formats = info_dict.get('formats', [info_dict])
1031         idlen = max(len(u'format code'),
1032                     max(len(f['format_id']) for f in formats))
1033         formats_s = [line(f, idlen) for f in formats]
1034         if len(formats) > 1:
1035             formats_s[0] += (' ' if format_note(formats[0]) else '') + '(worst)'
1036             formats_s[-1] += (' ' if format_note(formats[-1]) else '') + '(best)'
1037
1038         header_line = line({
1039             'format_id': u'format code', 'ext': u'extension',
1040             '_resolution': u'resolution', 'format_note': u'note'}, idlen=idlen)
1041         self.to_screen(u'[info] Available formats for %s:\n%s\n%s' %
1042                        (info_dict['id'], header_line, u"\n".join(formats_s)))
1043
1044     def urlopen(self, req):
1045         """ Start an HTTP download """
1046         return self._opener.open(req)
1047
1048     def print_debug_header(self):
1049         if not self.params.get('verbose'):
1050             return
1051         write_string(u'[debug] youtube-dl version ' + __version__ + u'\n')
1052         try:
1053             sp = subprocess.Popen(
1054                 ['git', 'rev-parse', '--short', 'HEAD'],
1055                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1056                 cwd=os.path.dirname(os.path.abspath(__file__)))
1057             out, err = sp.communicate()
1058             out = out.decode().strip()
1059             if re.match('[0-9a-f]+', out):
1060                 write_string(u'[debug] Git HEAD: ' + out + u'\n')
1061         except:
1062             try:
1063                 sys.exc_clear()
1064             except:
1065                 pass
1066         write_string(u'[debug] Python version %s - %s' %
1067                      (platform.python_version(), platform_name()) + u'\n')
1068
1069         proxy_map = {}
1070         for handler in self._opener.handlers:
1071             if hasattr(handler, 'proxies'):
1072                 proxy_map.update(handler.proxies)
1073         write_string(u'[debug] Proxy map: ' + compat_str(proxy_map) + u'\n')
1074
1075     def _setup_opener(self):
1076         timeout_val = self.params.get('socket_timeout')
1077         timeout = 600 if timeout_val is None else float(timeout_val)
1078
1079         opts_cookiefile = self.params.get('cookiefile')
1080         opts_proxy = self.params.get('proxy')
1081
1082         if opts_cookiefile is None:
1083             self.cookiejar = compat_cookiejar.CookieJar()
1084         else:
1085             self.cookiejar = compat_cookiejar.MozillaCookieJar(
1086                 opts_cookiefile)
1087             if os.access(opts_cookiefile, os.R_OK):
1088                 self.cookiejar.load()
1089
1090         cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1091             self.cookiejar)
1092         if opts_proxy is not None:
1093             if opts_proxy == '':
1094                 proxies = {}
1095             else:
1096                 proxies = {'http': opts_proxy, 'https': opts_proxy}
1097         else:
1098             proxies = compat_urllib_request.getproxies()
1099             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1100             if 'http' in proxies and 'https' not in proxies:
1101                 proxies['https'] = proxies['http']
1102         proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1103         https_handler = make_HTTPS_handler(
1104             self.params.get('nocheckcertificate', False))
1105         opener = compat_urllib_request.build_opener(
1106             https_handler, proxy_handler, cookie_processor, YoutubeDLHandler())
1107         # Delete the default user-agent header, which would otherwise apply in
1108         # cases where our custom HTTP handler doesn't come into play
1109         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1110         opener.addheaders = []
1111         self._opener = opener
1112
1113         # TODO remove this global modification
1114         compat_urllib_request.install_opener(opener)
1115         socket.setdefaulttimeout(timeout)