_ Git - youtube-dl/blob - youtube_dl/YoutubeDL.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 from __future__ import absolute_import
   5
   6 import collections
   7 import errno
   8 import io
   9 import json
  10 import os
  11 import platform
  12 import re
  13 import shutil
  14 import subprocess
  15 import socket
  16 import sys
  17 import time
  18 import traceback
  19
  20 if os.name == 'nt':
  21     import ctypes
  22
  23 from .utils import (
  24     compat_cookiejar,
  25     compat_http_client,
  26     compat_str,
  27     compat_urllib_error,
  28     compat_urllib_request,
  29     ContentTooShortError,
  30     date_from_str,
  31     DateRange,
  32     determine_ext,
  33     DownloadError,
  34     encodeFilename,
  35     ExtractorError,
  36     format_bytes,
  37     get_term_width,
  38     locked_file,
  39     make_HTTPS_handler,
  40     MaxDownloadsReached,
  41     PostProcessingError,
  42     platform_name,
  43     preferredencoding,
  44     SameFileError,
  45     sanitize_filename,
  46     subtitles_filename,
  47     takewhile_inclusive,
  48     UnavailableVideoError,
  49     write_json_file,
  50     write_string,
  51     YoutubeDLHandler,
  52 )
  53 from .extractor import get_info_extractor, gen_extractors
  54 from .downloader import get_suitable_downloader
  55 from .version import __version__
  56
  57
  58 class YoutubeDL(object):
  59     """YoutubeDL class.
  60
  61     YoutubeDL objects are the ones responsible of downloading the
  62     actual video file and writing it to disk if the user has requested
  63     it, among some other tasks. In most cases there should be one per
  64     program. As, given a video URL, the downloader doesn't know how to
  65     extract all the needed information, task that InfoExtractors do, it
  66     has to pass the URL to one of them.
  67
  68     For this, YoutubeDL objects have a method that allows
  69     InfoExtractors to be registered in a given order. When it is passed
  70     a URL, the YoutubeDL object handles it to the first InfoExtractor it
  71     finds that reports being able to handle it. The InfoExtractor extracts
  72     all the information about the video or videos the URL refers to, and
  73     YoutubeDL process the extracted information, possibly using a File
  74     Downloader to download the video.
  75
  76     YoutubeDL objects accept a lot of parameters. In order not to saturate
  77     the object constructor with arguments, it receives a dictionary of
  78     options instead. These options are available through the params
  79     attribute for the InfoExtractors to use. The YoutubeDL also
  80     registers itself as the downloader in charge for the InfoExtractors
  81     that are added to it, so this is a "mutual registration".
  82
  83     Available options:
  84
  85     username:          Username for authentication purposes.
  86     password:          Password for authentication purposes.
  87     videopassword:     Password for acces a video.
  88     usenetrc:          Use netrc for authentication instead.
  89     verbose:           Print additional info to stdout.
  90     quiet:             Do not print messages to stdout.
  91     forceurl:          Force printing final URL.
  92     forcetitle:        Force printing title.
  93     forceid:           Force printing ID.
  94     forcethumbnail:    Force printing thumbnail URL.
  95     forcedescription:  Force printing description.
  96     forcefilename:     Force printing final filename.
  97     forcejson:         Force printing info_dict as JSON.
  98     simulate:          Do not download the video files.
  99     format:            Video format code.
 100     format_limit:      Highest quality format to try.
 101     outtmpl:           Template for output names.
 102     restrictfilenames: Do not allow "&" and spaces in file names
 103     ignoreerrors:      Do not stop on download errors.
 104     nooverwrites:      Prevent overwriting files.
 105     playliststart:     Playlist item to start at.
 106     playlistend:       Playlist item to end at.
 107     matchtitle:        Download only matching titles.
 108     rejecttitle:       Reject downloads for matching titles.
 109     logger:            Log messages to a logging.Logger instance.
 110     logtostderr:       Log messages to stderr instead of stdout.
 111     writedescription:  Write the video description to a .description file
 112     writeinfojson:     Write the video description to a .info.json file
 113     writeannotations:  Write the video annotations to a .annotations.xml file
 114     writethumbnail:    Write the thumbnail image to a file
 115     writesubtitles:    Write the video subtitles to a file
 116     writeautomaticsub: Write the automatic subtitles to a file
 117     allsubtitles:      Downloads all the subtitles of the video
 118                        (requires writesubtitles or writeautomaticsub)
 119     listsubtitles:     Lists all available subtitles for the video
 120     subtitlesformat:   Subtitle format [srt/sbv/vtt] (default=srt)
 121     subtitleslangs:    List of languages of the subtitles to download
 122     keepvideo:         Keep the video file after post-processing
 123     daterange:         A DateRange object, download only if the upload_date is in the range.
 124     skip_download:     Skip the actual download of the video file
 125     cachedir:          Location of the cache files in the filesystem.
 126                        None to disable filesystem cache.
 127     noplaylist:        Download single video instead of a playlist if in doubt.
 128     age_limit:         An integer representing the user's age in years.
 129                        Unsuitable videos for the given age are skipped.
 130     download_archive:   File name of a file where all downloads are recorded.
 131                        Videos already present in the file are not downloaded
 132                        again.
 133     cookiefile:        File name where cookies should be read from and dumped to.
 134     nocheckcertificate:Do not verify SSL certificates
 135     proxy:             URL of the proxy server to use
 136     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 137     bidi_workaround:   Work around buggy terminals without bidirectional text
 138                        support, using fridibi
 139
 140     The following parameters are not used by YoutubeDL itself, they are used by
 141     the FileDownloader:
 142     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
 143     noresizebuffer, retries, continuedl, noprogress, consoletitle
 144     """
 145
 146     params = None
 147     _ies = []
 148     _pps = []
 149     _download_retcode = None
 150     _num_downloads = None
 151     _screen_file = None
 152
 153     def __init__(self, params=None):
 154         """Create a FileDownloader object with the given options."""
 155         self._ies = []
 156         self._ies_instances = {}
 157         self._pps = []
 158         self._fd_progress_hooks = []
 159         self._download_retcode = 0
 160         self._num_downloads = 0
 161         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 162         self._err_file = sys.stderr
 163         self.params = {} if params is None else params
 164
 165         if params.get('bidi_workaround', False):
 166             try:
 167                 import pty
 168                 master, slave = pty.openpty()
 169                 width = get_term_width()
 170                 if width is None:
 171                     width_args = []
 172                 else:
 173                     width_args = ['-w', str(width)]
 174                 self._fribidi = subprocess.Popen(
 175                     ['fribidi', '-c', 'UTF-8'] + width_args,
 176                     stdin=subprocess.PIPE,
 177                     stdout=slave,
 178                     stderr=self._err_file)
 179                 self._fribidi_channel = os.fdopen(master, 'rb')
 180             except OSError as ose:
 181                 if ose.errno == 2:
 182                     self.report_warning(u'Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 183                 else:
 184                     raise
 185
 186         if (sys.version_info >= (3,) and sys.platform != 'win32' and
 187                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 188                 and not params['restrictfilenames']):
 189             # On Python 3, the Unicode filesystem API will throw errors (#1474)
 190             self.report_warning(
 191                 u'Assuming --restrict-filenames since file system encoding '
 192                 u'cannot encode all charactes. '
 193                 u'Set the LC_ALL environment variable to fix this.')
 194             self.params['restrictfilenames'] = True
 195
 196         if '%(stitle)s' in self.params.get('outtmpl', ''):
 197             self.report_warning(u'%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
 198
 199         self._setup_opener()
 200
 201     def add_info_extractor(self, ie):
 202         """Add an InfoExtractor object to the end of the list."""
 203         self._ies.append(ie)
 204         self._ies_instances[ie.ie_key()] = ie
 205         ie.set_downloader(self)
 206
 207     def get_info_extractor(self, ie_key):
 208         """
 209         Get an instance of an IE with name ie_key, it will try to get one from
 210         the _ies list, if there's no instance it will create a new one and add
 211         it to the extractor list.
 212         """
 213         ie = self._ies_instances.get(ie_key)
 214         if ie is None:
 215             ie = get_info_extractor(ie_key)()
 216             self.add_info_extractor(ie)
 217         return ie
 218
 219     def add_default_info_extractors(self):
 220         """
 221         Add the InfoExtractors returned by gen_extractors to the end of the list
 222         """
 223         for ie in gen_extractors():
 224             self.add_info_extractor(ie)
 225
 226     def add_post_processor(self, pp):
 227         """Add a PostProcessor object to the end of the chain."""
 228         self._pps.append(pp)
 229         pp.set_downloader(self)
 230
 231     def add_downloader_progress_hook(self, ph):
 232         """Add the progress hook to the file downloader"""
 233         self._fd_progress_hooks.append(ph)
 234
 235     def _bidi_workaround(self, message):
 236         if not hasattr(self, '_fribidi_channel'):
 237             return message
 238
 239         assert type(message) == type(u'')
 240         line_count = message.count(u'\n') + 1
 241         self._fribidi.stdin.write((message + u'\n').encode('utf-8'))
 242         self._fribidi.stdin.flush()
 243         res = u''.join(self._fribidi_channel.readline().decode('utf-8')
 244                        for _ in range(line_count))
 245         return res[:-len(u'\n')]
 246
 247     def to_screen(self, message, skip_eol=False):
 248         """Print message to stdout if not in quiet mode."""
 249         return self.to_stdout(message, skip_eol, check_quiet=True)
 250
 251     def to_stdout(self, message, skip_eol=False, check_quiet=False):
 252         """Print message to stdout if not in quiet mode."""
 253         if self.params.get('logger'):
 254             self.params['logger'].debug(message)
 255         elif not check_quiet or not self.params.get('quiet', False):
 256             message = self._bidi_workaround(message)
 257             terminator = [u'\n', u''][skip_eol]
 258             output = message + terminator
 259
 260             write_string(output, self._screen_file)
 261
 262     def to_stderr(self, message):
 263         """Print message to stderr."""
 264         assert type(message) == type(u'')
 265         if self.params.get('logger'):
 266             self.params['logger'].error(message)
 267         else:
 268             message = self._bidi_workaround(message)
 269             output = message + u'\n'
 270             write_string(output, self._err_file)
 271
 272     def to_console_title(self, message):
 273         if not self.params.get('consoletitle', False):
 274             return
 275         if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
 276             # c_wchar_p() might not be necessary if `message` is
 277             # already of type unicode()
 278             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 279         elif 'TERM' in os.environ:
 280             write_string(u'\033]0;%s\007' % message, self._screen_file)
 281
 282     def save_console_title(self):
 283         if not self.params.get('consoletitle', False):
 284             return
 285         if 'TERM' in os.environ:
 286             # Save the title on stack
 287             write_string(u'\033[22;0t', self._screen_file)
 288
 289     def restore_console_title(self):
 290         if not self.params.get('consoletitle', False):
 291             return
 292         if 'TERM' in os.environ:
 293             # Restore the title from stack
 294             write_string(u'\033[23;0t', self._screen_file)
 295
 296     def __enter__(self):
 297         self.save_console_title()
 298         return self
 299
 300     def __exit__(self, *args):
 301         self.restore_console_title()
 302
 303         if self.params.get('cookiefile') is not None:
 304             self.cookiejar.save()
 305
 306     def trouble(self, message=None, tb=None):
 307         """Determine action to take when a download problem appears.
 308
 309         Depending on if the downloader has been configured to ignore
 310         download errors or not, this method may throw an exception or
 311         not when errors are found, after printing the message.
 312
 313         tb, if given, is additional traceback information.
 314         """
 315         if message is not None:
 316             self.to_stderr(message)
 317         if self.params.get('verbose'):
 318             if tb is None:
 319                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 320                     tb = u''
 321                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 322                         tb += u''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 323                     tb += compat_str(traceback.format_exc())
 324                 else:
 325                     tb_data = traceback.format_list(traceback.extract_stack())
 326                     tb = u''.join(tb_data)
 327             self.to_stderr(tb)
 328         if not self.params.get('ignoreerrors', False):
 329             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 330                 exc_info = sys.exc_info()[1].exc_info
 331             else:
 332                 exc_info = sys.exc_info()
 333             raise DownloadError(message, exc_info)
 334         self._download_retcode = 1
 335
 336     def report_warning(self, message):
 337         '''
 338         Print the message to stderr, it will be prefixed with 'WARNING:'
 339         If stderr is a tty file the 'WARNING:' will be colored
 340         '''
 341         if self._err_file.isatty() and os.name != 'nt':
 342             _msg_header = u'\033[0;33mWARNING:\033[0m'
 343         else:
 344             _msg_header = u'WARNING:'
 345         warning_message = u'%s %s' % (_msg_header, message)
 346         self.to_stderr(warning_message)
 347
 348     def report_error(self, message, tb=None):
 349         '''
 350         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 351         in red if stderr is a tty file.
 352         '''
 353         if self._err_file.isatty() and os.name != 'nt':
 354             _msg_header = u'\033[0;31mERROR:\033[0m'
 355         else:
 356             _msg_header = u'ERROR:'
 357         error_message = u'%s %s' % (_msg_header, message)
 358         self.trouble(error_message, tb)
 359
 360     def report_writedescription(self, descfn):
 361         """ Report that the description file is being written """
 362         self.to_screen(u'[info] Writing video description to: ' + descfn)
 363
 364     def report_writesubtitles(self, sub_filename):
 365         """ Report that the subtitles file is being written """
 366         self.to_screen(u'[info] Writing video subtitles to: ' + sub_filename)
 367
 368     def report_writeinfojson(self, infofn):
 369         """ Report that the metadata file has been written """
 370         self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn)
 371
 372     def report_writeannotations(self, annofn):
 373         """ Report that the annotations file has been written. """
 374         self.to_screen(u'[info] Writing video annotations to: ' + annofn)
 375
 376     def report_file_already_downloaded(self, file_name):
 377         """Report file has already been fully downloaded."""
 378         try:
 379             self.to_screen(u'[download] %s has already been downloaded' % file_name)
 380         except UnicodeEncodeError:
 381             self.to_screen(u'[download] The file has already been downloaded')
 382
 383     def increment_downloads(self):
 384         """Increment the ordinal that assigns a number to each file."""
 385         self._num_downloads += 1
 386
 387     def prepare_filename(self, info_dict):
 388         """Generate the output filename."""
 389         try:
 390             template_dict = dict(info_dict)
 391
 392             template_dict['epoch'] = int(time.time())
 393             autonumber_size = self.params.get('autonumber_size')
 394             if autonumber_size is None:
 395                 autonumber_size = 5
 396             autonumber_templ = u'%0' + str(autonumber_size) + u'd'
 397             template_dict['autonumber'] = autonumber_templ % self._num_downloads
 398             if template_dict.get('playlist_index') is not None:
 399                 template_dict['playlist_index'] = u'%05d' % template_dict['playlist_index']
 400
 401             sanitize = lambda k, v: sanitize_filename(
 402                 compat_str(v),
 403                 restricted=self.params.get('restrictfilenames'),
 404                 is_id=(k == u'id'))
 405             template_dict = dict((k, sanitize(k, v))
 406                                  for k, v in template_dict.items()
 407                                  if v is not None)
 408             template_dict = collections.defaultdict(lambda: u'NA', template_dict)
 409
 410             tmpl = os.path.expanduser(self.params['outtmpl'])
 411             filename = tmpl % template_dict
 412             return filename
 413         except ValueError as err:
 414             self.report_error(u'Error in output template: ' + str(err) + u' (encoding: ' + repr(preferredencoding()) + ')')
 415             return None
 416
 417     def _match_entry(self, info_dict):
 418         """ Returns None iff the file should be downloaded """
 419
 420         if 'title' in info_dict:
 421             # This can happen when we're just evaluating the playlist
 422             title = info_dict['title']
 423             matchtitle = self.params.get('matchtitle', False)
 424             if matchtitle:
 425                 if not re.search(matchtitle, title, re.IGNORECASE):
 426                     return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
 427             rejecttitle = self.params.get('rejecttitle', False)
 428             if rejecttitle:
 429                 if re.search(rejecttitle, title, re.IGNORECASE):
 430                     return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
 431         date = info_dict.get('upload_date', None)
 432         if date is not None:
 433             dateRange = self.params.get('daterange', DateRange())
 434             if date not in dateRange:
 435                 return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
 436         age_limit = self.params.get('age_limit')
 437         if age_limit is not None:
 438             if age_limit < info_dict.get('age_limit', 0):
 439                 return u'Skipping "' + title + '" because it is age restricted'
 440         if self.in_download_archive(info_dict):
 441             return (u'%s has already been recorded in archive'
 442                     % info_dict.get('title', info_dict.get('id', u'video')))
 443         return None
 444
 445     @staticmethod
 446     def add_extra_info(info_dict, extra_info):
 447         '''Set the keys from extra_info in info dict if they are missing'''
 448         for key, value in extra_info.items():
 449             info_dict.setdefault(key, value)
 450
 451     def extract_info(self, url, download=True, ie_key=None, extra_info={},
 452                      process=True):
 453         '''
 454         Returns a list with a dictionary for each video we find.
 455         If 'download', also downloads the videos.
 456         extra_info is a dict containing the extra values to add to each result
 457          '''
 458
 459         if ie_key:
 460             ies = [self.get_info_extractor(ie_key)]
 461         else:
 462             ies = self._ies
 463
 464         for ie in ies:
 465             if not ie.suitable(url):
 466                 continue
 467
 468             if not ie.working():
 469                 self.report_warning(u'The program functionality for this site has been marked as broken, '
 470                                     u'and will probably not work.')
 471
 472             try:
 473                 ie_result = ie.extract(url)
 474                 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
 475                     break
 476                 if isinstance(ie_result, list):
 477                     # Backwards compatibility: old IE result format
 478                     ie_result = {
 479                         '_type': 'compat_list',
 480                         'entries': ie_result,
 481                     }
 482                 self.add_extra_info(ie_result,
 483                     {
 484                         'extractor': ie.IE_NAME,
 485                         'webpage_url': url,
 486                         'extractor_key': ie.ie_key(),
 487                     })
 488                 if process:
 489                     return self.process_ie_result(ie_result, download, extra_info)
 490                 else:
 491                     return ie_result
 492             except ExtractorError as de: # An error we somewhat expected
 493                 self.report_error(compat_str(de), de.format_traceback())
 494                 break
 495             except Exception as e:
 496                 if self.params.get('ignoreerrors', False):
 497                     self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
 498                     break
 499                 else:
 500                     raise
 501         else:
 502             self.report_error(u'no suitable InfoExtractor: %s' % url)
 503
 504     def process_ie_result(self, ie_result, download=True, extra_info={}):
 505         """
 506         Take the result of the ie(may be modified) and resolve all unresolved
 507         references (URLs, playlist items).
 508
 509         It will also download the videos if 'download'.
 510         Returns the resolved ie_result.
 511         """
 512
 513         result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
 514         if result_type == 'video':
 515             self.add_extra_info(ie_result, extra_info)
 516             return self.process_video_result(ie_result, download=download)
 517         elif result_type == 'url':
 518             # We have to add extra_info to the results because it may be
 519             # contained in a playlist
 520             return self.extract_info(ie_result['url'],
 521                                      download,
 522                                      ie_key=ie_result.get('ie_key'),
 523                                      extra_info=extra_info)
 524         elif result_type == 'url_transparent':
 525             # Use the information from the embedding page
 526             info = self.extract_info(
 527                 ie_result['url'], ie_key=ie_result.get('ie_key'),
 528                 extra_info=extra_info, download=False, process=False)
 529
 530             def make_result(embedded_info):
 531                 new_result = ie_result.copy()
 532                 for f in ('_type', 'url', 'ext', 'player_url', 'formats',
 533                           'entries', 'urlhandle', 'ie_key', 'duration',
 534                           'subtitles', 'annotations', 'format',
 535                           'thumbnail', 'thumbnails'):
 536                     if f in new_result:
 537                         del new_result[f]
 538                     if f in embedded_info:
 539                         new_result[f] = embedded_info[f]
 540                 return new_result
 541             new_result = make_result(info)
 542
 543             assert new_result.get('_type') != 'url_transparent'
 544             if new_result.get('_type') == 'compat_list':
 545                 new_result['entries'] = [
 546                     make_result(e) for e in new_result['entries']]
 547
 548             return self.process_ie_result(
 549                 new_result, download=download, extra_info=extra_info)
 550         elif result_type == 'playlist':
 551             # We process each entry in the playlist
 552             playlist = ie_result.get('title', None) or ie_result.get('id', None)
 553             self.to_screen(u'[download] Downloading playlist: %s' % playlist)
 554
 555             playlist_results = []
 556
 557             n_all_entries = len(ie_result['entries'])
 558             playliststart = self.params.get('playliststart', 1) - 1
 559             playlistend = self.params.get('playlistend', -1)
 560
 561             if playlistend == -1:
 562                 entries = ie_result['entries'][playliststart:]
 563             else:
 564                 entries = ie_result['entries'][playliststart:playlistend]
 565
 566             n_entries = len(entries)
 567
 568             self.to_screen(u"[%s] playlist '%s': Collected %d video ids (downloading %d of them)" %
 569                 (ie_result['extractor'], playlist, n_all_entries, n_entries))
 570
 571             for i, entry in enumerate(entries, 1):
 572                 self.to_screen(u'[download] Downloading video #%s of %s' % (i, n_entries))
 573                 extra = {
 574                     'playlist': playlist,
 575                     'playlist_index': i + playliststart,
 576                     'extractor': ie_result['extractor'],
 577                     'webpage_url': ie_result['webpage_url'],
 578                     'extractor_key': ie_result['extractor_key'],
 579                 }
 580
 581                 reason = self._match_entry(entry)
 582                 if reason is not None:
 583                     self.to_screen(u'[download] ' + reason)
 584                     continue
 585
 586                 entry_result = self.process_ie_result(entry,
 587                                                       download=download,
 588                                                       extra_info=extra)
 589                 playlist_results.append(entry_result)
 590             ie_result['entries'] = playlist_results
 591             return ie_result
 592         elif result_type == 'compat_list':
 593             def _fixup(r):
 594                 self.add_extra_info(r,
 595                     {
 596                         'extractor': ie_result['extractor'],
 597                         'webpage_url': ie_result['webpage_url'],
 598                         'extractor_key': ie_result['extractor_key'],
 599                     })
 600                 return r
 601             ie_result['entries'] = [
 602                 self.process_ie_result(_fixup(r), download, extra_info)
 603                 for r in ie_result['entries']
 604             ]
 605             return ie_result
 606         else:
 607             raise Exception('Invalid result type: %s' % result_type)
 608
 609     def select_format(self, format_spec, available_formats):
 610         if format_spec == 'best' or format_spec is None:
 611             return available_formats[-1]
 612         elif format_spec == 'worst':
 613             return available_formats[0]
 614         else:
 615             extensions = [u'mp4', u'flv', u'webm', u'3gp']
 616             if format_spec in extensions:
 617                 filter_f = lambda f: f['ext'] == format_spec
 618             else:
 619                 filter_f = lambda f: f['format_id'] == format_spec
 620             matches = list(filter(filter_f, available_formats))
 621             if matches:
 622                 return matches[-1]
 623         return None
 624
 625     def process_video_result(self, info_dict, download=True):
 626         assert info_dict.get('_type', 'video') == 'video'
 627
 628         if 'playlist' not in info_dict:
 629             # It isn't part of a playlist
 630             info_dict['playlist'] = None
 631             info_dict['playlist_index'] = None
 632
 633         # This extractors handle format selection themselves
 634         if info_dict['extractor'] in [u'youtube', u'Youku']:
 635             if download:
 636                 self.process_info(info_dict)
 637             return info_dict
 638
 639         # We now pick which formats have to be downloaded
 640         if info_dict.get('formats') is None:
 641             # There's only one format available
 642             formats = [info_dict]
 643         else:
 644             formats = info_dict['formats']
 645
 646         # We check that all the formats have the format and format_id fields
 647         for (i, format) in enumerate(formats):
 648             if format.get('format_id') is None:
 649                 format['format_id'] = compat_str(i)
 650             if format.get('format') is None:
 651                 format['format'] = u'{id} - {res}{note}'.format(
 652                     id=format['format_id'],
 653                     res=self.format_resolution(format),
 654                     note=u' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
 655                 )
 656             # Automatically determine file extension if missing
 657             if 'ext' not in format:
 658                 format['ext'] = determine_ext(format['url'])
 659
 660         if self.params.get('listformats', None):
 661             self.list_formats(info_dict)
 662             return
 663
 664         format_limit = self.params.get('format_limit', None)
 665         if format_limit:
 666             formats = list(takewhile_inclusive(
 667                 lambda f: f['format_id'] != format_limit, formats
 668             ))
 669         if self.params.get('prefer_free_formats'):
 670             def _free_formats_key(f):
 671                 try:
 672                     ext_ord = [u'flv', u'mp4', u'webm'].index(f['ext'])
 673                 except ValueError:
 674                     ext_ord = -1
 675                 # We only compare the extension if they have the same height and width
 676                 return (f.get('height'), f.get('width'), ext_ord)
 677             formats = sorted(formats, key=_free_formats_key)
 678
 679         req_format = self.params.get('format', 'best')
 680         if req_format is None:
 681             req_format = 'best'
 682         formats_to_download = []
 683         # The -1 is for supporting YoutubeIE
 684         if req_format in ('-1', 'all'):
 685             formats_to_download = formats
 686         else:
 687             # We can accept formats requestd in the format: 34/5/best, we pick
 688             # the first that is available, starting from left
 689             req_formats = req_format.split('/')
 690             for rf in req_formats:
 691                 selected_format = self.select_format(rf, formats)
 692                 if selected_format is not None:
 693                     formats_to_download = [selected_format]
 694                     break
 695         if not formats_to_download:
 696             raise ExtractorError(u'requested format not available',
 697                                  expected=True)
 698
 699         if download:
 700             if len(formats_to_download) > 1:
 701                 self.to_screen(u'[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
 702             for format in formats_to_download:
 703                 new_info = dict(info_dict)
 704                 new_info.update(format)
 705                 self.process_info(new_info)
 706         # We update the info dict with the best quality format (backwards compatibility)
 707         info_dict.update(formats_to_download[-1])
 708         return info_dict
 709
 710     def process_info(self, info_dict):
 711         """Process a single resolved IE result."""
 712
 713         assert info_dict.get('_type', 'video') == 'video'
 714         #We increment the download the download count here to match the previous behaviour.
 715         self.increment_downloads()
 716
 717         info_dict['fulltitle'] = info_dict['title']
 718         if len(info_dict['title']) > 200:
 719             info_dict['title'] = info_dict['title'][:197] + u'...'
 720
 721         # Keep for backwards compatibility
 722         info_dict['stitle'] = info_dict['title']
 723
 724         if not 'format' in info_dict:
 725             info_dict['format'] = info_dict['ext']
 726
 727         reason = self._match_entry(info_dict)
 728         if reason is not None:
 729             self.to_screen(u'[download] ' + reason)
 730             return
 731
 732         max_downloads = self.params.get('max_downloads')
 733         if max_downloads is not None:
 734             if self._num_downloads > int(max_downloads):
 735                 raise MaxDownloadsReached()
 736
 737         filename = self.prepare_filename(info_dict)
 738
 739         # Forced printings
 740         if self.params.get('forcetitle', False):
 741             self.to_stdout(info_dict['fulltitle'])
 742         if self.params.get('forceid', False):
 743             self.to_stdout(info_dict['id'])
 744         if self.params.get('forceurl', False):
 745             # For RTMP URLs, also include the playpath
 746             self.to_stdout(info_dict['url'] + info_dict.get('play_path', u''))
 747         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
 748             self.to_stdout(info_dict['thumbnail'])
 749         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
 750             self.to_stdout(info_dict['description'])
 751         if self.params.get('forcefilename', False) and filename is not None:
 752             self.to_stdout(filename)
 753         if self.params.get('forceformat', False):
 754             self.to_stdout(info_dict['format'])
 755         if self.params.get('forcejson', False):
 756             info_dict['_filename'] = filename
 757             self.to_stdout(json.dumps(info_dict))
 758
 759         # Do nothing else if in simulate mode
 760         if self.params.get('simulate', False):
 761             return
 762
 763         if filename is None:
 764             return
 765
 766         try:
 767             dn = os.path.dirname(encodeFilename(filename))
 768             if dn != '' and not os.path.exists(dn):
 769                 os.makedirs(dn)
 770         except (OSError, IOError) as err:
 771             self.report_error(u'unable to create directory ' + compat_str(err))
 772             return
 773
 774         if self.params.get('writedescription', False):
 775             try:
 776                 descfn = filename + u'.description'
 777                 self.report_writedescription(descfn)
 778                 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
 779                     descfile.write(info_dict['description'])
 780             except (KeyError, TypeError):
 781                 self.report_warning(u'There\'s no description to write.')
 782             except (OSError, IOError):
 783                 self.report_error(u'Cannot write description file ' + descfn)
 784                 return
 785
 786         if self.params.get('writeannotations', False):
 787             try:
 788                 annofn = filename + u'.annotations.xml'
 789                 self.report_writeannotations(annofn)
 790                 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
 791                     annofile.write(info_dict['annotations'])
 792             except (KeyError, TypeError):
 793                 self.report_warning(u'There are no annotations to write.')
 794             except (OSError, IOError):
 795                 self.report_error(u'Cannot write annotations file: ' + annofn)
 796                 return
 797
 798         subtitles_are_requested = any([self.params.get('writesubtitles', False),
 799                                        self.params.get('writeautomaticsub')])
 800
 801         if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
 802             # subtitles download errors are already managed as troubles in relevant IE
 803             # that way it will silently go on when used with unsupporting IE
 804             subtitles = info_dict['subtitles']
 805             sub_format = self.params.get('subtitlesformat', 'srt')
 806             for sub_lang in subtitles.keys():
 807                 sub = subtitles[sub_lang]
 808                 if sub is None:
 809                     continue
 810                 try:
 811                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
 812                     self.report_writesubtitles(sub_filename)
 813                     with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
 814                             subfile.write(sub)
 815                 except (OSError, IOError):
 816                     self.report_error(u'Cannot write subtitles file ' + descfn)
 817                     return
 818
 819         if self.params.get('writeinfojson', False):
 820             infofn = os.path.splitext(filename)[0] + u'.info.json'
 821             self.report_writeinfojson(infofn)
 822             try:
 823                 json_info_dict = dict((k, v) for k, v in info_dict.items() if not k in ['urlhandle'])
 824                 write_json_file(json_info_dict, encodeFilename(infofn))
 825             except (OSError, IOError):
 826                 self.report_error(u'Cannot write metadata to JSON file ' + infofn)
 827                 return
 828
 829         if self.params.get('writethumbnail', False):
 830             if info_dict.get('thumbnail') is not None:
 831                 thumb_format = determine_ext(info_dict['thumbnail'], u'jpg')
 832                 thumb_filename = os.path.splitext(filename)[0] + u'.' + thumb_format
 833                 self.to_screen(u'[%s] %s: Downloading thumbnail ...' %
 834                                (info_dict['extractor'], info_dict['id']))
 835                 try:
 836                     uf = compat_urllib_request.urlopen(info_dict['thumbnail'])
 837                     with open(thumb_filename, 'wb') as thumbf:
 838                         shutil.copyfileobj(uf, thumbf)
 839                     self.to_screen(u'[%s] %s: Writing thumbnail to: %s' %
 840                         (info_dict['extractor'], info_dict['id'], thumb_filename))
 841                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
 842                     self.report_warning(u'Unable to download thumbnail "%s": %s' %
 843                         (info_dict['thumbnail'], compat_str(err)))
 844
 845         if not self.params.get('skip_download', False):
 846             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
 847                 success = True
 848             else:
 849                 try:
 850                     fd = get_suitable_downloader(info_dict)(self, self.params)
 851                     for ph in self._fd_progress_hooks:
 852                         fd.add_progress_hook(ph)
 853                     success = fd.download(filename, info_dict)
 854                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
 855                     self.report_error(u'unable to download video data: %s' % str(err))
 856                     return
 857                 except (OSError, IOError) as err:
 858                     raise UnavailableVideoError(err)
 859                 except (ContentTooShortError, ) as err:
 860                     self.report_error(u'content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
 861                     return
 862
 863             if success:
 864                 try:
 865                     self.post_process(filename, info_dict)
 866                 except (PostProcessingError) as err:
 867                     self.report_error(u'postprocessing: %s' % str(err))
 868                     return
 869
 870         self.record_download_archive(info_dict)
 871
 872     def download(self, url_list):
 873         """Download a given list of URLs."""
 874         if (len(url_list) > 1 and
 875                 '%' not in self.params['outtmpl']
 876                 and self.params.get('max_downloads') != 1):
 877             raise SameFileError(self.params['outtmpl'])
 878
 879         for url in url_list:
 880             try:
 881                 #It also downloads the videos
 882                 self.extract_info(url)
 883             except UnavailableVideoError:
 884                 self.report_error(u'unable to download video')
 885             except MaxDownloadsReached:
 886                 self.to_screen(u'[info] Maximum number of downloaded files reached.')
 887                 raise
 888
 889         return self._download_retcode
 890
 891     def download_with_info_file(self, info_filename):
 892         with io.open(info_filename, 'r', encoding='utf-8') as f:
 893             info = json.load(f)
 894         try:
 895             self.process_ie_result(info, download=True)
 896         except DownloadError:
 897             webpage_url = info.get('webpage_url')
 898             if webpage_url is not None:
 899                 self.report_warning(u'The info failed to download, trying with "%s"' % webpage_url)
 900                 return self.download([webpage_url])
 901             else:
 902                 raise
 903         return self._download_retcode
 904
 905     def post_process(self, filename, ie_info):
 906         """Run all the postprocessors on the given file."""
 907         info = dict(ie_info)
 908         info['filepath'] = filename
 909         keep_video = None
 910         for pp in self._pps:
 911             try:
 912                 keep_video_wish, new_info = pp.run(info)
 913                 if keep_video_wish is not None:
 914                     if keep_video_wish:
 915                         keep_video = keep_video_wish
 916                     elif keep_video is None:
 917                         # No clear decision yet, let IE decide
 918                         keep_video = keep_video_wish
 919             except PostProcessingError as e:
 920                 self.report_error(e.msg)
 921         if keep_video is False and not self.params.get('keepvideo', False):
 922             try:
 923                 self.to_screen(u'Deleting original file %s (pass -k to keep)' % filename)
 924                 os.remove(encodeFilename(filename))
 925             except (IOError, OSError):
 926                 self.report_warning(u'Unable to remove downloaded video file')
 927
 928     def _make_archive_id(self, info_dict):
 929         # Future-proof against any change in case
 930         # and backwards compatibility with prior versions
 931         extractor = info_dict.get('extractor_key')
 932         if extractor is None:
 933             if 'id' in info_dict:
 934                 extractor = info_dict.get('ie_key')  # key in a playlist
 935         if extractor is None:
 936             return None  # Incomplete video information
 937         return extractor.lower() + u' ' + info_dict['id']
 938
 939     def in_download_archive(self, info_dict):
 940         fn = self.params.get('download_archive')
 941         if fn is None:
 942             return False
 943
 944         vid_id = self._make_archive_id(info_dict)
 945         if vid_id is None:
 946             return False  # Incomplete video information
 947
 948         try:
 949             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
 950                 for line in archive_file:
 951                     if line.strip() == vid_id:
 952                         return True
 953         except IOError as ioe:
 954             if ioe.errno != errno.ENOENT:
 955                 raise
 956         return False
 957
 958     def record_download_archive(self, info_dict):
 959         fn = self.params.get('download_archive')
 960         if fn is None:
 961             return
 962         vid_id = self._make_archive_id(info_dict)
 963         assert vid_id
 964         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
 965             archive_file.write(vid_id + u'\n')
 966
 967     @staticmethod
 968     def format_resolution(format, default='unknown'):
 969         if format.get('vcodec') == 'none':
 970             return 'audio only'
 971         if format.get('_resolution') is not None:
 972             return format['_resolution']
 973         if format.get('height') is not None:
 974             if format.get('width') is not None:
 975                 res = u'%sx%s' % (format['width'], format['height'])
 976             else:
 977                 res = u'%sp' % format['height']
 978         else:
 979             res = default
 980         return res
 981
 982     def list_formats(self, info_dict):
 983         def format_note(fdict):
 984             res = u''
 985             if fdict.get('format_note') is not None:
 986                 res += fdict['format_note'] + u' '
 987             if (fdict.get('vcodec') is not None and
 988                     fdict.get('vcodec') != 'none'):
 989                 res += u'%-5s' % fdict['vcodec']
 990             elif fdict.get('vbr') is not None:
 991                 res += u'video'
 992             if fdict.get('vbr') is not None:
 993                 res += u'@%4dk' % fdict['vbr']
 994             if fdict.get('acodec') is not None:
 995                 if res:
 996                     res += u', '
 997                 res += u'%-5s' % fdict['acodec']
 998             elif fdict.get('abr') is not None:
 999                 if res:
1000                     res += u', '
1001                 res += 'audio'
1002             if fdict.get('abr') is not None:
1003                 res += u'@%3dk' % fdict['abr']
1004             if fdict.get('filesize') is not None:
1005                 if res:
1006                     res += u', '
1007                 res += format_bytes(fdict['filesize'])
1008             return res
1009
1010         def line(format, idlen=20):
1011             return ((u'%-' + compat_str(idlen + 1) + u's%-10s%-12s%s') % (
1012                 format['format_id'],
1013                 format['ext'],
1014                 self.format_resolution(format),
1015                 format_note(format),
1016             ))
1017
1018         formats = info_dict.get('formats', [info_dict])
1019         idlen = max(len(u'format code'),
1020                     max(len(f['format_id']) for f in formats))
1021         formats_s = [line(f, idlen) for f in formats]
1022         if len(formats) > 1:
1023             formats_s[0] += (' ' if format_note(formats[0]) else '') + '(worst)'
1024             formats_s[-1] += (' ' if format_note(formats[-1]) else '') + '(best)'
1025
1026         header_line = line({
1027             'format_id': u'format code', 'ext': u'extension',
1028             '_resolution': u'resolution', 'format_note': u'note'}, idlen=idlen)
1029         self.to_screen(u'[info] Available formats for %s:\n%s\n%s' %
1030                        (info_dict['id'], header_line, u"\n".join(formats_s)))
1031
1032     def urlopen(self, req):
1033         """ Start an HTTP download """
1034         return self._opener.open(req)
1035
1036     def print_debug_header(self):
1037         if not self.params.get('verbose'):
1038             return
1039         write_string(u'[debug] youtube-dl version ' + __version__ + u'\n')
1040         try:
1041             sp = subprocess.Popen(
1042                 ['git', 'rev-parse', '--short', 'HEAD'],
1043                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1044                 cwd=os.path.dirname(os.path.abspath(__file__)))
1045             out, err = sp.communicate()
1046             out = out.decode().strip()
1047             if re.match('[0-9a-f]+', out):
1048                 write_string(u'[debug] Git HEAD: ' + out + u'\n')
1049         except:
1050             try:
1051                 sys.exc_clear()
1052             except:
1053                 pass
1054         write_string(u'[debug] Python version %s - %s' %
1055                      (platform.python_version(), platform_name()) + u'\n')
1056
1057         proxy_map = {}
1058         for handler in self._opener.handlers:
1059             if hasattr(handler, 'proxies'):
1060                 proxy_map.update(handler.proxies)
1061         write_string(u'[debug] Proxy map: ' + compat_str(proxy_map) + u'\n')
1062
1063     def _setup_opener(self):
1064         timeout_val = self.params.get('socket_timeout')
1065         timeout = 600 if timeout_val is None else float(timeout_val)
1066
1067         opts_cookiefile = self.params.get('cookiefile')
1068         opts_proxy = self.params.get('proxy')
1069
1070         if opts_cookiefile is None:
1071             self.cookiejar = compat_cookiejar.CookieJar()
1072         else:
1073             self.cookiejar = compat_cookiejar.MozillaCookieJar(
1074                 opts_cookiefile)
1075             if os.access(opts_cookiefile, os.R_OK):
1076                 self.cookiejar.load()
1077
1078         cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1079             self.cookiejar)
1080         if opts_proxy is not None:
1081             if opts_proxy == '':
1082                 proxies = {}
1083             else:
1084                 proxies = {'http': opts_proxy, 'https': opts_proxy}
1085         else:
1086             proxies = compat_urllib_request.getproxies()
1087             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1088             if 'http' in proxies and 'https' not in proxies:
1089                 proxies['https'] = proxies['http']
1090         proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1091         https_handler = make_HTTPS_handler(
1092             self.params.get('nocheckcertificate', False))
1093         opener = compat_urllib_request.build_opener(
1094             https_handler, proxy_handler, cookie_processor, YoutubeDLHandler())
1095         # Delete the default user-agent header, which would otherwise apply in
1096         # cases where our custom HTTP handler doesn't come into play
1097         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1098         opener.addheaders = []
1099         self._opener = opener
1100
1101         # TODO remove this global modification
1102         compat_urllib_request.install_opener(opener)
1103         socket.setdefaulttimeout(timeout)