[YoutubeDL] don't set the 'requested_subtitles' without writesubtitles or writeautoma...
[youtube-dl] / youtube_dl / YoutubeDL.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 from __future__ import absolute_import, unicode_literals
5
6 import collections
7 import datetime
8 import errno
9 import io
10 import itertools
11 import json
12 import locale
13 import operator
14 import os
15 import platform
16 import re
17 import shutil
18 import subprocess
19 import socket
20 import sys
21 import time
22 import traceback
23
24 if os.name == 'nt':
25     import ctypes
26
27 from .compat import (
28     compat_basestring,
29     compat_cookiejar,
30     compat_expanduser,
31     compat_http_client,
32     compat_kwargs,
33     compat_str,
34     compat_urllib_error,
35     compat_urllib_request,
36 )
37 from .utils import (
38     escape_url,
39     ContentTooShortError,
40     date_from_str,
41     DateRange,
42     DEFAULT_OUTTMPL,
43     determine_ext,
44     DownloadError,
45     encodeFilename,
46     ExtractorError,
47     format_bytes,
48     formatSeconds,
49     get_term_width,
50     locked_file,
51     make_HTTPS_handler,
52     MaxDownloadsReached,
53     PagedList,
54     parse_filesize,
55     PostProcessingError,
56     platform_name,
57     preferredencoding,
58     render_table,
59     SameFileError,
60     sanitize_filename,
61     std_headers,
62     subtitles_filename,
63     takewhile_inclusive,
64     UnavailableVideoError,
65     url_basename,
66     version_tuple,
67     write_json_file,
68     write_string,
69     YoutubeDLHandler,
70     prepend_extension,
71     args_to_str,
72     age_restricted,
73 )
74 from .cache import Cache
75 from .extractor import get_info_extractor, gen_extractors
76 from .downloader import get_suitable_downloader
77 from .downloader.rtmp import rtmpdump_version
78 from .postprocessor import (
79     FFmpegFixupM4aPP,
80     FFmpegFixupStretchedPP,
81     FFmpegMergerPP,
82     FFmpegPostProcessor,
83     get_postprocessor,
84 )
85 from .version import __version__
86
87
88 class YoutubeDL(object):
89     """YoutubeDL class.
90
91     YoutubeDL objects are the ones responsible of downloading the
92     actual video file and writing it to disk if the user has requested
93     it, among some other tasks. In most cases there should be one per
94     program. As, given a video URL, the downloader doesn't know how to
95     extract all the needed information, task that InfoExtractors do, it
96     has to pass the URL to one of them.
97
98     For this, YoutubeDL objects have a method that allows
99     InfoExtractors to be registered in a given order. When it is passed
100     a URL, the YoutubeDL object handles it to the first InfoExtractor it
101     finds that reports being able to handle it. The InfoExtractor extracts
102     all the information about the video or videos the URL refers to, and
103     YoutubeDL process the extracted information, possibly using a File
104     Downloader to download the video.
105
106     YoutubeDL objects accept a lot of parameters. In order not to saturate
107     the object constructor with arguments, it receives a dictionary of
108     options instead. These options are available through the params
109     attribute for the InfoExtractors to use. The YoutubeDL also
110     registers itself as the downloader in charge for the InfoExtractors
111     that are added to it, so this is a "mutual registration".
112
113     Available options:
114
115     username:          Username for authentication purposes.
116     password:          Password for authentication purposes.
117     videopassword:     Password for acces a video.
118     usenetrc:          Use netrc for authentication instead.
119     verbose:           Print additional info to stdout.
120     quiet:             Do not print messages to stdout.
121     no_warnings:       Do not print out anything for warnings.
122     forceurl:          Force printing final URL.
123     forcetitle:        Force printing title.
124     forceid:           Force printing ID.
125     forcethumbnail:    Force printing thumbnail URL.
126     forcedescription:  Force printing description.
127     forcefilename:     Force printing final filename.
128     forceduration:     Force printing duration.
129     forcejson:         Force printing info_dict as JSON.
130     dump_single_json:  Force printing the info_dict of the whole playlist
131                        (or video) as a single JSON line.
132     simulate:          Do not download the video files.
133     format:            Video format code. See options.py for more information.
134     format_limit:      Highest quality format to try.
135     outtmpl:           Template for output names.
136     restrictfilenames: Do not allow "&" and spaces in file names
137     ignoreerrors:      Do not stop on download errors.
138     nooverwrites:      Prevent overwriting files.
139     playliststart:     Playlist item to start at.
140     playlistend:       Playlist item to end at.
141     playlist_items:    Specific indices of playlist to download.
142     playlistreverse:   Download playlist items in reverse order.
143     matchtitle:        Download only matching titles.
144     rejecttitle:       Reject downloads for matching titles.
145     logger:            Log messages to a logging.Logger instance.
146     logtostderr:       Log messages to stderr instead of stdout.
147     writedescription:  Write the video description to a .description file
148     writeinfojson:     Write the video description to a .info.json file
149     writeannotations:  Write the video annotations to a .annotations.xml file
150     writethumbnail:    Write the thumbnail image to a file
151     write_all_thumbnails:  Write all thumbnail formats to files
152     writesubtitles:    Write the video subtitles to a file
153     writeautomaticsub: Write the automatic subtitles to a file
154     allsubtitles:      Downloads all the subtitles of the video
155                        (requires writesubtitles or writeautomaticsub)
156     listsubtitles:     Lists all available subtitles for the video
157     subtitlesformat:   The format code for subtitles
158     subtitleslangs:    List of languages of the subtitles to download
159     keepvideo:         Keep the video file after post-processing
160     daterange:         A DateRange object, download only if the upload_date is in the range.
161     skip_download:     Skip the actual download of the video file
162     cachedir:          Location of the cache files in the filesystem.
163                        False to disable filesystem cache.
164     noplaylist:        Download single video instead of a playlist if in doubt.
165     age_limit:         An integer representing the user's age in years.
166                        Unsuitable videos for the given age are skipped.
167     min_views:         An integer representing the minimum view count the video
168                        must have in order to not be skipped.
169                        Videos without view count information are always
170                        downloaded. None for no limit.
171     max_views:         An integer representing the maximum view count.
172                        Videos that are more popular than that are not
173                        downloaded.
174                        Videos without view count information are always
175                        downloaded. None for no limit.
176     download_archive:  File name of a file where all downloads are recorded.
177                        Videos already present in the file are not downloaded
178                        again.
179     cookiefile:        File name where cookies should be read from and dumped to.
180     nocheckcertificate:Do not verify SSL certificates
181     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
182                        At the moment, this is only supported by YouTube.
183     proxy:             URL of the proxy server to use
184     socket_timeout:    Time to wait for unresponsive hosts, in seconds
185     bidi_workaround:   Work around buggy terminals without bidirectional text
186                        support, using fridibi
187     debug_printtraffic:Print out sent and received HTTP traffic
188     include_ads:       Download ads as well
189     default_search:    Prepend this string if an input url is not valid.
190                        'auto' for elaborate guessing
191     encoding:          Use this encoding instead of the system-specified.
192     extract_flat:      Do not resolve URLs, return the immediate result.
193                        Pass in 'in_playlist' to only show this behavior for
194                        playlist items.
195     postprocessors:    A list of dictionaries, each with an entry
196                        * key:  The name of the postprocessor. See
197                                youtube_dl/postprocessor/__init__.py for a list.
198                        as well as any further keyword arguments for the
199                        postprocessor.
200     progress_hooks:    A list of functions that get called on download
201                        progress, with a dictionary with the entries
202                        * status: One of "downloading" and "finished".
203                                  Check this first and ignore unknown values.
204
205                        If status is one of "downloading" or "finished", the
206                        following properties may also be present:
207                        * filename: The final filename (always present)
208                        * downloaded_bytes: Bytes on disk
209                        * total_bytes: Size of the whole file, None if unknown
210                        * tmpfilename: The filename we're currently writing to
211                        * eta: The estimated time in seconds, None if unknown
212                        * speed: The download speed in bytes/second, None if
213                                 unknown
214
215                        Progress hooks are guaranteed to be called at least once
216                        (with status "finished") if the download is successful.
217     merge_output_format: Extension to use when merging formats.
218     fixup:             Automatically correct known faults of the file.
219                        One of:
220                        - "never": do nothing
221                        - "warn": only emit a warning
222                        - "detect_or_warn": check whether we can do anything
223                                            about it, warn otherwise (default)
224     source_address:    (Experimental) Client-side IP address to bind to.
225     call_home:         Boolean, true iff we are allowed to contact the
226                        youtube-dl servers for debugging.
227     sleep_interval:    Number of seconds to sleep before each download.
228     external_downloader:  Executable of the external downloader to call.
229     listformats:       Print an overview of available video formats and exit.
230     list_thumbnails:   Print a table of all thumbnails and exit.
231     match_filter:      A function that gets called with the info_dict of
232                        every video.
233                        If it returns a message, the video is ignored.
234                        If it returns None, the video is downloaded.
235                        match_filter_func in utils.py is one example for this.
236     no_color:          Do not emit color codes in output.
237
238
239     The following parameters are not used by YoutubeDL itself, they are used by
240     the FileDownloader:
241     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
242     noresizebuffer, retries, continuedl, noprogress, consoletitle,
243     xattr_set_filesize.
244
245     The following options are used by the post processors:
246     prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
247                        otherwise prefer avconv.
248     exec_cmd:          Arbitrary command to run after downloading
249     """
250
251     params = None
252     _ies = []
253     _pps = []
254     _download_retcode = None
255     _num_downloads = None
256     _screen_file = None
257
258     def __init__(self, params=None, auto_init=True):
259         """Create a FileDownloader object with the given options."""
260         if params is None:
261             params = {}
262         self._ies = []
263         self._ies_instances = {}
264         self._pps = []
265         self._progress_hooks = []
266         self._download_retcode = 0
267         self._num_downloads = 0
268         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
269         self._err_file = sys.stderr
270         self.params = params
271         self.cache = Cache(self)
272
273         if params.get('bidi_workaround', False):
274             try:
275                 import pty
276                 master, slave = pty.openpty()
277                 width = get_term_width()
278                 if width is None:
279                     width_args = []
280                 else:
281                     width_args = ['-w', str(width)]
282                 sp_kwargs = dict(
283                     stdin=subprocess.PIPE,
284                     stdout=slave,
285                     stderr=self._err_file)
286                 try:
287                     self._output_process = subprocess.Popen(
288                         ['bidiv'] + width_args, **sp_kwargs
289                     )
290                 except OSError:
291                     self._output_process = subprocess.Popen(
292                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
293                 self._output_channel = os.fdopen(master, 'rb')
294             except OSError as ose:
295                 if ose.errno == 2:
296                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
297                 else:
298                     raise
299
300         if (sys.version_info >= (3,) and sys.platform != 'win32' and
301                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
302                 and not params.get('restrictfilenames', False)):
303             # On Python 3, the Unicode filesystem API will throw errors (#1474)
304             self.report_warning(
305                 'Assuming --restrict-filenames since file system encoding '
306                 'cannot encode all characters. '
307                 'Set the LC_ALL environment variable to fix this.')
308             self.params['restrictfilenames'] = True
309
310         if '%(stitle)s' in self.params.get('outtmpl', ''):
311             self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
312
313         self._setup_opener()
314
315         if auto_init:
316             self.print_debug_header()
317             self.add_default_info_extractors()
318
319         for pp_def_raw in self.params.get('postprocessors', []):
320             pp_class = get_postprocessor(pp_def_raw['key'])
321             pp_def = dict(pp_def_raw)
322             del pp_def['key']
323             pp = pp_class(self, **compat_kwargs(pp_def))
324             self.add_post_processor(pp)
325
326         for ph in self.params.get('progress_hooks', []):
327             self.add_progress_hook(ph)
328
329     def warn_if_short_id(self, argv):
330         # short YouTube ID starting with dash?
331         idxs = [
332             i for i, a in enumerate(argv)
333             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
334         if idxs:
335             correct_argv = (
336                 ['youtube-dl'] +
337                 [a for i, a in enumerate(argv) if i not in idxs] +
338                 ['--'] + [argv[i] for i in idxs]
339             )
340             self.report_warning(
341                 'Long argument string detected. '
342                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
343                 args_to_str(correct_argv))
344
345     def add_info_extractor(self, ie):
346         """Add an InfoExtractor object to the end of the list."""
347         self._ies.append(ie)
348         self._ies_instances[ie.ie_key()] = ie
349         ie.set_downloader(self)
350
351     def get_info_extractor(self, ie_key):
352         """
353         Get an instance of an IE with name ie_key, it will try to get one from
354         the _ies list, if there's no instance it will create a new one and add
355         it to the extractor list.
356         """
357         ie = self._ies_instances.get(ie_key)
358         if ie is None:
359             ie = get_info_extractor(ie_key)()
360             self.add_info_extractor(ie)
361         return ie
362
363     def add_default_info_extractors(self):
364         """
365         Add the InfoExtractors returned by gen_extractors to the end of the list
366         """
367         for ie in gen_extractors():
368             self.add_info_extractor(ie)
369
370     def add_post_processor(self, pp):
371         """Add a PostProcessor object to the end of the chain."""
372         self._pps.append(pp)
373         pp.set_downloader(self)
374
375     def add_progress_hook(self, ph):
376         """Add the progress hook (currently only for the file downloader)"""
377         self._progress_hooks.append(ph)
378
379     def _bidi_workaround(self, message):
380         if not hasattr(self, '_output_channel'):
381             return message
382
383         assert hasattr(self, '_output_process')
384         assert isinstance(message, compat_str)
385         line_count = message.count('\n') + 1
386         self._output_process.stdin.write((message + '\n').encode('utf-8'))
387         self._output_process.stdin.flush()
388         res = ''.join(self._output_channel.readline().decode('utf-8')
389                       for _ in range(line_count))
390         return res[:-len('\n')]
391
392     def to_screen(self, message, skip_eol=False):
393         """Print message to stdout if not in quiet mode."""
394         return self.to_stdout(message, skip_eol, check_quiet=True)
395
396     def _write_string(self, s, out=None):
397         write_string(s, out=out, encoding=self.params.get('encoding'))
398
399     def to_stdout(self, message, skip_eol=False, check_quiet=False):
400         """Print message to stdout if not in quiet mode."""
401         if self.params.get('logger'):
402             self.params['logger'].debug(message)
403         elif not check_quiet or not self.params.get('quiet', False):
404             message = self._bidi_workaround(message)
405             terminator = ['\n', ''][skip_eol]
406             output = message + terminator
407
408             self._write_string(output, self._screen_file)
409
410     def to_stderr(self, message):
411         """Print message to stderr."""
412         assert isinstance(message, compat_str)
413         if self.params.get('logger'):
414             self.params['logger'].error(message)
415         else:
416             message = self._bidi_workaround(message)
417             output = message + '\n'
418             self._write_string(output, self._err_file)
419
420     def to_console_title(self, message):
421         if not self.params.get('consoletitle', False):
422             return
423         if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
424             # c_wchar_p() might not be necessary if `message` is
425             # already of type unicode()
426             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
427         elif 'TERM' in os.environ:
428             self._write_string('\033]0;%s\007' % message, self._screen_file)
429
430     def save_console_title(self):
431         if not self.params.get('consoletitle', False):
432             return
433         if 'TERM' in os.environ:
434             # Save the title on stack
435             self._write_string('\033[22;0t', self._screen_file)
436
437     def restore_console_title(self):
438         if not self.params.get('consoletitle', False):
439             return
440         if 'TERM' in os.environ:
441             # Restore the title from stack
442             self._write_string('\033[23;0t', self._screen_file)
443
444     def __enter__(self):
445         self.save_console_title()
446         return self
447
448     def __exit__(self, *args):
449         self.restore_console_title()
450
451         if self.params.get('cookiefile') is not None:
452             self.cookiejar.save()
453
454     def trouble(self, message=None, tb=None):
455         """Determine action to take when a download problem appears.
456
457         Depending on if the downloader has been configured to ignore
458         download errors or not, this method may throw an exception or
459         not when errors are found, after printing the message.
460
461         tb, if given, is additional traceback information.
462         """
463         if message is not None:
464             self.to_stderr(message)
465         if self.params.get('verbose'):
466             if tb is None:
467                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
468                     tb = ''
469                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
470                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
471                     tb += compat_str(traceback.format_exc())
472                 else:
473                     tb_data = traceback.format_list(traceback.extract_stack())
474                     tb = ''.join(tb_data)
475             self.to_stderr(tb)
476         if not self.params.get('ignoreerrors', False):
477             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
478                 exc_info = sys.exc_info()[1].exc_info
479             else:
480                 exc_info = sys.exc_info()
481             raise DownloadError(message, exc_info)
482         self._download_retcode = 1
483
484     def report_warning(self, message):
485         '''
486         Print the message to stderr, it will be prefixed with 'WARNING:'
487         If stderr is a tty file the 'WARNING:' will be colored
488         '''
489         if self.params.get('logger') is not None:
490             self.params['logger'].warning(message)
491         else:
492             if self.params.get('no_warnings'):
493                 return
494             if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
495                 _msg_header = '\033[0;33mWARNING:\033[0m'
496             else:
497                 _msg_header = 'WARNING:'
498             warning_message = '%s %s' % (_msg_header, message)
499             self.to_stderr(warning_message)
500
501     def report_error(self, message, tb=None):
502         '''
503         Do the same as trouble, but prefixes the message with 'ERROR:', colored
504         in red if stderr is a tty file.
505         '''
506         if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
507             _msg_header = '\033[0;31mERROR:\033[0m'
508         else:
509             _msg_header = 'ERROR:'
510         error_message = '%s %s' % (_msg_header, message)
511         self.trouble(error_message, tb)
512
513     def report_file_already_downloaded(self, file_name):
514         """Report file has already been fully downloaded."""
515         try:
516             self.to_screen('[download] %s has already been downloaded' % file_name)
517         except UnicodeEncodeError:
518             self.to_screen('[download] The file has already been downloaded')
519
520     def prepare_filename(self, info_dict):
521         """Generate the output filename."""
522         try:
523             template_dict = dict(info_dict)
524
525             template_dict['epoch'] = int(time.time())
526             autonumber_size = self.params.get('autonumber_size')
527             if autonumber_size is None:
528                 autonumber_size = 5
529             autonumber_templ = '%0' + str(autonumber_size) + 'd'
530             template_dict['autonumber'] = autonumber_templ % self._num_downloads
531             if template_dict.get('playlist_index') is not None:
532                 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
533             if template_dict.get('resolution') is None:
534                 if template_dict.get('width') and template_dict.get('height'):
535                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
536                 elif template_dict.get('height'):
537                     template_dict['resolution'] = '%sp' % template_dict['height']
538                 elif template_dict.get('width'):
539                     template_dict['resolution'] = '?x%d' % template_dict['width']
540
541             sanitize = lambda k, v: sanitize_filename(
542                 compat_str(v),
543                 restricted=self.params.get('restrictfilenames'),
544                 is_id=(k == 'id'))
545             template_dict = dict((k, sanitize(k, v))
546                                  for k, v in template_dict.items()
547                                  if v is not None)
548             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
549
550             outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
551             tmpl = compat_expanduser(outtmpl)
552             filename = tmpl % template_dict
553             # Temporary fix for #4787
554             # 'Treat' all problem characters by passing filename through preferredencoding
555             # to workaround encoding issues with subprocess on python2 @ Windows
556             if sys.version_info < (3, 0) and sys.platform == 'win32':
557                 filename = encodeFilename(filename, True).decode(preferredencoding())
558             return filename
559         except ValueError as err:
560             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
561             return None
562
563     def _match_entry(self, info_dict, incomplete):
564         """ Returns None iff the file should be downloaded """
565
566         video_title = info_dict.get('title', info_dict.get('id', 'video'))
567         if 'title' in info_dict:
568             # This can happen when we're just evaluating the playlist
569             title = info_dict['title']
570             matchtitle = self.params.get('matchtitle', False)
571             if matchtitle:
572                 if not re.search(matchtitle, title, re.IGNORECASE):
573                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
574             rejecttitle = self.params.get('rejecttitle', False)
575             if rejecttitle:
576                 if re.search(rejecttitle, title, re.IGNORECASE):
577                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
578         date = info_dict.get('upload_date', None)
579         if date is not None:
580             dateRange = self.params.get('daterange', DateRange())
581             if date not in dateRange:
582                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
583         view_count = info_dict.get('view_count', None)
584         if view_count is not None:
585             min_views = self.params.get('min_views')
586             if min_views is not None and view_count < min_views:
587                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
588             max_views = self.params.get('max_views')
589             if max_views is not None and view_count > max_views:
590                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
591         if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
592             return 'Skipping "%s" because it is age restricted' % video_title
593         if self.in_download_archive(info_dict):
594             return '%s has already been recorded in archive' % video_title
595
596         if not incomplete:
597             match_filter = self.params.get('match_filter')
598             if match_filter is not None:
599                 ret = match_filter(info_dict)
600                 if ret is not None:
601                     return ret
602
603         return None
604
605     @staticmethod
606     def add_extra_info(info_dict, extra_info):
607         '''Set the keys from extra_info in info dict if they are missing'''
608         for key, value in extra_info.items():
609             info_dict.setdefault(key, value)
610
611     def extract_info(self, url, download=True, ie_key=None, extra_info={},
612                      process=True):
613         '''
614         Returns a list with a dictionary for each video we find.
615         If 'download', also downloads the videos.
616         extra_info is a dict containing the extra values to add to each result
617          '''
618
619         if ie_key:
620             ies = [self.get_info_extractor(ie_key)]
621         else:
622             ies = self._ies
623
624         for ie in ies:
625             if not ie.suitable(url):
626                 continue
627
628             if not ie.working():
629                 self.report_warning('The program functionality for this site has been marked as broken, '
630                                     'and will probably not work.')
631
632             try:
633                 ie_result = ie.extract(url)
634                 if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
635                     break
636                 if isinstance(ie_result, list):
637                     # Backwards compatibility: old IE result format
638                     ie_result = {
639                         '_type': 'compat_list',
640                         'entries': ie_result,
641                     }
642                 self.add_default_extra_info(ie_result, ie, url)
643                 if process:
644                     return self.process_ie_result(ie_result, download, extra_info)
645                 else:
646                     return ie_result
647             except ExtractorError as de:  # An error we somewhat expected
648                 self.report_error(compat_str(de), de.format_traceback())
649                 break
650             except MaxDownloadsReached:
651                 raise
652             except Exception as e:
653                 if self.params.get('ignoreerrors', False):
654                     self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
655                     break
656                 else:
657                     raise
658         else:
659             self.report_error('no suitable InfoExtractor for URL %s' % url)
660
661     def add_default_extra_info(self, ie_result, ie, url):
662         self.add_extra_info(ie_result, {
663             'extractor': ie.IE_NAME,
664             'webpage_url': url,
665             'webpage_url_basename': url_basename(url),
666             'extractor_key': ie.ie_key(),
667         })
668
669     def process_ie_result(self, ie_result, download=True, extra_info={}):
670         """
671         Take the result of the ie(may be modified) and resolve all unresolved
672         references (URLs, playlist items).
673
674         It will also download the videos if 'download'.
675         Returns the resolved ie_result.
676         """
677
678         result_type = ie_result.get('_type', 'video')
679
680         if result_type in ('url', 'url_transparent'):
681             extract_flat = self.params.get('extract_flat', False)
682             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
683                     extract_flat is True):
684                 if self.params.get('forcejson', False):
685                     self.to_stdout(json.dumps(ie_result))
686                 return ie_result
687
688         if result_type == 'video':
689             self.add_extra_info(ie_result, extra_info)
690             return self.process_video_result(ie_result, download=download)
691         elif result_type == 'url':
692             # We have to add extra_info to the results because it may be
693             # contained in a playlist
694             return self.extract_info(ie_result['url'],
695                                      download,
696                                      ie_key=ie_result.get('ie_key'),
697                                      extra_info=extra_info)
698         elif result_type == 'url_transparent':
699             # Use the information from the embedding page
700             info = self.extract_info(
701                 ie_result['url'], ie_key=ie_result.get('ie_key'),
702                 extra_info=extra_info, download=False, process=False)
703
704             force_properties = dict(
705                 (k, v) for k, v in ie_result.items() if v is not None)
706             for f in ('_type', 'url'):
707                 if f in force_properties:
708                     del force_properties[f]
709             new_result = info.copy()
710             new_result.update(force_properties)
711
712             assert new_result.get('_type') != 'url_transparent'
713
714             return self.process_ie_result(
715                 new_result, download=download, extra_info=extra_info)
716         elif result_type == 'playlist' or result_type == 'multi_video':
717             # We process each entry in the playlist
718             playlist = ie_result.get('title', None) or ie_result.get('id', None)
719             self.to_screen('[download] Downloading playlist: %s' % playlist)
720
721             playlist_results = []
722
723             playliststart = self.params.get('playliststart', 1) - 1
724             playlistend = self.params.get('playlistend', None)
725             # For backwards compatibility, interpret -1 as whole list
726             if playlistend == -1:
727                 playlistend = None
728
729             playlistitems_str = self.params.get('playlist_items', None)
730             playlistitems = None
731             if playlistitems_str is not None:
732                 def iter_playlistitems(format):
733                     for string_segment in format.split(','):
734                         if '-' in string_segment:
735                             start, end = string_segment.split('-')
736                             for item in range(int(start), int(end) + 1):
737                                 yield int(item)
738                         else:
739                             yield int(string_segment)
740                 playlistitems = iter_playlistitems(playlistitems_str)
741
742             ie_entries = ie_result['entries']
743             if isinstance(ie_entries, list):
744                 n_all_entries = len(ie_entries)
745                 if playlistitems:
746                     entries = [ie_entries[i - 1] for i in playlistitems]
747                 else:
748                     entries = ie_entries[playliststart:playlistend]
749                 n_entries = len(entries)
750                 self.to_screen(
751                     "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
752                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
753             elif isinstance(ie_entries, PagedList):
754                 if playlistitems:
755                     entries = []
756                     for item in playlistitems:
757                         entries.extend(ie_entries.getslice(
758                             item - 1, item
759                         ))
760                 else:
761                     entries = ie_entries.getslice(
762                         playliststart, playlistend)
763                 n_entries = len(entries)
764                 self.to_screen(
765                     "[%s] playlist %s: Downloading %d videos" %
766                     (ie_result['extractor'], playlist, n_entries))
767             else:  # iterable
768                 if playlistitems:
769                     entry_list = list(ie_entries)
770                     entries = [entry_list[i - 1] for i in playlistitems]
771                 else:
772                     entries = list(itertools.islice(
773                         ie_entries, playliststart, playlistend))
774                 n_entries = len(entries)
775                 self.to_screen(
776                     "[%s] playlist %s: Downloading %d videos" %
777                     (ie_result['extractor'], playlist, n_entries))
778
779             if self.params.get('playlistreverse', False):
780                 entries = entries[::-1]
781
782             for i, entry in enumerate(entries, 1):
783                 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
784                 extra = {
785                     'n_entries': n_entries,
786                     'playlist': playlist,
787                     'playlist_id': ie_result.get('id'),
788                     'playlist_title': ie_result.get('title'),
789                     'playlist_index': i + playliststart,
790                     'extractor': ie_result['extractor'],
791                     'webpage_url': ie_result['webpage_url'],
792                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
793                     'extractor_key': ie_result['extractor_key'],
794                 }
795
796                 reason = self._match_entry(entry, incomplete=True)
797                 if reason is not None:
798                     self.to_screen('[download] ' + reason)
799                     continue
800
801                 entry_result = self.process_ie_result(entry,
802                                                       download=download,
803                                                       extra_info=extra)
804                 playlist_results.append(entry_result)
805             ie_result['entries'] = playlist_results
806             return ie_result
807         elif result_type == 'compat_list':
808             self.report_warning(
809                 'Extractor %s returned a compat_list result. '
810                 'It needs to be updated.' % ie_result.get('extractor'))
811
812             def _fixup(r):
813                 self.add_extra_info(
814                     r,
815                     {
816                         'extractor': ie_result['extractor'],
817                         'webpage_url': ie_result['webpage_url'],
818                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
819                         'extractor_key': ie_result['extractor_key'],
820                     }
821                 )
822                 return r
823             ie_result['entries'] = [
824                 self.process_ie_result(_fixup(r), download, extra_info)
825                 for r in ie_result['entries']
826             ]
827             return ie_result
828         else:
829             raise Exception('Invalid result type: %s' % result_type)
830
831     def _apply_format_filter(self, format_spec, available_formats):
832         " Returns a tuple of the remaining format_spec and filtered formats "
833
834         OPERATORS = {
835             '<': operator.lt,
836             '<=': operator.le,
837             '>': operator.gt,
838             '>=': operator.ge,
839             '=': operator.eq,
840             '!=': operator.ne,
841         }
842         operator_rex = re.compile(r'''(?x)\s*\[
843             (?P<key>width|height|tbr|abr|vbr|asr|filesize|fps)
844             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
845             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
846             \]$
847             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
848         m = operator_rex.search(format_spec)
849         if m:
850             try:
851                 comparison_value = int(m.group('value'))
852             except ValueError:
853                 comparison_value = parse_filesize(m.group('value'))
854                 if comparison_value is None:
855                     comparison_value = parse_filesize(m.group('value') + 'B')
856                 if comparison_value is None:
857                     raise ValueError(
858                         'Invalid value %r in format specification %r' % (
859                             m.group('value'), format_spec))
860             op = OPERATORS[m.group('op')]
861
862         if not m:
863             STR_OPERATORS = {
864                 '=': operator.eq,
865                 '!=': operator.ne,
866             }
867             str_operator_rex = re.compile(r'''(?x)\s*\[
868                 \s*(?P<key>ext|acodec|vcodec|container|protocol)
869                 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
870                 \s*(?P<value>[a-zA-Z0-9_-]+)
871                 \s*\]$
872                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
873             m = str_operator_rex.search(format_spec)
874             if m:
875                 comparison_value = m.group('value')
876                 op = STR_OPERATORS[m.group('op')]
877
878         if not m:
879             raise ValueError('Invalid format specification %r' % format_spec)
880
881         def _filter(f):
882             actual_value = f.get(m.group('key'))
883             if actual_value is None:
884                 return m.group('none_inclusive')
885             return op(actual_value, comparison_value)
886         new_formats = [f for f in available_formats if _filter(f)]
887
888         new_format_spec = format_spec[:-len(m.group(0))]
889         if not new_format_spec:
890             new_format_spec = 'best'
891
892         return (new_format_spec, new_formats)
893
894     def select_format(self, format_spec, available_formats):
895         while format_spec.endswith(']'):
896             format_spec, available_formats = self._apply_format_filter(
897                 format_spec, available_formats)
898         if not available_formats:
899             return None
900
901         if format_spec == 'best' or format_spec is None:
902             return available_formats[-1]
903         elif format_spec == 'worst':
904             return available_formats[0]
905         elif format_spec == 'bestaudio':
906             audio_formats = [
907                 f for f in available_formats
908                 if f.get('vcodec') == 'none']
909             if audio_formats:
910                 return audio_formats[-1]
911         elif format_spec == 'worstaudio':
912             audio_formats = [
913                 f for f in available_formats
914                 if f.get('vcodec') == 'none']
915             if audio_formats:
916                 return audio_formats[0]
917         elif format_spec == 'bestvideo':
918             video_formats = [
919                 f for f in available_formats
920                 if f.get('acodec') == 'none']
921             if video_formats:
922                 return video_formats[-1]
923         elif format_spec == 'worstvideo':
924             video_formats = [
925                 f for f in available_formats
926                 if f.get('acodec') == 'none']
927             if video_formats:
928                 return video_formats[0]
929         else:
930             extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
931             if format_spec in extensions:
932                 filter_f = lambda f: f['ext'] == format_spec
933             else:
934                 filter_f = lambda f: f['format_id'] == format_spec
935             matches = list(filter(filter_f, available_formats))
936             if matches:
937                 return matches[-1]
938         return None
939
940     def _calc_headers(self, info_dict):
941         res = std_headers.copy()
942
943         add_headers = info_dict.get('http_headers')
944         if add_headers:
945             res.update(add_headers)
946
947         cookies = self._calc_cookies(info_dict)
948         if cookies:
949             res['Cookie'] = cookies
950
951         return res
952
953     def _calc_cookies(self, info_dict):
954         class _PseudoRequest(object):
955             def __init__(self, url):
956                 self.url = url
957                 self.headers = {}
958                 self.unverifiable = False
959
960             def add_unredirected_header(self, k, v):
961                 self.headers[k] = v
962
963             def get_full_url(self):
964                 return self.url
965
966             def is_unverifiable(self):
967                 return self.unverifiable
968
969             def has_header(self, h):
970                 return h in self.headers
971
972             def get_header(self, h, default=None):
973                 return self.headers.get(h, default)
974
975         pr = _PseudoRequest(info_dict['url'])
976         self.cookiejar.add_cookie_header(pr)
977         return pr.headers.get('Cookie')
978
979     def process_video_result(self, info_dict, download=True):
980         assert info_dict.get('_type', 'video') == 'video'
981
982         if 'id' not in info_dict:
983             raise ExtractorError('Missing "id" field in extractor result')
984         if 'title' not in info_dict:
985             raise ExtractorError('Missing "title" field in extractor result')
986
987         if 'playlist' not in info_dict:
988             # It isn't part of a playlist
989             info_dict['playlist'] = None
990             info_dict['playlist_index'] = None
991
992         thumbnails = info_dict.get('thumbnails')
993         if thumbnails is None:
994             thumbnail = info_dict.get('thumbnail')
995             if thumbnail:
996                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
997         if thumbnails:
998             thumbnails.sort(key=lambda t: (
999                 t.get('preference'), t.get('width'), t.get('height'),
1000                 t.get('id'), t.get('url')))
1001             for i, t in enumerate(thumbnails):
1002                 if 'width' in t and 'height' in t:
1003                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
1004                 if t.get('id') is None:
1005                     t['id'] = '%d' % i
1006
1007         if thumbnails and 'thumbnail' not in info_dict:
1008             info_dict['thumbnail'] = thumbnails[-1]['url']
1009
1010         if 'display_id' not in info_dict and 'id' in info_dict:
1011             info_dict['display_id'] = info_dict['id']
1012
1013         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
1014             # Working around negative timestamps in Windows
1015             # (see http://bugs.python.org/issue1646728)
1016             if info_dict['timestamp'] < 0 and os.name == 'nt':
1017                 info_dict['timestamp'] = 0
1018             upload_date = datetime.datetime.utcfromtimestamp(
1019                 info_dict['timestamp'])
1020             info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1021
1022         if self.params.get('listsubtitles', False):
1023             if 'automatic_captions' in info_dict:
1024                 self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions')
1025             self.list_subtitles(info_dict['id'], info_dict.get('subtitles'), 'subtitles')
1026             return
1027         info_dict['requested_subtitles'] = self.process_subtitles(
1028             info_dict['id'], info_dict.get('subtitles'),
1029             info_dict.get('automatic_captions'))
1030
1031         # This extractors handle format selection themselves
1032         if info_dict['extractor'] in ['Youku']:
1033             if download:
1034                 self.process_info(info_dict)
1035             return info_dict
1036
1037         # We now pick which formats have to be downloaded
1038         if info_dict.get('formats') is None:
1039             # There's only one format available
1040             formats = [info_dict]
1041         else:
1042             formats = info_dict['formats']
1043
1044         if not formats:
1045             raise ExtractorError('No video formats found!')
1046
1047         # We check that all the formats have the format and format_id fields
1048         for i, format in enumerate(formats):
1049             if 'url' not in format:
1050                 raise ExtractorError('Missing "url" key in result (index %d)' % i)
1051
1052             if format.get('format_id') is None:
1053                 format['format_id'] = compat_str(i)
1054             if format.get('format') is None:
1055                 format['format'] = '{id} - {res}{note}'.format(
1056                     id=format['format_id'],
1057                     res=self.format_resolution(format),
1058                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1059                 )
1060             # Automatically determine file extension if missing
1061             if 'ext' not in format:
1062                 format['ext'] = determine_ext(format['url']).lower()
1063             # Add HTTP headers, so that external programs can use them from the
1064             # json output
1065             full_format_info = info_dict.copy()
1066             full_format_info.update(format)
1067             format['http_headers'] = self._calc_headers(full_format_info)
1068
1069         format_limit = self.params.get('format_limit', None)
1070         if format_limit:
1071             formats = list(takewhile_inclusive(
1072                 lambda f: f['format_id'] != format_limit, formats
1073             ))
1074
1075         # TODO Central sorting goes here
1076
1077         if formats[0] is not info_dict:
1078             # only set the 'formats' fields if the original info_dict list them
1079             # otherwise we end up with a circular reference, the first (and unique)
1080             # element in the 'formats' field in info_dict is info_dict itself,
1081             # wich can't be exported to json
1082             info_dict['formats'] = formats
1083         if self.params.get('listformats'):
1084             self.list_formats(info_dict)
1085             return
1086         if self.params.get('list_thumbnails'):
1087             self.list_thumbnails(info_dict)
1088             return
1089
1090         req_format = self.params.get('format')
1091         if req_format is None:
1092             req_format = 'best'
1093         formats_to_download = []
1094         # The -1 is for supporting YoutubeIE
1095         if req_format in ('-1', 'all'):
1096             formats_to_download = formats
1097         else:
1098             for rfstr in req_format.split(','):
1099                 # We can accept formats requested in the format: 34/5/best, we pick
1100                 # the first that is available, starting from left
1101                 req_formats = rfstr.split('/')
1102                 for rf in req_formats:
1103                     if re.match(r'.+?\+.+?', rf) is not None:
1104                         # Two formats have been requested like '137+139'
1105                         format_1, format_2 = rf.split('+')
1106                         formats_info = (self.select_format(format_1, formats),
1107                                         self.select_format(format_2, formats))
1108                         if all(formats_info):
1109                             # The first format must contain the video and the
1110                             # second the audio
1111                             if formats_info[0].get('vcodec') == 'none':
1112                                 self.report_error('The first format must '
1113                                                   'contain the video, try using '
1114                                                   '"-f %s+%s"' % (format_2, format_1))
1115                                 return
1116                             output_ext = (
1117                                 formats_info[0]['ext']
1118                                 if self.params.get('merge_output_format') is None
1119                                 else self.params['merge_output_format'])
1120                             selected_format = {
1121                                 'requested_formats': formats_info,
1122                                 'format': '%s+%s' % (formats_info[0].get('format'),
1123                                                      formats_info[1].get('format')),
1124                                 'format_id': '%s+%s' % (formats_info[0].get('format_id'),
1125                                                         formats_info[1].get('format_id')),
1126                                 'width': formats_info[0].get('width'),
1127                                 'height': formats_info[0].get('height'),
1128                                 'resolution': formats_info[0].get('resolution'),
1129                                 'fps': formats_info[0].get('fps'),
1130                                 'vcodec': formats_info[0].get('vcodec'),
1131                                 'vbr': formats_info[0].get('vbr'),
1132                                 'stretched_ratio': formats_info[0].get('stretched_ratio'),
1133                                 'acodec': formats_info[1].get('acodec'),
1134                                 'abr': formats_info[1].get('abr'),
1135                                 'ext': output_ext,
1136                             }
1137                         else:
1138                             selected_format = None
1139                     else:
1140                         selected_format = self.select_format(rf, formats)
1141                     if selected_format is not None:
1142                         formats_to_download.append(selected_format)
1143                         break
1144         if not formats_to_download:
1145             raise ExtractorError('requested format not available',
1146                                  expected=True)
1147
1148         if download:
1149             if len(formats_to_download) > 1:
1150                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1151             for format in formats_to_download:
1152                 new_info = dict(info_dict)
1153                 new_info.update(format)
1154                 self.process_info(new_info)
1155         # We update the info dict with the best quality format (backwards compatibility)
1156         info_dict.update(formats_to_download[-1])
1157         return info_dict
1158
1159     def process_subtitles(self, video_id, available_subs, available_autocaps):
1160         """Select the requested subtitles and their format"""
1161         if available_autocaps and self.params.get('writeautomaticsub'):
1162             available_subs = available_subs.copy()
1163             for lang, cap_info in available_autocaps.items():
1164                 if lang not in available_subs:
1165                     available_subs[lang] = cap_info
1166
1167         if (not self.params.get('writesubtitles') and not
1168                 self.params.get('writeautomaticsub') or not
1169                 available_subs):
1170             return None
1171
1172         if self.params.get('allsubtitles', False):
1173             requested_langs = available_subs.keys()
1174         else:
1175             if self.params.get('subtitleslangs', False):
1176                 requested_langs = self.params.get('subtitleslangs')
1177             elif 'en' in available_subs:
1178                 requested_langs = ['en']
1179             else:
1180                 requested_langs = [list(available_subs.keys())[0]]
1181
1182         formats_query = self.params.get('subtitlesformat', 'best')
1183         formats_preference = formats_query.split('/') if formats_query else []
1184         subs = {}
1185         for lang in requested_langs:
1186             formats = available_subs.get(lang)
1187             if formats is None:
1188                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1189                 continue
1190             if isinstance(formats, compat_str):
1191                 # TODO: convert all IE with subtitles support to the new format
1192                 # and remove this
1193                 subs[lang] = {
1194                     'ext': formats_preference[0],
1195                     'data': formats,
1196                 }
1197                 continue
1198             for ext in formats_preference:
1199                 if ext == 'best':
1200                     f = formats[-1]
1201                     break
1202                 matches = list(filter(lambda f: f['ext'] == ext, formats))
1203                 if matches:
1204                     f = matches[-1]
1205                     break
1206             else:
1207                 f = formats[-1]
1208                 self.report_warning(
1209                     'No subtitle format found matching "%s" for language %s, '
1210                     'using %s' % (formats_query, lang, f['ext']))
1211             subs[lang] = f
1212         return subs
1213
1214     def process_info(self, info_dict):
1215         """Process a single resolved IE result."""
1216
1217         assert info_dict.get('_type', 'video') == 'video'
1218
1219         max_downloads = self.params.get('max_downloads')
1220         if max_downloads is not None:
1221             if self._num_downloads >= int(max_downloads):
1222                 raise MaxDownloadsReached()
1223
1224         info_dict['fulltitle'] = info_dict['title']
1225         if len(info_dict['title']) > 200:
1226             info_dict['title'] = info_dict['title'][:197] + '...'
1227
1228         # Keep for backwards compatibility
1229         info_dict['stitle'] = info_dict['title']
1230
1231         if 'format' not in info_dict:
1232             info_dict['format'] = info_dict['ext']
1233
1234         reason = self._match_entry(info_dict, incomplete=False)
1235         if reason is not None:
1236             self.to_screen('[download] ' + reason)
1237             return
1238
1239         self._num_downloads += 1
1240
1241         info_dict['_filename'] = filename = self.prepare_filename(info_dict)
1242
1243         # Forced printings
1244         if self.params.get('forcetitle', False):
1245             self.to_stdout(info_dict['fulltitle'])
1246         if self.params.get('forceid', False):
1247             self.to_stdout(info_dict['id'])
1248         if self.params.get('forceurl', False):
1249             if info_dict.get('requested_formats') is not None:
1250                 for f in info_dict['requested_formats']:
1251                     self.to_stdout(f['url'] + f.get('play_path', ''))
1252             else:
1253                 # For RTMP URLs, also include the playpath
1254                 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1255         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
1256             self.to_stdout(info_dict['thumbnail'])
1257         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
1258             self.to_stdout(info_dict['description'])
1259         if self.params.get('forcefilename', False) and filename is not None:
1260             self.to_stdout(filename)
1261         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1262             self.to_stdout(formatSeconds(info_dict['duration']))
1263         if self.params.get('forceformat', False):
1264             self.to_stdout(info_dict['format'])
1265         if self.params.get('forcejson', False):
1266             self.to_stdout(json.dumps(info_dict))
1267
1268         # Do nothing else if in simulate mode
1269         if self.params.get('simulate', False):
1270             return
1271
1272         if filename is None:
1273             return
1274
1275         try:
1276             dn = os.path.dirname(encodeFilename(filename))
1277             if dn and not os.path.exists(dn):
1278                 os.makedirs(dn)
1279         except (OSError, IOError) as err:
1280             self.report_error('unable to create directory ' + compat_str(err))
1281             return
1282
1283         if self.params.get('writedescription', False):
1284             descfn = filename + '.description'
1285             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1286                 self.to_screen('[info] Video description is already present')
1287             elif info_dict.get('description') is None:
1288                 self.report_warning('There\'s no description to write.')
1289             else:
1290                 try:
1291                     self.to_screen('[info] Writing video description to: ' + descfn)
1292                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1293                         descfile.write(info_dict['description'])
1294                 except (OSError, IOError):
1295                     self.report_error('Cannot write description file ' + descfn)
1296                     return
1297
1298         if self.params.get('writeannotations', False):
1299             annofn = filename + '.annotations.xml'
1300             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1301                 self.to_screen('[info] Video annotations are already present')
1302             else:
1303                 try:
1304                     self.to_screen('[info] Writing video annotations to: ' + annofn)
1305                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1306                         annofile.write(info_dict['annotations'])
1307                 except (KeyError, TypeError):
1308                     self.report_warning('There are no annotations to write.')
1309                 except (OSError, IOError):
1310                     self.report_error('Cannot write annotations file: ' + annofn)
1311                     return
1312
1313         subtitles_are_requested = any([self.params.get('writesubtitles', False),
1314                                        self.params.get('writeautomaticsub')])
1315
1316         if subtitles_are_requested and info_dict.get('requested_subtitles'):
1317             # subtitles download errors are already managed as troubles in relevant IE
1318             # that way it will silently go on when used with unsupporting IE
1319             subtitles = info_dict['requested_subtitles']
1320             for sub_lang, sub_info in subtitles.items():
1321                 sub_format = sub_info['ext']
1322                 if sub_info.get('data') is not None:
1323                     sub_data = sub_info['data']
1324                 else:
1325                     try:
1326                         uf = self.urlopen(sub_info['url'])
1327                         sub_data = uf.read().decode('utf-8')
1328                     except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1329                         self.report_warning('Unable to download subtitle for "%s": %s' %
1330                                             (sub_lang, compat_str(err)))
1331                         continue
1332                 try:
1333                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1334                     if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1335                         self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1336                     else:
1337                         self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1338                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
1339                             subfile.write(sub_data)
1340                 except (OSError, IOError):
1341                     self.report_error('Cannot write subtitles file ' + sub_filename)
1342                     return
1343
1344         if self.params.get('writeinfojson', False):
1345             infofn = os.path.splitext(filename)[0] + '.info.json'
1346             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1347                 self.to_screen('[info] Video description metadata is already present')
1348             else:
1349                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1350                 try:
1351                     write_json_file(info_dict, infofn)
1352                 except (OSError, IOError):
1353                     self.report_error('Cannot write metadata to JSON file ' + infofn)
1354                     return
1355
1356         self._write_thumbnails(info_dict, filename)
1357
1358         if not self.params.get('skip_download', False):
1359             try:
1360                 def dl(name, info):
1361                     fd = get_suitable_downloader(info, self.params)(self, self.params)
1362                     for ph in self._progress_hooks:
1363                         fd.add_progress_hook(ph)
1364                     if self.params.get('verbose'):
1365                         self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1366                     return fd.download(name, info)
1367
1368                 if info_dict.get('requested_formats') is not None:
1369                     downloaded = []
1370                     success = True
1371                     merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
1372                     if not merger._executable:
1373                         postprocessors = []
1374                         self.report_warning('You have requested multiple '
1375                                             'formats but ffmpeg or avconv are not installed.'
1376                                             ' The formats won\'t be merged')
1377                     else:
1378                         postprocessors = [merger]
1379                     for f in info_dict['requested_formats']:
1380                         new_info = dict(info_dict)
1381                         new_info.update(f)
1382                         fname = self.prepare_filename(new_info)
1383                         fname = prepend_extension(fname, 'f%s' % f['format_id'])
1384                         downloaded.append(fname)
1385                         partial_success = dl(fname, new_info)
1386                         success = success and partial_success
1387                     info_dict['__postprocessors'] = postprocessors
1388                     info_dict['__files_to_merge'] = downloaded
1389                 else:
1390                     # Just a single file
1391                     success = dl(filename, info_dict)
1392             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1393                 self.report_error('unable to download video data: %s' % str(err))
1394                 return
1395             except (OSError, IOError) as err:
1396                 raise UnavailableVideoError(err)
1397             except (ContentTooShortError, ) as err:
1398                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1399                 return
1400
1401             if success:
1402                 # Fixup content
1403                 fixup_policy = self.params.get('fixup')
1404                 if fixup_policy is None:
1405                     fixup_policy = 'detect_or_warn'
1406
1407                 stretched_ratio = info_dict.get('stretched_ratio')
1408                 if stretched_ratio is not None and stretched_ratio != 1:
1409                     if fixup_policy == 'warn':
1410                         self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1411                             info_dict['id'], stretched_ratio))
1412                     elif fixup_policy == 'detect_or_warn':
1413                         stretched_pp = FFmpegFixupStretchedPP(self)
1414                         if stretched_pp.available:
1415                             info_dict.setdefault('__postprocessors', [])
1416                             info_dict['__postprocessors'].append(stretched_pp)
1417                         else:
1418                             self.report_warning(
1419                                 '%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % (
1420                                     info_dict['id'], stretched_ratio))
1421                     else:
1422                         assert fixup_policy in ('ignore', 'never')
1423
1424                 if info_dict.get('requested_formats') is None and info_dict.get('container') == 'm4a_dash':
1425                     if fixup_policy == 'warn':
1426                         self.report_warning('%s: writing DASH m4a. Only some players support this container.' % (
1427                             info_dict['id']))
1428                     elif fixup_policy == 'detect_or_warn':
1429                         fixup_pp = FFmpegFixupM4aPP(self)
1430                         if fixup_pp.available:
1431                             info_dict.setdefault('__postprocessors', [])
1432                             info_dict['__postprocessors'].append(fixup_pp)
1433                         else:
1434                             self.report_warning(
1435                                 '%s: writing DASH m4a. Only some players support this container. Install ffmpeg or avconv to fix this automatically.' % (
1436                                     info_dict['id']))
1437                     else:
1438                         assert fixup_policy in ('ignore', 'never')
1439
1440                 try:
1441                     self.post_process(filename, info_dict)
1442                 except (PostProcessingError) as err:
1443                     self.report_error('postprocessing: %s' % str(err))
1444                     return
1445                 self.record_download_archive(info_dict)
1446
1447     def download(self, url_list):
1448         """Download a given list of URLs."""
1449         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1450         if (len(url_list) > 1 and
1451                 '%' not in outtmpl
1452                 and self.params.get('max_downloads') != 1):
1453             raise SameFileError(outtmpl)
1454
1455         for url in url_list:
1456             try:
1457                 # It also downloads the videos
1458                 res = self.extract_info(url)
1459             except UnavailableVideoError:
1460                 self.report_error('unable to download video')
1461             except MaxDownloadsReached:
1462                 self.to_screen('[info] Maximum number of downloaded files reached.')
1463                 raise
1464             else:
1465                 if self.params.get('dump_single_json', False):
1466                     self.to_stdout(json.dumps(res))
1467
1468         return self._download_retcode
1469
1470     def download_with_info_file(self, info_filename):
1471         with io.open(info_filename, 'r', encoding='utf-8') as f:
1472             info = json.load(f)
1473         try:
1474             self.process_ie_result(info, download=True)
1475         except DownloadError:
1476             webpage_url = info.get('webpage_url')
1477             if webpage_url is not None:
1478                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1479                 return self.download([webpage_url])
1480             else:
1481                 raise
1482         return self._download_retcode
1483
1484     def post_process(self, filename, ie_info):
1485         """Run all the postprocessors on the given file."""
1486         info = dict(ie_info)
1487         info['filepath'] = filename
1488         pps_chain = []
1489         if ie_info.get('__postprocessors') is not None:
1490             pps_chain.extend(ie_info['__postprocessors'])
1491         pps_chain.extend(self._pps)
1492         for pp in pps_chain:
1493             keep_video = None
1494             old_filename = info['filepath']
1495             try:
1496                 keep_video_wish, info = pp.run(info)
1497                 if keep_video_wish is not None:
1498                     if keep_video_wish:
1499                         keep_video = keep_video_wish
1500                     elif keep_video is None:
1501                         # No clear decision yet, let IE decide
1502                         keep_video = keep_video_wish
1503             except PostProcessingError as e:
1504                 self.report_error(e.msg)
1505             if keep_video is False and not self.params.get('keepvideo', False):
1506                 try:
1507                     self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
1508                     os.remove(encodeFilename(old_filename))
1509                 except (IOError, OSError):
1510                     self.report_warning('Unable to remove downloaded video file')
1511
1512     def _make_archive_id(self, info_dict):
1513         # Future-proof against any change in case
1514         # and backwards compatibility with prior versions
1515         extractor = info_dict.get('extractor_key')
1516         if extractor is None:
1517             if 'id' in info_dict:
1518                 extractor = info_dict.get('ie_key')  # key in a playlist
1519         if extractor is None:
1520             return None  # Incomplete video information
1521         return extractor.lower() + ' ' + info_dict['id']
1522
1523     def in_download_archive(self, info_dict):
1524         fn = self.params.get('download_archive')
1525         if fn is None:
1526             return False
1527
1528         vid_id = self._make_archive_id(info_dict)
1529         if vid_id is None:
1530             return False  # Incomplete video information
1531
1532         try:
1533             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1534                 for line in archive_file:
1535                     if line.strip() == vid_id:
1536                         return True
1537         except IOError as ioe:
1538             if ioe.errno != errno.ENOENT:
1539                 raise
1540         return False
1541
1542     def record_download_archive(self, info_dict):
1543         fn = self.params.get('download_archive')
1544         if fn is None:
1545             return
1546         vid_id = self._make_archive_id(info_dict)
1547         assert vid_id
1548         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1549             archive_file.write(vid_id + '\n')
1550
1551     @staticmethod
1552     def format_resolution(format, default='unknown'):
1553         if format.get('vcodec') == 'none':
1554             return 'audio only'
1555         if format.get('resolution') is not None:
1556             return format['resolution']
1557         if format.get('height') is not None:
1558             if format.get('width') is not None:
1559                 res = '%sx%s' % (format['width'], format['height'])
1560             else:
1561                 res = '%sp' % format['height']
1562         elif format.get('width') is not None:
1563             res = '?x%d' % format['width']
1564         else:
1565             res = default
1566         return res
1567
1568     def _format_note(self, fdict):
1569         res = ''
1570         if fdict.get('ext') in ['f4f', 'f4m']:
1571             res += '(unsupported) '
1572         if fdict.get('format_note') is not None:
1573             res += fdict['format_note'] + ' '
1574         if fdict.get('tbr') is not None:
1575             res += '%4dk ' % fdict['tbr']
1576         if fdict.get('container') is not None:
1577             if res:
1578                 res += ', '
1579             res += '%s container' % fdict['container']
1580         if (fdict.get('vcodec') is not None and
1581                 fdict.get('vcodec') != 'none'):
1582             if res:
1583                 res += ', '
1584             res += fdict['vcodec']
1585             if fdict.get('vbr') is not None:
1586                 res += '@'
1587         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1588             res += 'video@'
1589         if fdict.get('vbr') is not None:
1590             res += '%4dk' % fdict['vbr']
1591         if fdict.get('fps') is not None:
1592             res += ', %sfps' % fdict['fps']
1593         if fdict.get('acodec') is not None:
1594             if res:
1595                 res += ', '
1596             if fdict['acodec'] == 'none':
1597                 res += 'video only'
1598             else:
1599                 res += '%-5s' % fdict['acodec']
1600         elif fdict.get('abr') is not None:
1601             if res:
1602                 res += ', '
1603             res += 'audio'
1604         if fdict.get('abr') is not None:
1605             res += '@%3dk' % fdict['abr']
1606         if fdict.get('asr') is not None:
1607             res += ' (%5dHz)' % fdict['asr']
1608         if fdict.get('filesize') is not None:
1609             if res:
1610                 res += ', '
1611             res += format_bytes(fdict['filesize'])
1612         elif fdict.get('filesize_approx') is not None:
1613             if res:
1614                 res += ', '
1615             res += '~' + format_bytes(fdict['filesize_approx'])
1616         return res
1617
1618     def list_formats(self, info_dict):
1619         def line(format, idlen=20):
1620             return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
1621                 format['format_id'],
1622                 format['ext'],
1623                 self.format_resolution(format),
1624                 self._format_note(format),
1625             ))
1626
1627         formats = info_dict.get('formats', [info_dict])
1628         idlen = max(len('format code'),
1629                     max(len(f['format_id']) for f in formats))
1630         formats_s = [
1631             line(f, idlen) for f in formats
1632             if f.get('preference') is None or f['preference'] >= -1000]
1633         if len(formats) > 1:
1634             formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
1635
1636         header_line = line({
1637             'format_id': 'format code', 'ext': 'extension',
1638             'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
1639         self.to_screen(
1640             '[info] Available formats for %s:\n%s\n%s' %
1641             (info_dict['id'], header_line, '\n'.join(formats_s)))
1642
1643     def list_thumbnails(self, info_dict):
1644         thumbnails = info_dict.get('thumbnails')
1645         if not thumbnails:
1646             tn_url = info_dict.get('thumbnail')
1647             if tn_url:
1648                 thumbnails = [{'id': '0', 'url': tn_url}]
1649             else:
1650                 self.to_screen(
1651                     '[info] No thumbnails present for %s' % info_dict['id'])
1652                 return
1653
1654         self.to_screen(
1655             '[info] Thumbnails for %s:' % info_dict['id'])
1656         self.to_screen(render_table(
1657             ['ID', 'width', 'height', 'URL'],
1658             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
1659
1660     def list_subtitles(self, video_id, subtitles, name='subtitles'):
1661         if not subtitles:
1662             self.to_screen('%s has no %s' % (video_id, name))
1663             return
1664         self.to_screen(
1665             'Available %s for %s:' % (name, video_id))
1666         self.to_screen(render_table(
1667             ['Language', 'formats'],
1668             [[lang, ', '.join(f['ext'] for f in reversed(formats))]
1669                 for lang, formats in subtitles.items()]))
1670
1671     def urlopen(self, req):
1672         """ Start an HTTP download """
1673
1674         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1675         # always respected by websites, some tend to give out URLs with non percent-encoded
1676         # non-ASCII characters (see telemb.py, ard.py [#3412])
1677         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1678         # To work around aforementioned issue we will replace request's original URL with
1679         # percent-encoded one
1680         req_is_string = isinstance(req, compat_basestring)
1681         url = req if req_is_string else req.get_full_url()
1682         url_escaped = escape_url(url)
1683
1684         # Substitute URL if any change after escaping
1685         if url != url_escaped:
1686             if req_is_string:
1687                 req = url_escaped
1688             else:
1689                 req = compat_urllib_request.Request(
1690                     url_escaped, data=req.data, headers=req.headers,
1691                     origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
1692
1693         return self._opener.open(req, timeout=self._socket_timeout)
1694
1695     def print_debug_header(self):
1696         if not self.params.get('verbose'):
1697             return
1698
1699         if type('') is not compat_str:
1700             # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1701             self.report_warning(
1702                 'Your Python is broken! Update to a newer and supported version')
1703
1704         stdout_encoding = getattr(
1705             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
1706         encoding_str = (
1707             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1708                 locale.getpreferredencoding(),
1709                 sys.getfilesystemencoding(),
1710                 stdout_encoding,
1711                 self.get_encoding()))
1712         write_string(encoding_str, encoding=None)
1713
1714         self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1715         try:
1716             sp = subprocess.Popen(
1717                 ['git', 'rev-parse', '--short', 'HEAD'],
1718                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1719                 cwd=os.path.dirname(os.path.abspath(__file__)))
1720             out, err = sp.communicate()
1721             out = out.decode().strip()
1722             if re.match('[0-9a-f]+', out):
1723                 self._write_string('[debug] Git HEAD: ' + out + '\n')
1724         except:
1725             try:
1726                 sys.exc_clear()
1727             except:
1728                 pass
1729         self._write_string('[debug] Python version %s - %s\n' % (
1730             platform.python_version(), platform_name()))
1731
1732         exe_versions = FFmpegPostProcessor.get_versions()
1733         exe_versions['rtmpdump'] = rtmpdump_version()
1734         exe_str = ', '.join(
1735             '%s %s' % (exe, v)
1736             for exe, v in sorted(exe_versions.items())
1737             if v
1738         )
1739         if not exe_str:
1740             exe_str = 'none'
1741         self._write_string('[debug] exe versions: %s\n' % exe_str)
1742
1743         proxy_map = {}
1744         for handler in self._opener.handlers:
1745             if hasattr(handler, 'proxies'):
1746                 proxy_map.update(handler.proxies)
1747         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1748
1749         if self.params.get('call_home', False):
1750             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
1751             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
1752             latest_version = self.urlopen(
1753                 'https://yt-dl.org/latest/version').read().decode('utf-8')
1754             if version_tuple(latest_version) > version_tuple(__version__):
1755                 self.report_warning(
1756                     'You are using an outdated version (newest version: %s)! '
1757                     'See https://yt-dl.org/update if you need help updating.' %
1758                     latest_version)
1759
1760     def _setup_opener(self):
1761         timeout_val = self.params.get('socket_timeout')
1762         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1763
1764         opts_cookiefile = self.params.get('cookiefile')
1765         opts_proxy = self.params.get('proxy')
1766
1767         if opts_cookiefile is None:
1768             self.cookiejar = compat_cookiejar.CookieJar()
1769         else:
1770             self.cookiejar = compat_cookiejar.MozillaCookieJar(
1771                 opts_cookiefile)
1772             if os.access(opts_cookiefile, os.R_OK):
1773                 self.cookiejar.load()
1774
1775         cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1776             self.cookiejar)
1777         if opts_proxy is not None:
1778             if opts_proxy == '':
1779                 proxies = {}
1780             else:
1781                 proxies = {'http': opts_proxy, 'https': opts_proxy}
1782         else:
1783             proxies = compat_urllib_request.getproxies()
1784             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1785             if 'http' in proxies and 'https' not in proxies:
1786                 proxies['https'] = proxies['http']
1787         proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1788
1789         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1790         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
1791         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
1792         opener = compat_urllib_request.build_opener(
1793             https_handler, proxy_handler, cookie_processor, ydlh)
1794         # Delete the default user-agent header, which would otherwise apply in
1795         # cases where our custom HTTP handler doesn't come into play
1796         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1797         opener.addheaders = []
1798         self._opener = opener
1799
1800     def encode(self, s):
1801         if isinstance(s, bytes):
1802             return s  # Already encoded
1803
1804         try:
1805             return s.encode(self.get_encoding())
1806         except UnicodeEncodeError as err:
1807             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1808             raise
1809
1810     def get_encoding(self):
1811         encoding = self.params.get('encoding')
1812         if encoding is None:
1813             encoding = preferredencoding()
1814         return encoding
1815
1816     def _write_thumbnails(self, info_dict, filename):
1817         if self.params.get('writethumbnail', False):
1818             thumbnails = info_dict.get('thumbnails')
1819             if thumbnails:
1820                 thumbnails = [thumbnails[-1]]
1821         elif self.params.get('write_all_thumbnails', False):
1822             thumbnails = info_dict.get('thumbnails')
1823         else:
1824             return
1825
1826         if not thumbnails:
1827             # No thumbnails present, so return immediately
1828             return
1829
1830         for t in thumbnails:
1831             thumb_ext = determine_ext(t['url'], 'jpg')
1832             suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
1833             thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
1834             thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
1835
1836             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
1837                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
1838                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
1839             else:
1840                 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
1841                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
1842                 try:
1843                     uf = self.urlopen(t['url'])
1844                     with open(thumb_filename, 'wb') as thumbf:
1845                         shutil.copyfileobj(uf, thumbf)
1846                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
1847                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
1848                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1849                     self.report_warning('Unable to download thumbnail "%s": %s' %
1850                                         (t['url'], compat_str(err)))