[YoutubeDL] store the subtitles to download in the 'requested_subtitles' field
[youtube-dl] / youtube_dl / YoutubeDL.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 from __future__ import absolute_import, unicode_literals
5
6 import collections
7 import datetime
8 import errno
9 import io
10 import itertools
11 import json
12 import locale
13 import operator
14 import os
15 import platform
16 import re
17 import shutil
18 import subprocess
19 import socket
20 import sys
21 import time
22 import traceback
23
24 if os.name == 'nt':
25     import ctypes
26
27 from .compat import (
28     compat_basestring,
29     compat_cookiejar,
30     compat_expanduser,
31     compat_http_client,
32     compat_kwargs,
33     compat_str,
34     compat_urllib_error,
35     compat_urllib_request,
36 )
37 from .utils import (
38     escape_url,
39     ContentTooShortError,
40     date_from_str,
41     DateRange,
42     DEFAULT_OUTTMPL,
43     determine_ext,
44     DownloadError,
45     encodeFilename,
46     ExtractorError,
47     format_bytes,
48     formatSeconds,
49     get_term_width,
50     locked_file,
51     make_HTTPS_handler,
52     MaxDownloadsReached,
53     PagedList,
54     parse_filesize,
55     PostProcessingError,
56     platform_name,
57     preferredencoding,
58     render_table,
59     SameFileError,
60     sanitize_filename,
61     std_headers,
62     subtitles_filename,
63     takewhile_inclusive,
64     UnavailableVideoError,
65     url_basename,
66     version_tuple,
67     write_json_file,
68     write_string,
69     YoutubeDLHandler,
70     prepend_extension,
71     args_to_str,
72     age_restricted,
73 )
74 from .cache import Cache
75 from .extractor import get_info_extractor, gen_extractors
76 from .downloader import get_suitable_downloader
77 from .downloader.rtmp import rtmpdump_version
78 from .postprocessor import (
79     FFmpegFixupM4aPP,
80     FFmpegFixupStretchedPP,
81     FFmpegMergerPP,
82     FFmpegPostProcessor,
83     get_postprocessor,
84 )
85 from .version import __version__
86
87
88 class YoutubeDL(object):
89     """YoutubeDL class.
90
91     YoutubeDL objects are the ones responsible of downloading the
92     actual video file and writing it to disk if the user has requested
93     it, among some other tasks. In most cases there should be one per
94     program. As, given a video URL, the downloader doesn't know how to
95     extract all the needed information, task that InfoExtractors do, it
96     has to pass the URL to one of them.
97
98     For this, YoutubeDL objects have a method that allows
99     InfoExtractors to be registered in a given order. When it is passed
100     a URL, the YoutubeDL object handles it to the first InfoExtractor it
101     finds that reports being able to handle it. The InfoExtractor extracts
102     all the information about the video or videos the URL refers to, and
103     YoutubeDL process the extracted information, possibly using a File
104     Downloader to download the video.
105
106     YoutubeDL objects accept a lot of parameters. In order not to saturate
107     the object constructor with arguments, it receives a dictionary of
108     options instead. These options are available through the params
109     attribute for the InfoExtractors to use. The YoutubeDL also
110     registers itself as the downloader in charge for the InfoExtractors
111     that are added to it, so this is a "mutual registration".
112
113     Available options:
114
115     username:          Username for authentication purposes.
116     password:          Password for authentication purposes.
117     videopassword:     Password for acces a video.
118     usenetrc:          Use netrc for authentication instead.
119     verbose:           Print additional info to stdout.
120     quiet:             Do not print messages to stdout.
121     no_warnings:       Do not print out anything for warnings.
122     forceurl:          Force printing final URL.
123     forcetitle:        Force printing title.
124     forceid:           Force printing ID.
125     forcethumbnail:    Force printing thumbnail URL.
126     forcedescription:  Force printing description.
127     forcefilename:     Force printing final filename.
128     forceduration:     Force printing duration.
129     forcejson:         Force printing info_dict as JSON.
130     dump_single_json:  Force printing the info_dict of the whole playlist
131                        (or video) as a single JSON line.
132     simulate:          Do not download the video files.
133     format:            Video format code. See options.py for more information.
134     format_limit:      Highest quality format to try.
135     outtmpl:           Template for output names.
136     restrictfilenames: Do not allow "&" and spaces in file names
137     ignoreerrors:      Do not stop on download errors.
138     nooverwrites:      Prevent overwriting files.
139     playliststart:     Playlist item to start at.
140     playlistend:       Playlist item to end at.
141     playlist_items:    Specific indices of playlist to download.
142     playlistreverse:   Download playlist items in reverse order.
143     matchtitle:        Download only matching titles.
144     rejecttitle:       Reject downloads for matching titles.
145     logger:            Log messages to a logging.Logger instance.
146     logtostderr:       Log messages to stderr instead of stdout.
147     writedescription:  Write the video description to a .description file
148     writeinfojson:     Write the video description to a .info.json file
149     writeannotations:  Write the video annotations to a .annotations.xml file
150     writethumbnail:    Write the thumbnail image to a file
151     write_all_thumbnails:  Write all thumbnail formats to files
152     writesubtitles:    Write the video subtitles to a file
153     writeautomaticsub: Write the automatic subtitles to a file
154     allsubtitles:      Downloads all the subtitles of the video
155                        (requires writesubtitles or writeautomaticsub)
156     listsubtitles:     Lists all available subtitles for the video
157     subtitlesformat:   The format code for subtitles
158     subtitleslangs:    List of languages of the subtitles to download
159     keepvideo:         Keep the video file after post-processing
160     daterange:         A DateRange object, download only if the upload_date is in the range.
161     skip_download:     Skip the actual download of the video file
162     cachedir:          Location of the cache files in the filesystem.
163                        False to disable filesystem cache.
164     noplaylist:        Download single video instead of a playlist if in doubt.
165     age_limit:         An integer representing the user's age in years.
166                        Unsuitable videos for the given age are skipped.
167     min_views:         An integer representing the minimum view count the video
168                        must have in order to not be skipped.
169                        Videos without view count information are always
170                        downloaded. None for no limit.
171     max_views:         An integer representing the maximum view count.
172                        Videos that are more popular than that are not
173                        downloaded.
174                        Videos without view count information are always
175                        downloaded. None for no limit.
176     download_archive:  File name of a file where all downloads are recorded.
177                        Videos already present in the file are not downloaded
178                        again.
179     cookiefile:        File name where cookies should be read from and dumped to.
180     nocheckcertificate:Do not verify SSL certificates
181     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
182                        At the moment, this is only supported by YouTube.
183     proxy:             URL of the proxy server to use
184     socket_timeout:    Time to wait for unresponsive hosts, in seconds
185     bidi_workaround:   Work around buggy terminals without bidirectional text
186                        support, using fridibi
187     debug_printtraffic:Print out sent and received HTTP traffic
188     include_ads:       Download ads as well
189     default_search:    Prepend this string if an input url is not valid.
190                        'auto' for elaborate guessing
191     encoding:          Use this encoding instead of the system-specified.
192     extract_flat:      Do not resolve URLs, return the immediate result.
193                        Pass in 'in_playlist' to only show this behavior for
194                        playlist items.
195     postprocessors:    A list of dictionaries, each with an entry
196                        * key:  The name of the postprocessor. See
197                                youtube_dl/postprocessor/__init__.py for a list.
198                        as well as any further keyword arguments for the
199                        postprocessor.
200     progress_hooks:    A list of functions that get called on download
201                        progress, with a dictionary with the entries
202                        * status: One of "downloading" and "finished".
203                                  Check this first and ignore unknown values.
204
205                        If status is one of "downloading" or "finished", the
206                        following properties may also be present:
207                        * filename: The final filename (always present)
208                        * downloaded_bytes: Bytes on disk
209                        * total_bytes: Size of the whole file, None if unknown
210                        * tmpfilename: The filename we're currently writing to
211                        * eta: The estimated time in seconds, None if unknown
212                        * speed: The download speed in bytes/second, None if
213                                 unknown
214
215                        Progress hooks are guaranteed to be called at least once
216                        (with status "finished") if the download is successful.
217     merge_output_format: Extension to use when merging formats.
218     fixup:             Automatically correct known faults of the file.
219                        One of:
220                        - "never": do nothing
221                        - "warn": only emit a warning
222                        - "detect_or_warn": check whether we can do anything
223                                            about it, warn otherwise (default)
224     source_address:    (Experimental) Client-side IP address to bind to.
225     call_home:         Boolean, true iff we are allowed to contact the
226                        youtube-dl servers for debugging.
227     sleep_interval:    Number of seconds to sleep before each download.
228     external_downloader:  Executable of the external downloader to call.
229     listformats:       Print an overview of available video formats and exit.
230     list_thumbnails:   Print a table of all thumbnails and exit.
231     match_filter:      A function that gets called with the info_dict of
232                        every video.
233                        If it returns a message, the video is ignored.
234                        If it returns None, the video is downloaded.
235                        match_filter_func in utils.py is one example for this.
236     no_color:          Do not emit color codes in output.
237
238
239     The following parameters are not used by YoutubeDL itself, they are used by
240     the FileDownloader:
241     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
242     noresizebuffer, retries, continuedl, noprogress, consoletitle,
243     xattr_set_filesize.
244
245     The following options are used by the post processors:
246     prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
247                        otherwise prefer avconv.
248     exec_cmd:          Arbitrary command to run after downloading
249     """
250
251     params = None
252     _ies = []
253     _pps = []
254     _download_retcode = None
255     _num_downloads = None
256     _screen_file = None
257
258     def __init__(self, params=None, auto_init=True):
259         """Create a FileDownloader object with the given options."""
260         if params is None:
261             params = {}
262         self._ies = []
263         self._ies_instances = {}
264         self._pps = []
265         self._progress_hooks = []
266         self._download_retcode = 0
267         self._num_downloads = 0
268         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
269         self._err_file = sys.stderr
270         self.params = params
271         self.cache = Cache(self)
272
273         if params.get('bidi_workaround', False):
274             try:
275                 import pty
276                 master, slave = pty.openpty()
277                 width = get_term_width()
278                 if width is None:
279                     width_args = []
280                 else:
281                     width_args = ['-w', str(width)]
282                 sp_kwargs = dict(
283                     stdin=subprocess.PIPE,
284                     stdout=slave,
285                     stderr=self._err_file)
286                 try:
287                     self._output_process = subprocess.Popen(
288                         ['bidiv'] + width_args, **sp_kwargs
289                     )
290                 except OSError:
291                     self._output_process = subprocess.Popen(
292                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
293                 self._output_channel = os.fdopen(master, 'rb')
294             except OSError as ose:
295                 if ose.errno == 2:
296                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
297                 else:
298                     raise
299
300         if (sys.version_info >= (3,) and sys.platform != 'win32' and
301                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
302                 and not params.get('restrictfilenames', False)):
303             # On Python 3, the Unicode filesystem API will throw errors (#1474)
304             self.report_warning(
305                 'Assuming --restrict-filenames since file system encoding '
306                 'cannot encode all characters. '
307                 'Set the LC_ALL environment variable to fix this.')
308             self.params['restrictfilenames'] = True
309
310         if '%(stitle)s' in self.params.get('outtmpl', ''):
311             self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
312
313         self._setup_opener()
314
315         if auto_init:
316             self.print_debug_header()
317             self.add_default_info_extractors()
318
319         for pp_def_raw in self.params.get('postprocessors', []):
320             pp_class = get_postprocessor(pp_def_raw['key'])
321             pp_def = dict(pp_def_raw)
322             del pp_def['key']
323             pp = pp_class(self, **compat_kwargs(pp_def))
324             self.add_post_processor(pp)
325
326         for ph in self.params.get('progress_hooks', []):
327             self.add_progress_hook(ph)
328
329     def warn_if_short_id(self, argv):
330         # short YouTube ID starting with dash?
331         idxs = [
332             i for i, a in enumerate(argv)
333             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
334         if idxs:
335             correct_argv = (
336                 ['youtube-dl'] +
337                 [a for i, a in enumerate(argv) if i not in idxs] +
338                 ['--'] + [argv[i] for i in idxs]
339             )
340             self.report_warning(
341                 'Long argument string detected. '
342                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
343                 args_to_str(correct_argv))
344
345     def add_info_extractor(self, ie):
346         """Add an InfoExtractor object to the end of the list."""
347         self._ies.append(ie)
348         self._ies_instances[ie.ie_key()] = ie
349         ie.set_downloader(self)
350
351     def get_info_extractor(self, ie_key):
352         """
353         Get an instance of an IE with name ie_key, it will try to get one from
354         the _ies list, if there's no instance it will create a new one and add
355         it to the extractor list.
356         """
357         ie = self._ies_instances.get(ie_key)
358         if ie is None:
359             ie = get_info_extractor(ie_key)()
360             self.add_info_extractor(ie)
361         return ie
362
363     def add_default_info_extractors(self):
364         """
365         Add the InfoExtractors returned by gen_extractors to the end of the list
366         """
367         for ie in gen_extractors():
368             self.add_info_extractor(ie)
369
370     def add_post_processor(self, pp):
371         """Add a PostProcessor object to the end of the chain."""
372         self._pps.append(pp)
373         pp.set_downloader(self)
374
375     def add_progress_hook(self, ph):
376         """Add the progress hook (currently only for the file downloader)"""
377         self._progress_hooks.append(ph)
378
379     def _bidi_workaround(self, message):
380         if not hasattr(self, '_output_channel'):
381             return message
382
383         assert hasattr(self, '_output_process')
384         assert isinstance(message, compat_str)
385         line_count = message.count('\n') + 1
386         self._output_process.stdin.write((message + '\n').encode('utf-8'))
387         self._output_process.stdin.flush()
388         res = ''.join(self._output_channel.readline().decode('utf-8')
389                       for _ in range(line_count))
390         return res[:-len('\n')]
391
392     def to_screen(self, message, skip_eol=False):
393         """Print message to stdout if not in quiet mode."""
394         return self.to_stdout(message, skip_eol, check_quiet=True)
395
396     def _write_string(self, s, out=None):
397         write_string(s, out=out, encoding=self.params.get('encoding'))
398
399     def to_stdout(self, message, skip_eol=False, check_quiet=False):
400         """Print message to stdout if not in quiet mode."""
401         if self.params.get('logger'):
402             self.params['logger'].debug(message)
403         elif not check_quiet or not self.params.get('quiet', False):
404             message = self._bidi_workaround(message)
405             terminator = ['\n', ''][skip_eol]
406             output = message + terminator
407
408             self._write_string(output, self._screen_file)
409
410     def to_stderr(self, message):
411         """Print message to stderr."""
412         assert isinstance(message, compat_str)
413         if self.params.get('logger'):
414             self.params['logger'].error(message)
415         else:
416             message = self._bidi_workaround(message)
417             output = message + '\n'
418             self._write_string(output, self._err_file)
419
420     def to_console_title(self, message):
421         if not self.params.get('consoletitle', False):
422             return
423         if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
424             # c_wchar_p() might not be necessary if `message` is
425             # already of type unicode()
426             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
427         elif 'TERM' in os.environ:
428             self._write_string('\033]0;%s\007' % message, self._screen_file)
429
430     def save_console_title(self):
431         if not self.params.get('consoletitle', False):
432             return
433         if 'TERM' in os.environ:
434             # Save the title on stack
435             self._write_string('\033[22;0t', self._screen_file)
436
437     def restore_console_title(self):
438         if not self.params.get('consoletitle', False):
439             return
440         if 'TERM' in os.environ:
441             # Restore the title from stack
442             self._write_string('\033[23;0t', self._screen_file)
443
444     def __enter__(self):
445         self.save_console_title()
446         return self
447
448     def __exit__(self, *args):
449         self.restore_console_title()
450
451         if self.params.get('cookiefile') is not None:
452             self.cookiejar.save()
453
454     def trouble(self, message=None, tb=None):
455         """Determine action to take when a download problem appears.
456
457         Depending on if the downloader has been configured to ignore
458         download errors or not, this method may throw an exception or
459         not when errors are found, after printing the message.
460
461         tb, if given, is additional traceback information.
462         """
463         if message is not None:
464             self.to_stderr(message)
465         if self.params.get('verbose'):
466             if tb is None:
467                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
468                     tb = ''
469                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
470                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
471                     tb += compat_str(traceback.format_exc())
472                 else:
473                     tb_data = traceback.format_list(traceback.extract_stack())
474                     tb = ''.join(tb_data)
475             self.to_stderr(tb)
476         if not self.params.get('ignoreerrors', False):
477             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
478                 exc_info = sys.exc_info()[1].exc_info
479             else:
480                 exc_info = sys.exc_info()
481             raise DownloadError(message, exc_info)
482         self._download_retcode = 1
483
484     def report_warning(self, message):
485         '''
486         Print the message to stderr, it will be prefixed with 'WARNING:'
487         If stderr is a tty file the 'WARNING:' will be colored
488         '''
489         if self.params.get('logger') is not None:
490             self.params['logger'].warning(message)
491         else:
492             if self.params.get('no_warnings'):
493                 return
494             if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
495                 _msg_header = '\033[0;33mWARNING:\033[0m'
496             else:
497                 _msg_header = 'WARNING:'
498             warning_message = '%s %s' % (_msg_header, message)
499             self.to_stderr(warning_message)
500
501     def report_error(self, message, tb=None):
502         '''
503         Do the same as trouble, but prefixes the message with 'ERROR:', colored
504         in red if stderr is a tty file.
505         '''
506         if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
507             _msg_header = '\033[0;31mERROR:\033[0m'
508         else:
509             _msg_header = 'ERROR:'
510         error_message = '%s %s' % (_msg_header, message)
511         self.trouble(error_message, tb)
512
513     def report_file_already_downloaded(self, file_name):
514         """Report file has already been fully downloaded."""
515         try:
516             self.to_screen('[download] %s has already been downloaded' % file_name)
517         except UnicodeEncodeError:
518             self.to_screen('[download] The file has already been downloaded')
519
520     def prepare_filename(self, info_dict):
521         """Generate the output filename."""
522         try:
523             template_dict = dict(info_dict)
524
525             template_dict['epoch'] = int(time.time())
526             autonumber_size = self.params.get('autonumber_size')
527             if autonumber_size is None:
528                 autonumber_size = 5
529             autonumber_templ = '%0' + str(autonumber_size) + 'd'
530             template_dict['autonumber'] = autonumber_templ % self._num_downloads
531             if template_dict.get('playlist_index') is not None:
532                 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
533             if template_dict.get('resolution') is None:
534                 if template_dict.get('width') and template_dict.get('height'):
535                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
536                 elif template_dict.get('height'):
537                     template_dict['resolution'] = '%sp' % template_dict['height']
538                 elif template_dict.get('width'):
539                     template_dict['resolution'] = '?x%d' % template_dict['width']
540
541             sanitize = lambda k, v: sanitize_filename(
542                 compat_str(v),
543                 restricted=self.params.get('restrictfilenames'),
544                 is_id=(k == 'id'))
545             template_dict = dict((k, sanitize(k, v))
546                                  for k, v in template_dict.items()
547                                  if v is not None)
548             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
549
550             outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
551             tmpl = compat_expanduser(outtmpl)
552             filename = tmpl % template_dict
553             # Temporary fix for #4787
554             # 'Treat' all problem characters by passing filename through preferredencoding
555             # to workaround encoding issues with subprocess on python2 @ Windows
556             if sys.version_info < (3, 0) and sys.platform == 'win32':
557                 filename = encodeFilename(filename, True).decode(preferredencoding())
558             return filename
559         except ValueError as err:
560             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
561             return None
562
563     def _match_entry(self, info_dict, incomplete):
564         """ Returns None iff the file should be downloaded """
565
566         video_title = info_dict.get('title', info_dict.get('id', 'video'))
567         if 'title' in info_dict:
568             # This can happen when we're just evaluating the playlist
569             title = info_dict['title']
570             matchtitle = self.params.get('matchtitle', False)
571             if matchtitle:
572                 if not re.search(matchtitle, title, re.IGNORECASE):
573                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
574             rejecttitle = self.params.get('rejecttitle', False)
575             if rejecttitle:
576                 if re.search(rejecttitle, title, re.IGNORECASE):
577                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
578         date = info_dict.get('upload_date', None)
579         if date is not None:
580             dateRange = self.params.get('daterange', DateRange())
581             if date not in dateRange:
582                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
583         view_count = info_dict.get('view_count', None)
584         if view_count is not None:
585             min_views = self.params.get('min_views')
586             if min_views is not None and view_count < min_views:
587                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
588             max_views = self.params.get('max_views')
589             if max_views is not None and view_count > max_views:
590                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
591         if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
592             return 'Skipping "%s" because it is age restricted' % video_title
593         if self.in_download_archive(info_dict):
594             return '%s has already been recorded in archive' % video_title
595
596         if not incomplete:
597             match_filter = self.params.get('match_filter')
598             if match_filter is not None:
599                 ret = match_filter(info_dict)
600                 if ret is not None:
601                     return ret
602
603         return None
604
605     @staticmethod
606     def add_extra_info(info_dict, extra_info):
607         '''Set the keys from extra_info in info dict if they are missing'''
608         for key, value in extra_info.items():
609             info_dict.setdefault(key, value)
610
611     def extract_info(self, url, download=True, ie_key=None, extra_info={},
612                      process=True):
613         '''
614         Returns a list with a dictionary for each video we find.
615         If 'download', also downloads the videos.
616         extra_info is a dict containing the extra values to add to each result
617          '''
618
619         if ie_key:
620             ies = [self.get_info_extractor(ie_key)]
621         else:
622             ies = self._ies
623
624         for ie in ies:
625             if not ie.suitable(url):
626                 continue
627
628             if not ie.working():
629                 self.report_warning('The program functionality for this site has been marked as broken, '
630                                     'and will probably not work.')
631
632             try:
633                 ie_result = ie.extract(url)
634                 if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
635                     break
636                 if isinstance(ie_result, list):
637                     # Backwards compatibility: old IE result format
638                     ie_result = {
639                         '_type': 'compat_list',
640                         'entries': ie_result,
641                     }
642                 self.add_default_extra_info(ie_result, ie, url)
643                 if process:
644                     return self.process_ie_result(ie_result, download, extra_info)
645                 else:
646                     return ie_result
647             except ExtractorError as de:  # An error we somewhat expected
648                 self.report_error(compat_str(de), de.format_traceback())
649                 break
650             except MaxDownloadsReached:
651                 raise
652             except Exception as e:
653                 if self.params.get('ignoreerrors', False):
654                     self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
655                     break
656                 else:
657                     raise
658         else:
659             self.report_error('no suitable InfoExtractor for URL %s' % url)
660
661     def add_default_extra_info(self, ie_result, ie, url):
662         self.add_extra_info(ie_result, {
663             'extractor': ie.IE_NAME,
664             'webpage_url': url,
665             'webpage_url_basename': url_basename(url),
666             'extractor_key': ie.ie_key(),
667         })
668
669     def process_ie_result(self, ie_result, download=True, extra_info={}):
670         """
671         Take the result of the ie(may be modified) and resolve all unresolved
672         references (URLs, playlist items).
673
674         It will also download the videos if 'download'.
675         Returns the resolved ie_result.
676         """
677
678         result_type = ie_result.get('_type', 'video')
679
680         if result_type in ('url', 'url_transparent'):
681             extract_flat = self.params.get('extract_flat', False)
682             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
683                     extract_flat is True):
684                 if self.params.get('forcejson', False):
685                     self.to_stdout(json.dumps(ie_result))
686                 return ie_result
687
688         if result_type == 'video':
689             self.add_extra_info(ie_result, extra_info)
690             return self.process_video_result(ie_result, download=download)
691         elif result_type == 'url':
692             # We have to add extra_info to the results because it may be
693             # contained in a playlist
694             return self.extract_info(ie_result['url'],
695                                      download,
696                                      ie_key=ie_result.get('ie_key'),
697                                      extra_info=extra_info)
698         elif result_type == 'url_transparent':
699             # Use the information from the embedding page
700             info = self.extract_info(
701                 ie_result['url'], ie_key=ie_result.get('ie_key'),
702                 extra_info=extra_info, download=False, process=False)
703
704             force_properties = dict(
705                 (k, v) for k, v in ie_result.items() if v is not None)
706             for f in ('_type', 'url'):
707                 if f in force_properties:
708                     del force_properties[f]
709             new_result = info.copy()
710             new_result.update(force_properties)
711
712             assert new_result.get('_type') != 'url_transparent'
713
714             return self.process_ie_result(
715                 new_result, download=download, extra_info=extra_info)
716         elif result_type == 'playlist' or result_type == 'multi_video':
717             # We process each entry in the playlist
718             playlist = ie_result.get('title', None) or ie_result.get('id', None)
719             self.to_screen('[download] Downloading playlist: %s' % playlist)
720
721             playlist_results = []
722
723             playliststart = self.params.get('playliststart', 1) - 1
724             playlistend = self.params.get('playlistend', None)
725             # For backwards compatibility, interpret -1 as whole list
726             if playlistend == -1:
727                 playlistend = None
728
729             playlistitems_str = self.params.get('playlist_items', None)
730             playlistitems = None
731             if playlistitems_str is not None:
732                 def iter_playlistitems(format):
733                     for string_segment in format.split(','):
734                         if '-' in string_segment:
735                             start, end = string_segment.split('-')
736                             for item in range(int(start), int(end) + 1):
737                                 yield int(item)
738                         else:
739                             yield int(string_segment)
740                 playlistitems = iter_playlistitems(playlistitems_str)
741
742             ie_entries = ie_result['entries']
743             if isinstance(ie_entries, list):
744                 n_all_entries = len(ie_entries)
745                 if playlistitems:
746                     entries = [ie_entries[i - 1] for i in playlistitems]
747                 else:
748                     entries = ie_entries[playliststart:playlistend]
749                 n_entries = len(entries)
750                 self.to_screen(
751                     "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
752                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
753             elif isinstance(ie_entries, PagedList):
754                 if playlistitems:
755                     entries = []
756                     for item in playlistitems:
757                         entries.extend(ie_entries.getslice(
758                             item - 1, item
759                         ))
760                 else:
761                     entries = ie_entries.getslice(
762                         playliststart, playlistend)
763                 n_entries = len(entries)
764                 self.to_screen(
765                     "[%s] playlist %s: Downloading %d videos" %
766                     (ie_result['extractor'], playlist, n_entries))
767             else:  # iterable
768                 if playlistitems:
769                     entry_list = list(ie_entries)
770                     entries = [entry_list[i - 1] for i in playlistitems]
771                 else:
772                     entries = list(itertools.islice(
773                         ie_entries, playliststart, playlistend))
774                 n_entries = len(entries)
775                 self.to_screen(
776                     "[%s] playlist %s: Downloading %d videos" %
777                     (ie_result['extractor'], playlist, n_entries))
778
779             if self.params.get('playlistreverse', False):
780                 entries = entries[::-1]
781
782             for i, entry in enumerate(entries, 1):
783                 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
784                 extra = {
785                     'n_entries': n_entries,
786                     'playlist': playlist,
787                     'playlist_id': ie_result.get('id'),
788                     'playlist_title': ie_result.get('title'),
789                     'playlist_index': i + playliststart,
790                     'extractor': ie_result['extractor'],
791                     'webpage_url': ie_result['webpage_url'],
792                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
793                     'extractor_key': ie_result['extractor_key'],
794                 }
795
796                 reason = self._match_entry(entry, incomplete=True)
797                 if reason is not None:
798                     self.to_screen('[download] ' + reason)
799                     continue
800
801                 entry_result = self.process_ie_result(entry,
802                                                       download=download,
803                                                       extra_info=extra)
804                 playlist_results.append(entry_result)
805             ie_result['entries'] = playlist_results
806             return ie_result
807         elif result_type == 'compat_list':
808             self.report_warning(
809                 'Extractor %s returned a compat_list result. '
810                 'It needs to be updated.' % ie_result.get('extractor'))
811
812             def _fixup(r):
813                 self.add_extra_info(
814                     r,
815                     {
816                         'extractor': ie_result['extractor'],
817                         'webpage_url': ie_result['webpage_url'],
818                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
819                         'extractor_key': ie_result['extractor_key'],
820                     }
821                 )
822                 return r
823             ie_result['entries'] = [
824                 self.process_ie_result(_fixup(r), download, extra_info)
825                 for r in ie_result['entries']
826             ]
827             return ie_result
828         else:
829             raise Exception('Invalid result type: %s' % result_type)
830
831     def _apply_format_filter(self, format_spec, available_formats):
832         " Returns a tuple of the remaining format_spec and filtered formats "
833
834         OPERATORS = {
835             '<': operator.lt,
836             '<=': operator.le,
837             '>': operator.gt,
838             '>=': operator.ge,
839             '=': operator.eq,
840             '!=': operator.ne,
841         }
842         operator_rex = re.compile(r'''(?x)\s*\[
843             (?P<key>width|height|tbr|abr|vbr|asr|filesize|fps)
844             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
845             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
846             \]$
847             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
848         m = operator_rex.search(format_spec)
849         if m:
850             try:
851                 comparison_value = int(m.group('value'))
852             except ValueError:
853                 comparison_value = parse_filesize(m.group('value'))
854                 if comparison_value is None:
855                     comparison_value = parse_filesize(m.group('value') + 'B')
856                 if comparison_value is None:
857                     raise ValueError(
858                         'Invalid value %r in format specification %r' % (
859                             m.group('value'), format_spec))
860             op = OPERATORS[m.group('op')]
861
862         if not m:
863             STR_OPERATORS = {
864                 '=': operator.eq,
865                 '!=': operator.ne,
866             }
867             str_operator_rex = re.compile(r'''(?x)\s*\[
868                 \s*(?P<key>ext|acodec|vcodec|container|protocol)
869                 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
870                 \s*(?P<value>[a-zA-Z0-9_-]+)
871                 \s*\]$
872                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
873             m = str_operator_rex.search(format_spec)
874             if m:
875                 comparison_value = m.group('value')
876                 op = STR_OPERATORS[m.group('op')]
877
878         if not m:
879             raise ValueError('Invalid format specification %r' % format_spec)
880
881         def _filter(f):
882             actual_value = f.get(m.group('key'))
883             if actual_value is None:
884                 return m.group('none_inclusive')
885             return op(actual_value, comparison_value)
886         new_formats = [f for f in available_formats if _filter(f)]
887
888         new_format_spec = format_spec[:-len(m.group(0))]
889         if not new_format_spec:
890             new_format_spec = 'best'
891
892         return (new_format_spec, new_formats)
893
894     def select_format(self, format_spec, available_formats):
895         while format_spec.endswith(']'):
896             format_spec, available_formats = self._apply_format_filter(
897                 format_spec, available_formats)
898         if not available_formats:
899             return None
900
901         if format_spec == 'best' or format_spec is None:
902             return available_formats[-1]
903         elif format_spec == 'worst':
904             return available_formats[0]
905         elif format_spec == 'bestaudio':
906             audio_formats = [
907                 f for f in available_formats
908                 if f.get('vcodec') == 'none']
909             if audio_formats:
910                 return audio_formats[-1]
911         elif format_spec == 'worstaudio':
912             audio_formats = [
913                 f for f in available_formats
914                 if f.get('vcodec') == 'none']
915             if audio_formats:
916                 return audio_formats[0]
917         elif format_spec == 'bestvideo':
918             video_formats = [
919                 f for f in available_formats
920                 if f.get('acodec') == 'none']
921             if video_formats:
922                 return video_formats[-1]
923         elif format_spec == 'worstvideo':
924             video_formats = [
925                 f for f in available_formats
926                 if f.get('acodec') == 'none']
927             if video_formats:
928                 return video_formats[0]
929         else:
930             extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
931             if format_spec in extensions:
932                 filter_f = lambda f: f['ext'] == format_spec
933             else:
934                 filter_f = lambda f: f['format_id'] == format_spec
935             matches = list(filter(filter_f, available_formats))
936             if matches:
937                 return matches[-1]
938         return None
939
940     def _calc_headers(self, info_dict):
941         res = std_headers.copy()
942
943         add_headers = info_dict.get('http_headers')
944         if add_headers:
945             res.update(add_headers)
946
947         cookies = self._calc_cookies(info_dict)
948         if cookies:
949             res['Cookie'] = cookies
950
951         return res
952
953     def _calc_cookies(self, info_dict):
954         class _PseudoRequest(object):
955             def __init__(self, url):
956                 self.url = url
957                 self.headers = {}
958                 self.unverifiable = False
959
960             def add_unredirected_header(self, k, v):
961                 self.headers[k] = v
962
963             def get_full_url(self):
964                 return self.url
965
966             def is_unverifiable(self):
967                 return self.unverifiable
968
969             def has_header(self, h):
970                 return h in self.headers
971
972             def get_header(self, h, default=None):
973                 return self.headers.get(h, default)
974
975         pr = _PseudoRequest(info_dict['url'])
976         self.cookiejar.add_cookie_header(pr)
977         return pr.headers.get('Cookie')
978
979     def process_video_result(self, info_dict, download=True):
980         assert info_dict.get('_type', 'video') == 'video'
981
982         if 'id' not in info_dict:
983             raise ExtractorError('Missing "id" field in extractor result')
984         if 'title' not in info_dict:
985             raise ExtractorError('Missing "title" field in extractor result')
986
987         if 'playlist' not in info_dict:
988             # It isn't part of a playlist
989             info_dict['playlist'] = None
990             info_dict['playlist_index'] = None
991
992         thumbnails = info_dict.get('thumbnails')
993         if thumbnails is None:
994             thumbnail = info_dict.get('thumbnail')
995             if thumbnail:
996                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
997         if thumbnails:
998             thumbnails.sort(key=lambda t: (
999                 t.get('preference'), t.get('width'), t.get('height'),
1000                 t.get('id'), t.get('url')))
1001             for i, t in enumerate(thumbnails):
1002                 if 'width' in t and 'height' in t:
1003                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
1004                 if t.get('id') is None:
1005                     t['id'] = '%d' % i
1006
1007         if thumbnails and 'thumbnail' not in info_dict:
1008             info_dict['thumbnail'] = thumbnails[-1]['url']
1009
1010         if 'display_id' not in info_dict and 'id' in info_dict:
1011             info_dict['display_id'] = info_dict['id']
1012
1013         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
1014             # Working around negative timestamps in Windows
1015             # (see http://bugs.python.org/issue1646728)
1016             if info_dict['timestamp'] < 0 and os.name == 'nt':
1017                 info_dict['timestamp'] = 0
1018             upload_date = datetime.datetime.utcfromtimestamp(
1019                 info_dict['timestamp'])
1020             info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1021
1022         if self.params.get('listsubtitles', False):
1023             self.list_subtitles(info_dict['id'], info_dict.get('subtitles'))
1024             return
1025         info_dict['requested_subtitles'] = self.process_subtitles(info_dict['id'], info_dict.get('subtitles'))
1026
1027         # This extractors handle format selection themselves
1028         if info_dict['extractor'] in ['Youku']:
1029             if download:
1030                 self.process_info(info_dict)
1031             return info_dict
1032
1033         # We now pick which formats have to be downloaded
1034         if info_dict.get('formats') is None:
1035             # There's only one format available
1036             formats = [info_dict]
1037         else:
1038             formats = info_dict['formats']
1039
1040         if not formats:
1041             raise ExtractorError('No video formats found!')
1042
1043         # We check that all the formats have the format and format_id fields
1044         for i, format in enumerate(formats):
1045             if 'url' not in format:
1046                 raise ExtractorError('Missing "url" key in result (index %d)' % i)
1047
1048             if format.get('format_id') is None:
1049                 format['format_id'] = compat_str(i)
1050             if format.get('format') is None:
1051                 format['format'] = '{id} - {res}{note}'.format(
1052                     id=format['format_id'],
1053                     res=self.format_resolution(format),
1054                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1055                 )
1056             # Automatically determine file extension if missing
1057             if 'ext' not in format:
1058                 format['ext'] = determine_ext(format['url']).lower()
1059             # Add HTTP headers, so that external programs can use them from the
1060             # json output
1061             full_format_info = info_dict.copy()
1062             full_format_info.update(format)
1063             format['http_headers'] = self._calc_headers(full_format_info)
1064
1065         format_limit = self.params.get('format_limit', None)
1066         if format_limit:
1067             formats = list(takewhile_inclusive(
1068                 lambda f: f['format_id'] != format_limit, formats
1069             ))
1070
1071         # TODO Central sorting goes here
1072
1073         if formats[0] is not info_dict:
1074             # only set the 'formats' fields if the original info_dict list them
1075             # otherwise we end up with a circular reference, the first (and unique)
1076             # element in the 'formats' field in info_dict is info_dict itself,
1077             # wich can't be exported to json
1078             info_dict['formats'] = formats
1079         if self.params.get('listformats'):
1080             self.list_formats(info_dict)
1081             return
1082         if self.params.get('list_thumbnails'):
1083             self.list_thumbnails(info_dict)
1084             return
1085
1086         req_format = self.params.get('format')
1087         if req_format is None:
1088             req_format = 'best'
1089         formats_to_download = []
1090         # The -1 is for supporting YoutubeIE
1091         if req_format in ('-1', 'all'):
1092             formats_to_download = formats
1093         else:
1094             for rfstr in req_format.split(','):
1095                 # We can accept formats requested in the format: 34/5/best, we pick
1096                 # the first that is available, starting from left
1097                 req_formats = rfstr.split('/')
1098                 for rf in req_formats:
1099                     if re.match(r'.+?\+.+?', rf) is not None:
1100                         # Two formats have been requested like '137+139'
1101                         format_1, format_2 = rf.split('+')
1102                         formats_info = (self.select_format(format_1, formats),
1103                                         self.select_format(format_2, formats))
1104                         if all(formats_info):
1105                             # The first format must contain the video and the
1106                             # second the audio
1107                             if formats_info[0].get('vcodec') == 'none':
1108                                 self.report_error('The first format must '
1109                                                   'contain the video, try using '
1110                                                   '"-f %s+%s"' % (format_2, format_1))
1111                                 return
1112                             output_ext = (
1113                                 formats_info[0]['ext']
1114                                 if self.params.get('merge_output_format') is None
1115                                 else self.params['merge_output_format'])
1116                             selected_format = {
1117                                 'requested_formats': formats_info,
1118                                 'format': '%s+%s' % (formats_info[0].get('format'),
1119                                                      formats_info[1].get('format')),
1120                                 'format_id': '%s+%s' % (formats_info[0].get('format_id'),
1121                                                         formats_info[1].get('format_id')),
1122                                 'width': formats_info[0].get('width'),
1123                                 'height': formats_info[0].get('height'),
1124                                 'resolution': formats_info[0].get('resolution'),
1125                                 'fps': formats_info[0].get('fps'),
1126                                 'vcodec': formats_info[0].get('vcodec'),
1127                                 'vbr': formats_info[0].get('vbr'),
1128                                 'stretched_ratio': formats_info[0].get('stretched_ratio'),
1129                                 'acodec': formats_info[1].get('acodec'),
1130                                 'abr': formats_info[1].get('abr'),
1131                                 'ext': output_ext,
1132                             }
1133                         else:
1134                             selected_format = None
1135                     else:
1136                         selected_format = self.select_format(rf, formats)
1137                     if selected_format is not None:
1138                         formats_to_download.append(selected_format)
1139                         break
1140         if not formats_to_download:
1141             raise ExtractorError('requested format not available',
1142                                  expected=True)
1143
1144         if download:
1145             if len(formats_to_download) > 1:
1146                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1147             for format in formats_to_download:
1148                 new_info = dict(info_dict)
1149                 new_info.update(format)
1150                 self.process_info(new_info)
1151         # We update the info dict with the best quality format (backwards compatibility)
1152         info_dict.update(formats_to_download[-1])
1153         return info_dict
1154
1155     def process_subtitles(self, video_id, available_subs):
1156         """Select the requested subtitles and their format"""
1157         if not available_subs:
1158             return available_subs
1159
1160         if self.params.get('allsubtitles', False):
1161             requested_langs = available_subs.keys()
1162         else:
1163             if self.params.get('subtitleslangs', False):
1164                 requested_langs = self.params.get('subtitleslangs')
1165             elif 'en' in available_subs:
1166                 requested_langs = ['en']
1167             else:
1168                 requested_langs = [list(available_subs.keys())[0]]
1169
1170         formats_query = self.params.get('subtitlesformat', 'best')
1171         formats_preference = formats_query.split('/') if formats_query else []
1172         subs = {}
1173         for lang in requested_langs:
1174             formats = available_subs.get(lang)
1175             if formats is None:
1176                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1177                 continue
1178             if isinstance(formats, compat_str):
1179                 # TODO: convert all IE with subtitles support to the new format
1180                 # and remove this
1181                 subs[lang] = {
1182                     'ext': formats_preference[0],
1183                     'data': formats,
1184                 }
1185                 continue
1186             for ext in formats_preference:
1187                 if ext == 'best':
1188                     f = formats[-1]
1189                     break
1190                 matches = list(filter(lambda f: f['ext'] == ext, formats))
1191                 if matches:
1192                     f = matches[-1]
1193                     break
1194             else:
1195                 f = formats[-1]
1196                 self.report_warning(
1197                     'No subtitle format found matching "%s" for language %s, '
1198                     'using %s' % (formats_query, lang, f['ext']))
1199             subs[lang] = f
1200         return subs
1201
1202     def process_info(self, info_dict):
1203         """Process a single resolved IE result."""
1204
1205         assert info_dict.get('_type', 'video') == 'video'
1206
1207         max_downloads = self.params.get('max_downloads')
1208         if max_downloads is not None:
1209             if self._num_downloads >= int(max_downloads):
1210                 raise MaxDownloadsReached()
1211
1212         info_dict['fulltitle'] = info_dict['title']
1213         if len(info_dict['title']) > 200:
1214             info_dict['title'] = info_dict['title'][:197] + '...'
1215
1216         # Keep for backwards compatibility
1217         info_dict['stitle'] = info_dict['title']
1218
1219         if 'format' not in info_dict:
1220             info_dict['format'] = info_dict['ext']
1221
1222         reason = self._match_entry(info_dict, incomplete=False)
1223         if reason is not None:
1224             self.to_screen('[download] ' + reason)
1225             return
1226
1227         self._num_downloads += 1
1228
1229         info_dict['_filename'] = filename = self.prepare_filename(info_dict)
1230
1231         # Forced printings
1232         if self.params.get('forcetitle', False):
1233             self.to_stdout(info_dict['fulltitle'])
1234         if self.params.get('forceid', False):
1235             self.to_stdout(info_dict['id'])
1236         if self.params.get('forceurl', False):
1237             if info_dict.get('requested_formats') is not None:
1238                 for f in info_dict['requested_formats']:
1239                     self.to_stdout(f['url'] + f.get('play_path', ''))
1240             else:
1241                 # For RTMP URLs, also include the playpath
1242                 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1243         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
1244             self.to_stdout(info_dict['thumbnail'])
1245         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
1246             self.to_stdout(info_dict['description'])
1247         if self.params.get('forcefilename', False) and filename is not None:
1248             self.to_stdout(filename)
1249         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1250             self.to_stdout(formatSeconds(info_dict['duration']))
1251         if self.params.get('forceformat', False):
1252             self.to_stdout(info_dict['format'])
1253         if self.params.get('forcejson', False):
1254             self.to_stdout(json.dumps(info_dict))
1255
1256         # Do nothing else if in simulate mode
1257         if self.params.get('simulate', False):
1258             return
1259
1260         if filename is None:
1261             return
1262
1263         try:
1264             dn = os.path.dirname(encodeFilename(filename))
1265             if dn and not os.path.exists(dn):
1266                 os.makedirs(dn)
1267         except (OSError, IOError) as err:
1268             self.report_error('unable to create directory ' + compat_str(err))
1269             return
1270
1271         if self.params.get('writedescription', False):
1272             descfn = filename + '.description'
1273             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1274                 self.to_screen('[info] Video description is already present')
1275             elif info_dict.get('description') is None:
1276                 self.report_warning('There\'s no description to write.')
1277             else:
1278                 try:
1279                     self.to_screen('[info] Writing video description to: ' + descfn)
1280                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1281                         descfile.write(info_dict['description'])
1282                 except (OSError, IOError):
1283                     self.report_error('Cannot write description file ' + descfn)
1284                     return
1285
1286         if self.params.get('writeannotations', False):
1287             annofn = filename + '.annotations.xml'
1288             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1289                 self.to_screen('[info] Video annotations are already present')
1290             else:
1291                 try:
1292                     self.to_screen('[info] Writing video annotations to: ' + annofn)
1293                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1294                         annofile.write(info_dict['annotations'])
1295                 except (KeyError, TypeError):
1296                     self.report_warning('There are no annotations to write.')
1297                 except (OSError, IOError):
1298                     self.report_error('Cannot write annotations file: ' + annofn)
1299                     return
1300
1301         subtitles_are_requested = any([self.params.get('writesubtitles', False),
1302                                        self.params.get('writeautomaticsub')])
1303
1304         if subtitles_are_requested and info_dict.get('requested_subtitles'):
1305             # subtitles download errors are already managed as troubles in relevant IE
1306             # that way it will silently go on when used with unsupporting IE
1307             subtitles = info_dict['requested_subtitles']
1308             for sub_lang, sub_info in subtitles.items():
1309                 sub_format = sub_info['ext']
1310                 if sub_info.get('data') is not None:
1311                     sub_data = sub_info['data']
1312                 else:
1313                     try:
1314                         uf = self.urlopen(sub_info['url'])
1315                         sub_data = uf.read().decode('utf-8')
1316                     except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1317                         self.report_warning('Unable to download subtitle for "%s": %s' %
1318                                             (sub_lang, compat_str(err)))
1319                         continue
1320                 try:
1321                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1322                     if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1323                         self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1324                     else:
1325                         self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1326                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
1327                             subfile.write(sub_data)
1328                 except (OSError, IOError):
1329                     self.report_error('Cannot write subtitles file ' + sub_filename)
1330                     return
1331
1332         if self.params.get('writeinfojson', False):
1333             infofn = os.path.splitext(filename)[0] + '.info.json'
1334             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1335                 self.to_screen('[info] Video description metadata is already present')
1336             else:
1337                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1338                 try:
1339                     write_json_file(info_dict, infofn)
1340                 except (OSError, IOError):
1341                     self.report_error('Cannot write metadata to JSON file ' + infofn)
1342                     return
1343
1344         self._write_thumbnails(info_dict, filename)
1345
1346         if not self.params.get('skip_download', False):
1347             try:
1348                 def dl(name, info):
1349                     fd = get_suitable_downloader(info, self.params)(self, self.params)
1350                     for ph in self._progress_hooks:
1351                         fd.add_progress_hook(ph)
1352                     if self.params.get('verbose'):
1353                         self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1354                     return fd.download(name, info)
1355
1356                 if info_dict.get('requested_formats') is not None:
1357                     downloaded = []
1358                     success = True
1359                     merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
1360                     if not merger._executable:
1361                         postprocessors = []
1362                         self.report_warning('You have requested multiple '
1363                                             'formats but ffmpeg or avconv are not installed.'
1364                                             ' The formats won\'t be merged')
1365                     else:
1366                         postprocessors = [merger]
1367                     for f in info_dict['requested_formats']:
1368                         new_info = dict(info_dict)
1369                         new_info.update(f)
1370                         fname = self.prepare_filename(new_info)
1371                         fname = prepend_extension(fname, 'f%s' % f['format_id'])
1372                         downloaded.append(fname)
1373                         partial_success = dl(fname, new_info)
1374                         success = success and partial_success
1375                     info_dict['__postprocessors'] = postprocessors
1376                     info_dict['__files_to_merge'] = downloaded
1377                 else:
1378                     # Just a single file
1379                     success = dl(filename, info_dict)
1380             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1381                 self.report_error('unable to download video data: %s' % str(err))
1382                 return
1383             except (OSError, IOError) as err:
1384                 raise UnavailableVideoError(err)
1385             except (ContentTooShortError, ) as err:
1386                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1387                 return
1388
1389             if success:
1390                 # Fixup content
1391                 fixup_policy = self.params.get('fixup')
1392                 if fixup_policy is None:
1393                     fixup_policy = 'detect_or_warn'
1394
1395                 stretched_ratio = info_dict.get('stretched_ratio')
1396                 if stretched_ratio is not None and stretched_ratio != 1:
1397                     if fixup_policy == 'warn':
1398                         self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1399                             info_dict['id'], stretched_ratio))
1400                     elif fixup_policy == 'detect_or_warn':
1401                         stretched_pp = FFmpegFixupStretchedPP(self)
1402                         if stretched_pp.available:
1403                             info_dict.setdefault('__postprocessors', [])
1404                             info_dict['__postprocessors'].append(stretched_pp)
1405                         else:
1406                             self.report_warning(
1407                                 '%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % (
1408                                     info_dict['id'], stretched_ratio))
1409                     else:
1410                         assert fixup_policy in ('ignore', 'never')
1411
1412                 if info_dict.get('requested_formats') is None and info_dict.get('container') == 'm4a_dash':
1413                     if fixup_policy == 'warn':
1414                         self.report_warning('%s: writing DASH m4a. Only some players support this container.' % (
1415                             info_dict['id']))
1416                     elif fixup_policy == 'detect_or_warn':
1417                         fixup_pp = FFmpegFixupM4aPP(self)
1418                         if fixup_pp.available:
1419                             info_dict.setdefault('__postprocessors', [])
1420                             info_dict['__postprocessors'].append(fixup_pp)
1421                         else:
1422                             self.report_warning(
1423                                 '%s: writing DASH m4a. Only some players support this container. Install ffmpeg or avconv to fix this automatically.' % (
1424                                     info_dict['id']))
1425                     else:
1426                         assert fixup_policy in ('ignore', 'never')
1427
1428                 try:
1429                     self.post_process(filename, info_dict)
1430                 except (PostProcessingError) as err:
1431                     self.report_error('postprocessing: %s' % str(err))
1432                     return
1433                 self.record_download_archive(info_dict)
1434
1435     def download(self, url_list):
1436         """Download a given list of URLs."""
1437         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1438         if (len(url_list) > 1 and
1439                 '%' not in outtmpl
1440                 and self.params.get('max_downloads') != 1):
1441             raise SameFileError(outtmpl)
1442
1443         for url in url_list:
1444             try:
1445                 # It also downloads the videos
1446                 res = self.extract_info(url)
1447             except UnavailableVideoError:
1448                 self.report_error('unable to download video')
1449             except MaxDownloadsReached:
1450                 self.to_screen('[info] Maximum number of downloaded files reached.')
1451                 raise
1452             else:
1453                 if self.params.get('dump_single_json', False):
1454                     self.to_stdout(json.dumps(res))
1455
1456         return self._download_retcode
1457
1458     def download_with_info_file(self, info_filename):
1459         with io.open(info_filename, 'r', encoding='utf-8') as f:
1460             info = json.load(f)
1461         try:
1462             self.process_ie_result(info, download=True)
1463         except DownloadError:
1464             webpage_url = info.get('webpage_url')
1465             if webpage_url is not None:
1466                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1467                 return self.download([webpage_url])
1468             else:
1469                 raise
1470         return self._download_retcode
1471
1472     def post_process(self, filename, ie_info):
1473         """Run all the postprocessors on the given file."""
1474         info = dict(ie_info)
1475         info['filepath'] = filename
1476         pps_chain = []
1477         if ie_info.get('__postprocessors') is not None:
1478             pps_chain.extend(ie_info['__postprocessors'])
1479         pps_chain.extend(self._pps)
1480         for pp in pps_chain:
1481             keep_video = None
1482             old_filename = info['filepath']
1483             try:
1484                 keep_video_wish, info = pp.run(info)
1485                 if keep_video_wish is not None:
1486                     if keep_video_wish:
1487                         keep_video = keep_video_wish
1488                     elif keep_video is None:
1489                         # No clear decision yet, let IE decide
1490                         keep_video = keep_video_wish
1491             except PostProcessingError as e:
1492                 self.report_error(e.msg)
1493             if keep_video is False and not self.params.get('keepvideo', False):
1494                 try:
1495                     self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
1496                     os.remove(encodeFilename(old_filename))
1497                 except (IOError, OSError):
1498                     self.report_warning('Unable to remove downloaded video file')
1499
1500     def _make_archive_id(self, info_dict):
1501         # Future-proof against any change in case
1502         # and backwards compatibility with prior versions
1503         extractor = info_dict.get('extractor_key')
1504         if extractor is None:
1505             if 'id' in info_dict:
1506                 extractor = info_dict.get('ie_key')  # key in a playlist
1507         if extractor is None:
1508             return None  # Incomplete video information
1509         return extractor.lower() + ' ' + info_dict['id']
1510
1511     def in_download_archive(self, info_dict):
1512         fn = self.params.get('download_archive')
1513         if fn is None:
1514             return False
1515
1516         vid_id = self._make_archive_id(info_dict)
1517         if vid_id is None:
1518             return False  # Incomplete video information
1519
1520         try:
1521             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1522                 for line in archive_file:
1523                     if line.strip() == vid_id:
1524                         return True
1525         except IOError as ioe:
1526             if ioe.errno != errno.ENOENT:
1527                 raise
1528         return False
1529
1530     def record_download_archive(self, info_dict):
1531         fn = self.params.get('download_archive')
1532         if fn is None:
1533             return
1534         vid_id = self._make_archive_id(info_dict)
1535         assert vid_id
1536         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1537             archive_file.write(vid_id + '\n')
1538
1539     @staticmethod
1540     def format_resolution(format, default='unknown'):
1541         if format.get('vcodec') == 'none':
1542             return 'audio only'
1543         if format.get('resolution') is not None:
1544             return format['resolution']
1545         if format.get('height') is not None:
1546             if format.get('width') is not None:
1547                 res = '%sx%s' % (format['width'], format['height'])
1548             else:
1549                 res = '%sp' % format['height']
1550         elif format.get('width') is not None:
1551             res = '?x%d' % format['width']
1552         else:
1553             res = default
1554         return res
1555
1556     def _format_note(self, fdict):
1557         res = ''
1558         if fdict.get('ext') in ['f4f', 'f4m']:
1559             res += '(unsupported) '
1560         if fdict.get('format_note') is not None:
1561             res += fdict['format_note'] + ' '
1562         if fdict.get('tbr') is not None:
1563             res += '%4dk ' % fdict['tbr']
1564         if fdict.get('container') is not None:
1565             if res:
1566                 res += ', '
1567             res += '%s container' % fdict['container']
1568         if (fdict.get('vcodec') is not None and
1569                 fdict.get('vcodec') != 'none'):
1570             if res:
1571                 res += ', '
1572             res += fdict['vcodec']
1573             if fdict.get('vbr') is not None:
1574                 res += '@'
1575         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1576             res += 'video@'
1577         if fdict.get('vbr') is not None:
1578             res += '%4dk' % fdict['vbr']
1579         if fdict.get('fps') is not None:
1580             res += ', %sfps' % fdict['fps']
1581         if fdict.get('acodec') is not None:
1582             if res:
1583                 res += ', '
1584             if fdict['acodec'] == 'none':
1585                 res += 'video only'
1586             else:
1587                 res += '%-5s' % fdict['acodec']
1588         elif fdict.get('abr') is not None:
1589             if res:
1590                 res += ', '
1591             res += 'audio'
1592         if fdict.get('abr') is not None:
1593             res += '@%3dk' % fdict['abr']
1594         if fdict.get('asr') is not None:
1595             res += ' (%5dHz)' % fdict['asr']
1596         if fdict.get('filesize') is not None:
1597             if res:
1598                 res += ', '
1599             res += format_bytes(fdict['filesize'])
1600         elif fdict.get('filesize_approx') is not None:
1601             if res:
1602                 res += ', '
1603             res += '~' + format_bytes(fdict['filesize_approx'])
1604         return res
1605
1606     def list_formats(self, info_dict):
1607         def line(format, idlen=20):
1608             return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
1609                 format['format_id'],
1610                 format['ext'],
1611                 self.format_resolution(format),
1612                 self._format_note(format),
1613             ))
1614
1615         formats = info_dict.get('formats', [info_dict])
1616         idlen = max(len('format code'),
1617                     max(len(f['format_id']) for f in formats))
1618         formats_s = [
1619             line(f, idlen) for f in formats
1620             if f.get('preference') is None or f['preference'] >= -1000]
1621         if len(formats) > 1:
1622             formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
1623
1624         header_line = line({
1625             'format_id': 'format code', 'ext': 'extension',
1626             'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
1627         self.to_screen(
1628             '[info] Available formats for %s:\n%s\n%s' %
1629             (info_dict['id'], header_line, '\n'.join(formats_s)))
1630
1631     def list_thumbnails(self, info_dict):
1632         thumbnails = info_dict.get('thumbnails')
1633         if not thumbnails:
1634             tn_url = info_dict.get('thumbnail')
1635             if tn_url:
1636                 thumbnails = [{'id': '0', 'url': tn_url}]
1637             else:
1638                 self.to_screen(
1639                     '[info] No thumbnails present for %s' % info_dict['id'])
1640                 return
1641
1642         self.to_screen(
1643             '[info] Thumbnails for %s:' % info_dict['id'])
1644         self.to_screen(render_table(
1645             ['ID', 'width', 'height', 'URL'],
1646             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
1647
1648     def list_subtitles(self, video_id, subtitles):
1649         if not subtitles:
1650             self.to_screen('%s has no subtitles' % video_id)
1651             return
1652         header_line = 'Language    formats'
1653         sub_lines = [
1654             '%-12s%s' % (lang, ', '.join(f['ext'] for f in reversed(formats)))
1655             for lang, formats in subtitles.items()]
1656         self.to_screen(
1657             'Available subtitles for %s:\n%s\n%s' %
1658             (video_id, header_line, '\n'.join(sub_lines)))
1659
1660     def urlopen(self, req):
1661         """ Start an HTTP download """
1662
1663         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1664         # always respected by websites, some tend to give out URLs with non percent-encoded
1665         # non-ASCII characters (see telemb.py, ard.py [#3412])
1666         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1667         # To work around aforementioned issue we will replace request's original URL with
1668         # percent-encoded one
1669         req_is_string = isinstance(req, compat_basestring)
1670         url = req if req_is_string else req.get_full_url()
1671         url_escaped = escape_url(url)
1672
1673         # Substitute URL if any change after escaping
1674         if url != url_escaped:
1675             if req_is_string:
1676                 req = url_escaped
1677             else:
1678                 req = compat_urllib_request.Request(
1679                     url_escaped, data=req.data, headers=req.headers,
1680                     origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
1681
1682         return self._opener.open(req, timeout=self._socket_timeout)
1683
1684     def print_debug_header(self):
1685         if not self.params.get('verbose'):
1686             return
1687
1688         if type('') is not compat_str:
1689             # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1690             self.report_warning(
1691                 'Your Python is broken! Update to a newer and supported version')
1692
1693         stdout_encoding = getattr(
1694             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
1695         encoding_str = (
1696             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1697                 locale.getpreferredencoding(),
1698                 sys.getfilesystemencoding(),
1699                 stdout_encoding,
1700                 self.get_encoding()))
1701         write_string(encoding_str, encoding=None)
1702
1703         self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1704         try:
1705             sp = subprocess.Popen(
1706                 ['git', 'rev-parse', '--short', 'HEAD'],
1707                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1708                 cwd=os.path.dirname(os.path.abspath(__file__)))
1709             out, err = sp.communicate()
1710             out = out.decode().strip()
1711             if re.match('[0-9a-f]+', out):
1712                 self._write_string('[debug] Git HEAD: ' + out + '\n')
1713         except:
1714             try:
1715                 sys.exc_clear()
1716             except:
1717                 pass
1718         self._write_string('[debug] Python version %s - %s\n' % (
1719             platform.python_version(), platform_name()))
1720
1721         exe_versions = FFmpegPostProcessor.get_versions()
1722         exe_versions['rtmpdump'] = rtmpdump_version()
1723         exe_str = ', '.join(
1724             '%s %s' % (exe, v)
1725             for exe, v in sorted(exe_versions.items())
1726             if v
1727         )
1728         if not exe_str:
1729             exe_str = 'none'
1730         self._write_string('[debug] exe versions: %s\n' % exe_str)
1731
1732         proxy_map = {}
1733         for handler in self._opener.handlers:
1734             if hasattr(handler, 'proxies'):
1735                 proxy_map.update(handler.proxies)
1736         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1737
1738         if self.params.get('call_home', False):
1739             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
1740             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
1741             latest_version = self.urlopen(
1742                 'https://yt-dl.org/latest/version').read().decode('utf-8')
1743             if version_tuple(latest_version) > version_tuple(__version__):
1744                 self.report_warning(
1745                     'You are using an outdated version (newest version: %s)! '
1746                     'See https://yt-dl.org/update if you need help updating.' %
1747                     latest_version)
1748
1749     def _setup_opener(self):
1750         timeout_val = self.params.get('socket_timeout')
1751         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1752
1753         opts_cookiefile = self.params.get('cookiefile')
1754         opts_proxy = self.params.get('proxy')
1755
1756         if opts_cookiefile is None:
1757             self.cookiejar = compat_cookiejar.CookieJar()
1758         else:
1759             self.cookiejar = compat_cookiejar.MozillaCookieJar(
1760                 opts_cookiefile)
1761             if os.access(opts_cookiefile, os.R_OK):
1762                 self.cookiejar.load()
1763
1764         cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1765             self.cookiejar)
1766         if opts_proxy is not None:
1767             if opts_proxy == '':
1768                 proxies = {}
1769             else:
1770                 proxies = {'http': opts_proxy, 'https': opts_proxy}
1771         else:
1772             proxies = compat_urllib_request.getproxies()
1773             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1774             if 'http' in proxies and 'https' not in proxies:
1775                 proxies['https'] = proxies['http']
1776         proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1777
1778         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1779         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
1780         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
1781         opener = compat_urllib_request.build_opener(
1782             https_handler, proxy_handler, cookie_processor, ydlh)
1783         # Delete the default user-agent header, which would otherwise apply in
1784         # cases where our custom HTTP handler doesn't come into play
1785         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1786         opener.addheaders = []
1787         self._opener = opener
1788
1789     def encode(self, s):
1790         if isinstance(s, bytes):
1791             return s  # Already encoded
1792
1793         try:
1794             return s.encode(self.get_encoding())
1795         except UnicodeEncodeError as err:
1796             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1797             raise
1798
1799     def get_encoding(self):
1800         encoding = self.params.get('encoding')
1801         if encoding is None:
1802             encoding = preferredencoding()
1803         return encoding
1804
1805     def _write_thumbnails(self, info_dict, filename):
1806         if self.params.get('writethumbnail', False):
1807             thumbnails = info_dict.get('thumbnails')
1808             if thumbnails:
1809                 thumbnails = [thumbnails[-1]]
1810         elif self.params.get('write_all_thumbnails', False):
1811             thumbnails = info_dict.get('thumbnails')
1812         else:
1813             return
1814
1815         if not thumbnails:
1816             # No thumbnails present, so return immediately
1817             return
1818
1819         for t in thumbnails:
1820             thumb_ext = determine_ext(t['url'], 'jpg')
1821             suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
1822             thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
1823             thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
1824
1825             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
1826                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
1827                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
1828             else:
1829                 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
1830                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
1831                 try:
1832                     uf = self.urlopen(t['url'])
1833                     with open(thumb_filename, 'wb') as thumbf:
1834                         shutil.copyfileobj(uf, thumbf)
1835                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
1836                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
1837                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1838                     self.report_warning('Unable to download thumbnail "%s": %s' %
1839                                         (t['url'], compat_str(err)))