827c88e0d9ebb3821d2c4e6a5ec38a0c90c390ba
[youtube-dl] / youtube_dl / YoutubeDL.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 from __future__ import absolute_import, unicode_literals
5
6 import collections
7 import contextlib
8 import datetime
9 import errno
10 import fileinput
11 import io
12 import itertools
13 import json
14 import locale
15 import operator
16 import os
17 import platform
18 import re
19 import shutil
20 import subprocess
21 import socket
22 import sys
23 import time
24 import traceback
25
26 if os.name == 'nt':
27     import ctypes
28
29 from .compat import (
30     compat_basestring,
31     compat_cookiejar,
32     compat_expanduser,
33     compat_get_terminal_size,
34     compat_http_client,
35     compat_kwargs,
36     compat_str,
37     compat_urllib_error,
38     compat_urllib_request,
39 )
40 from .utils import (
41     escape_url,
42     ContentTooShortError,
43     date_from_str,
44     DateRange,
45     DEFAULT_OUTTMPL,
46     determine_ext,
47     DownloadError,
48     encodeFilename,
49     ExtractorError,
50     format_bytes,
51     formatSeconds,
52     locked_file,
53     make_HTTPS_handler,
54     MaxDownloadsReached,
55     PagedList,
56     parse_filesize,
57     PerRequestProxyHandler,
58     PostProcessingError,
59     platform_name,
60     preferredencoding,
61     render_table,
62     SameFileError,
63     sanitize_filename,
64     sanitize_path,
65     std_headers,
66     subtitles_filename,
67     UnavailableVideoError,
68     url_basename,
69     version_tuple,
70     write_json_file,
71     write_string,
72     YoutubeDLHandler,
73     prepend_extension,
74     args_to_str,
75     age_restricted,
76 )
77 from .cache import Cache
78 from .extractor import get_info_extractor, gen_extractors
79 from .downloader import get_suitable_downloader
80 from .downloader.rtmp import rtmpdump_version
81 from .postprocessor import (
82     FFmpegFixupM4aPP,
83     FFmpegFixupStretchedPP,
84     FFmpegMergerPP,
85     FFmpegPostProcessor,
86     get_postprocessor,
87 )
88 from .version import __version__
89
90
91 class YoutubeDL(object):
92     """YoutubeDL class.
93
94     YoutubeDL objects are the ones responsible of downloading the
95     actual video file and writing it to disk if the user has requested
96     it, among some other tasks. In most cases there should be one per
97     program. As, given a video URL, the downloader doesn't know how to
98     extract all the needed information, task that InfoExtractors do, it
99     has to pass the URL to one of them.
100
101     For this, YoutubeDL objects have a method that allows
102     InfoExtractors to be registered in a given order. When it is passed
103     a URL, the YoutubeDL object handles it to the first InfoExtractor it
104     finds that reports being able to handle it. The InfoExtractor extracts
105     all the information about the video or videos the URL refers to, and
106     YoutubeDL process the extracted information, possibly using a File
107     Downloader to download the video.
108
109     YoutubeDL objects accept a lot of parameters. In order not to saturate
110     the object constructor with arguments, it receives a dictionary of
111     options instead. These options are available through the params
112     attribute for the InfoExtractors to use. The YoutubeDL also
113     registers itself as the downloader in charge for the InfoExtractors
114     that are added to it, so this is a "mutual registration".
115
116     Available options:
117
118     username:          Username for authentication purposes.
119     password:          Password for authentication purposes.
120     videopassword:     Password for acces a video.
121     usenetrc:          Use netrc for authentication instead.
122     verbose:           Print additional info to stdout.
123     quiet:             Do not print messages to stdout.
124     no_warnings:       Do not print out anything for warnings.
125     forceurl:          Force printing final URL.
126     forcetitle:        Force printing title.
127     forceid:           Force printing ID.
128     forcethumbnail:    Force printing thumbnail URL.
129     forcedescription:  Force printing description.
130     forcefilename:     Force printing final filename.
131     forceduration:     Force printing duration.
132     forcejson:         Force printing info_dict as JSON.
133     dump_single_json:  Force printing the info_dict of the whole playlist
134                        (or video) as a single JSON line.
135     simulate:          Do not download the video files.
136     format:            Video format code. See options.py for more information.
137     outtmpl:           Template for output names.
138     restrictfilenames: Do not allow "&" and spaces in file names
139     ignoreerrors:      Do not stop on download errors.
140     nooverwrites:      Prevent overwriting files.
141     playliststart:     Playlist item to start at.
142     playlistend:       Playlist item to end at.
143     playlist_items:    Specific indices of playlist to download.
144     playlistreverse:   Download playlist items in reverse order.
145     matchtitle:        Download only matching titles.
146     rejecttitle:       Reject downloads for matching titles.
147     logger:            Log messages to a logging.Logger instance.
148     logtostderr:       Log messages to stderr instead of stdout.
149     writedescription:  Write the video description to a .description file
150     writeinfojson:     Write the video description to a .info.json file
151     writeannotations:  Write the video annotations to a .annotations.xml file
152     writethumbnail:    Write the thumbnail image to a file
153     write_all_thumbnails:  Write all thumbnail formats to files
154     writesubtitles:    Write the video subtitles to a file
155     writeautomaticsub: Write the automatic subtitles to a file
156     allsubtitles:      Downloads all the subtitles of the video
157                        (requires writesubtitles or writeautomaticsub)
158     listsubtitles:     Lists all available subtitles for the video
159     subtitlesformat:   The format code for subtitles
160     subtitleslangs:    List of languages of the subtitles to download
161     keepvideo:         Keep the video file after post-processing
162     daterange:         A DateRange object, download only if the upload_date is in the range.
163     skip_download:     Skip the actual download of the video file
164     cachedir:          Location of the cache files in the filesystem.
165                        False to disable filesystem cache.
166     noplaylist:        Download single video instead of a playlist if in doubt.
167     age_limit:         An integer representing the user's age in years.
168                        Unsuitable videos for the given age are skipped.
169     min_views:         An integer representing the minimum view count the video
170                        must have in order to not be skipped.
171                        Videos without view count information are always
172                        downloaded. None for no limit.
173     max_views:         An integer representing the maximum view count.
174                        Videos that are more popular than that are not
175                        downloaded.
176                        Videos without view count information are always
177                        downloaded. None for no limit.
178     download_archive:  File name of a file where all downloads are recorded.
179                        Videos already present in the file are not downloaded
180                        again.
181     cookiefile:        File name where cookies should be read from and dumped to.
182     nocheckcertificate:Do not verify SSL certificates
183     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
184                        At the moment, this is only supported by YouTube.
185     proxy:             URL of the proxy server to use
186     cn_verification_proxy:  URL of the proxy to use for IP address verification
187                        on Chinese sites. (Experimental)
188     socket_timeout:    Time to wait for unresponsive hosts, in seconds
189     bidi_workaround:   Work around buggy terminals without bidirectional text
190                        support, using fridibi
191     debug_printtraffic:Print out sent and received HTTP traffic
192     include_ads:       Download ads as well
193     default_search:    Prepend this string if an input url is not valid.
194                        'auto' for elaborate guessing
195     encoding:          Use this encoding instead of the system-specified.
196     extract_flat:      Do not resolve URLs, return the immediate result.
197                        Pass in 'in_playlist' to only show this behavior for
198                        playlist items.
199     postprocessors:    A list of dictionaries, each with an entry
200                        * key:  The name of the postprocessor. See
201                                youtube_dl/postprocessor/__init__.py for a list.
202                        as well as any further keyword arguments for the
203                        postprocessor.
204     progress_hooks:    A list of functions that get called on download
205                        progress, with a dictionary with the entries
206                        * status: One of "downloading", "error", or "finished".
207                                  Check this first and ignore unknown values.
208
209                        If status is one of "downloading", or "finished", the
210                        following properties may also be present:
211                        * filename: The final filename (always present)
212                        * tmpfilename: The filename we're currently writing to
213                        * downloaded_bytes: Bytes on disk
214                        * total_bytes: Size of the whole file, None if unknown
215                        * total_bytes_estimate: Guess of the eventual file size,
216                                                None if unavailable.
217                        * elapsed: The number of seconds since download started.
218                        * eta: The estimated time in seconds, None if unknown
219                        * speed: The download speed in bytes/second, None if
220                                 unknown
221                        * fragment_index: The counter of the currently
222                                          downloaded video fragment.
223                        * fragment_count: The number of fragments (= individual
224                                          files that will be merged)
225
226                        Progress hooks are guaranteed to be called at least once
227                        (with status "finished") if the download is successful.
228     merge_output_format: Extension to use when merging formats.
229     fixup:             Automatically correct known faults of the file.
230                        One of:
231                        - "never": do nothing
232                        - "warn": only emit a warning
233                        - "detect_or_warn": check whether we can do anything
234                                            about it, warn otherwise (default)
235     source_address:    (Experimental) Client-side IP address to bind to.
236     call_home:         Boolean, true iff we are allowed to contact the
237                        youtube-dl servers for debugging.
238     sleep_interval:    Number of seconds to sleep before each download.
239     listformats:       Print an overview of available video formats and exit.
240     list_thumbnails:   Print a table of all thumbnails and exit.
241     match_filter:      A function that gets called with the info_dict of
242                        every video.
243                        If it returns a message, the video is ignored.
244                        If it returns None, the video is downloaded.
245                        match_filter_func in utils.py is one example for this.
246     no_color:          Do not emit color codes in output.
247
248     The following options determine which downloader is picked:
249     external_downloader: Executable of the external downloader to call.
250                        None or unset for standard (built-in) downloader.
251     hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv.
252
253     The following parameters are not used by YoutubeDL itself, they are used by
254     the downloader (see youtube_dl/downloader/common.py):
255     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
256     noresizebuffer, retries, continuedl, noprogress, consoletitle,
257     xattr_set_filesize, external_downloader_args.
258
259     The following options are used by the post processors:
260     prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
261                        otherwise prefer avconv.
262     exec_cmd:          Arbitrary command to run after downloading
263     """
264
265     params = None
266     _ies = []
267     _pps = []
268     _download_retcode = None
269     _num_downloads = None
270     _screen_file = None
271
272     def __init__(self, params=None, auto_init=True):
273         """Create a FileDownloader object with the given options."""
274         if params is None:
275             params = {}
276         self._ies = []
277         self._ies_instances = {}
278         self._pps = []
279         self._progress_hooks = []
280         self._download_retcode = 0
281         self._num_downloads = 0
282         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
283         self._err_file = sys.stderr
284         self.params = params
285         self.cache = Cache(self)
286
287         if params.get('bidi_workaround', False):
288             try:
289                 import pty
290                 master, slave = pty.openpty()
291                 width = compat_get_terminal_size().columns
292                 if width is None:
293                     width_args = []
294                 else:
295                     width_args = ['-w', str(width)]
296                 sp_kwargs = dict(
297                     stdin=subprocess.PIPE,
298                     stdout=slave,
299                     stderr=self._err_file)
300                 try:
301                     self._output_process = subprocess.Popen(
302                         ['bidiv'] + width_args, **sp_kwargs
303                     )
304                 except OSError:
305                     self._output_process = subprocess.Popen(
306                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
307                 self._output_channel = os.fdopen(master, 'rb')
308             except OSError as ose:
309                 if ose.errno == 2:
310                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
311                 else:
312                     raise
313
314         if (sys.version_info >= (3,) and sys.platform != 'win32' and
315                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and
316                 not params.get('restrictfilenames', False)):
317             # On Python 3, the Unicode filesystem API will throw errors (#1474)
318             self.report_warning(
319                 'Assuming --restrict-filenames since file system encoding '
320                 'cannot encode all characters. '
321                 'Set the LC_ALL environment variable to fix this.')
322             self.params['restrictfilenames'] = True
323
324         if isinstance(params.get('outtmpl'), bytes):
325             self.report_warning(
326                 'Parameter outtmpl is bytes, but should be a unicode string. '
327                 'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
328
329         self._setup_opener()
330
331         if auto_init:
332             self.print_debug_header()
333             self.add_default_info_extractors()
334
335         for pp_def_raw in self.params.get('postprocessors', []):
336             pp_class = get_postprocessor(pp_def_raw['key'])
337             pp_def = dict(pp_def_raw)
338             del pp_def['key']
339             pp = pp_class(self, **compat_kwargs(pp_def))
340             self.add_post_processor(pp)
341
342         for ph in self.params.get('progress_hooks', []):
343             self.add_progress_hook(ph)
344
345     def warn_if_short_id(self, argv):
346         # short YouTube ID starting with dash?
347         idxs = [
348             i for i, a in enumerate(argv)
349             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
350         if idxs:
351             correct_argv = (
352                 ['youtube-dl'] +
353                 [a for i, a in enumerate(argv) if i not in idxs] +
354                 ['--'] + [argv[i] for i in idxs]
355             )
356             self.report_warning(
357                 'Long argument string detected. '
358                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
359                 args_to_str(correct_argv))
360
361     def add_info_extractor(self, ie):
362         """Add an InfoExtractor object to the end of the list."""
363         self._ies.append(ie)
364         self._ies_instances[ie.ie_key()] = ie
365         ie.set_downloader(self)
366
367     def get_info_extractor(self, ie_key):
368         """
369         Get an instance of an IE with name ie_key, it will try to get one from
370         the _ies list, if there's no instance it will create a new one and add
371         it to the extractor list.
372         """
373         ie = self._ies_instances.get(ie_key)
374         if ie is None:
375             ie = get_info_extractor(ie_key)()
376             self.add_info_extractor(ie)
377         return ie
378
379     def add_default_info_extractors(self):
380         """
381         Add the InfoExtractors returned by gen_extractors to the end of the list
382         """
383         for ie in gen_extractors():
384             self.add_info_extractor(ie)
385
386     def add_post_processor(self, pp):
387         """Add a PostProcessor object to the end of the chain."""
388         self._pps.append(pp)
389         pp.set_downloader(self)
390
391     def add_progress_hook(self, ph):
392         """Add the progress hook (currently only for the file downloader)"""
393         self._progress_hooks.append(ph)
394
395     def _bidi_workaround(self, message):
396         if not hasattr(self, '_output_channel'):
397             return message
398
399         assert hasattr(self, '_output_process')
400         assert isinstance(message, compat_str)
401         line_count = message.count('\n') + 1
402         self._output_process.stdin.write((message + '\n').encode('utf-8'))
403         self._output_process.stdin.flush()
404         res = ''.join(self._output_channel.readline().decode('utf-8')
405                       for _ in range(line_count))
406         return res[:-len('\n')]
407
408     def to_screen(self, message, skip_eol=False):
409         """Print message to stdout if not in quiet mode."""
410         return self.to_stdout(message, skip_eol, check_quiet=True)
411
412     def _write_string(self, s, out=None):
413         write_string(s, out=out, encoding=self.params.get('encoding'))
414
415     def to_stdout(self, message, skip_eol=False, check_quiet=False):
416         """Print message to stdout if not in quiet mode."""
417         if self.params.get('logger'):
418             self.params['logger'].debug(message)
419         elif not check_quiet or not self.params.get('quiet', False):
420             message = self._bidi_workaround(message)
421             terminator = ['\n', ''][skip_eol]
422             output = message + terminator
423
424             self._write_string(output, self._screen_file)
425
426     def to_stderr(self, message):
427         """Print message to stderr."""
428         assert isinstance(message, compat_str)
429         if self.params.get('logger'):
430             self.params['logger'].error(message)
431         else:
432             message = self._bidi_workaround(message)
433             output = message + '\n'
434             self._write_string(output, self._err_file)
435
436     def to_console_title(self, message):
437         if not self.params.get('consoletitle', False):
438             return
439         if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
440             # c_wchar_p() might not be necessary if `message` is
441             # already of type unicode()
442             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
443         elif 'TERM' in os.environ:
444             self._write_string('\033]0;%s\007' % message, self._screen_file)
445
446     def save_console_title(self):
447         if not self.params.get('consoletitle', False):
448             return
449         if 'TERM' in os.environ:
450             # Save the title on stack
451             self._write_string('\033[22;0t', self._screen_file)
452
453     def restore_console_title(self):
454         if not self.params.get('consoletitle', False):
455             return
456         if 'TERM' in os.environ:
457             # Restore the title from stack
458             self._write_string('\033[23;0t', self._screen_file)
459
460     def __enter__(self):
461         self.save_console_title()
462         return self
463
464     def __exit__(self, *args):
465         self.restore_console_title()
466
467         if self.params.get('cookiefile') is not None:
468             self.cookiejar.save()
469
470     def trouble(self, message=None, tb=None):
471         """Determine action to take when a download problem appears.
472
473         Depending on if the downloader has been configured to ignore
474         download errors or not, this method may throw an exception or
475         not when errors are found, after printing the message.
476
477         tb, if given, is additional traceback information.
478         """
479         if message is not None:
480             self.to_stderr(message)
481         if self.params.get('verbose'):
482             if tb is None:
483                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
484                     tb = ''
485                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
486                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
487                     tb += compat_str(traceback.format_exc())
488                 else:
489                     tb_data = traceback.format_list(traceback.extract_stack())
490                     tb = ''.join(tb_data)
491             self.to_stderr(tb)
492         if not self.params.get('ignoreerrors', False):
493             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
494                 exc_info = sys.exc_info()[1].exc_info
495             else:
496                 exc_info = sys.exc_info()
497             raise DownloadError(message, exc_info)
498         self._download_retcode = 1
499
500     def report_warning(self, message):
501         '''
502         Print the message to stderr, it will be prefixed with 'WARNING:'
503         If stderr is a tty file the 'WARNING:' will be colored
504         '''
505         if self.params.get('logger') is not None:
506             self.params['logger'].warning(message)
507         else:
508             if self.params.get('no_warnings'):
509                 return
510             if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
511                 _msg_header = '\033[0;33mWARNING:\033[0m'
512             else:
513                 _msg_header = 'WARNING:'
514             warning_message = '%s %s' % (_msg_header, message)
515             self.to_stderr(warning_message)
516
517     def report_error(self, message, tb=None):
518         '''
519         Do the same as trouble, but prefixes the message with 'ERROR:', colored
520         in red if stderr is a tty file.
521         '''
522         if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
523             _msg_header = '\033[0;31mERROR:\033[0m'
524         else:
525             _msg_header = 'ERROR:'
526         error_message = '%s %s' % (_msg_header, message)
527         self.trouble(error_message, tb)
528
529     def report_file_already_downloaded(self, file_name):
530         """Report file has already been fully downloaded."""
531         try:
532             self.to_screen('[download] %s has already been downloaded' % file_name)
533         except UnicodeEncodeError:
534             self.to_screen('[download] The file has already been downloaded')
535
536     def prepare_filename(self, info_dict):
537         """Generate the output filename."""
538         try:
539             template_dict = dict(info_dict)
540
541             template_dict['epoch'] = int(time.time())
542             autonumber_size = self.params.get('autonumber_size')
543             if autonumber_size is None:
544                 autonumber_size = 5
545             autonumber_templ = '%0' + str(autonumber_size) + 'd'
546             template_dict['autonumber'] = autonumber_templ % self._num_downloads
547             if template_dict.get('playlist_index') is not None:
548                 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
549             if template_dict.get('resolution') is None:
550                 if template_dict.get('width') and template_dict.get('height'):
551                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
552                 elif template_dict.get('height'):
553                     template_dict['resolution'] = '%sp' % template_dict['height']
554                 elif template_dict.get('width'):
555                     template_dict['resolution'] = '?x%d' % template_dict['width']
556
557             sanitize = lambda k, v: sanitize_filename(
558                 compat_str(v),
559                 restricted=self.params.get('restrictfilenames'),
560                 is_id=(k == 'id'))
561             template_dict = dict((k, sanitize(k, v))
562                                  for k, v in template_dict.items()
563                                  if v is not None)
564             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
565
566             outtmpl = sanitize_path(self.params.get('outtmpl', DEFAULT_OUTTMPL))
567             tmpl = compat_expanduser(outtmpl)
568             filename = tmpl % template_dict
569             # Temporary fix for #4787
570             # 'Treat' all problem characters by passing filename through preferredencoding
571             # to workaround encoding issues with subprocess on python2 @ Windows
572             if sys.version_info < (3, 0) and sys.platform == 'win32':
573                 filename = encodeFilename(filename, True).decode(preferredencoding())
574             return filename
575         except ValueError as err:
576             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
577             return None
578
579     def _match_entry(self, info_dict, incomplete):
580         """ Returns None iff the file should be downloaded """
581
582         video_title = info_dict.get('title', info_dict.get('id', 'video'))
583         if 'title' in info_dict:
584             # This can happen when we're just evaluating the playlist
585             title = info_dict['title']
586             matchtitle = self.params.get('matchtitle', False)
587             if matchtitle:
588                 if not re.search(matchtitle, title, re.IGNORECASE):
589                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
590             rejecttitle = self.params.get('rejecttitle', False)
591             if rejecttitle:
592                 if re.search(rejecttitle, title, re.IGNORECASE):
593                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
594         date = info_dict.get('upload_date', None)
595         if date is not None:
596             dateRange = self.params.get('daterange', DateRange())
597             if date not in dateRange:
598                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
599         view_count = info_dict.get('view_count', None)
600         if view_count is not None:
601             min_views = self.params.get('min_views')
602             if min_views is not None and view_count < min_views:
603                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
604             max_views = self.params.get('max_views')
605             if max_views is not None and view_count > max_views:
606                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
607         if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
608             return 'Skipping "%s" because it is age restricted' % video_title
609         if self.in_download_archive(info_dict):
610             return '%s has already been recorded in archive' % video_title
611
612         if not incomplete:
613             match_filter = self.params.get('match_filter')
614             if match_filter is not None:
615                 ret = match_filter(info_dict)
616                 if ret is not None:
617                     return ret
618
619         return None
620
621     @staticmethod
622     def add_extra_info(info_dict, extra_info):
623         '''Set the keys from extra_info in info dict if they are missing'''
624         for key, value in extra_info.items():
625             info_dict.setdefault(key, value)
626
627     def extract_info(self, url, download=True, ie_key=None, extra_info={},
628                      process=True):
629         '''
630         Returns a list with a dictionary for each video we find.
631         If 'download', also downloads the videos.
632         extra_info is a dict containing the extra values to add to each result
633         '''
634
635         if ie_key:
636             ies = [self.get_info_extractor(ie_key)]
637         else:
638             ies = self._ies
639
640         for ie in ies:
641             if not ie.suitable(url):
642                 continue
643
644             if not ie.working():
645                 self.report_warning('The program functionality for this site has been marked as broken, '
646                                     'and will probably not work.')
647
648             try:
649                 ie_result = ie.extract(url)
650                 if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
651                     break
652                 if isinstance(ie_result, list):
653                     # Backwards compatibility: old IE result format
654                     ie_result = {
655                         '_type': 'compat_list',
656                         'entries': ie_result,
657                     }
658                 self.add_default_extra_info(ie_result, ie, url)
659                 if process:
660                     return self.process_ie_result(ie_result, download, extra_info)
661                 else:
662                     return ie_result
663             except ExtractorError as de:  # An error we somewhat expected
664                 self.report_error(compat_str(de), de.format_traceback())
665                 break
666             except MaxDownloadsReached:
667                 raise
668             except Exception as e:
669                 if self.params.get('ignoreerrors', False):
670                     self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
671                     break
672                 else:
673                     raise
674         else:
675             self.report_error('no suitable InfoExtractor for URL %s' % url)
676
677     def add_default_extra_info(self, ie_result, ie, url):
678         self.add_extra_info(ie_result, {
679             'extractor': ie.IE_NAME,
680             'webpage_url': url,
681             'webpage_url_basename': url_basename(url),
682             'extractor_key': ie.ie_key(),
683         })
684
685     def process_ie_result(self, ie_result, download=True, extra_info={}):
686         """
687         Take the result of the ie(may be modified) and resolve all unresolved
688         references (URLs, playlist items).
689
690         It will also download the videos if 'download'.
691         Returns the resolved ie_result.
692         """
693
694         result_type = ie_result.get('_type', 'video')
695
696         if result_type in ('url', 'url_transparent'):
697             extract_flat = self.params.get('extract_flat', False)
698             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
699                     extract_flat is True):
700                 if self.params.get('forcejson', False):
701                     self.to_stdout(json.dumps(ie_result))
702                 return ie_result
703
704         if result_type == 'video':
705             self.add_extra_info(ie_result, extra_info)
706             return self.process_video_result(ie_result, download=download)
707         elif result_type == 'url':
708             # We have to add extra_info to the results because it may be
709             # contained in a playlist
710             return self.extract_info(ie_result['url'],
711                                      download,
712                                      ie_key=ie_result.get('ie_key'),
713                                      extra_info=extra_info)
714         elif result_type == 'url_transparent':
715             # Use the information from the embedding page
716             info = self.extract_info(
717                 ie_result['url'], ie_key=ie_result.get('ie_key'),
718                 extra_info=extra_info, download=False, process=False)
719
720             force_properties = dict(
721                 (k, v) for k, v in ie_result.items() if v is not None)
722             for f in ('_type', 'url'):
723                 if f in force_properties:
724                     del force_properties[f]
725             new_result = info.copy()
726             new_result.update(force_properties)
727
728             assert new_result.get('_type') != 'url_transparent'
729
730             return self.process_ie_result(
731                 new_result, download=download, extra_info=extra_info)
732         elif result_type == 'playlist' or result_type == 'multi_video':
733             # We process each entry in the playlist
734             playlist = ie_result.get('title', None) or ie_result.get('id', None)
735             self.to_screen('[download] Downloading playlist: %s' % playlist)
736
737             playlist_results = []
738
739             playliststart = self.params.get('playliststart', 1) - 1
740             playlistend = self.params.get('playlistend', None)
741             # For backwards compatibility, interpret -1 as whole list
742             if playlistend == -1:
743                 playlistend = None
744
745             playlistitems_str = self.params.get('playlist_items', None)
746             playlistitems = None
747             if playlistitems_str is not None:
748                 def iter_playlistitems(format):
749                     for string_segment in format.split(','):
750                         if '-' in string_segment:
751                             start, end = string_segment.split('-')
752                             for item in range(int(start), int(end) + 1):
753                                 yield int(item)
754                         else:
755                             yield int(string_segment)
756                 playlistitems = iter_playlistitems(playlistitems_str)
757
758             ie_entries = ie_result['entries']
759             if isinstance(ie_entries, list):
760                 n_all_entries = len(ie_entries)
761                 if playlistitems:
762                     entries = [ie_entries[i - 1] for i in playlistitems]
763                 else:
764                     entries = ie_entries[playliststart:playlistend]
765                 n_entries = len(entries)
766                 self.to_screen(
767                     "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
768                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
769             elif isinstance(ie_entries, PagedList):
770                 if playlistitems:
771                     entries = []
772                     for item in playlistitems:
773                         entries.extend(ie_entries.getslice(
774                             item - 1, item
775                         ))
776                 else:
777                     entries = ie_entries.getslice(
778                         playliststart, playlistend)
779                 n_entries = len(entries)
780                 self.to_screen(
781                     "[%s] playlist %s: Downloading %d videos" %
782                     (ie_result['extractor'], playlist, n_entries))
783             else:  # iterable
784                 if playlistitems:
785                     entry_list = list(ie_entries)
786                     entries = [entry_list[i - 1] for i in playlistitems]
787                 else:
788                     entries = list(itertools.islice(
789                         ie_entries, playliststart, playlistend))
790                 n_entries = len(entries)
791                 self.to_screen(
792                     "[%s] playlist %s: Downloading %d videos" %
793                     (ie_result['extractor'], playlist, n_entries))
794
795             if self.params.get('playlistreverse', False):
796                 entries = entries[::-1]
797
798             for i, entry in enumerate(entries, 1):
799                 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
800                 extra = {
801                     'n_entries': n_entries,
802                     'playlist': playlist,
803                     'playlist_id': ie_result.get('id'),
804                     'playlist_title': ie_result.get('title'),
805                     'playlist_index': i + playliststart,
806                     'extractor': ie_result['extractor'],
807                     'webpage_url': ie_result['webpage_url'],
808                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
809                     'extractor_key': ie_result['extractor_key'],
810                 }
811
812                 reason = self._match_entry(entry, incomplete=True)
813                 if reason is not None:
814                     self.to_screen('[download] ' + reason)
815                     continue
816
817                 entry_result = self.process_ie_result(entry,
818                                                       download=download,
819                                                       extra_info=extra)
820                 playlist_results.append(entry_result)
821             ie_result['entries'] = playlist_results
822             return ie_result
823         elif result_type == 'compat_list':
824             self.report_warning(
825                 'Extractor %s returned a compat_list result. '
826                 'It needs to be updated.' % ie_result.get('extractor'))
827
828             def _fixup(r):
829                 self.add_extra_info(
830                     r,
831                     {
832                         'extractor': ie_result['extractor'],
833                         'webpage_url': ie_result['webpage_url'],
834                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
835                         'extractor_key': ie_result['extractor_key'],
836                     }
837                 )
838                 return r
839             ie_result['entries'] = [
840                 self.process_ie_result(_fixup(r), download, extra_info)
841                 for r in ie_result['entries']
842             ]
843             return ie_result
844         else:
845             raise Exception('Invalid result type: %s' % result_type)
846
847     def _apply_format_filter(self, format_spec, available_formats):
848         " Returns a tuple of the remaining format_spec and filtered formats "
849
850         OPERATORS = {
851             '<': operator.lt,
852             '<=': operator.le,
853             '>': operator.gt,
854             '>=': operator.ge,
855             '=': operator.eq,
856             '!=': operator.ne,
857         }
858         operator_rex = re.compile(r'''(?x)\s*\[
859             (?P<key>width|height|tbr|abr|vbr|asr|filesize|fps)
860             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
861             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
862             \]$
863             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
864         m = operator_rex.search(format_spec)
865         if m:
866             try:
867                 comparison_value = int(m.group('value'))
868             except ValueError:
869                 comparison_value = parse_filesize(m.group('value'))
870                 if comparison_value is None:
871                     comparison_value = parse_filesize(m.group('value') + 'B')
872                 if comparison_value is None:
873                     raise ValueError(
874                         'Invalid value %r in format specification %r' % (
875                             m.group('value'), format_spec))
876             op = OPERATORS[m.group('op')]
877
878         if not m:
879             STR_OPERATORS = {
880                 '=': operator.eq,
881                 '!=': operator.ne,
882             }
883             str_operator_rex = re.compile(r'''(?x)\s*\[
884                 \s*(?P<key>ext|acodec|vcodec|container|protocol)
885                 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
886                 \s*(?P<value>[a-zA-Z0-9_-]+)
887                 \s*\]$
888                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
889             m = str_operator_rex.search(format_spec)
890             if m:
891                 comparison_value = m.group('value')
892                 op = STR_OPERATORS[m.group('op')]
893
894         if not m:
895             raise ValueError('Invalid format specification %r' % format_spec)
896
897         def _filter(f):
898             actual_value = f.get(m.group('key'))
899             if actual_value is None:
900                 return m.group('none_inclusive')
901             return op(actual_value, comparison_value)
902         new_formats = [f for f in available_formats if _filter(f)]
903
904         new_format_spec = format_spec[:-len(m.group(0))]
905         if not new_format_spec:
906             new_format_spec = 'best'
907
908         return (new_format_spec, new_formats)
909
910     def select_format(self, format_spec, available_formats):
911         while format_spec.endswith(']'):
912             format_spec, available_formats = self._apply_format_filter(
913                 format_spec, available_formats)
914         if not available_formats:
915             return None
916
917         if format_spec == 'best' or format_spec is None:
918             return available_formats[-1]
919         elif format_spec == 'worst':
920             audiovideo_formats = [
921                 f for f in available_formats
922                 if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
923             if audiovideo_formats:
924                 return audiovideo_formats[0]
925             return available_formats[0]
926         elif format_spec == 'bestaudio':
927             audio_formats = [
928                 f for f in available_formats
929                 if f.get('vcodec') == 'none']
930             if audio_formats:
931                 return audio_formats[-1]
932         elif format_spec == 'worstaudio':
933             audio_formats = [
934                 f for f in available_formats
935                 if f.get('vcodec') == 'none']
936             if audio_formats:
937                 return audio_formats[0]
938         elif format_spec == 'bestvideo':
939             video_formats = [
940                 f for f in available_formats
941                 if f.get('acodec') == 'none']
942             if video_formats:
943                 return video_formats[-1]
944         elif format_spec == 'worstvideo':
945             video_formats = [
946                 f for f in available_formats
947                 if f.get('acodec') == 'none']
948             if video_formats:
949                 return video_formats[0]
950         else:
951             extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
952             if format_spec in extensions:
953                 filter_f = lambda f: f['ext'] == format_spec
954             else:
955                 filter_f = lambda f: f['format_id'] == format_spec
956             matches = list(filter(filter_f, available_formats))
957             if matches:
958                 return matches[-1]
959         return None
960
961     def _calc_headers(self, info_dict):
962         res = std_headers.copy()
963
964         add_headers = info_dict.get('http_headers')
965         if add_headers:
966             res.update(add_headers)
967
968         cookies = self._calc_cookies(info_dict)
969         if cookies:
970             res['Cookie'] = cookies
971
972         return res
973
974     def _calc_cookies(self, info_dict):
975         pr = compat_urllib_request.Request(info_dict['url'])
976         self.cookiejar.add_cookie_header(pr)
977         return pr.get_header('Cookie')
978
979     def process_video_result(self, info_dict, download=True):
980         assert info_dict.get('_type', 'video') == 'video'
981
982         if 'id' not in info_dict:
983             raise ExtractorError('Missing "id" field in extractor result')
984         if 'title' not in info_dict:
985             raise ExtractorError('Missing "title" field in extractor result')
986
987         if 'playlist' not in info_dict:
988             # It isn't part of a playlist
989             info_dict['playlist'] = None
990             info_dict['playlist_index'] = None
991
992         thumbnails = info_dict.get('thumbnails')
993         if thumbnails is None:
994             thumbnail = info_dict.get('thumbnail')
995             if thumbnail:
996                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
997         if thumbnails:
998             thumbnails.sort(key=lambda t: (
999                 t.get('preference'), t.get('width'), t.get('height'),
1000                 t.get('id'), t.get('url')))
1001             for i, t in enumerate(thumbnails):
1002                 if 'width' in t and 'height' in t:
1003                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
1004                 if t.get('id') is None:
1005                     t['id'] = '%d' % i
1006
1007         if thumbnails and 'thumbnail' not in info_dict:
1008             info_dict['thumbnail'] = thumbnails[-1]['url']
1009
1010         if 'display_id' not in info_dict and 'id' in info_dict:
1011             info_dict['display_id'] = info_dict['id']
1012
1013         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
1014             # Working around negative timestamps in Windows
1015             # (see http://bugs.python.org/issue1646728)
1016             if info_dict['timestamp'] < 0 and os.name == 'nt':
1017                 info_dict['timestamp'] = 0
1018             upload_date = datetime.datetime.utcfromtimestamp(
1019                 info_dict['timestamp'])
1020             info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1021
1022         if self.params.get('listsubtitles', False):
1023             if 'automatic_captions' in info_dict:
1024                 self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions')
1025             self.list_subtitles(info_dict['id'], info_dict.get('subtitles'), 'subtitles')
1026             return
1027         info_dict['requested_subtitles'] = self.process_subtitles(
1028             info_dict['id'], info_dict.get('subtitles'),
1029             info_dict.get('automatic_captions'))
1030
1031         # This extractors handle format selection themselves
1032         if info_dict['extractor'] in ['Youku']:
1033             if download:
1034                 self.process_info(info_dict)
1035             return info_dict
1036
1037         # We now pick which formats have to be downloaded
1038         if info_dict.get('formats') is None:
1039             # There's only one format available
1040             formats = [info_dict]
1041         else:
1042             formats = info_dict['formats']
1043
1044         if not formats:
1045             raise ExtractorError('No video formats found!')
1046
1047         # We check that all the formats have the format and format_id fields
1048         for i, format in enumerate(formats):
1049             if 'url' not in format:
1050                 raise ExtractorError('Missing "url" key in result (index %d)' % i)
1051
1052             if format.get('format_id') is None:
1053                 format['format_id'] = compat_str(i)
1054             if format.get('format') is None:
1055                 format['format'] = '{id} - {res}{note}'.format(
1056                     id=format['format_id'],
1057                     res=self.format_resolution(format),
1058                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1059                 )
1060             # Automatically determine file extension if missing
1061             if 'ext' not in format:
1062                 format['ext'] = determine_ext(format['url']).lower()
1063             # Add HTTP headers, so that external programs can use them from the
1064             # json output
1065             full_format_info = info_dict.copy()
1066             full_format_info.update(format)
1067             format['http_headers'] = self._calc_headers(full_format_info)
1068
1069         # TODO Central sorting goes here
1070
1071         if formats[0] is not info_dict:
1072             # only set the 'formats' fields if the original info_dict list them
1073             # otherwise we end up with a circular reference, the first (and unique)
1074             # element in the 'formats' field in info_dict is info_dict itself,
1075             # wich can't be exported to json
1076             info_dict['formats'] = formats
1077         if self.params.get('listformats'):
1078             self.list_formats(info_dict)
1079             return
1080         if self.params.get('list_thumbnails'):
1081             self.list_thumbnails(info_dict)
1082             return
1083
1084         req_format = self.params.get('format')
1085         if req_format is None:
1086             req_format_list = []
1087             if info_dict['extractor'] in ['youtube', 'ted'] and FFmpegMergerPP(self).available:
1088                 req_format_list.append('bestvideo+bestaudio')
1089             req_format_list.append('best')
1090             req_format = '/'.join(req_format_list)
1091         formats_to_download = []
1092         if req_format == 'all':
1093             formats_to_download = formats
1094         else:
1095             for rfstr in req_format.split(','):
1096                 # We can accept formats requested in the format: 34/5/best, we pick
1097                 # the first that is available, starting from left
1098                 req_formats = rfstr.split('/')
1099                 for rf in req_formats:
1100                     if re.match(r'.+?\+.+?', rf) is not None:
1101                         # Two formats have been requested like '137+139'
1102                         format_1, format_2 = rf.split('+')
1103                         formats_info = (self.select_format(format_1, formats),
1104                                         self.select_format(format_2, formats))
1105                         if all(formats_info):
1106                             # The first format must contain the video and the
1107                             # second the audio
1108                             if formats_info[0].get('vcodec') == 'none':
1109                                 self.report_error('The first format must '
1110                                                   'contain the video, try using '
1111                                                   '"-f %s+%s"' % (format_2, format_1))
1112                                 return
1113                             output_ext = (
1114                                 formats_info[0]['ext']
1115                                 if self.params.get('merge_output_format') is None
1116                                 else self.params['merge_output_format'])
1117                             selected_format = {
1118                                 'requested_formats': formats_info,
1119                                 'format': '%s+%s' % (formats_info[0].get('format'),
1120                                                      formats_info[1].get('format')),
1121                                 'format_id': '%s+%s' % (formats_info[0].get('format_id'),
1122                                                         formats_info[1].get('format_id')),
1123                                 'width': formats_info[0].get('width'),
1124                                 'height': formats_info[0].get('height'),
1125                                 'resolution': formats_info[0].get('resolution'),
1126                                 'fps': formats_info[0].get('fps'),
1127                                 'vcodec': formats_info[0].get('vcodec'),
1128                                 'vbr': formats_info[0].get('vbr'),
1129                                 'stretched_ratio': formats_info[0].get('stretched_ratio'),
1130                                 'acodec': formats_info[1].get('acodec'),
1131                                 'abr': formats_info[1].get('abr'),
1132                                 'ext': output_ext,
1133                             }
1134                         else:
1135                             selected_format = None
1136                     else:
1137                         selected_format = self.select_format(rf, formats)
1138                     if selected_format is not None:
1139                         formats_to_download.append(selected_format)
1140                         break
1141         if not formats_to_download:
1142             raise ExtractorError('requested format not available',
1143                                  expected=True)
1144
1145         if download:
1146             if len(formats_to_download) > 1:
1147                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1148             for format in formats_to_download:
1149                 new_info = dict(info_dict)
1150                 new_info.update(format)
1151                 self.process_info(new_info)
1152         # We update the info dict with the best quality format (backwards compatibility)
1153         info_dict.update(formats_to_download[-1])
1154         return info_dict
1155
1156     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
1157         """Select the requested subtitles and their format"""
1158         available_subs = {}
1159         if normal_subtitles and self.params.get('writesubtitles'):
1160             available_subs.update(normal_subtitles)
1161         if automatic_captions and self.params.get('writeautomaticsub'):
1162             for lang, cap_info in automatic_captions.items():
1163                 if lang not in available_subs:
1164                     available_subs[lang] = cap_info
1165
1166         if (not self.params.get('writesubtitles') and not
1167                 self.params.get('writeautomaticsub') or not
1168                 available_subs):
1169             return None
1170
1171         if self.params.get('allsubtitles', False):
1172             requested_langs = available_subs.keys()
1173         else:
1174             if self.params.get('subtitleslangs', False):
1175                 requested_langs = self.params.get('subtitleslangs')
1176             elif 'en' in available_subs:
1177                 requested_langs = ['en']
1178             else:
1179                 requested_langs = [list(available_subs.keys())[0]]
1180
1181         formats_query = self.params.get('subtitlesformat', 'best')
1182         formats_preference = formats_query.split('/') if formats_query else []
1183         subs = {}
1184         for lang in requested_langs:
1185             formats = available_subs.get(lang)
1186             if formats is None:
1187                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1188                 continue
1189             for ext in formats_preference:
1190                 if ext == 'best':
1191                     f = formats[-1]
1192                     break
1193                 matches = list(filter(lambda f: f['ext'] == ext, formats))
1194                 if matches:
1195                     f = matches[-1]
1196                     break
1197             else:
1198                 f = formats[-1]
1199                 self.report_warning(
1200                     'No subtitle format found matching "%s" for language %s, '
1201                     'using %s' % (formats_query, lang, f['ext']))
1202             subs[lang] = f
1203         return subs
1204
1205     def process_info(self, info_dict):
1206         """Process a single resolved IE result."""
1207
1208         assert info_dict.get('_type', 'video') == 'video'
1209
1210         max_downloads = self.params.get('max_downloads')
1211         if max_downloads is not None:
1212             if self._num_downloads >= int(max_downloads):
1213                 raise MaxDownloadsReached()
1214
1215         info_dict['fulltitle'] = info_dict['title']
1216         if len(info_dict['title']) > 200:
1217             info_dict['title'] = info_dict['title'][:197] + '...'
1218
1219         if 'format' not in info_dict:
1220             info_dict['format'] = info_dict['ext']
1221
1222         reason = self._match_entry(info_dict, incomplete=False)
1223         if reason is not None:
1224             self.to_screen('[download] ' + reason)
1225             return
1226
1227         self._num_downloads += 1
1228
1229         info_dict['_filename'] = filename = self.prepare_filename(info_dict)
1230
1231         # Forced printings
1232         if self.params.get('forcetitle', False):
1233             self.to_stdout(info_dict['fulltitle'])
1234         if self.params.get('forceid', False):
1235             self.to_stdout(info_dict['id'])
1236         if self.params.get('forceurl', False):
1237             if info_dict.get('requested_formats') is not None:
1238                 for f in info_dict['requested_formats']:
1239                     self.to_stdout(f['url'] + f.get('play_path', ''))
1240             else:
1241                 # For RTMP URLs, also include the playpath
1242                 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1243         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
1244             self.to_stdout(info_dict['thumbnail'])
1245         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
1246             self.to_stdout(info_dict['description'])
1247         if self.params.get('forcefilename', False) and filename is not None:
1248             self.to_stdout(filename)
1249         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1250             self.to_stdout(formatSeconds(info_dict['duration']))
1251         if self.params.get('forceformat', False):
1252             self.to_stdout(info_dict['format'])
1253         if self.params.get('forcejson', False):
1254             self.to_stdout(json.dumps(info_dict))
1255
1256         # Do nothing else if in simulate mode
1257         if self.params.get('simulate', False):
1258             return
1259
1260         if filename is None:
1261             return
1262
1263         try:
1264             dn = os.path.dirname(sanitize_path(encodeFilename(filename)))
1265             if dn and not os.path.exists(dn):
1266                 os.makedirs(dn)
1267         except (OSError, IOError) as err:
1268             self.report_error('unable to create directory ' + compat_str(err))
1269             return
1270
1271         if self.params.get('writedescription', False):
1272             descfn = filename + '.description'
1273             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1274                 self.to_screen('[info] Video description is already present')
1275             elif info_dict.get('description') is None:
1276                 self.report_warning('There\'s no description to write.')
1277             else:
1278                 try:
1279                     self.to_screen('[info] Writing video description to: ' + descfn)
1280                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1281                         descfile.write(info_dict['description'])
1282                 except (OSError, IOError):
1283                     self.report_error('Cannot write description file ' + descfn)
1284                     return
1285
1286         if self.params.get('writeannotations', False):
1287             annofn = filename + '.annotations.xml'
1288             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1289                 self.to_screen('[info] Video annotations are already present')
1290             else:
1291                 try:
1292                     self.to_screen('[info] Writing video annotations to: ' + annofn)
1293                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1294                         annofile.write(info_dict['annotations'])
1295                 except (KeyError, TypeError):
1296                     self.report_warning('There are no annotations to write.')
1297                 except (OSError, IOError):
1298                     self.report_error('Cannot write annotations file: ' + annofn)
1299                     return
1300
1301         subtitles_are_requested = any([self.params.get('writesubtitles', False),
1302                                        self.params.get('writeautomaticsub')])
1303
1304         if subtitles_are_requested and info_dict.get('requested_subtitles'):
1305             # subtitles download errors are already managed as troubles in relevant IE
1306             # that way it will silently go on when used with unsupporting IE
1307             subtitles = info_dict['requested_subtitles']
1308             ie = self.get_info_extractor(info_dict['extractor_key'])
1309             for sub_lang, sub_info in subtitles.items():
1310                 sub_format = sub_info['ext']
1311                 if sub_info.get('data') is not None:
1312                     sub_data = sub_info['data']
1313                 else:
1314                     try:
1315                         sub_data = ie._download_webpage(
1316                             sub_info['url'], info_dict['id'], note=False)
1317                     except ExtractorError as err:
1318                         self.report_warning('Unable to download subtitle for "%s": %s' %
1319                                             (sub_lang, compat_str(err.cause)))
1320                         continue
1321                 try:
1322                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1323                     if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1324                         self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1325                     else:
1326                         self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1327                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
1328                             subfile.write(sub_data)
1329                 except (OSError, IOError):
1330                     self.report_error('Cannot write subtitles file ' + sub_filename)
1331                     return
1332
1333         if self.params.get('writeinfojson', False):
1334             infofn = os.path.splitext(filename)[0] + '.info.json'
1335             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1336                 self.to_screen('[info] Video description metadata is already present')
1337             else:
1338                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1339                 try:
1340                     write_json_file(info_dict, infofn)
1341                 except (OSError, IOError):
1342                     self.report_error('Cannot write metadata to JSON file ' + infofn)
1343                     return
1344
1345         self._write_thumbnails(info_dict, filename)
1346
1347         if not self.params.get('skip_download', False):
1348             try:
1349                 def dl(name, info):
1350                     fd = get_suitable_downloader(info, self.params)(self, self.params)
1351                     for ph in self._progress_hooks:
1352                         fd.add_progress_hook(ph)
1353                     if self.params.get('verbose'):
1354                         self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1355                     return fd.download(name, info)
1356
1357                 if info_dict.get('requested_formats') is not None:
1358                     downloaded = []
1359                     success = True
1360                     merger = FFmpegMergerPP(self)
1361                     if not merger.available:
1362                         postprocessors = []
1363                         self.report_warning('You have requested multiple '
1364                                             'formats but ffmpeg or avconv are not installed.'
1365                                             ' The formats won\'t be merged')
1366                     else:
1367                         postprocessors = [merger]
1368
1369                     def compatible_formats(formats):
1370                         video, audio = formats
1371                         # Check extension
1372                         video_ext, audio_ext = audio.get('ext'), video.get('ext')
1373                         if video_ext and audio_ext:
1374                             COMPATIBLE_EXTS = (
1375                                 ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v'),
1376                                 ('webm')
1377                             )
1378                             for exts in COMPATIBLE_EXTS:
1379                                 if video_ext in exts and audio_ext in exts:
1380                                     return True
1381                         # TODO: Check acodec/vcodec
1382                         return False
1383
1384                     requested_formats = info_dict['requested_formats']
1385                     if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
1386                         filename = os.path.splitext(filename)[0] + '.mkv'
1387                         self.report_warning('You have requested formats incompatible for merge. '
1388                                             'The formats will be merged into mkv')
1389                     if os.path.exists(encodeFilename(filename)):
1390                         self.to_screen(
1391                             '[download] %s has already been downloaded and '
1392                             'merged' % filename)
1393                     else:
1394                         for f in requested_formats:
1395                             new_info = dict(info_dict)
1396                             new_info.update(f)
1397                             fname = self.prepare_filename(new_info)
1398                             fname = prepend_extension(fname, 'f%s' % f['format_id'])
1399                             downloaded.append(fname)
1400                             partial_success = dl(fname, new_info)
1401                             success = success and partial_success
1402                         info_dict['__postprocessors'] = postprocessors
1403                         info_dict['__files_to_merge'] = downloaded
1404                 else:
1405                     # Just a single file
1406                     success = dl(filename, info_dict)
1407             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1408                 self.report_error('unable to download video data: %s' % str(err))
1409                 return
1410             except (OSError, IOError) as err:
1411                 raise UnavailableVideoError(err)
1412             except (ContentTooShortError, ) as err:
1413                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1414                 return
1415
1416             if success:
1417                 # Fixup content
1418                 fixup_policy = self.params.get('fixup')
1419                 if fixup_policy is None:
1420                     fixup_policy = 'detect_or_warn'
1421
1422                 stretched_ratio = info_dict.get('stretched_ratio')
1423                 if stretched_ratio is not None and stretched_ratio != 1:
1424                     if fixup_policy == 'warn':
1425                         self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1426                             info_dict['id'], stretched_ratio))
1427                     elif fixup_policy == 'detect_or_warn':
1428                         stretched_pp = FFmpegFixupStretchedPP(self)
1429                         if stretched_pp.available:
1430                             info_dict.setdefault('__postprocessors', [])
1431                             info_dict['__postprocessors'].append(stretched_pp)
1432                         else:
1433                             self.report_warning(
1434                                 '%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % (
1435                                     info_dict['id'], stretched_ratio))
1436                     else:
1437                         assert fixup_policy in ('ignore', 'never')
1438
1439                 if info_dict.get('requested_formats') is None and info_dict.get('container') == 'm4a_dash':
1440                     if fixup_policy == 'warn':
1441                         self.report_warning('%s: writing DASH m4a. Only some players support this container.' % (
1442                             info_dict['id']))
1443                     elif fixup_policy == 'detect_or_warn':
1444                         fixup_pp = FFmpegFixupM4aPP(self)
1445                         if fixup_pp.available:
1446                             info_dict.setdefault('__postprocessors', [])
1447                             info_dict['__postprocessors'].append(fixup_pp)
1448                         else:
1449                             self.report_warning(
1450                                 '%s: writing DASH m4a. Only some players support this container. Install ffmpeg or avconv to fix this automatically.' % (
1451                                     info_dict['id']))
1452                     else:
1453                         assert fixup_policy in ('ignore', 'never')
1454
1455                 try:
1456                     self.post_process(filename, info_dict)
1457                 except (PostProcessingError) as err:
1458                     self.report_error('postprocessing: %s' % str(err))
1459                     return
1460                 self.record_download_archive(info_dict)
1461
1462     def download(self, url_list):
1463         """Download a given list of URLs."""
1464         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1465         if (len(url_list) > 1 and
1466                 '%' not in outtmpl and
1467                 self.params.get('max_downloads') != 1):
1468             raise SameFileError(outtmpl)
1469
1470         for url in url_list:
1471             try:
1472                 # It also downloads the videos
1473                 res = self.extract_info(url)
1474             except UnavailableVideoError:
1475                 self.report_error('unable to download video')
1476             except MaxDownloadsReached:
1477                 self.to_screen('[info] Maximum number of downloaded files reached.')
1478                 raise
1479             else:
1480                 if self.params.get('dump_single_json', False):
1481                     self.to_stdout(json.dumps(res))
1482
1483         return self._download_retcode
1484
1485     def download_with_info_file(self, info_filename):
1486         with contextlib.closing(fileinput.FileInput(
1487                 [info_filename], mode='r',
1488                 openhook=fileinput.hook_encoded('utf-8'))) as f:
1489             # FileInput doesn't have a read method, we can't call json.load
1490             info = json.loads('\n'.join(f))
1491         try:
1492             self.process_ie_result(info, download=True)
1493         except DownloadError:
1494             webpage_url = info.get('webpage_url')
1495             if webpage_url is not None:
1496                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1497                 return self.download([webpage_url])
1498             else:
1499                 raise
1500         return self._download_retcode
1501
1502     def post_process(self, filename, ie_info):
1503         """Run all the postprocessors on the given file."""
1504         info = dict(ie_info)
1505         info['filepath'] = filename
1506         pps_chain = []
1507         if ie_info.get('__postprocessors') is not None:
1508             pps_chain.extend(ie_info['__postprocessors'])
1509         pps_chain.extend(self._pps)
1510         for pp in pps_chain:
1511             try:
1512                 files_to_delete, info = pp.run(info)
1513             except PostProcessingError as e:
1514                 self.report_error(e.msg)
1515             if files_to_delete and not self.params.get('keepvideo', False):
1516                 for old_filename in files_to_delete:
1517                     self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
1518                     try:
1519                         os.remove(encodeFilename(old_filename))
1520                     except (IOError, OSError):
1521                         self.report_warning('Unable to remove downloaded original file')
1522
1523     def _make_archive_id(self, info_dict):
1524         # Future-proof against any change in case
1525         # and backwards compatibility with prior versions
1526         extractor = info_dict.get('extractor_key')
1527         if extractor is None:
1528             if 'id' in info_dict:
1529                 extractor = info_dict.get('ie_key')  # key in a playlist
1530         if extractor is None:
1531             return None  # Incomplete video information
1532         return extractor.lower() + ' ' + info_dict['id']
1533
1534     def in_download_archive(self, info_dict):
1535         fn = self.params.get('download_archive')
1536         if fn is None:
1537             return False
1538
1539         vid_id = self._make_archive_id(info_dict)
1540         if vid_id is None:
1541             return False  # Incomplete video information
1542
1543         try:
1544             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1545                 for line in archive_file:
1546                     if line.strip() == vid_id:
1547                         return True
1548         except IOError as ioe:
1549             if ioe.errno != errno.ENOENT:
1550                 raise
1551         return False
1552
1553     def record_download_archive(self, info_dict):
1554         fn = self.params.get('download_archive')
1555         if fn is None:
1556             return
1557         vid_id = self._make_archive_id(info_dict)
1558         assert vid_id
1559         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1560             archive_file.write(vid_id + '\n')
1561
1562     @staticmethod
1563     def format_resolution(format, default='unknown'):
1564         if format.get('vcodec') == 'none':
1565             return 'audio only'
1566         if format.get('resolution') is not None:
1567             return format['resolution']
1568         if format.get('height') is not None:
1569             if format.get('width') is not None:
1570                 res = '%sx%s' % (format['width'], format['height'])
1571             else:
1572                 res = '%sp' % format['height']
1573         elif format.get('width') is not None:
1574             res = '?x%d' % format['width']
1575         else:
1576             res = default
1577         return res
1578
1579     def _format_note(self, fdict):
1580         res = ''
1581         if fdict.get('ext') in ['f4f', 'f4m']:
1582             res += '(unsupported) '
1583         if fdict.get('format_note') is not None:
1584             res += fdict['format_note'] + ' '
1585         if fdict.get('tbr') is not None:
1586             res += '%4dk ' % fdict['tbr']
1587         if fdict.get('container') is not None:
1588             if res:
1589                 res += ', '
1590             res += '%s container' % fdict['container']
1591         if (fdict.get('vcodec') is not None and
1592                 fdict.get('vcodec') != 'none'):
1593             if res:
1594                 res += ', '
1595             res += fdict['vcodec']
1596             if fdict.get('vbr') is not None:
1597                 res += '@'
1598         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1599             res += 'video@'
1600         if fdict.get('vbr') is not None:
1601             res += '%4dk' % fdict['vbr']
1602         if fdict.get('fps') is not None:
1603             res += ', %sfps' % fdict['fps']
1604         if fdict.get('acodec') is not None:
1605             if res:
1606                 res += ', '
1607             if fdict['acodec'] == 'none':
1608                 res += 'video only'
1609             else:
1610                 res += '%-5s' % fdict['acodec']
1611         elif fdict.get('abr') is not None:
1612             if res:
1613                 res += ', '
1614             res += 'audio'
1615         if fdict.get('abr') is not None:
1616             res += '@%3dk' % fdict['abr']
1617         if fdict.get('asr') is not None:
1618             res += ' (%5dHz)' % fdict['asr']
1619         if fdict.get('filesize') is not None:
1620             if res:
1621                 res += ', '
1622             res += format_bytes(fdict['filesize'])
1623         elif fdict.get('filesize_approx') is not None:
1624             if res:
1625                 res += ', '
1626             res += '~' + format_bytes(fdict['filesize_approx'])
1627         return res
1628
1629     def list_formats(self, info_dict):
1630         formats = info_dict.get('formats', [info_dict])
1631         table = [
1632             [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
1633             for f in formats
1634             if f.get('preference') is None or f['preference'] >= -1000]
1635         if len(formats) > 1:
1636             table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
1637
1638         header_line = ['format code', 'extension', 'resolution', 'note']
1639         self.to_screen(
1640             '[info] Available formats for %s:\n%s' %
1641             (info_dict['id'], render_table(header_line, table)))
1642
1643     def list_thumbnails(self, info_dict):
1644         thumbnails = info_dict.get('thumbnails')
1645         if not thumbnails:
1646             tn_url = info_dict.get('thumbnail')
1647             if tn_url:
1648                 thumbnails = [{'id': '0', 'url': tn_url}]
1649             else:
1650                 self.to_screen(
1651                     '[info] No thumbnails present for %s' % info_dict['id'])
1652                 return
1653
1654         self.to_screen(
1655             '[info] Thumbnails for %s:' % info_dict['id'])
1656         self.to_screen(render_table(
1657             ['ID', 'width', 'height', 'URL'],
1658             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
1659
1660     def list_subtitles(self, video_id, subtitles, name='subtitles'):
1661         if not subtitles:
1662             self.to_screen('%s has no %s' % (video_id, name))
1663             return
1664         self.to_screen(
1665             'Available %s for %s:' % (name, video_id))
1666         self.to_screen(render_table(
1667             ['Language', 'formats'],
1668             [[lang, ', '.join(f['ext'] for f in reversed(formats))]
1669                 for lang, formats in subtitles.items()]))
1670
1671     def urlopen(self, req):
1672         """ Start an HTTP download """
1673
1674         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1675         # always respected by websites, some tend to give out URLs with non percent-encoded
1676         # non-ASCII characters (see telemb.py, ard.py [#3412])
1677         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1678         # To work around aforementioned issue we will replace request's original URL with
1679         # percent-encoded one
1680         req_is_string = isinstance(req, compat_basestring)
1681         url = req if req_is_string else req.get_full_url()
1682         url_escaped = escape_url(url)
1683
1684         # Substitute URL if any change after escaping
1685         if url != url_escaped:
1686             if req_is_string:
1687                 req = url_escaped
1688             else:
1689                 req = compat_urllib_request.Request(
1690                     url_escaped, data=req.data, headers=req.headers,
1691                     origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
1692
1693         return self._opener.open(req, timeout=self._socket_timeout)
1694
1695     def print_debug_header(self):
1696         if not self.params.get('verbose'):
1697             return
1698
1699         if type('') is not compat_str:
1700             # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1701             self.report_warning(
1702                 'Your Python is broken! Update to a newer and supported version')
1703
1704         stdout_encoding = getattr(
1705             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
1706         encoding_str = (
1707             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1708                 locale.getpreferredencoding(),
1709                 sys.getfilesystemencoding(),
1710                 stdout_encoding,
1711                 self.get_encoding()))
1712         write_string(encoding_str, encoding=None)
1713
1714         self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1715         try:
1716             sp = subprocess.Popen(
1717                 ['git', 'rev-parse', '--short', 'HEAD'],
1718                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1719                 cwd=os.path.dirname(os.path.abspath(__file__)))
1720             out, err = sp.communicate()
1721             out = out.decode().strip()
1722             if re.match('[0-9a-f]+', out):
1723                 self._write_string('[debug] Git HEAD: ' + out + '\n')
1724         except Exception:
1725             try:
1726                 sys.exc_clear()
1727             except Exception:
1728                 pass
1729         self._write_string('[debug] Python version %s - %s\n' % (
1730             platform.python_version(), platform_name()))
1731
1732         exe_versions = FFmpegPostProcessor.get_versions(self)
1733         exe_versions['rtmpdump'] = rtmpdump_version()
1734         exe_str = ', '.join(
1735             '%s %s' % (exe, v)
1736             for exe, v in sorted(exe_versions.items())
1737             if v
1738         )
1739         if not exe_str:
1740             exe_str = 'none'
1741         self._write_string('[debug] exe versions: %s\n' % exe_str)
1742
1743         proxy_map = {}
1744         for handler in self._opener.handlers:
1745             if hasattr(handler, 'proxies'):
1746                 proxy_map.update(handler.proxies)
1747         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1748
1749         if self.params.get('call_home', False):
1750             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
1751             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
1752             latest_version = self.urlopen(
1753                 'https://yt-dl.org/latest/version').read().decode('utf-8')
1754             if version_tuple(latest_version) > version_tuple(__version__):
1755                 self.report_warning(
1756                     'You are using an outdated version (newest version: %s)! '
1757                     'See https://yt-dl.org/update if you need help updating.' %
1758                     latest_version)
1759
1760     def _setup_opener(self):
1761         timeout_val = self.params.get('socket_timeout')
1762         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1763
1764         opts_cookiefile = self.params.get('cookiefile')
1765         opts_proxy = self.params.get('proxy')
1766
1767         if opts_cookiefile is None:
1768             self.cookiejar = compat_cookiejar.CookieJar()
1769         else:
1770             self.cookiejar = compat_cookiejar.MozillaCookieJar(
1771                 opts_cookiefile)
1772             if os.access(opts_cookiefile, os.R_OK):
1773                 self.cookiejar.load()
1774
1775         cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1776             self.cookiejar)
1777         if opts_proxy is not None:
1778             if opts_proxy == '':
1779                 proxies = {}
1780             else:
1781                 proxies = {'http': opts_proxy, 'https': opts_proxy}
1782         else:
1783             proxies = compat_urllib_request.getproxies()
1784             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1785             if 'http' in proxies and 'https' not in proxies:
1786                 proxies['https'] = proxies['http']
1787         proxy_handler = PerRequestProxyHandler(proxies)
1788
1789         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1790         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
1791         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
1792         opener = compat_urllib_request.build_opener(
1793             proxy_handler, https_handler, cookie_processor, ydlh)
1794
1795         # Delete the default user-agent header, which would otherwise apply in
1796         # cases where our custom HTTP handler doesn't come into play
1797         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1798         opener.addheaders = []
1799         self._opener = opener
1800
1801     def encode(self, s):
1802         if isinstance(s, bytes):
1803             return s  # Already encoded
1804
1805         try:
1806             return s.encode(self.get_encoding())
1807         except UnicodeEncodeError as err:
1808             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1809             raise
1810
1811     def get_encoding(self):
1812         encoding = self.params.get('encoding')
1813         if encoding is None:
1814             encoding = preferredencoding()
1815         return encoding
1816
1817     def _write_thumbnails(self, info_dict, filename):
1818         if self.params.get('writethumbnail', False):
1819             thumbnails = info_dict.get('thumbnails')
1820             if thumbnails:
1821                 thumbnails = [thumbnails[-1]]
1822         elif self.params.get('write_all_thumbnails', False):
1823             thumbnails = info_dict.get('thumbnails')
1824         else:
1825             return
1826
1827         if not thumbnails:
1828             # No thumbnails present, so return immediately
1829             return
1830
1831         for t in thumbnails:
1832             thumb_ext = determine_ext(t['url'], 'jpg')
1833             suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
1834             thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
1835             thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
1836
1837             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
1838                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
1839                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
1840             else:
1841                 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
1842                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
1843                 try:
1844                     uf = self.urlopen(t['url'])
1845                     with open(thumb_filename, 'wb') as thumbf:
1846                         shutil.copyfileobj(uf, thumbf)
1847                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
1848                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
1849                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1850                     self.report_warning('Unable to download thumbnail "%s": %s' %
1851                                         (t['url'], compat_str(err)))