[Lecture2Go] Add new extractor
[youtube-dl] / youtube_dl / YoutubeDL.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 from __future__ import absolute_import, unicode_literals
5
6 import collections
7 import contextlib
8 import datetime
9 import errno
10 import fileinput
11 import io
12 import itertools
13 import json
14 import locale
15 import operator
16 import os
17 import platform
18 import re
19 import shutil
20 import subprocess
21 import socket
22 import sys
23 import time
24 import traceback
25
26 if os.name == 'nt':
27     import ctypes
28
29 from .compat import (
30     compat_basestring,
31     compat_cookiejar,
32     compat_expanduser,
33     compat_get_terminal_size,
34     compat_http_client,
35     compat_kwargs,
36     compat_str,
37     compat_urllib_error,
38     compat_urllib_request,
39 )
40 from .utils import (
41     escape_url,
42     ContentTooShortError,
43     date_from_str,
44     DateRange,
45     DEFAULT_OUTTMPL,
46     determine_ext,
47     DownloadError,
48     encodeFilename,
49     ExtractorError,
50     format_bytes,
51     formatSeconds,
52     locked_file,
53     make_HTTPS_handler,
54     MaxDownloadsReached,
55     PagedList,
56     parse_filesize,
57     PerRequestProxyHandler,
58     PostProcessingError,
59     platform_name,
60     preferredencoding,
61     render_table,
62     SameFileError,
63     sanitize_filename,
64     sanitize_path,
65     std_headers,
66     subtitles_filename,
67     takewhile_inclusive,
68     UnavailableVideoError,
69     url_basename,
70     version_tuple,
71     write_json_file,
72     write_string,
73     YoutubeDLHandler,
74     prepend_extension,
75     args_to_str,
76     age_restricted,
77 )
78 from .cache import Cache
79 from .extractor import get_info_extractor, gen_extractors
80 from .downloader import get_suitable_downloader
81 from .downloader.rtmp import rtmpdump_version
82 from .postprocessor import (
83     FFmpegFixupM4aPP,
84     FFmpegFixupStretchedPP,
85     FFmpegMergerPP,
86     FFmpegPostProcessor,
87     get_postprocessor,
88 )
89 from .version import __version__
90
91
92 class YoutubeDL(object):
93     """YoutubeDL class.
94
95     YoutubeDL objects are the ones responsible of downloading the
96     actual video file and writing it to disk if the user has requested
97     it, among some other tasks. In most cases there should be one per
98     program. As, given a video URL, the downloader doesn't know how to
99     extract all the needed information, task that InfoExtractors do, it
100     has to pass the URL to one of them.
101
102     For this, YoutubeDL objects have a method that allows
103     InfoExtractors to be registered in a given order. When it is passed
104     a URL, the YoutubeDL object handles it to the first InfoExtractor it
105     finds that reports being able to handle it. The InfoExtractor extracts
106     all the information about the video or videos the URL refers to, and
107     YoutubeDL process the extracted information, possibly using a File
108     Downloader to download the video.
109
110     YoutubeDL objects accept a lot of parameters. In order not to saturate
111     the object constructor with arguments, it receives a dictionary of
112     options instead. These options are available through the params
113     attribute for the InfoExtractors to use. The YoutubeDL also
114     registers itself as the downloader in charge for the InfoExtractors
115     that are added to it, so this is a "mutual registration".
116
117     Available options:
118
119     username:          Username for authentication purposes.
120     password:          Password for authentication purposes.
121     videopassword:     Password for acces a video.
122     usenetrc:          Use netrc for authentication instead.
123     verbose:           Print additional info to stdout.
124     quiet:             Do not print messages to stdout.
125     no_warnings:       Do not print out anything for warnings.
126     forceurl:          Force printing final URL.
127     forcetitle:        Force printing title.
128     forceid:           Force printing ID.
129     forcethumbnail:    Force printing thumbnail URL.
130     forcedescription:  Force printing description.
131     forcefilename:     Force printing final filename.
132     forceduration:     Force printing duration.
133     forcejson:         Force printing info_dict as JSON.
134     dump_single_json:  Force printing the info_dict of the whole playlist
135                        (or video) as a single JSON line.
136     simulate:          Do not download the video files.
137     format:            Video format code. See options.py for more information.
138     format_limit:      Highest quality format to try.
139     outtmpl:           Template for output names.
140     restrictfilenames: Do not allow "&" and spaces in file names
141     ignoreerrors:      Do not stop on download errors.
142     nooverwrites:      Prevent overwriting files.
143     playliststart:     Playlist item to start at.
144     playlistend:       Playlist item to end at.
145     playlist_items:    Specific indices of playlist to download.
146     playlistreverse:   Download playlist items in reverse order.
147     matchtitle:        Download only matching titles.
148     rejecttitle:       Reject downloads for matching titles.
149     logger:            Log messages to a logging.Logger instance.
150     logtostderr:       Log messages to stderr instead of stdout.
151     writedescription:  Write the video description to a .description file
152     writeinfojson:     Write the video description to a .info.json file
153     writeannotations:  Write the video annotations to a .annotations.xml file
154     writethumbnail:    Write the thumbnail image to a file
155     write_all_thumbnails:  Write all thumbnail formats to files
156     writesubtitles:    Write the video subtitles to a file
157     writeautomaticsub: Write the automatic subtitles to a file
158     allsubtitles:      Downloads all the subtitles of the video
159                        (requires writesubtitles or writeautomaticsub)
160     listsubtitles:     Lists all available subtitles for the video
161     subtitlesformat:   The format code for subtitles
162     subtitleslangs:    List of languages of the subtitles to download
163     keepvideo:         Keep the video file after post-processing
164     daterange:         A DateRange object, download only if the upload_date is in the range.
165     skip_download:     Skip the actual download of the video file
166     cachedir:          Location of the cache files in the filesystem.
167                        False to disable filesystem cache.
168     noplaylist:        Download single video instead of a playlist if in doubt.
169     age_limit:         An integer representing the user's age in years.
170                        Unsuitable videos for the given age are skipped.
171     min_views:         An integer representing the minimum view count the video
172                        must have in order to not be skipped.
173                        Videos without view count information are always
174                        downloaded. None for no limit.
175     max_views:         An integer representing the maximum view count.
176                        Videos that are more popular than that are not
177                        downloaded.
178                        Videos without view count information are always
179                        downloaded. None for no limit.
180     download_archive:  File name of a file where all downloads are recorded.
181                        Videos already present in the file are not downloaded
182                        again.
183     cookiefile:        File name where cookies should be read from and dumped to.
184     nocheckcertificate:Do not verify SSL certificates
185     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
186                        At the moment, this is only supported by YouTube.
187     proxy:             URL of the proxy server to use
188     cn_verification_proxy:  URL of the proxy to use for IP address verification
189                        on Chinese sites. (Experimental)
190     socket_timeout:    Time to wait for unresponsive hosts, in seconds
191     bidi_workaround:   Work around buggy terminals without bidirectional text
192                        support, using fridibi
193     debug_printtraffic:Print out sent and received HTTP traffic
194     include_ads:       Download ads as well
195     default_search:    Prepend this string if an input url is not valid.
196                        'auto' for elaborate guessing
197     encoding:          Use this encoding instead of the system-specified.
198     extract_flat:      Do not resolve URLs, return the immediate result.
199                        Pass in 'in_playlist' to only show this behavior for
200                        playlist items.
201     postprocessors:    A list of dictionaries, each with an entry
202                        * key:  The name of the postprocessor. See
203                                youtube_dl/postprocessor/__init__.py for a list.
204                        as well as any further keyword arguments for the
205                        postprocessor.
206     progress_hooks:    A list of functions that get called on download
207                        progress, with a dictionary with the entries
208                        * status: One of "downloading", "error", or "finished".
209                                  Check this first and ignore unknown values.
210
211                        If status is one of "downloading", or "finished", the
212                        following properties may also be present:
213                        * filename: The final filename (always present)
214                        * tmpfilename: The filename we're currently writing to
215                        * downloaded_bytes: Bytes on disk
216                        * total_bytes: Size of the whole file, None if unknown
217                        * total_bytes_estimate: Guess of the eventual file size,
218                                                None if unavailable.
219                        * elapsed: The number of seconds since download started.
220                        * eta: The estimated time in seconds, None if unknown
221                        * speed: The download speed in bytes/second, None if
222                                 unknown
223                        * fragment_index: The counter of the currently
224                                          downloaded video fragment.
225                        * fragment_count: The number of fragments (= individual
226                                          files that will be merged)
227
228                        Progress hooks are guaranteed to be called at least once
229                        (with status "finished") if the download is successful.
230     merge_output_format: Extension to use when merging formats.
231     fixup:             Automatically correct known faults of the file.
232                        One of:
233                        - "never": do nothing
234                        - "warn": only emit a warning
235                        - "detect_or_warn": check whether we can do anything
236                                            about it, warn otherwise (default)
237     source_address:    (Experimental) Client-side IP address to bind to.
238     call_home:         Boolean, true iff we are allowed to contact the
239                        youtube-dl servers for debugging.
240     sleep_interval:    Number of seconds to sleep before each download.
241     listformats:       Print an overview of available video formats and exit.
242     list_thumbnails:   Print a table of all thumbnails and exit.
243     match_filter:      A function that gets called with the info_dict of
244                        every video.
245                        If it returns a message, the video is ignored.
246                        If it returns None, the video is downloaded.
247                        match_filter_func in utils.py is one example for this.
248     no_color:          Do not emit color codes in output.
249
250     The following options determine which downloader is picked:
251     external_downloader: Executable of the external downloader to call.
252                        None or unset for standard (built-in) downloader.
253     hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv.
254
255     The following parameters are not used by YoutubeDL itself, they are used by
256     the downloader (see youtube_dl/downloader/common.py):
257     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
258     noresizebuffer, retries, continuedl, noprogress, consoletitle,
259     xattr_set_filesize, external_downloader_args.
260
261     The following options are used by the post processors:
262     prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
263                        otherwise prefer avconv.
264     exec_cmd:          Arbitrary command to run after downloading
265     """
266
267     params = None
268     _ies = []
269     _pps = []
270     _download_retcode = None
271     _num_downloads = None
272     _screen_file = None
273
274     def __init__(self, params=None, auto_init=True):
275         """Create a FileDownloader object with the given options."""
276         if params is None:
277             params = {}
278         self._ies = []
279         self._ies_instances = {}
280         self._pps = []
281         self._progress_hooks = []
282         self._download_retcode = 0
283         self._num_downloads = 0
284         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
285         self._err_file = sys.stderr
286         self.params = params
287         self.cache = Cache(self)
288
289         if params.get('bidi_workaround', False):
290             try:
291                 import pty
292                 master, slave = pty.openpty()
293                 width = compat_get_terminal_size().columns
294                 if width is None:
295                     width_args = []
296                 else:
297                     width_args = ['-w', str(width)]
298                 sp_kwargs = dict(
299                     stdin=subprocess.PIPE,
300                     stdout=slave,
301                     stderr=self._err_file)
302                 try:
303                     self._output_process = subprocess.Popen(
304                         ['bidiv'] + width_args, **sp_kwargs
305                     )
306                 except OSError:
307                     self._output_process = subprocess.Popen(
308                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
309                 self._output_channel = os.fdopen(master, 'rb')
310             except OSError as ose:
311                 if ose.errno == 2:
312                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
313                 else:
314                     raise
315
316         if (sys.version_info >= (3,) and sys.platform != 'win32' and
317                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and
318                 not params.get('restrictfilenames', False)):
319             # On Python 3, the Unicode filesystem API will throw errors (#1474)
320             self.report_warning(
321                 'Assuming --restrict-filenames since file system encoding '
322                 'cannot encode all characters. '
323                 'Set the LC_ALL environment variable to fix this.')
324             self.params['restrictfilenames'] = True
325
326         if isinstance(params.get('outtmpl'), bytes):
327             self.report_warning(
328                 'Parameter outtmpl is bytes, but should be a unicode string. '
329                 'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
330
331         self._setup_opener()
332
333         if auto_init:
334             self.print_debug_header()
335             self.add_default_info_extractors()
336
337         for pp_def_raw in self.params.get('postprocessors', []):
338             pp_class = get_postprocessor(pp_def_raw['key'])
339             pp_def = dict(pp_def_raw)
340             del pp_def['key']
341             pp = pp_class(self, **compat_kwargs(pp_def))
342             self.add_post_processor(pp)
343
344         for ph in self.params.get('progress_hooks', []):
345             self.add_progress_hook(ph)
346
347     def warn_if_short_id(self, argv):
348         # short YouTube ID starting with dash?
349         idxs = [
350             i for i, a in enumerate(argv)
351             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
352         if idxs:
353             correct_argv = (
354                 ['youtube-dl'] +
355                 [a for i, a in enumerate(argv) if i not in idxs] +
356                 ['--'] + [argv[i] for i in idxs]
357             )
358             self.report_warning(
359                 'Long argument string detected. '
360                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
361                 args_to_str(correct_argv))
362
363     def add_info_extractor(self, ie):
364         """Add an InfoExtractor object to the end of the list."""
365         self._ies.append(ie)
366         self._ies_instances[ie.ie_key()] = ie
367         ie.set_downloader(self)
368
369     def get_info_extractor(self, ie_key):
370         """
371         Get an instance of an IE with name ie_key, it will try to get one from
372         the _ies list, if there's no instance it will create a new one and add
373         it to the extractor list.
374         """
375         ie = self._ies_instances.get(ie_key)
376         if ie is None:
377             ie = get_info_extractor(ie_key)()
378             self.add_info_extractor(ie)
379         return ie
380
381     def add_default_info_extractors(self):
382         """
383         Add the InfoExtractors returned by gen_extractors to the end of the list
384         """
385         for ie in gen_extractors():
386             self.add_info_extractor(ie)
387
388     def add_post_processor(self, pp):
389         """Add a PostProcessor object to the end of the chain."""
390         self._pps.append(pp)
391         pp.set_downloader(self)
392
393     def add_progress_hook(self, ph):
394         """Add the progress hook (currently only for the file downloader)"""
395         self._progress_hooks.append(ph)
396
397     def _bidi_workaround(self, message):
398         if not hasattr(self, '_output_channel'):
399             return message
400
401         assert hasattr(self, '_output_process')
402         assert isinstance(message, compat_str)
403         line_count = message.count('\n') + 1
404         self._output_process.stdin.write((message + '\n').encode('utf-8'))
405         self._output_process.stdin.flush()
406         res = ''.join(self._output_channel.readline().decode('utf-8')
407                       for _ in range(line_count))
408         return res[:-len('\n')]
409
410     def to_screen(self, message, skip_eol=False):
411         """Print message to stdout if not in quiet mode."""
412         return self.to_stdout(message, skip_eol, check_quiet=True)
413
414     def _write_string(self, s, out=None):
415         write_string(s, out=out, encoding=self.params.get('encoding'))
416
417     def to_stdout(self, message, skip_eol=False, check_quiet=False):
418         """Print message to stdout if not in quiet mode."""
419         if self.params.get('logger'):
420             self.params['logger'].debug(message)
421         elif not check_quiet or not self.params.get('quiet', False):
422             message = self._bidi_workaround(message)
423             terminator = ['\n', ''][skip_eol]
424             output = message + terminator
425
426             self._write_string(output, self._screen_file)
427
428     def to_stderr(self, message):
429         """Print message to stderr."""
430         assert isinstance(message, compat_str)
431         if self.params.get('logger'):
432             self.params['logger'].error(message)
433         else:
434             message = self._bidi_workaround(message)
435             output = message + '\n'
436             self._write_string(output, self._err_file)
437
438     def to_console_title(self, message):
439         if not self.params.get('consoletitle', False):
440             return
441         if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
442             # c_wchar_p() might not be necessary if `message` is
443             # already of type unicode()
444             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
445         elif 'TERM' in os.environ:
446             self._write_string('\033]0;%s\007' % message, self._screen_file)
447
448     def save_console_title(self):
449         if not self.params.get('consoletitle', False):
450             return
451         if 'TERM' in os.environ:
452             # Save the title on stack
453             self._write_string('\033[22;0t', self._screen_file)
454
455     def restore_console_title(self):
456         if not self.params.get('consoletitle', False):
457             return
458         if 'TERM' in os.environ:
459             # Restore the title from stack
460             self._write_string('\033[23;0t', self._screen_file)
461
462     def __enter__(self):
463         self.save_console_title()
464         return self
465
466     def __exit__(self, *args):
467         self.restore_console_title()
468
469         if self.params.get('cookiefile') is not None:
470             self.cookiejar.save()
471
472     def trouble(self, message=None, tb=None):
473         """Determine action to take when a download problem appears.
474
475         Depending on if the downloader has been configured to ignore
476         download errors or not, this method may throw an exception or
477         not when errors are found, after printing the message.
478
479         tb, if given, is additional traceback information.
480         """
481         if message is not None:
482             self.to_stderr(message)
483         if self.params.get('verbose'):
484             if tb is None:
485                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
486                     tb = ''
487                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
488                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
489                     tb += compat_str(traceback.format_exc())
490                 else:
491                     tb_data = traceback.format_list(traceback.extract_stack())
492                     tb = ''.join(tb_data)
493             self.to_stderr(tb)
494         if not self.params.get('ignoreerrors', False):
495             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
496                 exc_info = sys.exc_info()[1].exc_info
497             else:
498                 exc_info = sys.exc_info()
499             raise DownloadError(message, exc_info)
500         self._download_retcode = 1
501
502     def report_warning(self, message):
503         '''
504         Print the message to stderr, it will be prefixed with 'WARNING:'
505         If stderr is a tty file the 'WARNING:' will be colored
506         '''
507         if self.params.get('logger') is not None:
508             self.params['logger'].warning(message)
509         else:
510             if self.params.get('no_warnings'):
511                 return
512             if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
513                 _msg_header = '\033[0;33mWARNING:\033[0m'
514             else:
515                 _msg_header = 'WARNING:'
516             warning_message = '%s %s' % (_msg_header, message)
517             self.to_stderr(warning_message)
518
519     def report_error(self, message, tb=None):
520         '''
521         Do the same as trouble, but prefixes the message with 'ERROR:', colored
522         in red if stderr is a tty file.
523         '''
524         if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
525             _msg_header = '\033[0;31mERROR:\033[0m'
526         else:
527             _msg_header = 'ERROR:'
528         error_message = '%s %s' % (_msg_header, message)
529         self.trouble(error_message, tb)
530
531     def report_file_already_downloaded(self, file_name):
532         """Report file has already been fully downloaded."""
533         try:
534             self.to_screen('[download] %s has already been downloaded' % file_name)
535         except UnicodeEncodeError:
536             self.to_screen('[download] The file has already been downloaded')
537
538     def prepare_filename(self, info_dict):
539         """Generate the output filename."""
540         try:
541             template_dict = dict(info_dict)
542
543             template_dict['epoch'] = int(time.time())
544             autonumber_size = self.params.get('autonumber_size')
545             if autonumber_size is None:
546                 autonumber_size = 5
547             autonumber_templ = '%0' + str(autonumber_size) + 'd'
548             template_dict['autonumber'] = autonumber_templ % self._num_downloads
549             if template_dict.get('playlist_index') is not None:
550                 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
551             if template_dict.get('resolution') is None:
552                 if template_dict.get('width') and template_dict.get('height'):
553                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
554                 elif template_dict.get('height'):
555                     template_dict['resolution'] = '%sp' % template_dict['height']
556                 elif template_dict.get('width'):
557                     template_dict['resolution'] = '?x%d' % template_dict['width']
558
559             sanitize = lambda k, v: sanitize_filename(
560                 compat_str(v),
561                 restricted=self.params.get('restrictfilenames'),
562                 is_id=(k == 'id'))
563             template_dict = dict((k, sanitize(k, v))
564                                  for k, v in template_dict.items()
565                                  if v is not None)
566             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
567
568             outtmpl = sanitize_path(self.params.get('outtmpl', DEFAULT_OUTTMPL))
569             tmpl = compat_expanduser(outtmpl)
570             filename = tmpl % template_dict
571             # Temporary fix for #4787
572             # 'Treat' all problem characters by passing filename through preferredencoding
573             # to workaround encoding issues with subprocess on python2 @ Windows
574             if sys.version_info < (3, 0) and sys.platform == 'win32':
575                 filename = encodeFilename(filename, True).decode(preferredencoding())
576             return filename
577         except ValueError as err:
578             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
579             return None
580
581     def _match_entry(self, info_dict, incomplete):
582         """ Returns None iff the file should be downloaded """
583
584         video_title = info_dict.get('title', info_dict.get('id', 'video'))
585         if 'title' in info_dict:
586             # This can happen when we're just evaluating the playlist
587             title = info_dict['title']
588             matchtitle = self.params.get('matchtitle', False)
589             if matchtitle:
590                 if not re.search(matchtitle, title, re.IGNORECASE):
591                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
592             rejecttitle = self.params.get('rejecttitle', False)
593             if rejecttitle:
594                 if re.search(rejecttitle, title, re.IGNORECASE):
595                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
596         date = info_dict.get('upload_date', None)
597         if date is not None:
598             dateRange = self.params.get('daterange', DateRange())
599             if date not in dateRange:
600                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
601         view_count = info_dict.get('view_count', None)
602         if view_count is not None:
603             min_views = self.params.get('min_views')
604             if min_views is not None and view_count < min_views:
605                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
606             max_views = self.params.get('max_views')
607             if max_views is not None and view_count > max_views:
608                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
609         if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
610             return 'Skipping "%s" because it is age restricted' % video_title
611         if self.in_download_archive(info_dict):
612             return '%s has already been recorded in archive' % video_title
613
614         if not incomplete:
615             match_filter = self.params.get('match_filter')
616             if match_filter is not None:
617                 ret = match_filter(info_dict)
618                 if ret is not None:
619                     return ret
620
621         return None
622
623     @staticmethod
624     def add_extra_info(info_dict, extra_info):
625         '''Set the keys from extra_info in info dict if they are missing'''
626         for key, value in extra_info.items():
627             info_dict.setdefault(key, value)
628
629     def extract_info(self, url, download=True, ie_key=None, extra_info={},
630                      process=True):
631         '''
632         Returns a list with a dictionary for each video we find.
633         If 'download', also downloads the videos.
634         extra_info is a dict containing the extra values to add to each result
635         '''
636
637         if ie_key:
638             ies = [self.get_info_extractor(ie_key)]
639         else:
640             ies = self._ies
641
642         for ie in ies:
643             if not ie.suitable(url):
644                 continue
645
646             if not ie.working():
647                 self.report_warning('The program functionality for this site has been marked as broken, '
648                                     'and will probably not work.')
649
650             try:
651                 ie_result = ie.extract(url)
652                 if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
653                     break
654                 if isinstance(ie_result, list):
655                     # Backwards compatibility: old IE result format
656                     ie_result = {
657                         '_type': 'compat_list',
658                         'entries': ie_result,
659                     }
660                 self.add_default_extra_info(ie_result, ie, url)
661                 if process:
662                     return self.process_ie_result(ie_result, download, extra_info)
663                 else:
664                     return ie_result
665             except ExtractorError as de:  # An error we somewhat expected
666                 self.report_error(compat_str(de), de.format_traceback())
667                 break
668             except MaxDownloadsReached:
669                 raise
670             except Exception as e:
671                 if self.params.get('ignoreerrors', False):
672                     self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
673                     break
674                 else:
675                     raise
676         else:
677             self.report_error('no suitable InfoExtractor for URL %s' % url)
678
679     def add_default_extra_info(self, ie_result, ie, url):
680         self.add_extra_info(ie_result, {
681             'extractor': ie.IE_NAME,
682             'webpage_url': url,
683             'webpage_url_basename': url_basename(url),
684             'extractor_key': ie.ie_key(),
685         })
686
687     def process_ie_result(self, ie_result, download=True, extra_info={}):
688         """
689         Take the result of the ie(may be modified) and resolve all unresolved
690         references (URLs, playlist items).
691
692         It will also download the videos if 'download'.
693         Returns the resolved ie_result.
694         """
695
696         result_type = ie_result.get('_type', 'video')
697
698         if result_type in ('url', 'url_transparent'):
699             extract_flat = self.params.get('extract_flat', False)
700             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
701                     extract_flat is True):
702                 if self.params.get('forcejson', False):
703                     self.to_stdout(json.dumps(ie_result))
704                 return ie_result
705
706         if result_type == 'video':
707             self.add_extra_info(ie_result, extra_info)
708             return self.process_video_result(ie_result, download=download)
709         elif result_type == 'url':
710             # We have to add extra_info to the results because it may be
711             # contained in a playlist
712             return self.extract_info(ie_result['url'],
713                                      download,
714                                      ie_key=ie_result.get('ie_key'),
715                                      extra_info=extra_info)
716         elif result_type == 'url_transparent':
717             # Use the information from the embedding page
718             info = self.extract_info(
719                 ie_result['url'], ie_key=ie_result.get('ie_key'),
720                 extra_info=extra_info, download=False, process=False)
721
722             force_properties = dict(
723                 (k, v) for k, v in ie_result.items() if v is not None)
724             for f in ('_type', 'url'):
725                 if f in force_properties:
726                     del force_properties[f]
727             new_result = info.copy()
728             new_result.update(force_properties)
729
730             assert new_result.get('_type') != 'url_transparent'
731
732             return self.process_ie_result(
733                 new_result, download=download, extra_info=extra_info)
734         elif result_type == 'playlist' or result_type == 'multi_video':
735             # We process each entry in the playlist
736             playlist = ie_result.get('title', None) or ie_result.get('id', None)
737             self.to_screen('[download] Downloading playlist: %s' % playlist)
738
739             playlist_results = []
740
741             playliststart = self.params.get('playliststart', 1) - 1
742             playlistend = self.params.get('playlistend', None)
743             # For backwards compatibility, interpret -1 as whole list
744             if playlistend == -1:
745                 playlistend = None
746
747             playlistitems_str = self.params.get('playlist_items', None)
748             playlistitems = None
749             if playlistitems_str is not None:
750                 def iter_playlistitems(format):
751                     for string_segment in format.split(','):
752                         if '-' in string_segment:
753                             start, end = string_segment.split('-')
754                             for item in range(int(start), int(end) + 1):
755                                 yield int(item)
756                         else:
757                             yield int(string_segment)
758                 playlistitems = iter_playlistitems(playlistitems_str)
759
760             ie_entries = ie_result['entries']
761             if isinstance(ie_entries, list):
762                 n_all_entries = len(ie_entries)
763                 if playlistitems:
764                     entries = [ie_entries[i - 1] for i in playlistitems]
765                 else:
766                     entries = ie_entries[playliststart:playlistend]
767                 n_entries = len(entries)
768                 self.to_screen(
769                     "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
770                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
771             elif isinstance(ie_entries, PagedList):
772                 if playlistitems:
773                     entries = []
774                     for item in playlistitems:
775                         entries.extend(ie_entries.getslice(
776                             item - 1, item
777                         ))
778                 else:
779                     entries = ie_entries.getslice(
780                         playliststart, playlistend)
781                 n_entries = len(entries)
782                 self.to_screen(
783                     "[%s] playlist %s: Downloading %d videos" %
784                     (ie_result['extractor'], playlist, n_entries))
785             else:  # iterable
786                 if playlistitems:
787                     entry_list = list(ie_entries)
788                     entries = [entry_list[i - 1] for i in playlistitems]
789                 else:
790                     entries = list(itertools.islice(
791                         ie_entries, playliststart, playlistend))
792                 n_entries = len(entries)
793                 self.to_screen(
794                     "[%s] playlist %s: Downloading %d videos" %
795                     (ie_result['extractor'], playlist, n_entries))
796
797             if self.params.get('playlistreverse', False):
798                 entries = entries[::-1]
799
800             for i, entry in enumerate(entries, 1):
801                 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
802                 extra = {
803                     'n_entries': n_entries,
804                     'playlist': playlist,
805                     'playlist_id': ie_result.get('id'),
806                     'playlist_title': ie_result.get('title'),
807                     'playlist_index': i + playliststart,
808                     'extractor': ie_result['extractor'],
809                     'webpage_url': ie_result['webpage_url'],
810                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
811                     'extractor_key': ie_result['extractor_key'],
812                 }
813
814                 reason = self._match_entry(entry, incomplete=True)
815                 if reason is not None:
816                     self.to_screen('[download] ' + reason)
817                     continue
818
819                 entry_result = self.process_ie_result(entry,
820                                                       download=download,
821                                                       extra_info=extra)
822                 playlist_results.append(entry_result)
823             ie_result['entries'] = playlist_results
824             return ie_result
825         elif result_type == 'compat_list':
826             self.report_warning(
827                 'Extractor %s returned a compat_list result. '
828                 'It needs to be updated.' % ie_result.get('extractor'))
829
830             def _fixup(r):
831                 self.add_extra_info(
832                     r,
833                     {
834                         'extractor': ie_result['extractor'],
835                         'webpage_url': ie_result['webpage_url'],
836                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
837                         'extractor_key': ie_result['extractor_key'],
838                     }
839                 )
840                 return r
841             ie_result['entries'] = [
842                 self.process_ie_result(_fixup(r), download, extra_info)
843                 for r in ie_result['entries']
844             ]
845             return ie_result
846         else:
847             raise Exception('Invalid result type: %s' % result_type)
848
849     def _apply_format_filter(self, format_spec, available_formats):
850         " Returns a tuple of the remaining format_spec and filtered formats "
851
852         OPERATORS = {
853             '<': operator.lt,
854             '<=': operator.le,
855             '>': operator.gt,
856             '>=': operator.ge,
857             '=': operator.eq,
858             '!=': operator.ne,
859         }
860         operator_rex = re.compile(r'''(?x)\s*\[
861             (?P<key>width|height|tbr|abr|vbr|asr|filesize|fps)
862             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
863             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
864             \]$
865             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
866         m = operator_rex.search(format_spec)
867         if m:
868             try:
869                 comparison_value = int(m.group('value'))
870             except ValueError:
871                 comparison_value = parse_filesize(m.group('value'))
872                 if comparison_value is None:
873                     comparison_value = parse_filesize(m.group('value') + 'B')
874                 if comparison_value is None:
875                     raise ValueError(
876                         'Invalid value %r in format specification %r' % (
877                             m.group('value'), format_spec))
878             op = OPERATORS[m.group('op')]
879
880         if not m:
881             STR_OPERATORS = {
882                 '=': operator.eq,
883                 '!=': operator.ne,
884             }
885             str_operator_rex = re.compile(r'''(?x)\s*\[
886                 \s*(?P<key>ext|acodec|vcodec|container|protocol)
887                 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
888                 \s*(?P<value>[a-zA-Z0-9_-]+)
889                 \s*\]$
890                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
891             m = str_operator_rex.search(format_spec)
892             if m:
893                 comparison_value = m.group('value')
894                 op = STR_OPERATORS[m.group('op')]
895
896         if not m:
897             raise ValueError('Invalid format specification %r' % format_spec)
898
899         def _filter(f):
900             actual_value = f.get(m.group('key'))
901             if actual_value is None:
902                 return m.group('none_inclusive')
903             return op(actual_value, comparison_value)
904         new_formats = [f for f in available_formats if _filter(f)]
905
906         new_format_spec = format_spec[:-len(m.group(0))]
907         if not new_format_spec:
908             new_format_spec = 'best'
909
910         return (new_format_spec, new_formats)
911
912     def select_format(self, format_spec, available_formats):
913         while format_spec.endswith(']'):
914             format_spec, available_formats = self._apply_format_filter(
915                 format_spec, available_formats)
916         if not available_formats:
917             return None
918
919         if format_spec == 'best' or format_spec is None:
920             return available_formats[-1]
921         elif format_spec == 'worst':
922             audiovideo_formats = [
923                 f for f in available_formats
924                 if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
925             if audiovideo_formats:
926                 return audiovideo_formats[0]
927             return available_formats[0]
928         elif format_spec == 'bestaudio':
929             audio_formats = [
930                 f for f in available_formats
931                 if f.get('vcodec') == 'none']
932             if audio_formats:
933                 return audio_formats[-1]
934         elif format_spec == 'worstaudio':
935             audio_formats = [
936                 f for f in available_formats
937                 if f.get('vcodec') == 'none']
938             if audio_formats:
939                 return audio_formats[0]
940         elif format_spec == 'bestvideo':
941             video_formats = [
942                 f for f in available_formats
943                 if f.get('acodec') == 'none']
944             if video_formats:
945                 return video_formats[-1]
946         elif format_spec == 'worstvideo':
947             video_formats = [
948                 f for f in available_formats
949                 if f.get('acodec') == 'none']
950             if video_formats:
951                 return video_formats[0]
952         else:
953             extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
954             if format_spec in extensions:
955                 filter_f = lambda f: f['ext'] == format_spec
956             else:
957                 filter_f = lambda f: f['format_id'] == format_spec
958             matches = list(filter(filter_f, available_formats))
959             if matches:
960                 return matches[-1]
961         return None
962
963     def _calc_headers(self, info_dict):
964         res = std_headers.copy()
965
966         add_headers = info_dict.get('http_headers')
967         if add_headers:
968             res.update(add_headers)
969
970         cookies = self._calc_cookies(info_dict)
971         if cookies:
972             res['Cookie'] = cookies
973
974         return res
975
976     def _calc_cookies(self, info_dict):
977         pr = compat_urllib_request.Request(info_dict['url'])
978         self.cookiejar.add_cookie_header(pr)
979         return pr.get_header('Cookie')
980
981     def process_video_result(self, info_dict, download=True):
982         assert info_dict.get('_type', 'video') == 'video'
983
984         if 'id' not in info_dict:
985             raise ExtractorError('Missing "id" field in extractor result')
986         if 'title' not in info_dict:
987             raise ExtractorError('Missing "title" field in extractor result')
988
989         if 'playlist' not in info_dict:
990             # It isn't part of a playlist
991             info_dict['playlist'] = None
992             info_dict['playlist_index'] = None
993
994         thumbnails = info_dict.get('thumbnails')
995         if thumbnails is None:
996             thumbnail = info_dict.get('thumbnail')
997             if thumbnail:
998                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
999         if thumbnails:
1000             thumbnails.sort(key=lambda t: (
1001                 t.get('preference'), t.get('width'), t.get('height'),
1002                 t.get('id'), t.get('url')))
1003             for i, t in enumerate(thumbnails):
1004                 if 'width' in t and 'height' in t:
1005                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
1006                 if t.get('id') is None:
1007                     t['id'] = '%d' % i
1008
1009         if thumbnails and 'thumbnail' not in info_dict:
1010             info_dict['thumbnail'] = thumbnails[-1]['url']
1011
1012         if 'display_id' not in info_dict and 'id' in info_dict:
1013             info_dict['display_id'] = info_dict['id']
1014
1015         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
1016             # Working around negative timestamps in Windows
1017             # (see http://bugs.python.org/issue1646728)
1018             if info_dict['timestamp'] < 0 and os.name == 'nt':
1019                 info_dict['timestamp'] = 0
1020             upload_date = datetime.datetime.utcfromtimestamp(
1021                 info_dict['timestamp'])
1022             info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1023
1024         if self.params.get('listsubtitles', False):
1025             if 'automatic_captions' in info_dict:
1026                 self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions')
1027             self.list_subtitles(info_dict['id'], info_dict.get('subtitles'), 'subtitles')
1028             return
1029         info_dict['requested_subtitles'] = self.process_subtitles(
1030             info_dict['id'], info_dict.get('subtitles'),
1031             info_dict.get('automatic_captions'))
1032
1033         # This extractors handle format selection themselves
1034         if info_dict['extractor'] in ['Youku']:
1035             if download:
1036                 self.process_info(info_dict)
1037             return info_dict
1038
1039         # We now pick which formats have to be downloaded
1040         if info_dict.get('formats') is None:
1041             # There's only one format available
1042             formats = [info_dict]
1043         else:
1044             formats = info_dict['formats']
1045
1046         if not formats:
1047             raise ExtractorError('No video formats found!')
1048
1049         # We check that all the formats have the format and format_id fields
1050         for i, format in enumerate(formats):
1051             if 'url' not in format:
1052                 raise ExtractorError('Missing "url" key in result (index %d)' % i)
1053
1054             if format.get('format_id') is None:
1055                 format['format_id'] = compat_str(i)
1056             if format.get('format') is None:
1057                 format['format'] = '{id} - {res}{note}'.format(
1058                     id=format['format_id'],
1059                     res=self.format_resolution(format),
1060                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1061                 )
1062             # Automatically determine file extension if missing
1063             if 'ext' not in format:
1064                 format['ext'] = determine_ext(format['url']).lower()
1065             # Add HTTP headers, so that external programs can use them from the
1066             # json output
1067             full_format_info = info_dict.copy()
1068             full_format_info.update(format)
1069             format['http_headers'] = self._calc_headers(full_format_info)
1070
1071         format_limit = self.params.get('format_limit', None)
1072         if format_limit:
1073             formats = list(takewhile_inclusive(
1074                 lambda f: f['format_id'] != format_limit, formats
1075             ))
1076
1077         # TODO Central sorting goes here
1078
1079         if formats[0] is not info_dict:
1080             # only set the 'formats' fields if the original info_dict list them
1081             # otherwise we end up with a circular reference, the first (and unique)
1082             # element in the 'formats' field in info_dict is info_dict itself,
1083             # wich can't be exported to json
1084             info_dict['formats'] = formats
1085         if self.params.get('listformats'):
1086             self.list_formats(info_dict)
1087             return
1088         if self.params.get('list_thumbnails'):
1089             self.list_thumbnails(info_dict)
1090             return
1091
1092         req_format = self.params.get('format')
1093         if req_format is None:
1094             req_format = 'best'
1095         formats_to_download = []
1096         if req_format == 'all':
1097             formats_to_download = formats
1098         else:
1099             for rfstr in req_format.split(','):
1100                 # We can accept formats requested in the format: 34/5/best, we pick
1101                 # the first that is available, starting from left
1102                 req_formats = rfstr.split('/')
1103                 for rf in req_formats:
1104                     if re.match(r'.+?\+.+?', rf) is not None:
1105                         # Two formats have been requested like '137+139'
1106                         format_1, format_2 = rf.split('+')
1107                         formats_info = (self.select_format(format_1, formats),
1108                                         self.select_format(format_2, formats))
1109                         if all(formats_info):
1110                             # The first format must contain the video and the
1111                             # second the audio
1112                             if formats_info[0].get('vcodec') == 'none':
1113                                 self.report_error('The first format must '
1114                                                   'contain the video, try using '
1115                                                   '"-f %s+%s"' % (format_2, format_1))
1116                                 return
1117                             output_ext = (
1118                                 formats_info[0]['ext']
1119                                 if self.params.get('merge_output_format') is None
1120                                 else self.params['merge_output_format'])
1121                             selected_format = {
1122                                 'requested_formats': formats_info,
1123                                 'format': '%s+%s' % (formats_info[0].get('format'),
1124                                                      formats_info[1].get('format')),
1125                                 'format_id': '%s+%s' % (formats_info[0].get('format_id'),
1126                                                         formats_info[1].get('format_id')),
1127                                 'width': formats_info[0].get('width'),
1128                                 'height': formats_info[0].get('height'),
1129                                 'resolution': formats_info[0].get('resolution'),
1130                                 'fps': formats_info[0].get('fps'),
1131                                 'vcodec': formats_info[0].get('vcodec'),
1132                                 'vbr': formats_info[0].get('vbr'),
1133                                 'stretched_ratio': formats_info[0].get('stretched_ratio'),
1134                                 'acodec': formats_info[1].get('acodec'),
1135                                 'abr': formats_info[1].get('abr'),
1136                                 'ext': output_ext,
1137                             }
1138                         else:
1139                             selected_format = None
1140                     else:
1141                         selected_format = self.select_format(rf, formats)
1142                     if selected_format is not None:
1143                         formats_to_download.append(selected_format)
1144                         break
1145         if not formats_to_download:
1146             raise ExtractorError('requested format not available',
1147                                  expected=True)
1148
1149         if download:
1150             if len(formats_to_download) > 1:
1151                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1152             for format in formats_to_download:
1153                 new_info = dict(info_dict)
1154                 new_info.update(format)
1155                 self.process_info(new_info)
1156         # We update the info dict with the best quality format (backwards compatibility)
1157         info_dict.update(formats_to_download[-1])
1158         return info_dict
1159
1160     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
1161         """Select the requested subtitles and their format"""
1162         available_subs = {}
1163         if normal_subtitles and self.params.get('writesubtitles'):
1164             available_subs.update(normal_subtitles)
1165         if automatic_captions and self.params.get('writeautomaticsub'):
1166             for lang, cap_info in automatic_captions.items():
1167                 if lang not in available_subs:
1168                     available_subs[lang] = cap_info
1169
1170         if (not self.params.get('writesubtitles') and not
1171                 self.params.get('writeautomaticsub') or not
1172                 available_subs):
1173             return None
1174
1175         if self.params.get('allsubtitles', False):
1176             requested_langs = available_subs.keys()
1177         else:
1178             if self.params.get('subtitleslangs', False):
1179                 requested_langs = self.params.get('subtitleslangs')
1180             elif 'en' in available_subs:
1181                 requested_langs = ['en']
1182             else:
1183                 requested_langs = [list(available_subs.keys())[0]]
1184
1185         formats_query = self.params.get('subtitlesformat', 'best')
1186         formats_preference = formats_query.split('/') if formats_query else []
1187         subs = {}
1188         for lang in requested_langs:
1189             formats = available_subs.get(lang)
1190             if formats is None:
1191                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1192                 continue
1193             for ext in formats_preference:
1194                 if ext == 'best':
1195                     f = formats[-1]
1196                     break
1197                 matches = list(filter(lambda f: f['ext'] == ext, formats))
1198                 if matches:
1199                     f = matches[-1]
1200                     break
1201             else:
1202                 f = formats[-1]
1203                 self.report_warning(
1204                     'No subtitle format found matching "%s" for language %s, '
1205                     'using %s' % (formats_query, lang, f['ext']))
1206             subs[lang] = f
1207         return subs
1208
1209     def process_info(self, info_dict):
1210         """Process a single resolved IE result."""
1211
1212         assert info_dict.get('_type', 'video') == 'video'
1213
1214         max_downloads = self.params.get('max_downloads')
1215         if max_downloads is not None:
1216             if self._num_downloads >= int(max_downloads):
1217                 raise MaxDownloadsReached()
1218
1219         info_dict['fulltitle'] = info_dict['title']
1220         if len(info_dict['title']) > 200:
1221             info_dict['title'] = info_dict['title'][:197] + '...'
1222
1223         if 'format' not in info_dict:
1224             info_dict['format'] = info_dict['ext']
1225
1226         reason = self._match_entry(info_dict, incomplete=False)
1227         if reason is not None:
1228             self.to_screen('[download] ' + reason)
1229             return
1230
1231         self._num_downloads += 1
1232
1233         info_dict['_filename'] = filename = self.prepare_filename(info_dict)
1234
1235         # Forced printings
1236         if self.params.get('forcetitle', False):
1237             self.to_stdout(info_dict['fulltitle'])
1238         if self.params.get('forceid', False):
1239             self.to_stdout(info_dict['id'])
1240         if self.params.get('forceurl', False):
1241             if info_dict.get('requested_formats') is not None:
1242                 for f in info_dict['requested_formats']:
1243                     self.to_stdout(f['url'] + f.get('play_path', ''))
1244             else:
1245                 # For RTMP URLs, also include the playpath
1246                 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1247         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
1248             self.to_stdout(info_dict['thumbnail'])
1249         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
1250             self.to_stdout(info_dict['description'])
1251         if self.params.get('forcefilename', False) and filename is not None:
1252             self.to_stdout(filename)
1253         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1254             self.to_stdout(formatSeconds(info_dict['duration']))
1255         if self.params.get('forceformat', False):
1256             self.to_stdout(info_dict['format'])
1257         if self.params.get('forcejson', False):
1258             self.to_stdout(json.dumps(info_dict))
1259
1260         # Do nothing else if in simulate mode
1261         if self.params.get('simulate', False):
1262             return
1263
1264         if filename is None:
1265             return
1266
1267         try:
1268             dn = os.path.dirname(sanitize_path(encodeFilename(filename)))
1269             if dn and not os.path.exists(dn):
1270                 os.makedirs(dn)
1271         except (OSError, IOError) as err:
1272             self.report_error('unable to create directory ' + compat_str(err))
1273             return
1274
1275         if self.params.get('writedescription', False):
1276             descfn = filename + '.description'
1277             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1278                 self.to_screen('[info] Video description is already present')
1279             elif info_dict.get('description') is None:
1280                 self.report_warning('There\'s no description to write.')
1281             else:
1282                 try:
1283                     self.to_screen('[info] Writing video description to: ' + descfn)
1284                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1285                         descfile.write(info_dict['description'])
1286                 except (OSError, IOError):
1287                     self.report_error('Cannot write description file ' + descfn)
1288                     return
1289
1290         if self.params.get('writeannotations', False):
1291             annofn = filename + '.annotations.xml'
1292             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1293                 self.to_screen('[info] Video annotations are already present')
1294             else:
1295                 try:
1296                     self.to_screen('[info] Writing video annotations to: ' + annofn)
1297                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1298                         annofile.write(info_dict['annotations'])
1299                 except (KeyError, TypeError):
1300                     self.report_warning('There are no annotations to write.')
1301                 except (OSError, IOError):
1302                     self.report_error('Cannot write annotations file: ' + annofn)
1303                     return
1304
1305         subtitles_are_requested = any([self.params.get('writesubtitles', False),
1306                                        self.params.get('writeautomaticsub')])
1307
1308         if subtitles_are_requested and info_dict.get('requested_subtitles'):
1309             # subtitles download errors are already managed as troubles in relevant IE
1310             # that way it will silently go on when used with unsupporting IE
1311             subtitles = info_dict['requested_subtitles']
1312             ie = self.get_info_extractor(info_dict['extractor_key'])
1313             for sub_lang, sub_info in subtitles.items():
1314                 sub_format = sub_info['ext']
1315                 if sub_info.get('data') is not None:
1316                     sub_data = sub_info['data']
1317                 else:
1318                     try:
1319                         sub_data = ie._download_webpage(
1320                             sub_info['url'], info_dict['id'], note=False)
1321                     except ExtractorError as err:
1322                         self.report_warning('Unable to download subtitle for "%s": %s' %
1323                                             (sub_lang, compat_str(err.cause)))
1324                         continue
1325                 try:
1326                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1327                     if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1328                         self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1329                     else:
1330                         self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1331                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
1332                             subfile.write(sub_data)
1333                 except (OSError, IOError):
1334                     self.report_error('Cannot write subtitles file ' + sub_filename)
1335                     return
1336
1337         if self.params.get('writeinfojson', False):
1338             infofn = os.path.splitext(filename)[0] + '.info.json'
1339             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1340                 self.to_screen('[info] Video description metadata is already present')
1341             else:
1342                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1343                 try:
1344                     write_json_file(info_dict, infofn)
1345                 except (OSError, IOError):
1346                     self.report_error('Cannot write metadata to JSON file ' + infofn)
1347                     return
1348
1349         self._write_thumbnails(info_dict, filename)
1350
1351         if not self.params.get('skip_download', False):
1352             try:
1353                 def dl(name, info):
1354                     fd = get_suitable_downloader(info, self.params)(self, self.params)
1355                     for ph in self._progress_hooks:
1356                         fd.add_progress_hook(ph)
1357                     if self.params.get('verbose'):
1358                         self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1359                     return fd.download(name, info)
1360
1361                 if info_dict.get('requested_formats') is not None:
1362                     downloaded = []
1363                     success = True
1364                     merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
1365                     if not merger.available:
1366                         postprocessors = []
1367                         self.report_warning('You have requested multiple '
1368                                             'formats but ffmpeg or avconv are not installed.'
1369                                             ' The formats won\'t be merged')
1370                     else:
1371                         postprocessors = [merger]
1372                     for f in info_dict['requested_formats']:
1373                         new_info = dict(info_dict)
1374                         new_info.update(f)
1375                         fname = self.prepare_filename(new_info)
1376                         fname = prepend_extension(fname, 'f%s' % f['format_id'])
1377                         downloaded.append(fname)
1378                         partial_success = dl(fname, new_info)
1379                         success = success and partial_success
1380                     info_dict['__postprocessors'] = postprocessors
1381                     info_dict['__files_to_merge'] = downloaded
1382                 else:
1383                     # Just a single file
1384                     success = dl(filename, info_dict)
1385             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1386                 self.report_error('unable to download video data: %s' % str(err))
1387                 return
1388             except (OSError, IOError) as err:
1389                 raise UnavailableVideoError(err)
1390             except (ContentTooShortError, ) as err:
1391                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1392                 return
1393
1394             if success:
1395                 # Fixup content
1396                 fixup_policy = self.params.get('fixup')
1397                 if fixup_policy is None:
1398                     fixup_policy = 'detect_or_warn'
1399
1400                 stretched_ratio = info_dict.get('stretched_ratio')
1401                 if stretched_ratio is not None and stretched_ratio != 1:
1402                     if fixup_policy == 'warn':
1403                         self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1404                             info_dict['id'], stretched_ratio))
1405                     elif fixup_policy == 'detect_or_warn':
1406                         stretched_pp = FFmpegFixupStretchedPP(self)
1407                         if stretched_pp.available:
1408                             info_dict.setdefault('__postprocessors', [])
1409                             info_dict['__postprocessors'].append(stretched_pp)
1410                         else:
1411                             self.report_warning(
1412                                 '%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % (
1413                                     info_dict['id'], stretched_ratio))
1414                     else:
1415                         assert fixup_policy in ('ignore', 'never')
1416
1417                 if info_dict.get('requested_formats') is None and info_dict.get('container') == 'm4a_dash':
1418                     if fixup_policy == 'warn':
1419                         self.report_warning('%s: writing DASH m4a. Only some players support this container.' % (
1420                             info_dict['id']))
1421                     elif fixup_policy == 'detect_or_warn':
1422                         fixup_pp = FFmpegFixupM4aPP(self)
1423                         if fixup_pp.available:
1424                             info_dict.setdefault('__postprocessors', [])
1425                             info_dict['__postprocessors'].append(fixup_pp)
1426                         else:
1427                             self.report_warning(
1428                                 '%s: writing DASH m4a. Only some players support this container. Install ffmpeg or avconv to fix this automatically.' % (
1429                                     info_dict['id']))
1430                     else:
1431                         assert fixup_policy in ('ignore', 'never')
1432
1433                 try:
1434                     self.post_process(filename, info_dict)
1435                 except (PostProcessingError) as err:
1436                     self.report_error('postprocessing: %s' % str(err))
1437                     return
1438                 self.record_download_archive(info_dict)
1439
1440     def download(self, url_list):
1441         """Download a given list of URLs."""
1442         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1443         if (len(url_list) > 1 and
1444                 '%' not in outtmpl and
1445                 self.params.get('max_downloads') != 1):
1446             raise SameFileError(outtmpl)
1447
1448         for url in url_list:
1449             try:
1450                 # It also downloads the videos
1451                 res = self.extract_info(url)
1452             except UnavailableVideoError:
1453                 self.report_error('unable to download video')
1454             except MaxDownloadsReached:
1455                 self.to_screen('[info] Maximum number of downloaded files reached.')
1456                 raise
1457             else:
1458                 if self.params.get('dump_single_json', False):
1459                     self.to_stdout(json.dumps(res))
1460
1461         return self._download_retcode
1462
1463     def download_with_info_file(self, info_filename):
1464         with contextlib.closing(fileinput.FileInput(
1465                 [info_filename], mode='r',
1466                 openhook=fileinput.hook_encoded('utf-8'))) as f:
1467             # FileInput doesn't have a read method, we can't call json.load
1468             info = json.loads('\n'.join(f))
1469         try:
1470             self.process_ie_result(info, download=True)
1471         except DownloadError:
1472             webpage_url = info.get('webpage_url')
1473             if webpage_url is not None:
1474                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1475                 return self.download([webpage_url])
1476             else:
1477                 raise
1478         return self._download_retcode
1479
1480     def post_process(self, filename, ie_info):
1481         """Run all the postprocessors on the given file."""
1482         info = dict(ie_info)
1483         info['filepath'] = filename
1484         pps_chain = []
1485         if ie_info.get('__postprocessors') is not None:
1486             pps_chain.extend(ie_info['__postprocessors'])
1487         pps_chain.extend(self._pps)
1488         for pp in pps_chain:
1489             keep_video = None
1490             old_filename = info['filepath']
1491             try:
1492                 keep_video_wish, info = pp.run(info)
1493                 if keep_video_wish is not None:
1494                     if keep_video_wish:
1495                         keep_video = keep_video_wish
1496                     elif keep_video is None:
1497                         # No clear decision yet, let IE decide
1498                         keep_video = keep_video_wish
1499             except PostProcessingError as e:
1500                 self.report_error(e.msg)
1501             if keep_video is False and not self.params.get('keepvideo', False):
1502                 try:
1503                     self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
1504                     os.remove(encodeFilename(old_filename))
1505                 except (IOError, OSError):
1506                     self.report_warning('Unable to remove downloaded video file')
1507
1508     def _make_archive_id(self, info_dict):
1509         # Future-proof against any change in case
1510         # and backwards compatibility with prior versions
1511         extractor = info_dict.get('extractor_key')
1512         if extractor is None:
1513             if 'id' in info_dict:
1514                 extractor = info_dict.get('ie_key')  # key in a playlist
1515         if extractor is None:
1516             return None  # Incomplete video information
1517         return extractor.lower() + ' ' + info_dict['id']
1518
1519     def in_download_archive(self, info_dict):
1520         fn = self.params.get('download_archive')
1521         if fn is None:
1522             return False
1523
1524         vid_id = self._make_archive_id(info_dict)
1525         if vid_id is None:
1526             return False  # Incomplete video information
1527
1528         try:
1529             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1530                 for line in archive_file:
1531                     if line.strip() == vid_id:
1532                         return True
1533         except IOError as ioe:
1534             if ioe.errno != errno.ENOENT:
1535                 raise
1536         return False
1537
1538     def record_download_archive(self, info_dict):
1539         fn = self.params.get('download_archive')
1540         if fn is None:
1541             return
1542         vid_id = self._make_archive_id(info_dict)
1543         assert vid_id
1544         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1545             archive_file.write(vid_id + '\n')
1546
1547     @staticmethod
1548     def format_resolution(format, default='unknown'):
1549         if format.get('vcodec') == 'none':
1550             return 'audio only'
1551         if format.get('resolution') is not None:
1552             return format['resolution']
1553         if format.get('height') is not None:
1554             if format.get('width') is not None:
1555                 res = '%sx%s' % (format['width'], format['height'])
1556             else:
1557                 res = '%sp' % format['height']
1558         elif format.get('width') is not None:
1559             res = '?x%d' % format['width']
1560         else:
1561             res = default
1562         return res
1563
1564     def _format_note(self, fdict):
1565         res = ''
1566         if fdict.get('ext') in ['f4f', 'f4m']:
1567             res += '(unsupported) '
1568         if fdict.get('format_note') is not None:
1569             res += fdict['format_note'] + ' '
1570         if fdict.get('tbr') is not None:
1571             res += '%4dk ' % fdict['tbr']
1572         if fdict.get('container') is not None:
1573             if res:
1574                 res += ', '
1575             res += '%s container' % fdict['container']
1576         if (fdict.get('vcodec') is not None and
1577                 fdict.get('vcodec') != 'none'):
1578             if res:
1579                 res += ', '
1580             res += fdict['vcodec']
1581             if fdict.get('vbr') is not None:
1582                 res += '@'
1583         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1584             res += 'video@'
1585         if fdict.get('vbr') is not None:
1586             res += '%4dk' % fdict['vbr']
1587         if fdict.get('fps') is not None:
1588             res += ', %sfps' % fdict['fps']
1589         if fdict.get('acodec') is not None:
1590             if res:
1591                 res += ', '
1592             if fdict['acodec'] == 'none':
1593                 res += 'video only'
1594             else:
1595                 res += '%-5s' % fdict['acodec']
1596         elif fdict.get('abr') is not None:
1597             if res:
1598                 res += ', '
1599             res += 'audio'
1600         if fdict.get('abr') is not None:
1601             res += '@%3dk' % fdict['abr']
1602         if fdict.get('asr') is not None:
1603             res += ' (%5dHz)' % fdict['asr']
1604         if fdict.get('filesize') is not None:
1605             if res:
1606                 res += ', '
1607             res += format_bytes(fdict['filesize'])
1608         elif fdict.get('filesize_approx') is not None:
1609             if res:
1610                 res += ', '
1611             res += '~' + format_bytes(fdict['filesize_approx'])
1612         return res
1613
1614     def list_formats(self, info_dict):
1615         formats = info_dict.get('formats', [info_dict])
1616         table = [
1617             [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
1618             for f in formats
1619             if f.get('preference') is None or f['preference'] >= -1000]
1620         if len(formats) > 1:
1621             table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
1622
1623         header_line = ['format code', 'extension', 'resolution', 'note']
1624         self.to_screen(
1625             '[info] Available formats for %s:\n%s' %
1626             (info_dict['id'], render_table(header_line, table)))
1627
1628     def list_thumbnails(self, info_dict):
1629         thumbnails = info_dict.get('thumbnails')
1630         if not thumbnails:
1631             tn_url = info_dict.get('thumbnail')
1632             if tn_url:
1633                 thumbnails = [{'id': '0', 'url': tn_url}]
1634             else:
1635                 self.to_screen(
1636                     '[info] No thumbnails present for %s' % info_dict['id'])
1637                 return
1638
1639         self.to_screen(
1640             '[info] Thumbnails for %s:' % info_dict['id'])
1641         self.to_screen(render_table(
1642             ['ID', 'width', 'height', 'URL'],
1643             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
1644
1645     def list_subtitles(self, video_id, subtitles, name='subtitles'):
1646         if not subtitles:
1647             self.to_screen('%s has no %s' % (video_id, name))
1648             return
1649         self.to_screen(
1650             'Available %s for %s:' % (name, video_id))
1651         self.to_screen(render_table(
1652             ['Language', 'formats'],
1653             [[lang, ', '.join(f['ext'] for f in reversed(formats))]
1654                 for lang, formats in subtitles.items()]))
1655
1656     def urlopen(self, req):
1657         """ Start an HTTP download """
1658
1659         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1660         # always respected by websites, some tend to give out URLs with non percent-encoded
1661         # non-ASCII characters (see telemb.py, ard.py [#3412])
1662         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1663         # To work around aforementioned issue we will replace request's original URL with
1664         # percent-encoded one
1665         req_is_string = isinstance(req, compat_basestring)
1666         url = req if req_is_string else req.get_full_url()
1667         url_escaped = escape_url(url)
1668
1669         # Substitute URL if any change after escaping
1670         if url != url_escaped:
1671             if req_is_string:
1672                 req = url_escaped
1673             else:
1674                 req = compat_urllib_request.Request(
1675                     url_escaped, data=req.data, headers=req.headers,
1676                     origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
1677
1678         return self._opener.open(req, timeout=self._socket_timeout)
1679
1680     def print_debug_header(self):
1681         if not self.params.get('verbose'):
1682             return
1683
1684         if type('') is not compat_str:
1685             # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1686             self.report_warning(
1687                 'Your Python is broken! Update to a newer and supported version')
1688
1689         stdout_encoding = getattr(
1690             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
1691         encoding_str = (
1692             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1693                 locale.getpreferredencoding(),
1694                 sys.getfilesystemencoding(),
1695                 stdout_encoding,
1696                 self.get_encoding()))
1697         write_string(encoding_str, encoding=None)
1698
1699         self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1700         try:
1701             sp = subprocess.Popen(
1702                 ['git', 'rev-parse', '--short', 'HEAD'],
1703                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1704                 cwd=os.path.dirname(os.path.abspath(__file__)))
1705             out, err = sp.communicate()
1706             out = out.decode().strip()
1707             if re.match('[0-9a-f]+', out):
1708                 self._write_string('[debug] Git HEAD: ' + out + '\n')
1709         except Exception:
1710             try:
1711                 sys.exc_clear()
1712             except Exception:
1713                 pass
1714         self._write_string('[debug] Python version %s - %s\n' % (
1715             platform.python_version(), platform_name()))
1716
1717         exe_versions = FFmpegPostProcessor.get_versions(self)
1718         exe_versions['rtmpdump'] = rtmpdump_version()
1719         exe_str = ', '.join(
1720             '%s %s' % (exe, v)
1721             for exe, v in sorted(exe_versions.items())
1722             if v
1723         )
1724         if not exe_str:
1725             exe_str = 'none'
1726         self._write_string('[debug] exe versions: %s\n' % exe_str)
1727
1728         proxy_map = {}
1729         for handler in self._opener.handlers:
1730             if hasattr(handler, 'proxies'):
1731                 proxy_map.update(handler.proxies)
1732         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1733
1734         if self.params.get('call_home', False):
1735             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
1736             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
1737             latest_version = self.urlopen(
1738                 'https://yt-dl.org/latest/version').read().decode('utf-8')
1739             if version_tuple(latest_version) > version_tuple(__version__):
1740                 self.report_warning(
1741                     'You are using an outdated version (newest version: %s)! '
1742                     'See https://yt-dl.org/update if you need help updating.' %
1743                     latest_version)
1744
1745     def _setup_opener(self):
1746         timeout_val = self.params.get('socket_timeout')
1747         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1748
1749         opts_cookiefile = self.params.get('cookiefile')
1750         opts_proxy = self.params.get('proxy')
1751
1752         if opts_cookiefile is None:
1753             self.cookiejar = compat_cookiejar.CookieJar()
1754         else:
1755             self.cookiejar = compat_cookiejar.MozillaCookieJar(
1756                 opts_cookiefile)
1757             if os.access(opts_cookiefile, os.R_OK):
1758                 self.cookiejar.load()
1759
1760         cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1761             self.cookiejar)
1762         if opts_proxy is not None:
1763             if opts_proxy == '':
1764                 proxies = {}
1765             else:
1766                 proxies = {'http': opts_proxy, 'https': opts_proxy}
1767         else:
1768             proxies = compat_urllib_request.getproxies()
1769             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1770             if 'http' in proxies and 'https' not in proxies:
1771                 proxies['https'] = proxies['http']
1772         proxy_handler = PerRequestProxyHandler(proxies)
1773
1774         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1775         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
1776         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
1777         opener = compat_urllib_request.build_opener(
1778             proxy_handler, https_handler, cookie_processor, ydlh)
1779
1780         # Delete the default user-agent header, which would otherwise apply in
1781         # cases where our custom HTTP handler doesn't come into play
1782         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1783         opener.addheaders = []
1784         self._opener = opener
1785
1786     def encode(self, s):
1787         if isinstance(s, bytes):
1788             return s  # Already encoded
1789
1790         try:
1791             return s.encode(self.get_encoding())
1792         except UnicodeEncodeError as err:
1793             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1794             raise
1795
1796     def get_encoding(self):
1797         encoding = self.params.get('encoding')
1798         if encoding is None:
1799             encoding = preferredencoding()
1800         return encoding
1801
1802     def _write_thumbnails(self, info_dict, filename):
1803         if self.params.get('writethumbnail', False):
1804             thumbnails = info_dict.get('thumbnails')
1805             if thumbnails:
1806                 thumbnails = [thumbnails[-1]]
1807         elif self.params.get('write_all_thumbnails', False):
1808             thumbnails = info_dict.get('thumbnails')
1809         else:
1810             return
1811
1812         if not thumbnails:
1813             # No thumbnails present, so return immediately
1814             return
1815
1816         for t in thumbnails:
1817             thumb_ext = determine_ext(t['url'], 'jpg')
1818             suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
1819             thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
1820             thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
1821
1822             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
1823                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
1824                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
1825             else:
1826                 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
1827                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
1828                 try:
1829                     uf = self.urlopen(t['url'])
1830                     with open(thumb_filename, 'wb') as thumbf:
1831                         shutil.copyfileobj(uf, thumbf)
1832                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
1833                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
1834                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1835                     self.report_warning('Unable to download thumbnail "%s": %s' %
1836                                         (t['url'], compat_str(err)))