Merge pull request #7519 from barlik/master
[youtube-dl] / youtube_dl / YoutubeDL.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 from __future__ import absolute_import, unicode_literals
5
6 import collections
7 import contextlib
8 import datetime
9 import errno
10 import fileinput
11 import io
12 import itertools
13 import json
14 import locale
15 import operator
16 import os
17 import platform
18 import re
19 import shutil
20 import subprocess
21 import socket
22 import sys
23 import time
24 import tokenize
25 import traceback
26
27 if os.name == 'nt':
28     import ctypes
29
30 from .compat import (
31     compat_cookiejar,
32     compat_expanduser,
33     compat_get_terminal_size,
34     compat_http_client,
35     compat_kwargs,
36     compat_str,
37     compat_tokenize_tokenize,
38     compat_urllib_error,
39     compat_urllib_request,
40     compat_urllib_request_DataHandler,
41 )
42 from .utils import (
43     ContentTooShortError,
44     date_from_str,
45     DateRange,
46     DEFAULT_OUTTMPL,
47     determine_ext,
48     DownloadError,
49     encodeFilename,
50     ExtractorError,
51     format_bytes,
52     formatSeconds,
53     locked_file,
54     make_HTTPS_handler,
55     MaxDownloadsReached,
56     PagedList,
57     parse_filesize,
58     PerRequestProxyHandler,
59     PostProcessingError,
60     platform_name,
61     preferredencoding,
62     render_table,
63     SameFileError,
64     sanitize_filename,
65     sanitize_path,
66     std_headers,
67     subtitles_filename,
68     UnavailableVideoError,
69     url_basename,
70     version_tuple,
71     write_json_file,
72     write_string,
73     YoutubeDLCookieProcessor,
74     YoutubeDLHandler,
75     prepend_extension,
76     replace_extension,
77     args_to_str,
78     age_restricted,
79 )
80 from .cache import Cache
81 from .extractor import get_info_extractor, gen_extractors
82 from .downloader import get_suitable_downloader
83 from .downloader.rtmp import rtmpdump_version
84 from .postprocessor import (
85     FFmpegFixupM4aPP,
86     FFmpegFixupStretchedPP,
87     FFmpegMergerPP,
88     FFmpegPostProcessor,
89     get_postprocessor,
90 )
91 from .version import __version__
92
93
94 class YoutubeDL(object):
95     """YoutubeDL class.
96
97     YoutubeDL objects are the ones responsible of downloading the
98     actual video file and writing it to disk if the user has requested
99     it, among some other tasks. In most cases there should be one per
100     program. As, given a video URL, the downloader doesn't know how to
101     extract all the needed information, task that InfoExtractors do, it
102     has to pass the URL to one of them.
103
104     For this, YoutubeDL objects have a method that allows
105     InfoExtractors to be registered in a given order. When it is passed
106     a URL, the YoutubeDL object handles it to the first InfoExtractor it
107     finds that reports being able to handle it. The InfoExtractor extracts
108     all the information about the video or videos the URL refers to, and
109     YoutubeDL process the extracted information, possibly using a File
110     Downloader to download the video.
111
112     YoutubeDL objects accept a lot of parameters. In order not to saturate
113     the object constructor with arguments, it receives a dictionary of
114     options instead. These options are available through the params
115     attribute for the InfoExtractors to use. The YoutubeDL also
116     registers itself as the downloader in charge for the InfoExtractors
117     that are added to it, so this is a "mutual registration".
118
119     Available options:
120
121     username:          Username for authentication purposes.
122     password:          Password for authentication purposes.
123     videopassword:     Password for accessing a video.
124     usenetrc:          Use netrc for authentication instead.
125     verbose:           Print additional info to stdout.
126     quiet:             Do not print messages to stdout.
127     no_warnings:       Do not print out anything for warnings.
128     forceurl:          Force printing final URL.
129     forcetitle:        Force printing title.
130     forceid:           Force printing ID.
131     forcethumbnail:    Force printing thumbnail URL.
132     forcedescription:  Force printing description.
133     forcefilename:     Force printing final filename.
134     forceduration:     Force printing duration.
135     forcejson:         Force printing info_dict as JSON.
136     dump_single_json:  Force printing the info_dict of the whole playlist
137                        (or video) as a single JSON line.
138     simulate:          Do not download the video files.
139     format:            Video format code. See options.py for more information.
140     outtmpl:           Template for output names.
141     restrictfilenames: Do not allow "&" and spaces in file names
142     ignoreerrors:      Do not stop on download errors.
143     force_generic_extractor: Force downloader to use the generic extractor
144     nooverwrites:      Prevent overwriting files.
145     playliststart:     Playlist item to start at.
146     playlistend:       Playlist item to end at.
147     playlist_items:    Specific indices of playlist to download.
148     playlistreverse:   Download playlist items in reverse order.
149     matchtitle:        Download only matching titles.
150     rejecttitle:       Reject downloads for matching titles.
151     logger:            Log messages to a logging.Logger instance.
152     logtostderr:       Log messages to stderr instead of stdout.
153     writedescription:  Write the video description to a .description file
154     writeinfojson:     Write the video description to a .info.json file
155     writeannotations:  Write the video annotations to a .annotations.xml file
156     writethumbnail:    Write the thumbnail image to a file
157     write_all_thumbnails:  Write all thumbnail formats to files
158     writesubtitles:    Write the video subtitles to a file
159     writeautomaticsub: Write the automatically generated subtitles to a file
160     allsubtitles:      Downloads all the subtitles of the video
161                        (requires writesubtitles or writeautomaticsub)
162     listsubtitles:     Lists all available subtitles for the video
163     subtitlesformat:   The format code for subtitles
164     subtitleslangs:    List of languages of the subtitles to download
165     keepvideo:         Keep the video file after post-processing
166     daterange:         A DateRange object, download only if the upload_date is in the range.
167     skip_download:     Skip the actual download of the video file
168     cachedir:          Location of the cache files in the filesystem.
169                        False to disable filesystem cache.
170     noplaylist:        Download single video instead of a playlist if in doubt.
171     age_limit:         An integer representing the user's age in years.
172                        Unsuitable videos for the given age are skipped.
173     min_views:         An integer representing the minimum view count the video
174                        must have in order to not be skipped.
175                        Videos without view count information are always
176                        downloaded. None for no limit.
177     max_views:         An integer representing the maximum view count.
178                        Videos that are more popular than that are not
179                        downloaded.
180                        Videos without view count information are always
181                        downloaded. None for no limit.
182     download_archive:  File name of a file where all downloads are recorded.
183                        Videos already present in the file are not downloaded
184                        again.
185     cookiefile:        File name where cookies should be read from and dumped to.
186     nocheckcertificate:Do not verify SSL certificates
187     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
188                        At the moment, this is only supported by YouTube.
189     proxy:             URL of the proxy server to use
190     cn_verification_proxy:  URL of the proxy to use for IP address verification
191                        on Chinese sites. (Experimental)
192     socket_timeout:    Time to wait for unresponsive hosts, in seconds
193     bidi_workaround:   Work around buggy terminals without bidirectional text
194                        support, using fridibi
195     debug_printtraffic:Print out sent and received HTTP traffic
196     include_ads:       Download ads as well
197     default_search:    Prepend this string if an input url is not valid.
198                        'auto' for elaborate guessing
199     encoding:          Use this encoding instead of the system-specified.
200     extract_flat:      Do not resolve URLs, return the immediate result.
201                        Pass in 'in_playlist' to only show this behavior for
202                        playlist items.
203     postprocessors:    A list of dictionaries, each with an entry
204                        * key:  The name of the postprocessor. See
205                                youtube_dl/postprocessor/__init__.py for a list.
206                        as well as any further keyword arguments for the
207                        postprocessor.
208     progress_hooks:    A list of functions that get called on download
209                        progress, with a dictionary with the entries
210                        * status: One of "downloading", "error", or "finished".
211                                  Check this first and ignore unknown values.
212
213                        If status is one of "downloading", or "finished", the
214                        following properties may also be present:
215                        * filename: The final filename (always present)
216                        * tmpfilename: The filename we're currently writing to
217                        * downloaded_bytes: Bytes on disk
218                        * total_bytes: Size of the whole file, None if unknown
219                        * total_bytes_estimate: Guess of the eventual file size,
220                                                None if unavailable.
221                        * elapsed: The number of seconds since download started.
222                        * eta: The estimated time in seconds, None if unknown
223                        * speed: The download speed in bytes/second, None if
224                                 unknown
225                        * fragment_index: The counter of the currently
226                                          downloaded video fragment.
227                        * fragment_count: The number of fragments (= individual
228                                          files that will be merged)
229
230                        Progress hooks are guaranteed to be called at least once
231                        (with status "finished") if the download is successful.
232     merge_output_format: Extension to use when merging formats.
233     fixup:             Automatically correct known faults of the file.
234                        One of:
235                        - "never": do nothing
236                        - "warn": only emit a warning
237                        - "detect_or_warn": check whether we can do anything
238                                            about it, warn otherwise (default)
239     source_address:    (Experimental) Client-side IP address to bind to.
240     call_home:         Boolean, true iff we are allowed to contact the
241                        youtube-dl servers for debugging.
242     sleep_interval:    Number of seconds to sleep before each download.
243     listformats:       Print an overview of available video formats and exit.
244     list_thumbnails:   Print a table of all thumbnails and exit.
245     match_filter:      A function that gets called with the info_dict of
246                        every video.
247                        If it returns a message, the video is ignored.
248                        If it returns None, the video is downloaded.
249                        match_filter_func in utils.py is one example for this.
250     no_color:          Do not emit color codes in output.
251
252     The following options determine which downloader is picked:
253     external_downloader: Executable of the external downloader to call.
254                        None or unset for standard (built-in) downloader.
255     hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv.
256
257     The following parameters are not used by YoutubeDL itself, they are used by
258     the downloader (see youtube_dl/downloader/common.py):
259     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
260     noresizebuffer, retries, continuedl, noprogress, consoletitle,
261     xattr_set_filesize, external_downloader_args.
262
263     The following options are used by the post processors:
264     prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
265                        otherwise prefer avconv.
266     postprocessor_args: A list of additional command-line arguments for the
267                         postprocessor.
268     """
269
270     params = None
271     _ies = []
272     _pps = []
273     _download_retcode = None
274     _num_downloads = None
275     _screen_file = None
276
277     def __init__(self, params=None, auto_init=True):
278         """Create a FileDownloader object with the given options."""
279         if params is None:
280             params = {}
281         self._ies = []
282         self._ies_instances = {}
283         self._pps = []
284         self._progress_hooks = []
285         self._download_retcode = 0
286         self._num_downloads = 0
287         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
288         self._err_file = sys.stderr
289         self.params = {
290             # Default parameters
291             'nocheckcertificate': False,
292         }
293         self.params.update(params)
294         self.cache = Cache(self)
295
296         if params.get('bidi_workaround', False):
297             try:
298                 import pty
299                 master, slave = pty.openpty()
300                 width = compat_get_terminal_size().columns
301                 if width is None:
302                     width_args = []
303                 else:
304                     width_args = ['-w', str(width)]
305                 sp_kwargs = dict(
306                     stdin=subprocess.PIPE,
307                     stdout=slave,
308                     stderr=self._err_file)
309                 try:
310                     self._output_process = subprocess.Popen(
311                         ['bidiv'] + width_args, **sp_kwargs
312                     )
313                 except OSError:
314                     self._output_process = subprocess.Popen(
315                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
316                 self._output_channel = os.fdopen(master, 'rb')
317             except OSError as ose:
318                 if ose.errno == 2:
319                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
320                 else:
321                     raise
322
323         if (sys.version_info >= (3,) and sys.platform != 'win32' and
324                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and
325                 not params.get('restrictfilenames', False)):
326             # On Python 3, the Unicode filesystem API will throw errors (#1474)
327             self.report_warning(
328                 'Assuming --restrict-filenames since file system encoding '
329                 'cannot encode all characters. '
330                 'Set the LC_ALL environment variable to fix this.')
331             self.params['restrictfilenames'] = True
332
333         if isinstance(params.get('outtmpl'), bytes):
334             self.report_warning(
335                 'Parameter outtmpl is bytes, but should be a unicode string. '
336                 'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
337
338         self._setup_opener()
339
340         if auto_init:
341             self.print_debug_header()
342             self.add_default_info_extractors()
343
344         for pp_def_raw in self.params.get('postprocessors', []):
345             pp_class = get_postprocessor(pp_def_raw['key'])
346             pp_def = dict(pp_def_raw)
347             del pp_def['key']
348             pp = pp_class(self, **compat_kwargs(pp_def))
349             self.add_post_processor(pp)
350
351         for ph in self.params.get('progress_hooks', []):
352             self.add_progress_hook(ph)
353
354     def warn_if_short_id(self, argv):
355         # short YouTube ID starting with dash?
356         idxs = [
357             i for i, a in enumerate(argv)
358             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
359         if idxs:
360             correct_argv = (
361                 ['youtube-dl'] +
362                 [a for i, a in enumerate(argv) if i not in idxs] +
363                 ['--'] + [argv[i] for i in idxs]
364             )
365             self.report_warning(
366                 'Long argument string detected. '
367                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
368                 args_to_str(correct_argv))
369
370     def add_info_extractor(self, ie):
371         """Add an InfoExtractor object to the end of the list."""
372         self._ies.append(ie)
373         self._ies_instances[ie.ie_key()] = ie
374         ie.set_downloader(self)
375
376     def get_info_extractor(self, ie_key):
377         """
378         Get an instance of an IE with name ie_key, it will try to get one from
379         the _ies list, if there's no instance it will create a new one and add
380         it to the extractor list.
381         """
382         ie = self._ies_instances.get(ie_key)
383         if ie is None:
384             ie = get_info_extractor(ie_key)()
385             self.add_info_extractor(ie)
386         return ie
387
388     def add_default_info_extractors(self):
389         """
390         Add the InfoExtractors returned by gen_extractors to the end of the list
391         """
392         for ie in gen_extractors():
393             self.add_info_extractor(ie)
394
395     def add_post_processor(self, pp):
396         """Add a PostProcessor object to the end of the chain."""
397         self._pps.append(pp)
398         pp.set_downloader(self)
399
400     def add_progress_hook(self, ph):
401         """Add the progress hook (currently only for the file downloader)"""
402         self._progress_hooks.append(ph)
403
404     def _bidi_workaround(self, message):
405         if not hasattr(self, '_output_channel'):
406             return message
407
408         assert hasattr(self, '_output_process')
409         assert isinstance(message, compat_str)
410         line_count = message.count('\n') + 1
411         self._output_process.stdin.write((message + '\n').encode('utf-8'))
412         self._output_process.stdin.flush()
413         res = ''.join(self._output_channel.readline().decode('utf-8')
414                       for _ in range(line_count))
415         return res[:-len('\n')]
416
417     def to_screen(self, message, skip_eol=False):
418         """Print message to stdout if not in quiet mode."""
419         return self.to_stdout(message, skip_eol, check_quiet=True)
420
421     def _write_string(self, s, out=None):
422         write_string(s, out=out, encoding=self.params.get('encoding'))
423
424     def to_stdout(self, message, skip_eol=False, check_quiet=False):
425         """Print message to stdout if not in quiet mode."""
426         if self.params.get('logger'):
427             self.params['logger'].debug(message)
428         elif not check_quiet or not self.params.get('quiet', False):
429             message = self._bidi_workaround(message)
430             terminator = ['\n', ''][skip_eol]
431             output = message + terminator
432
433             self._write_string(output, self._screen_file)
434
435     def to_stderr(self, message):
436         """Print message to stderr."""
437         assert isinstance(message, compat_str)
438         if self.params.get('logger'):
439             self.params['logger'].error(message)
440         else:
441             message = self._bidi_workaround(message)
442             output = message + '\n'
443             self._write_string(output, self._err_file)
444
445     def to_console_title(self, message):
446         if not self.params.get('consoletitle', False):
447             return
448         if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
449             # c_wchar_p() might not be necessary if `message` is
450             # already of type unicode()
451             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
452         elif 'TERM' in os.environ:
453             self._write_string('\033]0;%s\007' % message, self._screen_file)
454
455     def save_console_title(self):
456         if not self.params.get('consoletitle', False):
457             return
458         if 'TERM' in os.environ:
459             # Save the title on stack
460             self._write_string('\033[22;0t', self._screen_file)
461
462     def restore_console_title(self):
463         if not self.params.get('consoletitle', False):
464             return
465         if 'TERM' in os.environ:
466             # Restore the title from stack
467             self._write_string('\033[23;0t', self._screen_file)
468
469     def __enter__(self):
470         self.save_console_title()
471         return self
472
473     def __exit__(self, *args):
474         self.restore_console_title()
475
476         if self.params.get('cookiefile') is not None:
477             self.cookiejar.save()
478
479     def trouble(self, message=None, tb=None):
480         """Determine action to take when a download problem appears.
481
482         Depending on if the downloader has been configured to ignore
483         download errors or not, this method may throw an exception or
484         not when errors are found, after printing the message.
485
486         tb, if given, is additional traceback information.
487         """
488         if message is not None:
489             self.to_stderr(message)
490         if self.params.get('verbose'):
491             if tb is None:
492                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
493                     tb = ''
494                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
495                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
496                     tb += compat_str(traceback.format_exc())
497                 else:
498                     tb_data = traceback.format_list(traceback.extract_stack())
499                     tb = ''.join(tb_data)
500             self.to_stderr(tb)
501         if not self.params.get('ignoreerrors', False):
502             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
503                 exc_info = sys.exc_info()[1].exc_info
504             else:
505                 exc_info = sys.exc_info()
506             raise DownloadError(message, exc_info)
507         self._download_retcode = 1
508
509     def report_warning(self, message):
510         '''
511         Print the message to stderr, it will be prefixed with 'WARNING:'
512         If stderr is a tty file the 'WARNING:' will be colored
513         '''
514         if self.params.get('logger') is not None:
515             self.params['logger'].warning(message)
516         else:
517             if self.params.get('no_warnings'):
518                 return
519             if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
520                 _msg_header = '\033[0;33mWARNING:\033[0m'
521             else:
522                 _msg_header = 'WARNING:'
523             warning_message = '%s %s' % (_msg_header, message)
524             self.to_stderr(warning_message)
525
526     def report_error(self, message, tb=None):
527         '''
528         Do the same as trouble, but prefixes the message with 'ERROR:', colored
529         in red if stderr is a tty file.
530         '''
531         if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
532             _msg_header = '\033[0;31mERROR:\033[0m'
533         else:
534             _msg_header = 'ERROR:'
535         error_message = '%s %s' % (_msg_header, message)
536         self.trouble(error_message, tb)
537
538     def report_file_already_downloaded(self, file_name):
539         """Report file has already been fully downloaded."""
540         try:
541             self.to_screen('[download] %s has already been downloaded' % file_name)
542         except UnicodeEncodeError:
543             self.to_screen('[download] The file has already been downloaded')
544
545     def prepare_filename(self, info_dict):
546         """Generate the output filename."""
547         try:
548             template_dict = dict(info_dict)
549
550             template_dict['epoch'] = int(time.time())
551             autonumber_size = self.params.get('autonumber_size')
552             if autonumber_size is None:
553                 autonumber_size = 5
554             autonumber_templ = '%0' + str(autonumber_size) + 'd'
555             template_dict['autonumber'] = autonumber_templ % self._num_downloads
556             if template_dict.get('playlist_index') is not None:
557                 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
558             if template_dict.get('resolution') is None:
559                 if template_dict.get('width') and template_dict.get('height'):
560                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
561                 elif template_dict.get('height'):
562                     template_dict['resolution'] = '%sp' % template_dict['height']
563                 elif template_dict.get('width'):
564                     template_dict['resolution'] = '?x%d' % template_dict['width']
565
566             sanitize = lambda k, v: sanitize_filename(
567                 compat_str(v),
568                 restricted=self.params.get('restrictfilenames'),
569                 is_id=(k == 'id'))
570             template_dict = dict((k, sanitize(k, v))
571                                  for k, v in template_dict.items()
572                                  if v is not None)
573             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
574
575             outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
576             tmpl = compat_expanduser(outtmpl)
577             filename = tmpl % template_dict
578             # Temporary fix for #4787
579             # 'Treat' all problem characters by passing filename through preferredencoding
580             # to workaround encoding issues with subprocess on python2 @ Windows
581             if sys.version_info < (3, 0) and sys.platform == 'win32':
582                 filename = encodeFilename(filename, True).decode(preferredencoding())
583             return sanitize_path(filename)
584         except ValueError as err:
585             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
586             return None
587
588     def _match_entry(self, info_dict, incomplete):
589         """ Returns None iff the file should be downloaded """
590
591         video_title = info_dict.get('title', info_dict.get('id', 'video'))
592         if 'title' in info_dict:
593             # This can happen when we're just evaluating the playlist
594             title = info_dict['title']
595             matchtitle = self.params.get('matchtitle', False)
596             if matchtitle:
597                 if not re.search(matchtitle, title, re.IGNORECASE):
598                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
599             rejecttitle = self.params.get('rejecttitle', False)
600             if rejecttitle:
601                 if re.search(rejecttitle, title, re.IGNORECASE):
602                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
603         date = info_dict.get('upload_date', None)
604         if date is not None:
605             dateRange = self.params.get('daterange', DateRange())
606             if date not in dateRange:
607                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
608         view_count = info_dict.get('view_count', None)
609         if view_count is not None:
610             min_views = self.params.get('min_views')
611             if min_views is not None and view_count < min_views:
612                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
613             max_views = self.params.get('max_views')
614             if max_views is not None and view_count > max_views:
615                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
616         if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
617             return 'Skipping "%s" because it is age restricted' % video_title
618         if self.in_download_archive(info_dict):
619             return '%s has already been recorded in archive' % video_title
620
621         if not incomplete:
622             match_filter = self.params.get('match_filter')
623             if match_filter is not None:
624                 ret = match_filter(info_dict)
625                 if ret is not None:
626                     return ret
627
628         return None
629
630     @staticmethod
631     def add_extra_info(info_dict, extra_info):
632         '''Set the keys from extra_info in info dict if they are missing'''
633         for key, value in extra_info.items():
634             info_dict.setdefault(key, value)
635
636     def extract_info(self, url, download=True, ie_key=None, extra_info={},
637                      process=True, force_generic_extractor=False):
638         '''
639         Returns a list with a dictionary for each video we find.
640         If 'download', also downloads the videos.
641         extra_info is a dict containing the extra values to add to each result
642         '''
643
644         if not ie_key and force_generic_extractor:
645             ie_key = 'Generic'
646
647         if ie_key:
648             ies = [self.get_info_extractor(ie_key)]
649         else:
650             ies = self._ies
651
652         for ie in ies:
653             if not ie.suitable(url):
654                 continue
655
656             if not ie.working():
657                 self.report_warning('The program functionality for this site has been marked as broken, '
658                                     'and will probably not work.')
659
660             try:
661                 ie_result = ie.extract(url)
662                 if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
663                     break
664                 if isinstance(ie_result, list):
665                     # Backwards compatibility: old IE result format
666                     ie_result = {
667                         '_type': 'compat_list',
668                         'entries': ie_result,
669                     }
670                 self.add_default_extra_info(ie_result, ie, url)
671                 if process:
672                     return self.process_ie_result(ie_result, download, extra_info)
673                 else:
674                     return ie_result
675             except ExtractorError as de:  # An error we somewhat expected
676                 self.report_error(compat_str(de), de.format_traceback())
677                 break
678             except MaxDownloadsReached:
679                 raise
680             except Exception as e:
681                 if self.params.get('ignoreerrors', False):
682                     self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
683                     break
684                 else:
685                     raise
686         else:
687             self.report_error('no suitable InfoExtractor for URL %s' % url)
688
689     def add_default_extra_info(self, ie_result, ie, url):
690         self.add_extra_info(ie_result, {
691             'extractor': ie.IE_NAME,
692             'webpage_url': url,
693             'webpage_url_basename': url_basename(url),
694             'extractor_key': ie.ie_key(),
695         })
696
697     def process_ie_result(self, ie_result, download=True, extra_info={}):
698         """
699         Take the result of the ie(may be modified) and resolve all unresolved
700         references (URLs, playlist items).
701
702         It will also download the videos if 'download'.
703         Returns the resolved ie_result.
704         """
705
706         result_type = ie_result.get('_type', 'video')
707
708         if result_type in ('url', 'url_transparent'):
709             extract_flat = self.params.get('extract_flat', False)
710             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
711                     extract_flat is True):
712                 if self.params.get('forcejson', False):
713                     self.to_stdout(json.dumps(ie_result))
714                 return ie_result
715
716         if result_type == 'video':
717             self.add_extra_info(ie_result, extra_info)
718             return self.process_video_result(ie_result, download=download)
719         elif result_type == 'url':
720             # We have to add extra_info to the results because it may be
721             # contained in a playlist
722             return self.extract_info(ie_result['url'],
723                                      download,
724                                      ie_key=ie_result.get('ie_key'),
725                                      extra_info=extra_info)
726         elif result_type == 'url_transparent':
727             # Use the information from the embedding page
728             info = self.extract_info(
729                 ie_result['url'], ie_key=ie_result.get('ie_key'),
730                 extra_info=extra_info, download=False, process=False)
731
732             force_properties = dict(
733                 (k, v) for k, v in ie_result.items() if v is not None)
734             for f in ('_type', 'url'):
735                 if f in force_properties:
736                     del force_properties[f]
737             new_result = info.copy()
738             new_result.update(force_properties)
739
740             assert new_result.get('_type') != 'url_transparent'
741
742             return self.process_ie_result(
743                 new_result, download=download, extra_info=extra_info)
744         elif result_type == 'playlist' or result_type == 'multi_video':
745             # We process each entry in the playlist
746             playlist = ie_result.get('title', None) or ie_result.get('id', None)
747             self.to_screen('[download] Downloading playlist: %s' % playlist)
748
749             playlist_results = []
750
751             playliststart = self.params.get('playliststart', 1) - 1
752             playlistend = self.params.get('playlistend', None)
753             # For backwards compatibility, interpret -1 as whole list
754             if playlistend == -1:
755                 playlistend = None
756
757             playlistitems_str = self.params.get('playlist_items', None)
758             playlistitems = None
759             if playlistitems_str is not None:
760                 def iter_playlistitems(format):
761                     for string_segment in format.split(','):
762                         if '-' in string_segment:
763                             start, end = string_segment.split('-')
764                             for item in range(int(start), int(end) + 1):
765                                 yield int(item)
766                         else:
767                             yield int(string_segment)
768                 playlistitems = iter_playlistitems(playlistitems_str)
769
770             ie_entries = ie_result['entries']
771             if isinstance(ie_entries, list):
772                 n_all_entries = len(ie_entries)
773                 if playlistitems:
774                     entries = [
775                         ie_entries[i - 1] for i in playlistitems
776                         if -n_all_entries <= i - 1 < n_all_entries]
777                 else:
778                     entries = ie_entries[playliststart:playlistend]
779                 n_entries = len(entries)
780                 self.to_screen(
781                     "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
782                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
783             elif isinstance(ie_entries, PagedList):
784                 if playlistitems:
785                     entries = []
786                     for item in playlistitems:
787                         entries.extend(ie_entries.getslice(
788                             item - 1, item
789                         ))
790                 else:
791                     entries = ie_entries.getslice(
792                         playliststart, playlistend)
793                 n_entries = len(entries)
794                 self.to_screen(
795                     "[%s] playlist %s: Downloading %d videos" %
796                     (ie_result['extractor'], playlist, n_entries))
797             else:  # iterable
798                 if playlistitems:
799                     entry_list = list(ie_entries)
800                     entries = [entry_list[i - 1] for i in playlistitems]
801                 else:
802                     entries = list(itertools.islice(
803                         ie_entries, playliststart, playlistend))
804                 n_entries = len(entries)
805                 self.to_screen(
806                     "[%s] playlist %s: Downloading %d videos" %
807                     (ie_result['extractor'], playlist, n_entries))
808
809             if self.params.get('playlistreverse', False):
810                 entries = entries[::-1]
811
812             for i, entry in enumerate(entries, 1):
813                 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
814                 extra = {
815                     'n_entries': n_entries,
816                     'playlist': playlist,
817                     'playlist_id': ie_result.get('id'),
818                     'playlist_title': ie_result.get('title'),
819                     'playlist_index': i + playliststart,
820                     'extractor': ie_result['extractor'],
821                     'webpage_url': ie_result['webpage_url'],
822                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
823                     'extractor_key': ie_result['extractor_key'],
824                 }
825
826                 reason = self._match_entry(entry, incomplete=True)
827                 if reason is not None:
828                     self.to_screen('[download] ' + reason)
829                     continue
830
831                 entry_result = self.process_ie_result(entry,
832                                                       download=download,
833                                                       extra_info=extra)
834                 playlist_results.append(entry_result)
835             ie_result['entries'] = playlist_results
836             self.to_screen('[download] Finished downloading playlist: %s' % playlist)
837             return ie_result
838         elif result_type == 'compat_list':
839             self.report_warning(
840                 'Extractor %s returned a compat_list result. '
841                 'It needs to be updated.' % ie_result.get('extractor'))
842
843             def _fixup(r):
844                 self.add_extra_info(
845                     r,
846                     {
847                         'extractor': ie_result['extractor'],
848                         'webpage_url': ie_result['webpage_url'],
849                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
850                         'extractor_key': ie_result['extractor_key'],
851                     }
852                 )
853                 return r
854             ie_result['entries'] = [
855                 self.process_ie_result(_fixup(r), download, extra_info)
856                 for r in ie_result['entries']
857             ]
858             return ie_result
859         else:
860             raise Exception('Invalid result type: %s' % result_type)
861
862     def _build_format_filter(self, filter_spec):
863         " Returns a function to filter the formats according to the filter_spec "
864
865         OPERATORS = {
866             '<': operator.lt,
867             '<=': operator.le,
868             '>': operator.gt,
869             '>=': operator.ge,
870             '=': operator.eq,
871             '!=': operator.ne,
872         }
873         operator_rex = re.compile(r'''(?x)\s*
874             (?P<key>width|height|tbr|abr|vbr|asr|filesize|fps)
875             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
876             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
877             $
878             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
879         m = operator_rex.search(filter_spec)
880         if m:
881             try:
882                 comparison_value = int(m.group('value'))
883             except ValueError:
884                 comparison_value = parse_filesize(m.group('value'))
885                 if comparison_value is None:
886                     comparison_value = parse_filesize(m.group('value') + 'B')
887                 if comparison_value is None:
888                     raise ValueError(
889                         'Invalid value %r in format specification %r' % (
890                             m.group('value'), filter_spec))
891             op = OPERATORS[m.group('op')]
892
893         if not m:
894             STR_OPERATORS = {
895                 '=': operator.eq,
896                 '!=': operator.ne,
897             }
898             str_operator_rex = re.compile(r'''(?x)
899                 \s*(?P<key>ext|acodec|vcodec|container|protocol)
900                 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
901                 \s*(?P<value>[a-zA-Z0-9_-]+)
902                 \s*$
903                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
904             m = str_operator_rex.search(filter_spec)
905             if m:
906                 comparison_value = m.group('value')
907                 op = STR_OPERATORS[m.group('op')]
908
909         if not m:
910             raise ValueError('Invalid filter specification %r' % filter_spec)
911
912         def _filter(f):
913             actual_value = f.get(m.group('key'))
914             if actual_value is None:
915                 return m.group('none_inclusive')
916             return op(actual_value, comparison_value)
917         return _filter
918
919     def build_format_selector(self, format_spec):
920         def syntax_error(note, start):
921             message = (
922                 'Invalid format specification: '
923                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
924             return SyntaxError(message)
925
926         PICKFIRST = 'PICKFIRST'
927         MERGE = 'MERGE'
928         SINGLE = 'SINGLE'
929         GROUP = 'GROUP'
930         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
931
932         def _parse_filter(tokens):
933             filter_parts = []
934             for type, string, start, _, _ in tokens:
935                 if type == tokenize.OP and string == ']':
936                     return ''.join(filter_parts)
937                 else:
938                     filter_parts.append(string)
939
940         def _remove_unused_ops(tokens):
941             # Remove operators that we don't use and join them with the sourrounding strings
942             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
943             ALLOWED_OPS = ('/', '+', ',', '(', ')')
944             last_string, last_start, last_end, last_line = None, None, None, None
945             for type, string, start, end, line in tokens:
946                 if type == tokenize.OP and string == '[':
947                     if last_string:
948                         yield tokenize.NAME, last_string, last_start, last_end, last_line
949                         last_string = None
950                     yield type, string, start, end, line
951                     # everything inside brackets will be handled by _parse_filter
952                     for type, string, start, end, line in tokens:
953                         yield type, string, start, end, line
954                         if type == tokenize.OP and string == ']':
955                             break
956                 elif type == tokenize.OP and string in ALLOWED_OPS:
957                     if last_string:
958                         yield tokenize.NAME, last_string, last_start, last_end, last_line
959                         last_string = None
960                     yield type, string, start, end, line
961                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
962                     if not last_string:
963                         last_string = string
964                         last_start = start
965                         last_end = end
966                     else:
967                         last_string += string
968             if last_string:
969                 yield tokenize.NAME, last_string, last_start, last_end, last_line
970
971         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
972             selectors = []
973             current_selector = None
974             for type, string, start, _, _ in tokens:
975                 # ENCODING is only defined in python 3.x
976                 if type == getattr(tokenize, 'ENCODING', None):
977                     continue
978                 elif type in [tokenize.NAME, tokenize.NUMBER]:
979                     current_selector = FormatSelector(SINGLE, string, [])
980                 elif type == tokenize.OP:
981                     if string == ')':
982                         if not inside_group:
983                             # ')' will be handled by the parentheses group
984                             tokens.restore_last_token()
985                         break
986                     elif inside_merge and string in ['/', ',']:
987                         tokens.restore_last_token()
988                         break
989                     elif inside_choice and string == ',':
990                         tokens.restore_last_token()
991                         break
992                     elif string == ',':
993                         if not current_selector:
994                             raise syntax_error('"," must follow a format selector', start)
995                         selectors.append(current_selector)
996                         current_selector = None
997                     elif string == '/':
998                         if not current_selector:
999                             raise syntax_error('"/" must follow a format selector', start)
1000                         first_choice = current_selector
1001                         second_choice = _parse_format_selection(tokens, inside_choice=True)
1002                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1003                     elif string == '[':
1004                         if not current_selector:
1005                             current_selector = FormatSelector(SINGLE, 'best', [])
1006                         format_filter = _parse_filter(tokens)
1007                         current_selector.filters.append(format_filter)
1008                     elif string == '(':
1009                         if current_selector:
1010                             raise syntax_error('Unexpected "("', start)
1011                         group = _parse_format_selection(tokens, inside_group=True)
1012                         current_selector = FormatSelector(GROUP, group, [])
1013                     elif string == '+':
1014                         video_selector = current_selector
1015                         audio_selector = _parse_format_selection(tokens, inside_merge=True)
1016                         if not video_selector or not audio_selector:
1017                             raise syntax_error('"+" must be between two format selectors', start)
1018                         current_selector = FormatSelector(MERGE, (video_selector, audio_selector), [])
1019                     else:
1020                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1021                 elif type == tokenize.ENDMARKER:
1022                     break
1023             if current_selector:
1024                 selectors.append(current_selector)
1025             return selectors
1026
1027         def _build_selector_function(selector):
1028             if isinstance(selector, list):
1029                 fs = [_build_selector_function(s) for s in selector]
1030
1031                 def selector_function(formats):
1032                     for f in fs:
1033                         for format in f(formats):
1034                             yield format
1035                 return selector_function
1036             elif selector.type == GROUP:
1037                 selector_function = _build_selector_function(selector.selector)
1038             elif selector.type == PICKFIRST:
1039                 fs = [_build_selector_function(s) for s in selector.selector]
1040
1041                 def selector_function(formats):
1042                     for f in fs:
1043                         picked_formats = list(f(formats))
1044                         if picked_formats:
1045                             return picked_formats
1046                     return []
1047             elif selector.type == SINGLE:
1048                 format_spec = selector.selector
1049
1050                 def selector_function(formats):
1051                     formats = list(formats)
1052                     if not formats:
1053                         return
1054                     if format_spec == 'all':
1055                         for f in formats:
1056                             yield f
1057                     elif format_spec in ['best', 'worst', None]:
1058                         format_idx = 0 if format_spec == 'worst' else -1
1059                         audiovideo_formats = [
1060                             f for f in formats
1061                             if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
1062                         if audiovideo_formats:
1063                             yield audiovideo_formats[format_idx]
1064                         # for audio only (soundcloud) or video only (imgur) urls, select the best/worst audio format
1065                         elif (all(f.get('acodec') != 'none' for f in formats) or
1066                               all(f.get('vcodec') != 'none' for f in formats)):
1067                             yield formats[format_idx]
1068                     elif format_spec == 'bestaudio':
1069                         audio_formats = [
1070                             f for f in formats
1071                             if f.get('vcodec') == 'none']
1072                         if audio_formats:
1073                             yield audio_formats[-1]
1074                     elif format_spec == 'worstaudio':
1075                         audio_formats = [
1076                             f for f in formats
1077                             if f.get('vcodec') == 'none']
1078                         if audio_formats:
1079                             yield audio_formats[0]
1080                     elif format_spec == 'bestvideo':
1081                         video_formats = [
1082                             f for f in formats
1083                             if f.get('acodec') == 'none']
1084                         if video_formats:
1085                             yield video_formats[-1]
1086                     elif format_spec == 'worstvideo':
1087                         video_formats = [
1088                             f for f in formats
1089                             if f.get('acodec') == 'none']
1090                         if video_formats:
1091                             yield video_formats[0]
1092                     else:
1093                         extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
1094                         if format_spec in extensions:
1095                             filter_f = lambda f: f['ext'] == format_spec
1096                         else:
1097                             filter_f = lambda f: f['format_id'] == format_spec
1098                         matches = list(filter(filter_f, formats))
1099                         if matches:
1100                             yield matches[-1]
1101             elif selector.type == MERGE:
1102                 def _merge(formats_info):
1103                     format_1, format_2 = [f['format_id'] for f in formats_info]
1104                     # The first format must contain the video and the
1105                     # second the audio
1106                     if formats_info[0].get('vcodec') == 'none':
1107                         self.report_error('The first format must '
1108                                           'contain the video, try using '
1109                                           '"-f %s+%s"' % (format_2, format_1))
1110                         return
1111                     output_ext = (
1112                         formats_info[0]['ext']
1113                         if self.params.get('merge_output_format') is None
1114                         else self.params['merge_output_format'])
1115                     return {
1116                         'requested_formats': formats_info,
1117                         'format': '%s+%s' % (formats_info[0].get('format'),
1118                                              formats_info[1].get('format')),
1119                         'format_id': '%s+%s' % (formats_info[0].get('format_id'),
1120                                                 formats_info[1].get('format_id')),
1121                         'width': formats_info[0].get('width'),
1122                         'height': formats_info[0].get('height'),
1123                         'resolution': formats_info[0].get('resolution'),
1124                         'fps': formats_info[0].get('fps'),
1125                         'vcodec': formats_info[0].get('vcodec'),
1126                         'vbr': formats_info[0].get('vbr'),
1127                         'stretched_ratio': formats_info[0].get('stretched_ratio'),
1128                         'acodec': formats_info[1].get('acodec'),
1129                         'abr': formats_info[1].get('abr'),
1130                         'ext': output_ext,
1131                     }
1132                 video_selector, audio_selector = map(_build_selector_function, selector.selector)
1133
1134                 def selector_function(formats):
1135                     formats = list(formats)
1136                     for pair in itertools.product(video_selector(formats), audio_selector(formats)):
1137                         yield _merge(pair)
1138
1139             filters = [self._build_format_filter(f) for f in selector.filters]
1140
1141             def final_selector(formats):
1142                 for _filter in filters:
1143                     formats = list(filter(_filter, formats))
1144                 return selector_function(formats)
1145             return final_selector
1146
1147         stream = io.BytesIO(format_spec.encode('utf-8'))
1148         try:
1149             tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
1150         except tokenize.TokenError:
1151             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1152
1153         class TokenIterator(object):
1154             def __init__(self, tokens):
1155                 self.tokens = tokens
1156                 self.counter = 0
1157
1158             def __iter__(self):
1159                 return self
1160
1161             def __next__(self):
1162                 if self.counter >= len(self.tokens):
1163                     raise StopIteration()
1164                 value = self.tokens[self.counter]
1165                 self.counter += 1
1166                 return value
1167
1168             next = __next__
1169
1170             def restore_last_token(self):
1171                 self.counter -= 1
1172
1173         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
1174         return _build_selector_function(parsed_selector)
1175
1176     def _calc_headers(self, info_dict):
1177         res = std_headers.copy()
1178
1179         add_headers = info_dict.get('http_headers')
1180         if add_headers:
1181             res.update(add_headers)
1182
1183         cookies = self._calc_cookies(info_dict)
1184         if cookies:
1185             res['Cookie'] = cookies
1186
1187         return res
1188
1189     def _calc_cookies(self, info_dict):
1190         pr = compat_urllib_request.Request(info_dict['url'])
1191         self.cookiejar.add_cookie_header(pr)
1192         return pr.get_header('Cookie')
1193
1194     def process_video_result(self, info_dict, download=True):
1195         assert info_dict.get('_type', 'video') == 'video'
1196
1197         if 'id' not in info_dict:
1198             raise ExtractorError('Missing "id" field in extractor result')
1199         if 'title' not in info_dict:
1200             raise ExtractorError('Missing "title" field in extractor result')
1201
1202         if 'playlist' not in info_dict:
1203             # It isn't part of a playlist
1204             info_dict['playlist'] = None
1205             info_dict['playlist_index'] = None
1206
1207         thumbnails = info_dict.get('thumbnails')
1208         if thumbnails is None:
1209             thumbnail = info_dict.get('thumbnail')
1210             if thumbnail:
1211                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1212         if thumbnails:
1213             thumbnails.sort(key=lambda t: (
1214                 t.get('preference'), t.get('width'), t.get('height'),
1215                 t.get('id'), t.get('url')))
1216             for i, t in enumerate(thumbnails):
1217                 if t.get('width') and t.get('height'):
1218                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
1219                 if t.get('id') is None:
1220                     t['id'] = '%d' % i
1221
1222         if thumbnails and 'thumbnail' not in info_dict:
1223             info_dict['thumbnail'] = thumbnails[-1]['url']
1224
1225         if 'display_id' not in info_dict and 'id' in info_dict:
1226             info_dict['display_id'] = info_dict['id']
1227
1228         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
1229             # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1230             # see http://bugs.python.org/issue1646728)
1231             try:
1232                 upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
1233                 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1234             except (ValueError, OverflowError, OSError):
1235                 pass
1236
1237         subtitles = info_dict.get('subtitles')
1238         if subtitles:
1239             for _, subtitle in subtitles.items():
1240                 for subtitle_format in subtitle:
1241                     if 'ext' not in subtitle_format:
1242                         subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
1243
1244         if self.params.get('listsubtitles', False):
1245             if 'automatic_captions' in info_dict:
1246                 self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions')
1247             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
1248             return
1249         info_dict['requested_subtitles'] = self.process_subtitles(
1250             info_dict['id'], subtitles,
1251             info_dict.get('automatic_captions'))
1252
1253         # We now pick which formats have to be downloaded
1254         if info_dict.get('formats') is None:
1255             # There's only one format available
1256             formats = [info_dict]
1257         else:
1258             formats = info_dict['formats']
1259
1260         if not formats:
1261             raise ExtractorError('No video formats found!')
1262
1263         formats_dict = {}
1264
1265         # We check that all the formats have the format and format_id fields
1266         for i, format in enumerate(formats):
1267             if 'url' not in format:
1268                 raise ExtractorError('Missing "url" key in result (index %d)' % i)
1269
1270             if format.get('format_id') is None:
1271                 format['format_id'] = compat_str(i)
1272             format_id = format['format_id']
1273             if format_id not in formats_dict:
1274                 formats_dict[format_id] = []
1275             formats_dict[format_id].append(format)
1276
1277         # Make sure all formats have unique format_id
1278         for format_id, ambiguous_formats in formats_dict.items():
1279             if len(ambiguous_formats) > 1:
1280                 for i, format in enumerate(ambiguous_formats):
1281                     format['format_id'] = '%s-%d' % (format_id, i)
1282
1283         for i, format in enumerate(formats):
1284             if format.get('format') is None:
1285                 format['format'] = '{id} - {res}{note}'.format(
1286                     id=format['format_id'],
1287                     res=self.format_resolution(format),
1288                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1289                 )
1290             # Automatically determine file extension if missing
1291             if 'ext' not in format:
1292                 format['ext'] = determine_ext(format['url']).lower()
1293             # Add HTTP headers, so that external programs can use them from the
1294             # json output
1295             full_format_info = info_dict.copy()
1296             full_format_info.update(format)
1297             format['http_headers'] = self._calc_headers(full_format_info)
1298
1299         # TODO Central sorting goes here
1300
1301         if formats[0] is not info_dict:
1302             # only set the 'formats' fields if the original info_dict list them
1303             # otherwise we end up with a circular reference, the first (and unique)
1304             # element in the 'formats' field in info_dict is info_dict itself,
1305             # wich can't be exported to json
1306             info_dict['formats'] = formats
1307         if self.params.get('listformats'):
1308             self.list_formats(info_dict)
1309             return
1310         if self.params.get('list_thumbnails'):
1311             self.list_thumbnails(info_dict)
1312             return
1313
1314         req_format = self.params.get('format')
1315         if req_format is None:
1316             req_format_list = []
1317             if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and
1318                     info_dict['extractor'] in ['youtube', 'ted'] and
1319                     not info_dict.get('is_live')):
1320                 merger = FFmpegMergerPP(self)
1321                 if merger.available and merger.can_merge():
1322                     req_format_list.append('bestvideo+bestaudio')
1323             req_format_list.append('best')
1324             req_format = '/'.join(req_format_list)
1325         format_selector = self.build_format_selector(req_format)
1326         formats_to_download = list(format_selector(formats))
1327         if not formats_to_download:
1328             raise ExtractorError('requested format not available',
1329                                  expected=True)
1330
1331         if download:
1332             if len(formats_to_download) > 1:
1333                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1334             for format in formats_to_download:
1335                 new_info = dict(info_dict)
1336                 new_info.update(format)
1337                 self.process_info(new_info)
1338         # We update the info dict with the best quality format (backwards compatibility)
1339         info_dict.update(formats_to_download[-1])
1340         return info_dict
1341
1342     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
1343         """Select the requested subtitles and their format"""
1344         available_subs = {}
1345         if normal_subtitles and self.params.get('writesubtitles'):
1346             available_subs.update(normal_subtitles)
1347         if automatic_captions and self.params.get('writeautomaticsub'):
1348             for lang, cap_info in automatic_captions.items():
1349                 if lang not in available_subs:
1350                     available_subs[lang] = cap_info
1351
1352         if (not self.params.get('writesubtitles') and not
1353                 self.params.get('writeautomaticsub') or not
1354                 available_subs):
1355             return None
1356
1357         if self.params.get('allsubtitles', False):
1358             requested_langs = available_subs.keys()
1359         else:
1360             if self.params.get('subtitleslangs', False):
1361                 requested_langs = self.params.get('subtitleslangs')
1362             elif 'en' in available_subs:
1363                 requested_langs = ['en']
1364             else:
1365                 requested_langs = [list(available_subs.keys())[0]]
1366
1367         formats_query = self.params.get('subtitlesformat', 'best')
1368         formats_preference = formats_query.split('/') if formats_query else []
1369         subs = {}
1370         for lang in requested_langs:
1371             formats = available_subs.get(lang)
1372             if formats is None:
1373                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1374                 continue
1375             for ext in formats_preference:
1376                 if ext == 'best':
1377                     f = formats[-1]
1378                     break
1379                 matches = list(filter(lambda f: f['ext'] == ext, formats))
1380                 if matches:
1381                     f = matches[-1]
1382                     break
1383             else:
1384                 f = formats[-1]
1385                 self.report_warning(
1386                     'No subtitle format found matching "%s" for language %s, '
1387                     'using %s' % (formats_query, lang, f['ext']))
1388             subs[lang] = f
1389         return subs
1390
1391     def process_info(self, info_dict):
1392         """Process a single resolved IE result."""
1393
1394         assert info_dict.get('_type', 'video') == 'video'
1395
1396         max_downloads = self.params.get('max_downloads')
1397         if max_downloads is not None:
1398             if self._num_downloads >= int(max_downloads):
1399                 raise MaxDownloadsReached()
1400
1401         info_dict['fulltitle'] = info_dict['title']
1402         if len(info_dict['title']) > 200:
1403             info_dict['title'] = info_dict['title'][:197] + '...'
1404
1405         if 'format' not in info_dict:
1406             info_dict['format'] = info_dict['ext']
1407
1408         reason = self._match_entry(info_dict, incomplete=False)
1409         if reason is not None:
1410             self.to_screen('[download] ' + reason)
1411             return
1412
1413         self._num_downloads += 1
1414
1415         info_dict['_filename'] = filename = self.prepare_filename(info_dict)
1416
1417         # Forced printings
1418         if self.params.get('forcetitle', False):
1419             self.to_stdout(info_dict['fulltitle'])
1420         if self.params.get('forceid', False):
1421             self.to_stdout(info_dict['id'])
1422         if self.params.get('forceurl', False):
1423             if info_dict.get('requested_formats') is not None:
1424                 for f in info_dict['requested_formats']:
1425                     self.to_stdout(f['url'] + f.get('play_path', ''))
1426             else:
1427                 # For RTMP URLs, also include the playpath
1428                 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1429         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
1430             self.to_stdout(info_dict['thumbnail'])
1431         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
1432             self.to_stdout(info_dict['description'])
1433         if self.params.get('forcefilename', False) and filename is not None:
1434             self.to_stdout(filename)
1435         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1436             self.to_stdout(formatSeconds(info_dict['duration']))
1437         if self.params.get('forceformat', False):
1438             self.to_stdout(info_dict['format'])
1439         if self.params.get('forcejson', False):
1440             self.to_stdout(json.dumps(info_dict))
1441
1442         # Do nothing else if in simulate mode
1443         if self.params.get('simulate', False):
1444             return
1445
1446         if filename is None:
1447             return
1448
1449         try:
1450             dn = os.path.dirname(sanitize_path(encodeFilename(filename)))
1451             if dn and not os.path.exists(dn):
1452                 os.makedirs(dn)
1453         except (OSError, IOError) as err:
1454             self.report_error('unable to create directory ' + compat_str(err))
1455             return
1456
1457         if self.params.get('writedescription', False):
1458             descfn = replace_extension(filename, 'description', info_dict.get('ext'))
1459             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1460                 self.to_screen('[info] Video description is already present')
1461             elif info_dict.get('description') is None:
1462                 self.report_warning('There\'s no description to write.')
1463             else:
1464                 try:
1465                     self.to_screen('[info] Writing video description to: ' + descfn)
1466                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1467                         descfile.write(info_dict['description'])
1468                 except (OSError, IOError):
1469                     self.report_error('Cannot write description file ' + descfn)
1470                     return
1471
1472         if self.params.get('writeannotations', False):
1473             annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
1474             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1475                 self.to_screen('[info] Video annotations are already present')
1476             else:
1477                 try:
1478                     self.to_screen('[info] Writing video annotations to: ' + annofn)
1479                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1480                         annofile.write(info_dict['annotations'])
1481                 except (KeyError, TypeError):
1482                     self.report_warning('There are no annotations to write.')
1483                 except (OSError, IOError):
1484                     self.report_error('Cannot write annotations file: ' + annofn)
1485                     return
1486
1487         subtitles_are_requested = any([self.params.get('writesubtitles', False),
1488                                        self.params.get('writeautomaticsub')])
1489
1490         if subtitles_are_requested and info_dict.get('requested_subtitles'):
1491             # subtitles download errors are already managed as troubles in relevant IE
1492             # that way it will silently go on when used with unsupporting IE
1493             subtitles = info_dict['requested_subtitles']
1494             ie = self.get_info_extractor(info_dict['extractor_key'])
1495             for sub_lang, sub_info in subtitles.items():
1496                 sub_format = sub_info['ext']
1497                 if sub_info.get('data') is not None:
1498                     sub_data = sub_info['data']
1499                 else:
1500                     try:
1501                         sub_data = ie._download_webpage(
1502                             sub_info['url'], info_dict['id'], note=False)
1503                     except ExtractorError as err:
1504                         self.report_warning('Unable to download subtitle for "%s": %s' %
1505                                             (sub_lang, compat_str(err.cause)))
1506                         continue
1507                 try:
1508                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1509                     if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1510                         self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1511                     else:
1512                         self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1513                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
1514                             subfile.write(sub_data)
1515                 except (OSError, IOError):
1516                     self.report_error('Cannot write subtitles file ' + sub_filename)
1517                     return
1518
1519         if self.params.get('writeinfojson', False):
1520             infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
1521             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1522                 self.to_screen('[info] Video description metadata is already present')
1523             else:
1524                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1525                 try:
1526                     write_json_file(self.filter_requested_info(info_dict), infofn)
1527                 except (OSError, IOError):
1528                     self.report_error('Cannot write metadata to JSON file ' + infofn)
1529                     return
1530
1531         self._write_thumbnails(info_dict, filename)
1532
1533         if not self.params.get('skip_download', False):
1534             try:
1535                 def dl(name, info):
1536                     fd = get_suitable_downloader(info, self.params)(self, self.params)
1537                     for ph in self._progress_hooks:
1538                         fd.add_progress_hook(ph)
1539                     if self.params.get('verbose'):
1540                         self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1541                     return fd.download(name, info)
1542
1543                 if info_dict.get('requested_formats') is not None:
1544                     downloaded = []
1545                     success = True
1546                     merger = FFmpegMergerPP(self)
1547                     if not merger.available:
1548                         postprocessors = []
1549                         self.report_warning('You have requested multiple '
1550                                             'formats but ffmpeg or avconv are not installed.'
1551                                             ' The formats won\'t be merged.')
1552                     else:
1553                         postprocessors = [merger]
1554
1555                     def compatible_formats(formats):
1556                         video, audio = formats
1557                         # Check extension
1558                         video_ext, audio_ext = audio.get('ext'), video.get('ext')
1559                         if video_ext and audio_ext:
1560                             COMPATIBLE_EXTS = (
1561                                 ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v'),
1562                                 ('webm')
1563                             )
1564                             for exts in COMPATIBLE_EXTS:
1565                                 if video_ext in exts and audio_ext in exts:
1566                                     return True
1567                         # TODO: Check acodec/vcodec
1568                         return False
1569
1570                     filename_real_ext = os.path.splitext(filename)[1][1:]
1571                     filename_wo_ext = (
1572                         os.path.splitext(filename)[0]
1573                         if filename_real_ext == info_dict['ext']
1574                         else filename)
1575                     requested_formats = info_dict['requested_formats']
1576                     if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
1577                         info_dict['ext'] = 'mkv'
1578                         self.report_warning(
1579                             'Requested formats are incompatible for merge and will be merged into mkv.')
1580                     # Ensure filename always has a correct extension for successful merge
1581                     filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
1582                     if os.path.exists(encodeFilename(filename)):
1583                         self.to_screen(
1584                             '[download] %s has already been downloaded and '
1585                             'merged' % filename)
1586                     else:
1587                         for f in requested_formats:
1588                             new_info = dict(info_dict)
1589                             new_info.update(f)
1590                             fname = self.prepare_filename(new_info)
1591                             fname = prepend_extension(fname, 'f%s' % f['format_id'], new_info['ext'])
1592                             downloaded.append(fname)
1593                             partial_success = dl(fname, new_info)
1594                             success = success and partial_success
1595                         info_dict['__postprocessors'] = postprocessors
1596                         info_dict['__files_to_merge'] = downloaded
1597                 else:
1598                     # Just a single file
1599                     success = dl(filename, info_dict)
1600             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1601                 self.report_error('unable to download video data: %s' % str(err))
1602                 return
1603             except (OSError, IOError) as err:
1604                 raise UnavailableVideoError(err)
1605             except (ContentTooShortError, ) as err:
1606                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1607                 return
1608
1609             if success:
1610                 # Fixup content
1611                 fixup_policy = self.params.get('fixup')
1612                 if fixup_policy is None:
1613                     fixup_policy = 'detect_or_warn'
1614
1615                 stretched_ratio = info_dict.get('stretched_ratio')
1616                 if stretched_ratio is not None and stretched_ratio != 1:
1617                     if fixup_policy == 'warn':
1618                         self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1619                             info_dict['id'], stretched_ratio))
1620                     elif fixup_policy == 'detect_or_warn':
1621                         stretched_pp = FFmpegFixupStretchedPP(self)
1622                         if stretched_pp.available:
1623                             info_dict.setdefault('__postprocessors', [])
1624                             info_dict['__postprocessors'].append(stretched_pp)
1625                         else:
1626                             self.report_warning(
1627                                 '%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % (
1628                                     info_dict['id'], stretched_ratio))
1629                     else:
1630                         assert fixup_policy in ('ignore', 'never')
1631
1632                 if info_dict.get('requested_formats') is None and info_dict.get('container') == 'm4a_dash':
1633                     if fixup_policy == 'warn':
1634                         self.report_warning('%s: writing DASH m4a. Only some players support this container.' % (
1635                             info_dict['id']))
1636                     elif fixup_policy == 'detect_or_warn':
1637                         fixup_pp = FFmpegFixupM4aPP(self)
1638                         if fixup_pp.available:
1639                             info_dict.setdefault('__postprocessors', [])
1640                             info_dict['__postprocessors'].append(fixup_pp)
1641                         else:
1642                             self.report_warning(
1643                                 '%s: writing DASH m4a. Only some players support this container. Install ffmpeg or avconv to fix this automatically.' % (
1644                                     info_dict['id']))
1645                     else:
1646                         assert fixup_policy in ('ignore', 'never')
1647
1648                 try:
1649                     self.post_process(filename, info_dict)
1650                 except (PostProcessingError) as err:
1651                     self.report_error('postprocessing: %s' % str(err))
1652                     return
1653                 self.record_download_archive(info_dict)
1654
1655     def download(self, url_list):
1656         """Download a given list of URLs."""
1657         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1658         if (len(url_list) > 1 and
1659                 '%' not in outtmpl and
1660                 self.params.get('max_downloads') != 1):
1661             raise SameFileError(outtmpl)
1662
1663         for url in url_list:
1664             try:
1665                 # It also downloads the videos
1666                 res = self.extract_info(
1667                     url, force_generic_extractor=self.params.get('force_generic_extractor', False))
1668             except UnavailableVideoError:
1669                 self.report_error('unable to download video')
1670             except MaxDownloadsReached:
1671                 self.to_screen('[info] Maximum number of downloaded files reached.')
1672                 raise
1673             else:
1674                 if self.params.get('dump_single_json', False):
1675                     self.to_stdout(json.dumps(res))
1676
1677         return self._download_retcode
1678
1679     def download_with_info_file(self, info_filename):
1680         with contextlib.closing(fileinput.FileInput(
1681                 [info_filename], mode='r',
1682                 openhook=fileinput.hook_encoded('utf-8'))) as f:
1683             # FileInput doesn't have a read method, we can't call json.load
1684             info = self.filter_requested_info(json.loads('\n'.join(f)))
1685         try:
1686             self.process_ie_result(info, download=True)
1687         except DownloadError:
1688             webpage_url = info.get('webpage_url')
1689             if webpage_url is not None:
1690                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1691                 return self.download([webpage_url])
1692             else:
1693                 raise
1694         return self._download_retcode
1695
1696     @staticmethod
1697     def filter_requested_info(info_dict):
1698         return dict(
1699             (k, v) for k, v in info_dict.items()
1700             if k not in ['requested_formats', 'requested_subtitles'])
1701
1702     def post_process(self, filename, ie_info):
1703         """Run all the postprocessors on the given file."""
1704         info = dict(ie_info)
1705         info['filepath'] = filename
1706         pps_chain = []
1707         if ie_info.get('__postprocessors') is not None:
1708             pps_chain.extend(ie_info['__postprocessors'])
1709         pps_chain.extend(self._pps)
1710         for pp in pps_chain:
1711             files_to_delete = []
1712             try:
1713                 files_to_delete, info = pp.run(info)
1714             except PostProcessingError as e:
1715                 self.report_error(e.msg)
1716             if files_to_delete and not self.params.get('keepvideo', False):
1717                 for old_filename in files_to_delete:
1718                     self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
1719                     try:
1720                         os.remove(encodeFilename(old_filename))
1721                     except (IOError, OSError):
1722                         self.report_warning('Unable to remove downloaded original file')
1723
1724     def _make_archive_id(self, info_dict):
1725         # Future-proof against any change in case
1726         # and backwards compatibility with prior versions
1727         extractor = info_dict.get('extractor_key')
1728         if extractor is None:
1729             if 'id' in info_dict:
1730                 extractor = info_dict.get('ie_key')  # key in a playlist
1731         if extractor is None:
1732             return None  # Incomplete video information
1733         return extractor.lower() + ' ' + info_dict['id']
1734
1735     def in_download_archive(self, info_dict):
1736         fn = self.params.get('download_archive')
1737         if fn is None:
1738             return False
1739
1740         vid_id = self._make_archive_id(info_dict)
1741         if vid_id is None:
1742             return False  # Incomplete video information
1743
1744         try:
1745             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1746                 for line in archive_file:
1747                     if line.strip() == vid_id:
1748                         return True
1749         except IOError as ioe:
1750             if ioe.errno != errno.ENOENT:
1751                 raise
1752         return False
1753
1754     def record_download_archive(self, info_dict):
1755         fn = self.params.get('download_archive')
1756         if fn is None:
1757             return
1758         vid_id = self._make_archive_id(info_dict)
1759         assert vid_id
1760         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1761             archive_file.write(vid_id + '\n')
1762
1763     @staticmethod
1764     def format_resolution(format, default='unknown'):
1765         if format.get('vcodec') == 'none':
1766             return 'audio only'
1767         if format.get('resolution') is not None:
1768             return format['resolution']
1769         if format.get('height') is not None:
1770             if format.get('width') is not None:
1771                 res = '%sx%s' % (format['width'], format['height'])
1772             else:
1773                 res = '%sp' % format['height']
1774         elif format.get('width') is not None:
1775             res = '?x%d' % format['width']
1776         else:
1777             res = default
1778         return res
1779
1780     def _format_note(self, fdict):
1781         res = ''
1782         if fdict.get('ext') in ['f4f', 'f4m']:
1783             res += '(unsupported) '
1784         if fdict.get('format_note') is not None:
1785             res += fdict['format_note'] + ' '
1786         if fdict.get('tbr') is not None:
1787             res += '%4dk ' % fdict['tbr']
1788         if fdict.get('container') is not None:
1789             if res:
1790                 res += ', '
1791             res += '%s container' % fdict['container']
1792         if (fdict.get('vcodec') is not None and
1793                 fdict.get('vcodec') != 'none'):
1794             if res:
1795                 res += ', '
1796             res += fdict['vcodec']
1797             if fdict.get('vbr') is not None:
1798                 res += '@'
1799         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1800             res += 'video@'
1801         if fdict.get('vbr') is not None:
1802             res += '%4dk' % fdict['vbr']
1803         if fdict.get('fps') is not None:
1804             res += ', %sfps' % fdict['fps']
1805         if fdict.get('acodec') is not None:
1806             if res:
1807                 res += ', '
1808             if fdict['acodec'] == 'none':
1809                 res += 'video only'
1810             else:
1811                 res += '%-5s' % fdict['acodec']
1812         elif fdict.get('abr') is not None:
1813             if res:
1814                 res += ', '
1815             res += 'audio'
1816         if fdict.get('abr') is not None:
1817             res += '@%3dk' % fdict['abr']
1818         if fdict.get('asr') is not None:
1819             res += ' (%5dHz)' % fdict['asr']
1820         if fdict.get('filesize') is not None:
1821             if res:
1822                 res += ', '
1823             res += format_bytes(fdict['filesize'])
1824         elif fdict.get('filesize_approx') is not None:
1825             if res:
1826                 res += ', '
1827             res += '~' + format_bytes(fdict['filesize_approx'])
1828         return res
1829
1830     def list_formats(self, info_dict):
1831         formats = info_dict.get('formats', [info_dict])
1832         table = [
1833             [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
1834             for f in formats
1835             if f.get('preference') is None or f['preference'] >= -1000]
1836         if len(formats) > 1:
1837             table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
1838
1839         header_line = ['format code', 'extension', 'resolution', 'note']
1840         self.to_screen(
1841             '[info] Available formats for %s:\n%s' %
1842             (info_dict['id'], render_table(header_line, table)))
1843
1844     def list_thumbnails(self, info_dict):
1845         thumbnails = info_dict.get('thumbnails')
1846         if not thumbnails:
1847             tn_url = info_dict.get('thumbnail')
1848             if tn_url:
1849                 thumbnails = [{'id': '0', 'url': tn_url}]
1850             else:
1851                 self.to_screen(
1852                     '[info] No thumbnails present for %s' % info_dict['id'])
1853                 return
1854
1855         self.to_screen(
1856             '[info] Thumbnails for %s:' % info_dict['id'])
1857         self.to_screen(render_table(
1858             ['ID', 'width', 'height', 'URL'],
1859             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
1860
1861     def list_subtitles(self, video_id, subtitles, name='subtitles'):
1862         if not subtitles:
1863             self.to_screen('%s has no %s' % (video_id, name))
1864             return
1865         self.to_screen(
1866             'Available %s for %s:' % (name, video_id))
1867         self.to_screen(render_table(
1868             ['Language', 'formats'],
1869             [[lang, ', '.join(f['ext'] for f in reversed(formats))]
1870                 for lang, formats in subtitles.items()]))
1871
1872     def urlopen(self, req):
1873         """ Start an HTTP download """
1874         return self._opener.open(req, timeout=self._socket_timeout)
1875
1876     def print_debug_header(self):
1877         if not self.params.get('verbose'):
1878             return
1879
1880         if type('') is not compat_str:
1881             # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1882             self.report_warning(
1883                 'Your Python is broken! Update to a newer and supported version')
1884
1885         stdout_encoding = getattr(
1886             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
1887         encoding_str = (
1888             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1889                 locale.getpreferredencoding(),
1890                 sys.getfilesystemencoding(),
1891                 stdout_encoding,
1892                 self.get_encoding()))
1893         write_string(encoding_str, encoding=None)
1894
1895         self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1896         try:
1897             sp = subprocess.Popen(
1898                 ['git', 'rev-parse', '--short', 'HEAD'],
1899                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1900                 cwd=os.path.dirname(os.path.abspath(__file__)))
1901             out, err = sp.communicate()
1902             out = out.decode().strip()
1903             if re.match('[0-9a-f]+', out):
1904                 self._write_string('[debug] Git HEAD: ' + out + '\n')
1905         except Exception:
1906             try:
1907                 sys.exc_clear()
1908             except Exception:
1909                 pass
1910         self._write_string('[debug] Python version %s - %s\n' % (
1911             platform.python_version(), platform_name()))
1912
1913         exe_versions = FFmpegPostProcessor.get_versions(self)
1914         exe_versions['rtmpdump'] = rtmpdump_version()
1915         exe_str = ', '.join(
1916             '%s %s' % (exe, v)
1917             for exe, v in sorted(exe_versions.items())
1918             if v
1919         )
1920         if not exe_str:
1921             exe_str = 'none'
1922         self._write_string('[debug] exe versions: %s\n' % exe_str)
1923
1924         proxy_map = {}
1925         for handler in self._opener.handlers:
1926             if hasattr(handler, 'proxies'):
1927                 proxy_map.update(handler.proxies)
1928         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1929
1930         if self.params.get('call_home', False):
1931             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
1932             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
1933             latest_version = self.urlopen(
1934                 'https://yt-dl.org/latest/version').read().decode('utf-8')
1935             if version_tuple(latest_version) > version_tuple(__version__):
1936                 self.report_warning(
1937                     'You are using an outdated version (newest version: %s)! '
1938                     'See https://yt-dl.org/update if you need help updating.' %
1939                     latest_version)
1940
1941     def _setup_opener(self):
1942         timeout_val = self.params.get('socket_timeout')
1943         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1944
1945         opts_cookiefile = self.params.get('cookiefile')
1946         opts_proxy = self.params.get('proxy')
1947
1948         if opts_cookiefile is None:
1949             self.cookiejar = compat_cookiejar.CookieJar()
1950         else:
1951             self.cookiejar = compat_cookiejar.MozillaCookieJar(
1952                 opts_cookiefile)
1953             if os.access(opts_cookiefile, os.R_OK):
1954                 self.cookiejar.load()
1955
1956         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
1957         if opts_proxy is not None:
1958             if opts_proxy == '':
1959                 proxies = {}
1960             else:
1961                 proxies = {'http': opts_proxy, 'https': opts_proxy}
1962         else:
1963             proxies = compat_urllib_request.getproxies()
1964             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1965             if 'http' in proxies and 'https' not in proxies:
1966                 proxies['https'] = proxies['http']
1967         proxy_handler = PerRequestProxyHandler(proxies)
1968
1969         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1970         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
1971         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
1972         data_handler = compat_urllib_request_DataHandler()
1973         opener = compat_urllib_request.build_opener(
1974             proxy_handler, https_handler, cookie_processor, ydlh, data_handler)
1975
1976         # Delete the default user-agent header, which would otherwise apply in
1977         # cases where our custom HTTP handler doesn't come into play
1978         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1979         opener.addheaders = []
1980         self._opener = opener
1981
1982     def encode(self, s):
1983         if isinstance(s, bytes):
1984             return s  # Already encoded
1985
1986         try:
1987             return s.encode(self.get_encoding())
1988         except UnicodeEncodeError as err:
1989             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1990             raise
1991
1992     def get_encoding(self):
1993         encoding = self.params.get('encoding')
1994         if encoding is None:
1995             encoding = preferredencoding()
1996         return encoding
1997
1998     def _write_thumbnails(self, info_dict, filename):
1999         if self.params.get('writethumbnail', False):
2000             thumbnails = info_dict.get('thumbnails')
2001             if thumbnails:
2002                 thumbnails = [thumbnails[-1]]
2003         elif self.params.get('write_all_thumbnails', False):
2004             thumbnails = info_dict.get('thumbnails')
2005         else:
2006             return
2007
2008         if not thumbnails:
2009             # No thumbnails present, so return immediately
2010             return
2011
2012         for t in thumbnails:
2013             thumb_ext = determine_ext(t['url'], 'jpg')
2014             suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
2015             thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
2016             t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
2017
2018             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
2019                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
2020                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
2021             else:
2022                 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
2023                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
2024                 try:
2025                     uf = self.urlopen(t['url'])
2026                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
2027                         shutil.copyfileobj(uf, thumbf)
2028                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
2029                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
2030                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2031                     self.report_warning('Unable to download thumbnail "%s": %s' %
2032                                         (t['url'], compat_str(err)))