Add experimental geo restriction bypass mechanism
[youtube-dl] / youtube_dl / YoutubeDL.py
1 #!/usr/bin/env python
2 # coding: utf-8
3
4 from __future__ import absolute_import, unicode_literals
5
6 import collections
7 import contextlib
8 import copy
9 import datetime
10 import errno
11 import fileinput
12 import io
13 import itertools
14 import json
15 import locale
16 import operator
17 import os
18 import platform
19 import re
20 import shutil
21 import subprocess
22 import socket
23 import sys
24 import time
25 import tokenize
26 import traceback
27 import random
28
29 from .compat import (
30     compat_basestring,
31     compat_cookiejar,
32     compat_expanduser,
33     compat_get_terminal_size,
34     compat_http_client,
35     compat_kwargs,
36     compat_os_name,
37     compat_str,
38     compat_tokenize_tokenize,
39     compat_urllib_error,
40     compat_urllib_request,
41     compat_urllib_request_DataHandler,
42 )
43 from .utils import (
44     age_restricted,
45     args_to_str,
46     ContentTooShortError,
47     date_from_str,
48     DateRange,
49     DEFAULT_OUTTMPL,
50     determine_ext,
51     determine_protocol,
52     DownloadError,
53     encode_compat_str,
54     encodeFilename,
55     error_to_compat_str,
56     ExtractorError,
57     format_bytes,
58     formatSeconds,
59     GeoRestrictedError,
60     ISO3166Utils,
61     locked_file,
62     make_HTTPS_handler,
63     MaxDownloadsReached,
64     PagedList,
65     parse_filesize,
66     PerRequestProxyHandler,
67     platform_name,
68     PostProcessingError,
69     preferredencoding,
70     prepend_extension,
71     register_socks_protocols,
72     render_table,
73     replace_extension,
74     SameFileError,
75     sanitize_filename,
76     sanitize_path,
77     sanitize_url,
78     sanitized_Request,
79     std_headers,
80     subtitles_filename,
81     UnavailableVideoError,
82     url_basename,
83     version_tuple,
84     write_json_file,
85     write_string,
86     YoutubeDLCookieProcessor,
87     YoutubeDLHandler,
88 )
89 from .cache import Cache
90 from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER
91 from .downloader import get_suitable_downloader
92 from .downloader.rtmp import rtmpdump_version
93 from .postprocessor import (
94     FFmpegFixupM3u8PP,
95     FFmpegFixupM4aPP,
96     FFmpegFixupStretchedPP,
97     FFmpegMergerPP,
98     FFmpegPostProcessor,
99     get_postprocessor,
100 )
101 from .version import __version__
102
103 if compat_os_name == 'nt':
104     import ctypes
105
106
107 class YoutubeDL(object):
108     """YoutubeDL class.
109
110     YoutubeDL objects are the ones responsible of downloading the
111     actual video file and writing it to disk if the user has requested
112     it, among some other tasks. In most cases there should be one per
113     program. As, given a video URL, the downloader doesn't know how to
114     extract all the needed information, task that InfoExtractors do, it
115     has to pass the URL to one of them.
116
117     For this, YoutubeDL objects have a method that allows
118     InfoExtractors to be registered in a given order. When it is passed
119     a URL, the YoutubeDL object handles it to the first InfoExtractor it
120     finds that reports being able to handle it. The InfoExtractor extracts
121     all the information about the video or videos the URL refers to, and
122     YoutubeDL process the extracted information, possibly using a File
123     Downloader to download the video.
124
125     YoutubeDL objects accept a lot of parameters. In order not to saturate
126     the object constructor with arguments, it receives a dictionary of
127     options instead. These options are available through the params
128     attribute for the InfoExtractors to use. The YoutubeDL also
129     registers itself as the downloader in charge for the InfoExtractors
130     that are added to it, so this is a "mutual registration".
131
132     Available options:
133
134     username:          Username for authentication purposes.
135     password:          Password for authentication purposes.
136     videopassword:     Password for accessing a video.
137     ap_mso:            Adobe Pass multiple-system operator identifier.
138     ap_username:       Multiple-system operator account username.
139     ap_password:       Multiple-system operator account password.
140     usenetrc:          Use netrc for authentication instead.
141     verbose:           Print additional info to stdout.
142     quiet:             Do not print messages to stdout.
143     no_warnings:       Do not print out anything for warnings.
144     forceurl:          Force printing final URL.
145     forcetitle:        Force printing title.
146     forceid:           Force printing ID.
147     forcethumbnail:    Force printing thumbnail URL.
148     forcedescription:  Force printing description.
149     forcefilename:     Force printing final filename.
150     forceduration:     Force printing duration.
151     forcejson:         Force printing info_dict as JSON.
152     dump_single_json:  Force printing the info_dict of the whole playlist
153                        (or video) as a single JSON line.
154     simulate:          Do not download the video files.
155     format:            Video format code. See options.py for more information.
156     outtmpl:           Template for output names.
157     restrictfilenames: Do not allow "&" and spaces in file names
158     ignoreerrors:      Do not stop on download errors.
159     force_generic_extractor: Force downloader to use the generic extractor
160     nooverwrites:      Prevent overwriting files.
161     playliststart:     Playlist item to start at.
162     playlistend:       Playlist item to end at.
163     playlist_items:    Specific indices of playlist to download.
164     playlistreverse:   Download playlist items in reverse order.
165     playlistrandom:    Download playlist items in random order.
166     matchtitle:        Download only matching titles.
167     rejecttitle:       Reject downloads for matching titles.
168     logger:            Log messages to a logging.Logger instance.
169     logtostderr:       Log messages to stderr instead of stdout.
170     writedescription:  Write the video description to a .description file
171     writeinfojson:     Write the video description to a .info.json file
172     writeannotations:  Write the video annotations to a .annotations.xml file
173     writethumbnail:    Write the thumbnail image to a file
174     write_all_thumbnails:  Write all thumbnail formats to files
175     writesubtitles:    Write the video subtitles to a file
176     writeautomaticsub: Write the automatically generated subtitles to a file
177     allsubtitles:      Downloads all the subtitles of the video
178                        (requires writesubtitles or writeautomaticsub)
179     listsubtitles:     Lists all available subtitles for the video
180     subtitlesformat:   The format code for subtitles
181     subtitleslangs:    List of languages of the subtitles to download
182     keepvideo:         Keep the video file after post-processing
183     daterange:         A DateRange object, download only if the upload_date is in the range.
184     skip_download:     Skip the actual download of the video file
185     cachedir:          Location of the cache files in the filesystem.
186                        False to disable filesystem cache.
187     noplaylist:        Download single video instead of a playlist if in doubt.
188     age_limit:         An integer representing the user's age in years.
189                        Unsuitable videos for the given age are skipped.
190     min_views:         An integer representing the minimum view count the video
191                        must have in order to not be skipped.
192                        Videos without view count information are always
193                        downloaded. None for no limit.
194     max_views:         An integer representing the maximum view count.
195                        Videos that are more popular than that are not
196                        downloaded.
197                        Videos without view count information are always
198                        downloaded. None for no limit.
199     download_archive:  File name of a file where all downloads are recorded.
200                        Videos already present in the file are not downloaded
201                        again.
202     cookiefile:        File name where cookies should be read from and dumped to.
203     nocheckcertificate:Do not verify SSL certificates
204     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
205                        At the moment, this is only supported by YouTube.
206     proxy:             URL of the proxy server to use
207     geo_verification_proxy:  URL of the proxy to use for IP address verification
208                        on geo-restricted sites. (Experimental)
209     socket_timeout:    Time to wait for unresponsive hosts, in seconds
210     bidi_workaround:   Work around buggy terminals without bidirectional text
211                        support, using fridibi
212     debug_printtraffic:Print out sent and received HTTP traffic
213     include_ads:       Download ads as well
214     default_search:    Prepend this string if an input url is not valid.
215                        'auto' for elaborate guessing
216     encoding:          Use this encoding instead of the system-specified.
217     extract_flat:      Do not resolve URLs, return the immediate result.
218                        Pass in 'in_playlist' to only show this behavior for
219                        playlist items.
220     postprocessors:    A list of dictionaries, each with an entry
221                        * key:  The name of the postprocessor. See
222                                youtube_dl/postprocessor/__init__.py for a list.
223                        as well as any further keyword arguments for the
224                        postprocessor.
225     progress_hooks:    A list of functions that get called on download
226                        progress, with a dictionary with the entries
227                        * status: One of "downloading", "error", or "finished".
228                                  Check this first and ignore unknown values.
229
230                        If status is one of "downloading", or "finished", the
231                        following properties may also be present:
232                        * filename: The final filename (always present)
233                        * tmpfilename: The filename we're currently writing to
234                        * downloaded_bytes: Bytes on disk
235                        * total_bytes: Size of the whole file, None if unknown
236                        * total_bytes_estimate: Guess of the eventual file size,
237                                                None if unavailable.
238                        * elapsed: The number of seconds since download started.
239                        * eta: The estimated time in seconds, None if unknown
240                        * speed: The download speed in bytes/second, None if
241                                 unknown
242                        * fragment_index: The counter of the currently
243                                          downloaded video fragment.
244                        * fragment_count: The number of fragments (= individual
245                                          files that will be merged)
246
247                        Progress hooks are guaranteed to be called at least once
248                        (with status "finished") if the download is successful.
249     merge_output_format: Extension to use when merging formats.
250     fixup:             Automatically correct known faults of the file.
251                        One of:
252                        - "never": do nothing
253                        - "warn": only emit a warning
254                        - "detect_or_warn": check whether we can do anything
255                                            about it, warn otherwise (default)
256     source_address:    (Experimental) Client-side IP address to bind to.
257     call_home:         Boolean, true iff we are allowed to contact the
258                        youtube-dl servers for debugging.
259     sleep_interval:    Number of seconds to sleep before each download when
260                        used alone or a lower bound of a range for randomized
261                        sleep before each download (minimum possible number
262                        of seconds to sleep) when used along with
263                        max_sleep_interval.
264     max_sleep_interval:Upper bound of a range for randomized sleep before each
265                        download (maximum possible number of seconds to sleep).
266                        Must only be used along with sleep_interval.
267                        Actual sleep time will be a random float from range
268                        [sleep_interval; max_sleep_interval].
269     listformats:       Print an overview of available video formats and exit.
270     list_thumbnails:   Print a table of all thumbnails and exit.
271     match_filter:      A function that gets called with the info_dict of
272                        every video.
273                        If it returns a message, the video is ignored.
274                        If it returns None, the video is downloaded.
275                        match_filter_func in utils.py is one example for this.
276     no_color:          Do not emit color codes in output.
277     bypass_geo_restriction:
278                        Bypass geographic restriction via faking X-Forwarded-For
279                        HTTP header (experimental)
280     bypass_geo_restriction_as_country:
281                        Two-letter ISO 3166-2 country code that will be used for
282                        explicit geographic restriction bypassing via faking
283                        X-Forwarded-For HTTP header (experimental)
284
285     The following options determine which downloader is picked:
286     external_downloader: Executable of the external downloader to call.
287                        None or unset for standard (built-in) downloader.
288     hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv
289                        if True, otherwise use ffmpeg/avconv if False, otherwise
290                        use downloader suggested by extractor if None.
291
292     The following parameters are not used by YoutubeDL itself, they are used by
293     the downloader (see youtube_dl/downloader/common.py):
294     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
295     noresizebuffer, retries, continuedl, noprogress, consoletitle,
296     xattr_set_filesize, external_downloader_args, hls_use_mpegts.
297
298     The following options are used by the post processors:
299     prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
300                        otherwise prefer avconv.
301     postprocessor_args: A list of additional command-line arguments for the
302                         postprocessor.
303     """
304
305     params = None
306     _ies = []
307     _pps = []
308     _download_retcode = None
309     _num_downloads = None
310     _screen_file = None
311
312     def __init__(self, params=None, auto_init=True):
313         """Create a FileDownloader object with the given options."""
314         if params is None:
315             params = {}
316         self._ies = []
317         self._ies_instances = {}
318         self._pps = []
319         self._progress_hooks = []
320         self._download_retcode = 0
321         self._num_downloads = 0
322         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
323         self._err_file = sys.stderr
324         self.params = {
325             # Default parameters
326             'nocheckcertificate': False,
327         }
328         self.params.update(params)
329         self.cache = Cache(self)
330
331         if self.params.get('cn_verification_proxy') is not None:
332             self.report_warning('--cn-verification-proxy is deprecated. Use --geo-verification-proxy instead.')
333             if self.params.get('geo_verification_proxy') is None:
334                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
335
336         if params.get('bidi_workaround', False):
337             try:
338                 import pty
339                 master, slave = pty.openpty()
340                 width = compat_get_terminal_size().columns
341                 if width is None:
342                     width_args = []
343                 else:
344                     width_args = ['-w', str(width)]
345                 sp_kwargs = dict(
346                     stdin=subprocess.PIPE,
347                     stdout=slave,
348                     stderr=self._err_file)
349                 try:
350                     self._output_process = subprocess.Popen(
351                         ['bidiv'] + width_args, **sp_kwargs
352                     )
353                 except OSError:
354                     self._output_process = subprocess.Popen(
355                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
356                 self._output_channel = os.fdopen(master, 'rb')
357             except OSError as ose:
358                 if ose.errno == errno.ENOENT:
359                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
360                 else:
361                     raise
362
363         if (sys.version_info >= (3,) and sys.platform != 'win32' and
364                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and
365                 not params.get('restrictfilenames', False)):
366             # On Python 3, the Unicode filesystem API will throw errors (#1474)
367             self.report_warning(
368                 'Assuming --restrict-filenames since file system encoding '
369                 'cannot encode all characters. '
370                 'Set the LC_ALL environment variable to fix this.')
371             self.params['restrictfilenames'] = True
372
373         if isinstance(params.get('outtmpl'), bytes):
374             self.report_warning(
375                 'Parameter outtmpl is bytes, but should be a unicode string. '
376                 'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
377
378         self._setup_opener()
379
380         if auto_init:
381             self.print_debug_header()
382             self.add_default_info_extractors()
383
384         for pp_def_raw in self.params.get('postprocessors', []):
385             pp_class = get_postprocessor(pp_def_raw['key'])
386             pp_def = dict(pp_def_raw)
387             del pp_def['key']
388             pp = pp_class(self, **compat_kwargs(pp_def))
389             self.add_post_processor(pp)
390
391         for ph in self.params.get('progress_hooks', []):
392             self.add_progress_hook(ph)
393
394         register_socks_protocols()
395
396     def warn_if_short_id(self, argv):
397         # short YouTube ID starting with dash?
398         idxs = [
399             i for i, a in enumerate(argv)
400             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
401         if idxs:
402             correct_argv = (
403                 ['youtube-dl'] +
404                 [a for i, a in enumerate(argv) if i not in idxs] +
405                 ['--'] + [argv[i] for i in idxs]
406             )
407             self.report_warning(
408                 'Long argument string detected. '
409                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
410                 args_to_str(correct_argv))
411
412     def add_info_extractor(self, ie):
413         """Add an InfoExtractor object to the end of the list."""
414         self._ies.append(ie)
415         if not isinstance(ie, type):
416             self._ies_instances[ie.ie_key()] = ie
417             ie.set_downloader(self)
418
419     def get_info_extractor(self, ie_key):
420         """
421         Get an instance of an IE with name ie_key, it will try to get one from
422         the _ies list, if there's no instance it will create a new one and add
423         it to the extractor list.
424         """
425         ie = self._ies_instances.get(ie_key)
426         if ie is None:
427             ie = get_info_extractor(ie_key)()
428             self.add_info_extractor(ie)
429         return ie
430
431     def add_default_info_extractors(self):
432         """
433         Add the InfoExtractors returned by gen_extractors to the end of the list
434         """
435         for ie in gen_extractor_classes():
436             self.add_info_extractor(ie)
437
438     def add_post_processor(self, pp):
439         """Add a PostProcessor object to the end of the chain."""
440         self._pps.append(pp)
441         pp.set_downloader(self)
442
443     def add_progress_hook(self, ph):
444         """Add the progress hook (currently only for the file downloader)"""
445         self._progress_hooks.append(ph)
446
447     def _bidi_workaround(self, message):
448         if not hasattr(self, '_output_channel'):
449             return message
450
451         assert hasattr(self, '_output_process')
452         assert isinstance(message, compat_str)
453         line_count = message.count('\n') + 1
454         self._output_process.stdin.write((message + '\n').encode('utf-8'))
455         self._output_process.stdin.flush()
456         res = ''.join(self._output_channel.readline().decode('utf-8')
457                       for _ in range(line_count))
458         return res[:-len('\n')]
459
460     def to_screen(self, message, skip_eol=False):
461         """Print message to stdout if not in quiet mode."""
462         return self.to_stdout(message, skip_eol, check_quiet=True)
463
464     def _write_string(self, s, out=None):
465         write_string(s, out=out, encoding=self.params.get('encoding'))
466
467     def to_stdout(self, message, skip_eol=False, check_quiet=False):
468         """Print message to stdout if not in quiet mode."""
469         if self.params.get('logger'):
470             self.params['logger'].debug(message)
471         elif not check_quiet or not self.params.get('quiet', False):
472             message = self._bidi_workaround(message)
473             terminator = ['\n', ''][skip_eol]
474             output = message + terminator
475
476             self._write_string(output, self._screen_file)
477
478     def to_stderr(self, message):
479         """Print message to stderr."""
480         assert isinstance(message, compat_str)
481         if self.params.get('logger'):
482             self.params['logger'].error(message)
483         else:
484             message = self._bidi_workaround(message)
485             output = message + '\n'
486             self._write_string(output, self._err_file)
487
488     def to_console_title(self, message):
489         if not self.params.get('consoletitle', False):
490             return
491         if compat_os_name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
492             # c_wchar_p() might not be necessary if `message` is
493             # already of type unicode()
494             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
495         elif 'TERM' in os.environ:
496             self._write_string('\033]0;%s\007' % message, self._screen_file)
497
498     def save_console_title(self):
499         if not self.params.get('consoletitle', False):
500             return
501         if 'TERM' in os.environ:
502             # Save the title on stack
503             self._write_string('\033[22;0t', self._screen_file)
504
505     def restore_console_title(self):
506         if not self.params.get('consoletitle', False):
507             return
508         if 'TERM' in os.environ:
509             # Restore the title from stack
510             self._write_string('\033[23;0t', self._screen_file)
511
512     def __enter__(self):
513         self.save_console_title()
514         return self
515
516     def __exit__(self, *args):
517         self.restore_console_title()
518
519         if self.params.get('cookiefile') is not None:
520             self.cookiejar.save()
521
522     def trouble(self, message=None, tb=None):
523         """Determine action to take when a download problem appears.
524
525         Depending on if the downloader has been configured to ignore
526         download errors or not, this method may throw an exception or
527         not when errors are found, after printing the message.
528
529         tb, if given, is additional traceback information.
530         """
531         if message is not None:
532             self.to_stderr(message)
533         if self.params.get('verbose'):
534             if tb is None:
535                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
536                     tb = ''
537                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
538                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
539                     tb += encode_compat_str(traceback.format_exc())
540                 else:
541                     tb_data = traceback.format_list(traceback.extract_stack())
542                     tb = ''.join(tb_data)
543             self.to_stderr(tb)
544         if not self.params.get('ignoreerrors', False):
545             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
546                 exc_info = sys.exc_info()[1].exc_info
547             else:
548                 exc_info = sys.exc_info()
549             raise DownloadError(message, exc_info)
550         self._download_retcode = 1
551
552     def report_warning(self, message):
553         '''
554         Print the message to stderr, it will be prefixed with 'WARNING:'
555         If stderr is a tty file the 'WARNING:' will be colored
556         '''
557         if self.params.get('logger') is not None:
558             self.params['logger'].warning(message)
559         else:
560             if self.params.get('no_warnings'):
561                 return
562             if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
563                 _msg_header = '\033[0;33mWARNING:\033[0m'
564             else:
565                 _msg_header = 'WARNING:'
566             warning_message = '%s %s' % (_msg_header, message)
567             self.to_stderr(warning_message)
568
569     def report_error(self, message, tb=None):
570         '''
571         Do the same as trouble, but prefixes the message with 'ERROR:', colored
572         in red if stderr is a tty file.
573         '''
574         if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
575             _msg_header = '\033[0;31mERROR:\033[0m'
576         else:
577             _msg_header = 'ERROR:'
578         error_message = '%s %s' % (_msg_header, message)
579         self.trouble(error_message, tb)
580
581     def report_file_already_downloaded(self, file_name):
582         """Report file has already been fully downloaded."""
583         try:
584             self.to_screen('[download] %s has already been downloaded' % file_name)
585         except UnicodeEncodeError:
586             self.to_screen('[download] The file has already been downloaded')
587
588     def prepare_filename(self, info_dict):
589         """Generate the output filename."""
590         try:
591             template_dict = dict(info_dict)
592
593             template_dict['epoch'] = int(time.time())
594             autonumber_size = self.params.get('autonumber_size')
595             if autonumber_size is None:
596                 autonumber_size = 5
597             autonumber_templ = '%0' + str(autonumber_size) + 'd'
598             template_dict['autonumber'] = autonumber_templ % (self.params.get('autonumber_start', 1) - 1 + self._num_downloads)
599             if template_dict.get('playlist_index') is not None:
600                 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
601             if template_dict.get('resolution') is None:
602                 if template_dict.get('width') and template_dict.get('height'):
603                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
604                 elif template_dict.get('height'):
605                     template_dict['resolution'] = '%sp' % template_dict['height']
606                 elif template_dict.get('width'):
607                     template_dict['resolution'] = '%dx?' % template_dict['width']
608
609             sanitize = lambda k, v: sanitize_filename(
610                 compat_str(v),
611                 restricted=self.params.get('restrictfilenames'),
612                 is_id=(k == 'id'))
613             template_dict = dict((k, sanitize(k, v))
614                                  for k, v in template_dict.items()
615                                  if v is not None and not isinstance(v, (list, tuple, dict)))
616             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
617
618             outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
619             tmpl = compat_expanduser(outtmpl)
620             filename = tmpl % template_dict
621             # Temporary fix for #4787
622             # 'Treat' all problem characters by passing filename through preferredencoding
623             # to workaround encoding issues with subprocess on python2 @ Windows
624             if sys.version_info < (3, 0) and sys.platform == 'win32':
625                 filename = encodeFilename(filename, True).decode(preferredencoding())
626             return sanitize_path(filename)
627         except ValueError as err:
628             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
629             return None
630
631     def _match_entry(self, info_dict, incomplete):
632         """ Returns None iff the file should be downloaded """
633
634         video_title = info_dict.get('title', info_dict.get('id', 'video'))
635         if 'title' in info_dict:
636             # This can happen when we're just evaluating the playlist
637             title = info_dict['title']
638             matchtitle = self.params.get('matchtitle', False)
639             if matchtitle:
640                 if not re.search(matchtitle, title, re.IGNORECASE):
641                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
642             rejecttitle = self.params.get('rejecttitle', False)
643             if rejecttitle:
644                 if re.search(rejecttitle, title, re.IGNORECASE):
645                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
646         date = info_dict.get('upload_date')
647         if date is not None:
648             dateRange = self.params.get('daterange', DateRange())
649             if date not in dateRange:
650                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
651         view_count = info_dict.get('view_count')
652         if view_count is not None:
653             min_views = self.params.get('min_views')
654             if min_views is not None and view_count < min_views:
655                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
656             max_views = self.params.get('max_views')
657             if max_views is not None and view_count > max_views:
658                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
659         if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
660             return 'Skipping "%s" because it is age restricted' % video_title
661         if self.in_download_archive(info_dict):
662             return '%s has already been recorded in archive' % video_title
663
664         if not incomplete:
665             match_filter = self.params.get('match_filter')
666             if match_filter is not None:
667                 ret = match_filter(info_dict)
668                 if ret is not None:
669                     return ret
670
671         return None
672
673     @staticmethod
674     def add_extra_info(info_dict, extra_info):
675         '''Set the keys from extra_info in info dict if they are missing'''
676         for key, value in extra_info.items():
677             info_dict.setdefault(key, value)
678
679     def extract_info(self, url, download=True, ie_key=None, extra_info={},
680                      process=True, force_generic_extractor=False):
681         '''
682         Returns a list with a dictionary for each video we find.
683         If 'download', also downloads the videos.
684         extra_info is a dict containing the extra values to add to each result
685         '''
686
687         if not ie_key and force_generic_extractor:
688             ie_key = 'Generic'
689
690         if ie_key:
691             ies = [self.get_info_extractor(ie_key)]
692         else:
693             ies = self._ies
694
695         for ie in ies:
696             if not ie.suitable(url):
697                 continue
698
699             ie = self.get_info_extractor(ie.ie_key())
700             if not ie.working():
701                 self.report_warning('The program functionality for this site has been marked as broken, '
702                                     'and will probably not work.')
703
704             try:
705                 ie_result = ie.extract(url)
706                 if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
707                     break
708                 if isinstance(ie_result, list):
709                     # Backwards compatibility: old IE result format
710                     ie_result = {
711                         '_type': 'compat_list',
712                         'entries': ie_result,
713                     }
714                 self.add_default_extra_info(ie_result, ie, url)
715                 if process:
716                     return self.process_ie_result(ie_result, download, extra_info)
717                 else:
718                     return ie_result
719             except GeoRestrictedError as e:
720                 msg = e.msg
721                 if e.countries:
722                     msg += '\nThis video is available in %s.' % ', '.join(
723                         map(ISO3166Utils.short2full, e.countries))
724                 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
725                 self.report_error(msg)
726                 break
727             except ExtractorError as e:  # An error we somewhat expected
728                 self.report_error(compat_str(e), e.format_traceback())
729                 break
730             except MaxDownloadsReached:
731                 raise
732             except Exception as e:
733                 if self.params.get('ignoreerrors', False):
734                     self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
735                     break
736                 else:
737                     raise
738         else:
739             self.report_error('no suitable InfoExtractor for URL %s' % url)
740
741     def add_default_extra_info(self, ie_result, ie, url):
742         self.add_extra_info(ie_result, {
743             'extractor': ie.IE_NAME,
744             'webpage_url': url,
745             'webpage_url_basename': url_basename(url),
746             'extractor_key': ie.ie_key(),
747         })
748
749     def process_ie_result(self, ie_result, download=True, extra_info={}):
750         """
751         Take the result of the ie(may be modified) and resolve all unresolved
752         references (URLs, playlist items).
753
754         It will also download the videos if 'download'.
755         Returns the resolved ie_result.
756         """
757         result_type = ie_result.get('_type', 'video')
758
759         if result_type in ('url', 'url_transparent'):
760             ie_result['url'] = sanitize_url(ie_result['url'])
761             extract_flat = self.params.get('extract_flat', False)
762             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
763                     extract_flat is True):
764                 if self.params.get('forcejson', False):
765                     self.to_stdout(json.dumps(ie_result))
766                 return ie_result
767
768         if result_type == 'video':
769             self.add_extra_info(ie_result, extra_info)
770             return self.process_video_result(ie_result, download=download)
771         elif result_type == 'url':
772             # We have to add extra_info to the results because it may be
773             # contained in a playlist
774             return self.extract_info(ie_result['url'],
775                                      download,
776                                      ie_key=ie_result.get('ie_key'),
777                                      extra_info=extra_info)
778         elif result_type == 'url_transparent':
779             # Use the information from the embedding page
780             info = self.extract_info(
781                 ie_result['url'], ie_key=ie_result.get('ie_key'),
782                 extra_info=extra_info, download=False, process=False)
783
784             force_properties = dict(
785                 (k, v) for k, v in ie_result.items() if v is not None)
786             for f in ('_type', 'url', 'ie_key'):
787                 if f in force_properties:
788                     del force_properties[f]
789             new_result = info.copy()
790             new_result.update(force_properties)
791
792             assert new_result.get('_type') != 'url_transparent'
793
794             return self.process_ie_result(
795                 new_result, download=download, extra_info=extra_info)
796         elif result_type == 'playlist' or result_type == 'multi_video':
797             # We process each entry in the playlist
798             playlist = ie_result.get('title') or ie_result.get('id')
799             self.to_screen('[download] Downloading playlist: %s' % playlist)
800
801             playlist_results = []
802
803             playliststart = self.params.get('playliststart', 1) - 1
804             playlistend = self.params.get('playlistend')
805             # For backwards compatibility, interpret -1 as whole list
806             if playlistend == -1:
807                 playlistend = None
808
809             playlistitems_str = self.params.get('playlist_items')
810             playlistitems = None
811             if playlistitems_str is not None:
812                 def iter_playlistitems(format):
813                     for string_segment in format.split(','):
814                         if '-' in string_segment:
815                             start, end = string_segment.split('-')
816                             for item in range(int(start), int(end) + 1):
817                                 yield int(item)
818                         else:
819                             yield int(string_segment)
820                 playlistitems = iter_playlistitems(playlistitems_str)
821
822             ie_entries = ie_result['entries']
823             if isinstance(ie_entries, list):
824                 n_all_entries = len(ie_entries)
825                 if playlistitems:
826                     entries = [
827                         ie_entries[i - 1] for i in playlistitems
828                         if -n_all_entries <= i - 1 < n_all_entries]
829                 else:
830                     entries = ie_entries[playliststart:playlistend]
831                 n_entries = len(entries)
832                 self.to_screen(
833                     '[%s] playlist %s: Collected %d video ids (downloading %d of them)' %
834                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
835             elif isinstance(ie_entries, PagedList):
836                 if playlistitems:
837                     entries = []
838                     for item in playlistitems:
839                         entries.extend(ie_entries.getslice(
840                             item - 1, item
841                         ))
842                 else:
843                     entries = ie_entries.getslice(
844                         playliststart, playlistend)
845                 n_entries = len(entries)
846                 self.to_screen(
847                     '[%s] playlist %s: Downloading %d videos' %
848                     (ie_result['extractor'], playlist, n_entries))
849             else:  # iterable
850                 if playlistitems:
851                     entry_list = list(ie_entries)
852                     entries = [entry_list[i - 1] for i in playlistitems]
853                 else:
854                     entries = list(itertools.islice(
855                         ie_entries, playliststart, playlistend))
856                 n_entries = len(entries)
857                 self.to_screen(
858                     '[%s] playlist %s: Downloading %d videos' %
859                     (ie_result['extractor'], playlist, n_entries))
860
861             if self.params.get('playlistreverse', False):
862                 entries = entries[::-1]
863
864             if self.params.get('playlistrandom', False):
865                 random.shuffle(entries)
866
867             for i, entry in enumerate(entries, 1):
868                 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
869                 extra = {
870                     'n_entries': n_entries,
871                     'playlist': playlist,
872                     'playlist_id': ie_result.get('id'),
873                     'playlist_title': ie_result.get('title'),
874                     'playlist_index': i + playliststart,
875                     'extractor': ie_result['extractor'],
876                     'webpage_url': ie_result['webpage_url'],
877                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
878                     'extractor_key': ie_result['extractor_key'],
879                 }
880
881                 reason = self._match_entry(entry, incomplete=True)
882                 if reason is not None:
883                     self.to_screen('[download] ' + reason)
884                     continue
885
886                 entry_result = self.process_ie_result(entry,
887                                                       download=download,
888                                                       extra_info=extra)
889                 playlist_results.append(entry_result)
890             ie_result['entries'] = playlist_results
891             self.to_screen('[download] Finished downloading playlist: %s' % playlist)
892             return ie_result
893         elif result_type == 'compat_list':
894             self.report_warning(
895                 'Extractor %s returned a compat_list result. '
896                 'It needs to be updated.' % ie_result.get('extractor'))
897
898             def _fixup(r):
899                 self.add_extra_info(
900                     r,
901                     {
902                         'extractor': ie_result['extractor'],
903                         'webpage_url': ie_result['webpage_url'],
904                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
905                         'extractor_key': ie_result['extractor_key'],
906                     }
907                 )
908                 return r
909             ie_result['entries'] = [
910                 self.process_ie_result(_fixup(r), download, extra_info)
911                 for r in ie_result['entries']
912             ]
913             return ie_result
914         else:
915             raise Exception('Invalid result type: %s' % result_type)
916
917     def _build_format_filter(self, filter_spec):
918         " Returns a function to filter the formats according to the filter_spec "
919
920         OPERATORS = {
921             '<': operator.lt,
922             '<=': operator.le,
923             '>': operator.gt,
924             '>=': operator.ge,
925             '=': operator.eq,
926             '!=': operator.ne,
927         }
928         operator_rex = re.compile(r'''(?x)\s*
929             (?P<key>width|height|tbr|abr|vbr|asr|filesize|fps)
930             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
931             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
932             $
933             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
934         m = operator_rex.search(filter_spec)
935         if m:
936             try:
937                 comparison_value = int(m.group('value'))
938             except ValueError:
939                 comparison_value = parse_filesize(m.group('value'))
940                 if comparison_value is None:
941                     comparison_value = parse_filesize(m.group('value') + 'B')
942                 if comparison_value is None:
943                     raise ValueError(
944                         'Invalid value %r in format specification %r' % (
945                             m.group('value'), filter_spec))
946             op = OPERATORS[m.group('op')]
947
948         if not m:
949             STR_OPERATORS = {
950                 '=': operator.eq,
951                 '!=': operator.ne,
952                 '^=': lambda attr, value: attr.startswith(value),
953                 '$=': lambda attr, value: attr.endswith(value),
954                 '*=': lambda attr, value: value in attr,
955             }
956             str_operator_rex = re.compile(r'''(?x)
957                 \s*(?P<key>ext|acodec|vcodec|container|protocol|format_id)
958                 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
959                 \s*(?P<value>[a-zA-Z0-9._-]+)
960                 \s*$
961                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
962             m = str_operator_rex.search(filter_spec)
963             if m:
964                 comparison_value = m.group('value')
965                 op = STR_OPERATORS[m.group('op')]
966
967         if not m:
968             raise ValueError('Invalid filter specification %r' % filter_spec)
969
970         def _filter(f):
971             actual_value = f.get(m.group('key'))
972             if actual_value is None:
973                 return m.group('none_inclusive')
974             return op(actual_value, comparison_value)
975         return _filter
976
977     def build_format_selector(self, format_spec):
978         def syntax_error(note, start):
979             message = (
980                 'Invalid format specification: '
981                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
982             return SyntaxError(message)
983
984         PICKFIRST = 'PICKFIRST'
985         MERGE = 'MERGE'
986         SINGLE = 'SINGLE'
987         GROUP = 'GROUP'
988         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
989
990         def _parse_filter(tokens):
991             filter_parts = []
992             for type, string, start, _, _ in tokens:
993                 if type == tokenize.OP and string == ']':
994                     return ''.join(filter_parts)
995                 else:
996                     filter_parts.append(string)
997
998         def _remove_unused_ops(tokens):
999             # Remove operators that we don't use and join them with the surrounding strings
1000             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1001             ALLOWED_OPS = ('/', '+', ',', '(', ')')
1002             last_string, last_start, last_end, last_line = None, None, None, None
1003             for type, string, start, end, line in tokens:
1004                 if type == tokenize.OP and string == '[':
1005                     if last_string:
1006                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1007                         last_string = None
1008                     yield type, string, start, end, line
1009                     # everything inside brackets will be handled by _parse_filter
1010                     for type, string, start, end, line in tokens:
1011                         yield type, string, start, end, line
1012                         if type == tokenize.OP and string == ']':
1013                             break
1014                 elif type == tokenize.OP and string in ALLOWED_OPS:
1015                     if last_string:
1016                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1017                         last_string = None
1018                     yield type, string, start, end, line
1019                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1020                     if not last_string:
1021                         last_string = string
1022                         last_start = start
1023                         last_end = end
1024                     else:
1025                         last_string += string
1026             if last_string:
1027                 yield tokenize.NAME, last_string, last_start, last_end, last_line
1028
1029         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1030             selectors = []
1031             current_selector = None
1032             for type, string, start, _, _ in tokens:
1033                 # ENCODING is only defined in python 3.x
1034                 if type == getattr(tokenize, 'ENCODING', None):
1035                     continue
1036                 elif type in [tokenize.NAME, tokenize.NUMBER]:
1037                     current_selector = FormatSelector(SINGLE, string, [])
1038                 elif type == tokenize.OP:
1039                     if string == ')':
1040                         if not inside_group:
1041                             # ')' will be handled by the parentheses group
1042                             tokens.restore_last_token()
1043                         break
1044                     elif inside_merge and string in ['/', ',']:
1045                         tokens.restore_last_token()
1046                         break
1047                     elif inside_choice and string == ',':
1048                         tokens.restore_last_token()
1049                         break
1050                     elif string == ',':
1051                         if not current_selector:
1052                             raise syntax_error('"," must follow a format selector', start)
1053                         selectors.append(current_selector)
1054                         current_selector = None
1055                     elif string == '/':
1056                         if not current_selector:
1057                             raise syntax_error('"/" must follow a format selector', start)
1058                         first_choice = current_selector
1059                         second_choice = _parse_format_selection(tokens, inside_choice=True)
1060                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1061                     elif string == '[':
1062                         if not current_selector:
1063                             current_selector = FormatSelector(SINGLE, 'best', [])
1064                         format_filter = _parse_filter(tokens)
1065                         current_selector.filters.append(format_filter)
1066                     elif string == '(':
1067                         if current_selector:
1068                             raise syntax_error('Unexpected "("', start)
1069                         group = _parse_format_selection(tokens, inside_group=True)
1070                         current_selector = FormatSelector(GROUP, group, [])
1071                     elif string == '+':
1072                         video_selector = current_selector
1073                         audio_selector = _parse_format_selection(tokens, inside_merge=True)
1074                         if not video_selector or not audio_selector:
1075                             raise syntax_error('"+" must be between two format selectors', start)
1076                         current_selector = FormatSelector(MERGE, (video_selector, audio_selector), [])
1077                     else:
1078                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1079                 elif type == tokenize.ENDMARKER:
1080                     break
1081             if current_selector:
1082                 selectors.append(current_selector)
1083             return selectors
1084
1085         def _build_selector_function(selector):
1086             if isinstance(selector, list):
1087                 fs = [_build_selector_function(s) for s in selector]
1088
1089                 def selector_function(ctx):
1090                     for f in fs:
1091                         for format in f(ctx):
1092                             yield format
1093                 return selector_function
1094             elif selector.type == GROUP:
1095                 selector_function = _build_selector_function(selector.selector)
1096             elif selector.type == PICKFIRST:
1097                 fs = [_build_selector_function(s) for s in selector.selector]
1098
1099                 def selector_function(ctx):
1100                     for f in fs:
1101                         picked_formats = list(f(ctx))
1102                         if picked_formats:
1103                             return picked_formats
1104                     return []
1105             elif selector.type == SINGLE:
1106                 format_spec = selector.selector
1107
1108                 def selector_function(ctx):
1109                     formats = list(ctx['formats'])
1110                     if not formats:
1111                         return
1112                     if format_spec == 'all':
1113                         for f in formats:
1114                             yield f
1115                     elif format_spec in ['best', 'worst', None]:
1116                         format_idx = 0 if format_spec == 'worst' else -1
1117                         audiovideo_formats = [
1118                             f for f in formats
1119                             if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
1120                         if audiovideo_formats:
1121                             yield audiovideo_formats[format_idx]
1122                         # for extractors with incomplete formats (audio only (soundcloud)
1123                         # or video only (imgur)) we will fallback to best/worst
1124                         # {video,audio}-only format
1125                         elif ctx['incomplete_formats']:
1126                             yield formats[format_idx]
1127                     elif format_spec == 'bestaudio':
1128                         audio_formats = [
1129                             f for f in formats
1130                             if f.get('vcodec') == 'none']
1131                         if audio_formats:
1132                             yield audio_formats[-1]
1133                     elif format_spec == 'worstaudio':
1134                         audio_formats = [
1135                             f for f in formats
1136                             if f.get('vcodec') == 'none']
1137                         if audio_formats:
1138                             yield audio_formats[0]
1139                     elif format_spec == 'bestvideo':
1140                         video_formats = [
1141                             f for f in formats
1142                             if f.get('acodec') == 'none']
1143                         if video_formats:
1144                             yield video_formats[-1]
1145                     elif format_spec == 'worstvideo':
1146                         video_formats = [
1147                             f for f in formats
1148                             if f.get('acodec') == 'none']
1149                         if video_formats:
1150                             yield video_formats[0]
1151                     else:
1152                         extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
1153                         if format_spec in extensions:
1154                             filter_f = lambda f: f['ext'] == format_spec
1155                         else:
1156                             filter_f = lambda f: f['format_id'] == format_spec
1157                         matches = list(filter(filter_f, formats))
1158                         if matches:
1159                             yield matches[-1]
1160             elif selector.type == MERGE:
1161                 def _merge(formats_info):
1162                     format_1, format_2 = [f['format_id'] for f in formats_info]
1163                     # The first format must contain the video and the
1164                     # second the audio
1165                     if formats_info[0].get('vcodec') == 'none':
1166                         self.report_error('The first format must '
1167                                           'contain the video, try using '
1168                                           '"-f %s+%s"' % (format_2, format_1))
1169                         return
1170                     # Formats must be opposite (video+audio)
1171                     if formats_info[0].get('acodec') == 'none' and formats_info[1].get('acodec') == 'none':
1172                         self.report_error(
1173                             'Both formats %s and %s are video-only, you must specify "-f video+audio"'
1174                             % (format_1, format_2))
1175                         return
1176                     output_ext = (
1177                         formats_info[0]['ext']
1178                         if self.params.get('merge_output_format') is None
1179                         else self.params['merge_output_format'])
1180                     return {
1181                         'requested_formats': formats_info,
1182                         'format': '%s+%s' % (formats_info[0].get('format'),
1183                                              formats_info[1].get('format')),
1184                         'format_id': '%s+%s' % (formats_info[0].get('format_id'),
1185                                                 formats_info[1].get('format_id')),
1186                         'width': formats_info[0].get('width'),
1187                         'height': formats_info[0].get('height'),
1188                         'resolution': formats_info[0].get('resolution'),
1189                         'fps': formats_info[0].get('fps'),
1190                         'vcodec': formats_info[0].get('vcodec'),
1191                         'vbr': formats_info[0].get('vbr'),
1192                         'stretched_ratio': formats_info[0].get('stretched_ratio'),
1193                         'acodec': formats_info[1].get('acodec'),
1194                         'abr': formats_info[1].get('abr'),
1195                         'ext': output_ext,
1196                     }
1197                 video_selector, audio_selector = map(_build_selector_function, selector.selector)
1198
1199                 def selector_function(ctx):
1200                     for pair in itertools.product(
1201                             video_selector(copy.deepcopy(ctx)), audio_selector(copy.deepcopy(ctx))):
1202                         yield _merge(pair)
1203
1204             filters = [self._build_format_filter(f) for f in selector.filters]
1205
1206             def final_selector(ctx):
1207                 ctx_copy = copy.deepcopy(ctx)
1208                 for _filter in filters:
1209                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
1210                 return selector_function(ctx_copy)
1211             return final_selector
1212
1213         stream = io.BytesIO(format_spec.encode('utf-8'))
1214         try:
1215             tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
1216         except tokenize.TokenError:
1217             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1218
1219         class TokenIterator(object):
1220             def __init__(self, tokens):
1221                 self.tokens = tokens
1222                 self.counter = 0
1223
1224             def __iter__(self):
1225                 return self
1226
1227             def __next__(self):
1228                 if self.counter >= len(self.tokens):
1229                     raise StopIteration()
1230                 value = self.tokens[self.counter]
1231                 self.counter += 1
1232                 return value
1233
1234             next = __next__
1235
1236             def restore_last_token(self):
1237                 self.counter -= 1
1238
1239         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
1240         return _build_selector_function(parsed_selector)
1241
1242     def _calc_headers(self, info_dict):
1243         res = std_headers.copy()
1244
1245         add_headers = info_dict.get('http_headers')
1246         if add_headers:
1247             res.update(add_headers)
1248
1249         cookies = self._calc_cookies(info_dict)
1250         if cookies:
1251             res['Cookie'] = cookies
1252
1253         return res
1254
1255     def _calc_cookies(self, info_dict):
1256         pr = sanitized_Request(info_dict['url'])
1257         self.cookiejar.add_cookie_header(pr)
1258         return pr.get_header('Cookie')
1259
1260     def process_video_result(self, info_dict, download=True):
1261         assert info_dict.get('_type', 'video') == 'video'
1262
1263         if 'id' not in info_dict:
1264             raise ExtractorError('Missing "id" field in extractor result')
1265         if 'title' not in info_dict:
1266             raise ExtractorError('Missing "title" field in extractor result')
1267
1268         if not isinstance(info_dict['id'], compat_str):
1269             self.report_warning('"id" field is not a string - forcing string conversion')
1270             info_dict['id'] = compat_str(info_dict['id'])
1271
1272         if 'playlist' not in info_dict:
1273             # It isn't part of a playlist
1274             info_dict['playlist'] = None
1275             info_dict['playlist_index'] = None
1276
1277         thumbnails = info_dict.get('thumbnails')
1278         if thumbnails is None:
1279             thumbnail = info_dict.get('thumbnail')
1280             if thumbnail:
1281                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1282         if thumbnails:
1283             thumbnails.sort(key=lambda t: (
1284                 t.get('preference') if t.get('preference') is not None else -1,
1285                 t.get('width') if t.get('width') is not None else -1,
1286                 t.get('height') if t.get('height') is not None else -1,
1287                 t.get('id') if t.get('id') is not None else '', t.get('url')))
1288             for i, t in enumerate(thumbnails):
1289                 t['url'] = sanitize_url(t['url'])
1290                 if t.get('width') and t.get('height'):
1291                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
1292                 if t.get('id') is None:
1293                     t['id'] = '%d' % i
1294
1295         if self.params.get('list_thumbnails'):
1296             self.list_thumbnails(info_dict)
1297             return
1298
1299         thumbnail = info_dict.get('thumbnail')
1300         if thumbnail:
1301             info_dict['thumbnail'] = sanitize_url(thumbnail)
1302         elif thumbnails:
1303             info_dict['thumbnail'] = thumbnails[-1]['url']
1304
1305         if 'display_id' not in info_dict and 'id' in info_dict:
1306             info_dict['display_id'] = info_dict['id']
1307
1308         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
1309             # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1310             # see http://bugs.python.org/issue1646728)
1311             try:
1312                 upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
1313                 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1314             except (ValueError, OverflowError, OSError):
1315                 pass
1316
1317         # Auto generate title fields corresponding to the *_number fields when missing
1318         # in order to always have clean titles. This is very common for TV series.
1319         for field in ('chapter', 'season', 'episode'):
1320             if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
1321                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
1322
1323         subtitles = info_dict.get('subtitles')
1324         if subtitles:
1325             for _, subtitle in subtitles.items():
1326                 for subtitle_format in subtitle:
1327                     if subtitle_format.get('url'):
1328                         subtitle_format['url'] = sanitize_url(subtitle_format['url'])
1329                     if subtitle_format.get('ext') is None:
1330                         subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
1331
1332         if self.params.get('listsubtitles', False):
1333             if 'automatic_captions' in info_dict:
1334                 self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions')
1335             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
1336             return
1337         info_dict['requested_subtitles'] = self.process_subtitles(
1338             info_dict['id'], subtitles,
1339             info_dict.get('automatic_captions'))
1340
1341         # We now pick which formats have to be downloaded
1342         if info_dict.get('formats') is None:
1343             # There's only one format available
1344             formats = [info_dict]
1345         else:
1346             formats = info_dict['formats']
1347
1348         if not formats:
1349             raise ExtractorError('No video formats found!')
1350
1351         formats_dict = {}
1352
1353         # We check that all the formats have the format and format_id fields
1354         for i, format in enumerate(formats):
1355             if 'url' not in format:
1356                 raise ExtractorError('Missing "url" key in result (index %d)' % i)
1357
1358             format['url'] = sanitize_url(format['url'])
1359
1360             if format.get('format_id') is None:
1361                 format['format_id'] = compat_str(i)
1362             else:
1363                 # Sanitize format_id from characters used in format selector expression
1364                 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
1365             format_id = format['format_id']
1366             if format_id not in formats_dict:
1367                 formats_dict[format_id] = []
1368             formats_dict[format_id].append(format)
1369
1370         # Make sure all formats have unique format_id
1371         for format_id, ambiguous_formats in formats_dict.items():
1372             if len(ambiguous_formats) > 1:
1373                 for i, format in enumerate(ambiguous_formats):
1374                     format['format_id'] = '%s-%d' % (format_id, i)
1375
1376         for i, format in enumerate(formats):
1377             if format.get('format') is None:
1378                 format['format'] = '{id} - {res}{note}'.format(
1379                     id=format['format_id'],
1380                     res=self.format_resolution(format),
1381                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1382                 )
1383             # Automatically determine file extension if missing
1384             if format.get('ext') is None:
1385                 format['ext'] = determine_ext(format['url']).lower()
1386             # Automatically determine protocol if missing (useful for format
1387             # selection purposes)
1388             if format.get('protocol') is None:
1389                 format['protocol'] = determine_protocol(format)
1390             # Add HTTP headers, so that external programs can use them from the
1391             # json output
1392             full_format_info = info_dict.copy()
1393             full_format_info.update(format)
1394             format['http_headers'] = self._calc_headers(full_format_info)
1395
1396         # TODO Central sorting goes here
1397
1398         if formats[0] is not info_dict:
1399             # only set the 'formats' fields if the original info_dict list them
1400             # otherwise we end up with a circular reference, the first (and unique)
1401             # element in the 'formats' field in info_dict is info_dict itself,
1402             # which can't be exported to json
1403             info_dict['formats'] = formats
1404         if self.params.get('listformats'):
1405             self.list_formats(info_dict)
1406             return
1407
1408         req_format = self.params.get('format')
1409         if req_format is None:
1410             req_format_list = []
1411             if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and
1412                     not info_dict.get('is_live')):
1413                 merger = FFmpegMergerPP(self)
1414                 if merger.available and merger.can_merge():
1415                     req_format_list.append('bestvideo+bestaudio')
1416             req_format_list.append('best')
1417             req_format = '/'.join(req_format_list)
1418         format_selector = self.build_format_selector(req_format)
1419
1420         # While in format selection we may need to have an access to the original
1421         # format set in order to calculate some metrics or do some processing.
1422         # For now we need to be able to guess whether original formats provided
1423         # by extractor are incomplete or not (i.e. whether extractor provides only
1424         # video-only or audio-only formats) for proper formats selection for
1425         # extractors with such incomplete formats (see
1426         # https://github.com/rg3/youtube-dl/pull/5556).
1427         # Since formats may be filtered during format selection and may not match
1428         # the original formats the results may be incorrect. Thus original formats
1429         # or pre-calculated metrics should be passed to format selection routines
1430         # as well.
1431         # We will pass a context object containing all necessary additional data
1432         # instead of just formats.
1433         # This fixes incorrect format selection issue (see
1434         # https://github.com/rg3/youtube-dl/issues/10083).
1435         incomplete_formats = (
1436             # All formats are video-only or
1437             all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats) or
1438             # all formats are audio-only
1439             all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
1440
1441         ctx = {
1442             'formats': formats,
1443             'incomplete_formats': incomplete_formats,
1444         }
1445
1446         formats_to_download = list(format_selector(ctx))
1447         if not formats_to_download:
1448             raise ExtractorError('requested format not available',
1449                                  expected=True)
1450
1451         if download:
1452             if len(formats_to_download) > 1:
1453                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1454             for format in formats_to_download:
1455                 new_info = dict(info_dict)
1456                 new_info.update(format)
1457                 self.process_info(new_info)
1458         # We update the info dict with the best quality format (backwards compatibility)
1459         info_dict.update(formats_to_download[-1])
1460         return info_dict
1461
1462     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
1463         """Select the requested subtitles and their format"""
1464         available_subs = {}
1465         if normal_subtitles and self.params.get('writesubtitles'):
1466             available_subs.update(normal_subtitles)
1467         if automatic_captions and self.params.get('writeautomaticsub'):
1468             for lang, cap_info in automatic_captions.items():
1469                 if lang not in available_subs:
1470                     available_subs[lang] = cap_info
1471
1472         if (not self.params.get('writesubtitles') and not
1473                 self.params.get('writeautomaticsub') or not
1474                 available_subs):
1475             return None
1476
1477         if self.params.get('allsubtitles', False):
1478             requested_langs = available_subs.keys()
1479         else:
1480             if self.params.get('subtitleslangs', False):
1481                 requested_langs = self.params.get('subtitleslangs')
1482             elif 'en' in available_subs:
1483                 requested_langs = ['en']
1484             else:
1485                 requested_langs = [list(available_subs.keys())[0]]
1486
1487         formats_query = self.params.get('subtitlesformat', 'best')
1488         formats_preference = formats_query.split('/') if formats_query else []
1489         subs = {}
1490         for lang in requested_langs:
1491             formats = available_subs.get(lang)
1492             if formats is None:
1493                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1494                 continue
1495             for ext in formats_preference:
1496                 if ext == 'best':
1497                     f = formats[-1]
1498                     break
1499                 matches = list(filter(lambda f: f['ext'] == ext, formats))
1500                 if matches:
1501                     f = matches[-1]
1502                     break
1503             else:
1504                 f = formats[-1]
1505                 self.report_warning(
1506                     'No subtitle format found matching "%s" for language %s, '
1507                     'using %s' % (formats_query, lang, f['ext']))
1508             subs[lang] = f
1509         return subs
1510
1511     def process_info(self, info_dict):
1512         """Process a single resolved IE result."""
1513
1514         assert info_dict.get('_type', 'video') == 'video'
1515
1516         max_downloads = self.params.get('max_downloads')
1517         if max_downloads is not None:
1518             if self._num_downloads >= int(max_downloads):
1519                 raise MaxDownloadsReached()
1520
1521         info_dict['fulltitle'] = info_dict['title']
1522         if len(info_dict['title']) > 200:
1523             info_dict['title'] = info_dict['title'][:197] + '...'
1524
1525         if 'format' not in info_dict:
1526             info_dict['format'] = info_dict['ext']
1527
1528         reason = self._match_entry(info_dict, incomplete=False)
1529         if reason is not None:
1530             self.to_screen('[download] ' + reason)
1531             return
1532
1533         self._num_downloads += 1
1534
1535         info_dict['_filename'] = filename = self.prepare_filename(info_dict)
1536
1537         # Forced printings
1538         if self.params.get('forcetitle', False):
1539             self.to_stdout(info_dict['fulltitle'])
1540         if self.params.get('forceid', False):
1541             self.to_stdout(info_dict['id'])
1542         if self.params.get('forceurl', False):
1543             if info_dict.get('requested_formats') is not None:
1544                 for f in info_dict['requested_formats']:
1545                     self.to_stdout(f['url'] + f.get('play_path', ''))
1546             else:
1547                 # For RTMP URLs, also include the playpath
1548                 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1549         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
1550             self.to_stdout(info_dict['thumbnail'])
1551         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
1552             self.to_stdout(info_dict['description'])
1553         if self.params.get('forcefilename', False) and filename is not None:
1554             self.to_stdout(filename)
1555         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1556             self.to_stdout(formatSeconds(info_dict['duration']))
1557         if self.params.get('forceformat', False):
1558             self.to_stdout(info_dict['format'])
1559         if self.params.get('forcejson', False):
1560             self.to_stdout(json.dumps(info_dict))
1561
1562         # Do nothing else if in simulate mode
1563         if self.params.get('simulate', False):
1564             return
1565
1566         if filename is None:
1567             return
1568
1569         try:
1570             dn = os.path.dirname(sanitize_path(encodeFilename(filename)))
1571             if dn and not os.path.exists(dn):
1572                 os.makedirs(dn)
1573         except (OSError, IOError) as err:
1574             self.report_error('unable to create directory ' + error_to_compat_str(err))
1575             return
1576
1577         if self.params.get('writedescription', False):
1578             descfn = replace_extension(filename, 'description', info_dict.get('ext'))
1579             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1580                 self.to_screen('[info] Video description is already present')
1581             elif info_dict.get('description') is None:
1582                 self.report_warning('There\'s no description to write.')
1583             else:
1584                 try:
1585                     self.to_screen('[info] Writing video description to: ' + descfn)
1586                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1587                         descfile.write(info_dict['description'])
1588                 except (OSError, IOError):
1589                     self.report_error('Cannot write description file ' + descfn)
1590                     return
1591
1592         if self.params.get('writeannotations', False):
1593             annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
1594             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1595                 self.to_screen('[info] Video annotations are already present')
1596             else:
1597                 try:
1598                     self.to_screen('[info] Writing video annotations to: ' + annofn)
1599                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1600                         annofile.write(info_dict['annotations'])
1601                 except (KeyError, TypeError):
1602                     self.report_warning('There are no annotations to write.')
1603                 except (OSError, IOError):
1604                     self.report_error('Cannot write annotations file: ' + annofn)
1605                     return
1606
1607         subtitles_are_requested = any([self.params.get('writesubtitles', False),
1608                                        self.params.get('writeautomaticsub')])
1609
1610         if subtitles_are_requested and info_dict.get('requested_subtitles'):
1611             # subtitles download errors are already managed as troubles in relevant IE
1612             # that way it will silently go on when used with unsupporting IE
1613             subtitles = info_dict['requested_subtitles']
1614             ie = self.get_info_extractor(info_dict['extractor_key'])
1615             for sub_lang, sub_info in subtitles.items():
1616                 sub_format = sub_info['ext']
1617                 if sub_info.get('data') is not None:
1618                     sub_data = sub_info['data']
1619                 else:
1620                     try:
1621                         sub_data = ie._download_webpage(
1622                             sub_info['url'], info_dict['id'], note=False)
1623                     except ExtractorError as err:
1624                         self.report_warning('Unable to download subtitle for "%s": %s' %
1625                                             (sub_lang, error_to_compat_str(err.cause)))
1626                         continue
1627                 try:
1628                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1629                     if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1630                         self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1631                     else:
1632                         self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1633                         # Use newline='' to prevent conversion of newline characters
1634                         # See https://github.com/rg3/youtube-dl/issues/10268
1635                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
1636                             subfile.write(sub_data)
1637                 except (OSError, IOError):
1638                     self.report_error('Cannot write subtitles file ' + sub_filename)
1639                     return
1640
1641         if self.params.get('writeinfojson', False):
1642             infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
1643             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1644                 self.to_screen('[info] Video description metadata is already present')
1645             else:
1646                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1647                 try:
1648                     write_json_file(self.filter_requested_info(info_dict), infofn)
1649                 except (OSError, IOError):
1650                     self.report_error('Cannot write metadata to JSON file ' + infofn)
1651                     return
1652
1653         self._write_thumbnails(info_dict, filename)
1654
1655         if not self.params.get('skip_download', False):
1656             try:
1657                 def dl(name, info):
1658                     fd = get_suitable_downloader(info, self.params)(self, self.params)
1659                     for ph in self._progress_hooks:
1660                         fd.add_progress_hook(ph)
1661                     if self.params.get('verbose'):
1662                         self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1663                     return fd.download(name, info)
1664
1665                 if info_dict.get('requested_formats') is not None:
1666                     downloaded = []
1667                     success = True
1668                     merger = FFmpegMergerPP(self)
1669                     if not merger.available:
1670                         postprocessors = []
1671                         self.report_warning('You have requested multiple '
1672                                             'formats but ffmpeg or avconv are not installed.'
1673                                             ' The formats won\'t be merged.')
1674                     else:
1675                         postprocessors = [merger]
1676
1677                     def compatible_formats(formats):
1678                         video, audio = formats
1679                         # Check extension
1680                         video_ext, audio_ext = audio.get('ext'), video.get('ext')
1681                         if video_ext and audio_ext:
1682                             COMPATIBLE_EXTS = (
1683                                 ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma'),
1684                                 ('webm')
1685                             )
1686                             for exts in COMPATIBLE_EXTS:
1687                                 if video_ext in exts and audio_ext in exts:
1688                                     return True
1689                         # TODO: Check acodec/vcodec
1690                         return False
1691
1692                     filename_real_ext = os.path.splitext(filename)[1][1:]
1693                     filename_wo_ext = (
1694                         os.path.splitext(filename)[0]
1695                         if filename_real_ext == info_dict['ext']
1696                         else filename)
1697                     requested_formats = info_dict['requested_formats']
1698                     if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
1699                         info_dict['ext'] = 'mkv'
1700                         self.report_warning(
1701                             'Requested formats are incompatible for merge and will be merged into mkv.')
1702                     # Ensure filename always has a correct extension for successful merge
1703                     filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
1704                     if os.path.exists(encodeFilename(filename)):
1705                         self.to_screen(
1706                             '[download] %s has already been downloaded and '
1707                             'merged' % filename)
1708                     else:
1709                         for f in requested_formats:
1710                             new_info = dict(info_dict)
1711                             new_info.update(f)
1712                             fname = self.prepare_filename(new_info)
1713                             fname = prepend_extension(fname, 'f%s' % f['format_id'], new_info['ext'])
1714                             downloaded.append(fname)
1715                             partial_success = dl(fname, new_info)
1716                             success = success and partial_success
1717                         info_dict['__postprocessors'] = postprocessors
1718                         info_dict['__files_to_merge'] = downloaded
1719                 else:
1720                     # Just a single file
1721                     success = dl(filename, info_dict)
1722             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1723                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
1724                 return
1725             except (OSError, IOError) as err:
1726                 raise UnavailableVideoError(err)
1727             except (ContentTooShortError, ) as err:
1728                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1729                 return
1730
1731             if success and filename != '-':
1732                 # Fixup content
1733                 fixup_policy = self.params.get('fixup')
1734                 if fixup_policy is None:
1735                     fixup_policy = 'detect_or_warn'
1736
1737                 INSTALL_FFMPEG_MESSAGE = 'Install ffmpeg or avconv to fix this automatically.'
1738
1739                 stretched_ratio = info_dict.get('stretched_ratio')
1740                 if stretched_ratio is not None and stretched_ratio != 1:
1741                     if fixup_policy == 'warn':
1742                         self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1743                             info_dict['id'], stretched_ratio))
1744                     elif fixup_policy == 'detect_or_warn':
1745                         stretched_pp = FFmpegFixupStretchedPP(self)
1746                         if stretched_pp.available:
1747                             info_dict.setdefault('__postprocessors', [])
1748                             info_dict['__postprocessors'].append(stretched_pp)
1749                         else:
1750                             self.report_warning(
1751                                 '%s: Non-uniform pixel ratio (%s). %s'
1752                                 % (info_dict['id'], stretched_ratio, INSTALL_FFMPEG_MESSAGE))
1753                     else:
1754                         assert fixup_policy in ('ignore', 'never')
1755
1756                 if (info_dict.get('requested_formats') is None and
1757                         info_dict.get('container') == 'm4a_dash'):
1758                     if fixup_policy == 'warn':
1759                         self.report_warning(
1760                             '%s: writing DASH m4a. '
1761                             'Only some players support this container.'
1762                             % info_dict['id'])
1763                     elif fixup_policy == 'detect_or_warn':
1764                         fixup_pp = FFmpegFixupM4aPP(self)
1765                         if fixup_pp.available:
1766                             info_dict.setdefault('__postprocessors', [])
1767                             info_dict['__postprocessors'].append(fixup_pp)
1768                         else:
1769                             self.report_warning(
1770                                 '%s: writing DASH m4a. '
1771                                 'Only some players support this container. %s'
1772                                 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
1773                     else:
1774                         assert fixup_policy in ('ignore', 'never')
1775
1776                 if (info_dict.get('protocol') == 'm3u8_native' or
1777                         info_dict.get('protocol') == 'm3u8' and
1778                         self.params.get('hls_prefer_native')):
1779                     if fixup_policy == 'warn':
1780                         self.report_warning('%s: malformated aac bitstream.' % (
1781                             info_dict['id']))
1782                     elif fixup_policy == 'detect_or_warn':
1783                         fixup_pp = FFmpegFixupM3u8PP(self)
1784                         if fixup_pp.available:
1785                             info_dict.setdefault('__postprocessors', [])
1786                             info_dict['__postprocessors'].append(fixup_pp)
1787                         else:
1788                             self.report_warning(
1789                                 '%s: malformated aac bitstream. %s'
1790                                 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
1791                     else:
1792                         assert fixup_policy in ('ignore', 'never')
1793
1794                 try:
1795                     self.post_process(filename, info_dict)
1796                 except (PostProcessingError) as err:
1797                     self.report_error('postprocessing: %s' % str(err))
1798                     return
1799                 self.record_download_archive(info_dict)
1800
1801     def download(self, url_list):
1802         """Download a given list of URLs."""
1803         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1804         if (len(url_list) > 1 and
1805                 '%' not in outtmpl and
1806                 self.params.get('max_downloads') != 1):
1807             raise SameFileError(outtmpl)
1808
1809         for url in url_list:
1810             try:
1811                 # It also downloads the videos
1812                 res = self.extract_info(
1813                     url, force_generic_extractor=self.params.get('force_generic_extractor', False))
1814             except UnavailableVideoError:
1815                 self.report_error('unable to download video')
1816             except MaxDownloadsReached:
1817                 self.to_screen('[info] Maximum number of downloaded files reached.')
1818                 raise
1819             else:
1820                 if self.params.get('dump_single_json', False):
1821                     self.to_stdout(json.dumps(res))
1822
1823         return self._download_retcode
1824
1825     def download_with_info_file(self, info_filename):
1826         with contextlib.closing(fileinput.FileInput(
1827                 [info_filename], mode='r',
1828                 openhook=fileinput.hook_encoded('utf-8'))) as f:
1829             # FileInput doesn't have a read method, we can't call json.load
1830             info = self.filter_requested_info(json.loads('\n'.join(f)))
1831         try:
1832             self.process_ie_result(info, download=True)
1833         except DownloadError:
1834             webpage_url = info.get('webpage_url')
1835             if webpage_url is not None:
1836                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1837                 return self.download([webpage_url])
1838             else:
1839                 raise
1840         return self._download_retcode
1841
1842     @staticmethod
1843     def filter_requested_info(info_dict):
1844         return dict(
1845             (k, v) for k, v in info_dict.items()
1846             if k not in ['requested_formats', 'requested_subtitles'])
1847
1848     def post_process(self, filename, ie_info):
1849         """Run all the postprocessors on the given file."""
1850         info = dict(ie_info)
1851         info['filepath'] = filename
1852         pps_chain = []
1853         if ie_info.get('__postprocessors') is not None:
1854             pps_chain.extend(ie_info['__postprocessors'])
1855         pps_chain.extend(self._pps)
1856         for pp in pps_chain:
1857             files_to_delete = []
1858             try:
1859                 files_to_delete, info = pp.run(info)
1860             except PostProcessingError as e:
1861                 self.report_error(e.msg)
1862             if files_to_delete and not self.params.get('keepvideo', False):
1863                 for old_filename in files_to_delete:
1864                     self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
1865                     try:
1866                         os.remove(encodeFilename(old_filename))
1867                     except (IOError, OSError):
1868                         self.report_warning('Unable to remove downloaded original file')
1869
1870     def _make_archive_id(self, info_dict):
1871         # Future-proof against any change in case
1872         # and backwards compatibility with prior versions
1873         extractor = info_dict.get('extractor_key')
1874         if extractor is None:
1875             if 'id' in info_dict:
1876                 extractor = info_dict.get('ie_key')  # key in a playlist
1877         if extractor is None:
1878             return None  # Incomplete video information
1879         return extractor.lower() + ' ' + info_dict['id']
1880
1881     def in_download_archive(self, info_dict):
1882         fn = self.params.get('download_archive')
1883         if fn is None:
1884             return False
1885
1886         vid_id = self._make_archive_id(info_dict)
1887         if vid_id is None:
1888             return False  # Incomplete video information
1889
1890         try:
1891             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1892                 for line in archive_file:
1893                     if line.strip() == vid_id:
1894                         return True
1895         except IOError as ioe:
1896             if ioe.errno != errno.ENOENT:
1897                 raise
1898         return False
1899
1900     def record_download_archive(self, info_dict):
1901         fn = self.params.get('download_archive')
1902         if fn is None:
1903             return
1904         vid_id = self._make_archive_id(info_dict)
1905         assert vid_id
1906         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1907             archive_file.write(vid_id + '\n')
1908
1909     @staticmethod
1910     def format_resolution(format, default='unknown'):
1911         if format.get('vcodec') == 'none':
1912             return 'audio only'
1913         if format.get('resolution') is not None:
1914             return format['resolution']
1915         if format.get('height') is not None:
1916             if format.get('width') is not None:
1917                 res = '%sx%s' % (format['width'], format['height'])
1918             else:
1919                 res = '%sp' % format['height']
1920         elif format.get('width') is not None:
1921             res = '%dx?' % format['width']
1922         else:
1923             res = default
1924         return res
1925
1926     def _format_note(self, fdict):
1927         res = ''
1928         if fdict.get('ext') in ['f4f', 'f4m']:
1929             res += '(unsupported) '
1930         if fdict.get('language'):
1931             if res:
1932                 res += ' '
1933             res += '[%s] ' % fdict['language']
1934         if fdict.get('format_note') is not None:
1935             res += fdict['format_note'] + ' '
1936         if fdict.get('tbr') is not None:
1937             res += '%4dk ' % fdict['tbr']
1938         if fdict.get('container') is not None:
1939             if res:
1940                 res += ', '
1941             res += '%s container' % fdict['container']
1942         if (fdict.get('vcodec') is not None and
1943                 fdict.get('vcodec') != 'none'):
1944             if res:
1945                 res += ', '
1946             res += fdict['vcodec']
1947             if fdict.get('vbr') is not None:
1948                 res += '@'
1949         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1950             res += 'video@'
1951         if fdict.get('vbr') is not None:
1952             res += '%4dk' % fdict['vbr']
1953         if fdict.get('fps') is not None:
1954             if res:
1955                 res += ', '
1956             res += '%sfps' % fdict['fps']
1957         if fdict.get('acodec') is not None:
1958             if res:
1959                 res += ', '
1960             if fdict['acodec'] == 'none':
1961                 res += 'video only'
1962             else:
1963                 res += '%-5s' % fdict['acodec']
1964         elif fdict.get('abr') is not None:
1965             if res:
1966                 res += ', '
1967             res += 'audio'
1968         if fdict.get('abr') is not None:
1969             res += '@%3dk' % fdict['abr']
1970         if fdict.get('asr') is not None:
1971             res += ' (%5dHz)' % fdict['asr']
1972         if fdict.get('filesize') is not None:
1973             if res:
1974                 res += ', '
1975             res += format_bytes(fdict['filesize'])
1976         elif fdict.get('filesize_approx') is not None:
1977             if res:
1978                 res += ', '
1979             res += '~' + format_bytes(fdict['filesize_approx'])
1980         return res
1981
1982     def list_formats(self, info_dict):
1983         formats = info_dict.get('formats', [info_dict])
1984         table = [
1985             [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
1986             for f in formats
1987             if f.get('preference') is None or f['preference'] >= -1000]
1988         if len(formats) > 1:
1989             table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
1990
1991         header_line = ['format code', 'extension', 'resolution', 'note']
1992         self.to_screen(
1993             '[info] Available formats for %s:\n%s' %
1994             (info_dict['id'], render_table(header_line, table)))
1995
1996     def list_thumbnails(self, info_dict):
1997         thumbnails = info_dict.get('thumbnails')
1998         if not thumbnails:
1999             self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
2000             return
2001
2002         self.to_screen(
2003             '[info] Thumbnails for %s:' % info_dict['id'])
2004         self.to_screen(render_table(
2005             ['ID', 'width', 'height', 'URL'],
2006             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
2007
2008     def list_subtitles(self, video_id, subtitles, name='subtitles'):
2009         if not subtitles:
2010             self.to_screen('%s has no %s' % (video_id, name))
2011             return
2012         self.to_screen(
2013             'Available %s for %s:' % (name, video_id))
2014         self.to_screen(render_table(
2015             ['Language', 'formats'],
2016             [[lang, ', '.join(f['ext'] for f in reversed(formats))]
2017                 for lang, formats in subtitles.items()]))
2018
2019     def urlopen(self, req):
2020         """ Start an HTTP download """
2021         if isinstance(req, compat_basestring):
2022             req = sanitized_Request(req)
2023         return self._opener.open(req, timeout=self._socket_timeout)
2024
2025     def print_debug_header(self):
2026         if not self.params.get('verbose'):
2027             return
2028
2029         if type('') is not compat_str:
2030             # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
2031             self.report_warning(
2032                 'Your Python is broken! Update to a newer and supported version')
2033
2034         stdout_encoding = getattr(
2035             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
2036         encoding_str = (
2037             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
2038                 locale.getpreferredencoding(),
2039                 sys.getfilesystemencoding(),
2040                 stdout_encoding,
2041                 self.get_encoding()))
2042         write_string(encoding_str, encoding=None)
2043
2044         self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
2045         if _LAZY_LOADER:
2046             self._write_string('[debug] Lazy loading extractors enabled' + '\n')
2047         try:
2048             sp = subprocess.Popen(
2049                 ['git', 'rev-parse', '--short', 'HEAD'],
2050                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
2051                 cwd=os.path.dirname(os.path.abspath(__file__)))
2052             out, err = sp.communicate()
2053             out = out.decode().strip()
2054             if re.match('[0-9a-f]+', out):
2055                 self._write_string('[debug] Git HEAD: ' + out + '\n')
2056         except Exception:
2057             try:
2058                 sys.exc_clear()
2059             except Exception:
2060                 pass
2061         self._write_string('[debug] Python version %s - %s\n' % (
2062             platform.python_version(), platform_name()))
2063
2064         exe_versions = FFmpegPostProcessor.get_versions(self)
2065         exe_versions['rtmpdump'] = rtmpdump_version()
2066         exe_str = ', '.join(
2067             '%s %s' % (exe, v)
2068             for exe, v in sorted(exe_versions.items())
2069             if v
2070         )
2071         if not exe_str:
2072             exe_str = 'none'
2073         self._write_string('[debug] exe versions: %s\n' % exe_str)
2074
2075         proxy_map = {}
2076         for handler in self._opener.handlers:
2077             if hasattr(handler, 'proxies'):
2078                 proxy_map.update(handler.proxies)
2079         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
2080
2081         if self.params.get('call_home', False):
2082             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
2083             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
2084             latest_version = self.urlopen(
2085                 'https://yt-dl.org/latest/version').read().decode('utf-8')
2086             if version_tuple(latest_version) > version_tuple(__version__):
2087                 self.report_warning(
2088                     'You are using an outdated version (newest version: %s)! '
2089                     'See https://yt-dl.org/update if you need help updating.' %
2090                     latest_version)
2091
2092     def _setup_opener(self):
2093         timeout_val = self.params.get('socket_timeout')
2094         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
2095
2096         opts_cookiefile = self.params.get('cookiefile')
2097         opts_proxy = self.params.get('proxy')
2098
2099         if opts_cookiefile is None:
2100             self.cookiejar = compat_cookiejar.CookieJar()
2101         else:
2102             opts_cookiefile = compat_expanduser(opts_cookiefile)
2103             self.cookiejar = compat_cookiejar.MozillaCookieJar(
2104                 opts_cookiefile)
2105             if os.access(opts_cookiefile, os.R_OK):
2106                 self.cookiejar.load()
2107
2108         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
2109         if opts_proxy is not None:
2110             if opts_proxy == '':
2111                 proxies = {}
2112             else:
2113                 proxies = {'http': opts_proxy, 'https': opts_proxy}
2114         else:
2115             proxies = compat_urllib_request.getproxies()
2116             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
2117             if 'http' in proxies and 'https' not in proxies:
2118                 proxies['https'] = proxies['http']
2119         proxy_handler = PerRequestProxyHandler(proxies)
2120
2121         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
2122         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
2123         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
2124         data_handler = compat_urllib_request_DataHandler()
2125
2126         # When passing our own FileHandler instance, build_opener won't add the
2127         # default FileHandler and allows us to disable the file protocol, which
2128         # can be used for malicious purposes (see
2129         # https://github.com/rg3/youtube-dl/issues/8227)
2130         file_handler = compat_urllib_request.FileHandler()
2131
2132         def file_open(*args, **kwargs):
2133             raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in youtube-dl for security reasons')
2134         file_handler.file_open = file_open
2135
2136         opener = compat_urllib_request.build_opener(
2137             proxy_handler, https_handler, cookie_processor, ydlh, data_handler, file_handler)
2138
2139         # Delete the default user-agent header, which would otherwise apply in
2140         # cases where our custom HTTP handler doesn't come into play
2141         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
2142         opener.addheaders = []
2143         self._opener = opener
2144
2145     def encode(self, s):
2146         if isinstance(s, bytes):
2147             return s  # Already encoded
2148
2149         try:
2150             return s.encode(self.get_encoding())
2151         except UnicodeEncodeError as err:
2152             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
2153             raise
2154
2155     def get_encoding(self):
2156         encoding = self.params.get('encoding')
2157         if encoding is None:
2158             encoding = preferredencoding()
2159         return encoding
2160
2161     def _write_thumbnails(self, info_dict, filename):
2162         if self.params.get('writethumbnail', False):
2163             thumbnails = info_dict.get('thumbnails')
2164             if thumbnails:
2165                 thumbnails = [thumbnails[-1]]
2166         elif self.params.get('write_all_thumbnails', False):
2167             thumbnails = info_dict.get('thumbnails')
2168         else:
2169             return
2170
2171         if not thumbnails:
2172             # No thumbnails present, so return immediately
2173             return
2174
2175         for t in thumbnails:
2176             thumb_ext = determine_ext(t['url'], 'jpg')
2177             suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
2178             thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
2179             t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
2180
2181             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
2182                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
2183                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
2184             else:
2185                 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
2186                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
2187                 try:
2188                     uf = self.urlopen(t['url'])
2189                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
2190                         shutil.copyfileobj(uf, thumbf)
2191                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
2192                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
2193                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2194                     self.report_warning('Unable to download thumbnail "%s": %s' %
2195                                         (t['url'], error_to_compat_str(err)))