Add faked X-Forwarded-For to formats' HTTP headers
[youtube-dl] / youtube_dl / YoutubeDL.py
1 #!/usr/bin/env python
2 # coding: utf-8
3
4 from __future__ import absolute_import, unicode_literals
5
6 import collections
7 import contextlib
8 import copy
9 import datetime
10 import errno
11 import fileinput
12 import io
13 import itertools
14 import json
15 import locale
16 import operator
17 import os
18 import platform
19 import re
20 import shutil
21 import subprocess
22 import socket
23 import sys
24 import time
25 import tokenize
26 import traceback
27 import random
28
29 from .compat import (
30     compat_basestring,
31     compat_cookiejar,
32     compat_expanduser,
33     compat_get_terminal_size,
34     compat_http_client,
35     compat_kwargs,
36     compat_os_name,
37     compat_str,
38     compat_tokenize_tokenize,
39     compat_urllib_error,
40     compat_urllib_request,
41     compat_urllib_request_DataHandler,
42 )
43 from .utils import (
44     age_restricted,
45     args_to_str,
46     ContentTooShortError,
47     date_from_str,
48     DateRange,
49     DEFAULT_OUTTMPL,
50     determine_ext,
51     determine_protocol,
52     DownloadError,
53     encode_compat_str,
54     encodeFilename,
55     error_to_compat_str,
56     ExtractorError,
57     format_bytes,
58     formatSeconds,
59     GeoRestrictedError,
60     ISO3166Utils,
61     locked_file,
62     make_HTTPS_handler,
63     MaxDownloadsReached,
64     PagedList,
65     parse_filesize,
66     PerRequestProxyHandler,
67     platform_name,
68     PostProcessingError,
69     preferredencoding,
70     prepend_extension,
71     register_socks_protocols,
72     render_table,
73     replace_extension,
74     SameFileError,
75     sanitize_filename,
76     sanitize_path,
77     sanitize_url,
78     sanitized_Request,
79     std_headers,
80     subtitles_filename,
81     UnavailableVideoError,
82     url_basename,
83     version_tuple,
84     write_json_file,
85     write_string,
86     YoutubeDLCookieProcessor,
87     YoutubeDLHandler,
88 )
89 from .cache import Cache
90 from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER
91 from .downloader import get_suitable_downloader
92 from .downloader.rtmp import rtmpdump_version
93 from .postprocessor import (
94     FFmpegFixupM3u8PP,
95     FFmpegFixupM4aPP,
96     FFmpegFixupStretchedPP,
97     FFmpegMergerPP,
98     FFmpegPostProcessor,
99     get_postprocessor,
100 )
101 from .version import __version__
102
103 if compat_os_name == 'nt':
104     import ctypes
105
106
107 class YoutubeDL(object):
108     """YoutubeDL class.
109
110     YoutubeDL objects are the ones responsible of downloading the
111     actual video file and writing it to disk if the user has requested
112     it, among some other tasks. In most cases there should be one per
113     program. As, given a video URL, the downloader doesn't know how to
114     extract all the needed information, task that InfoExtractors do, it
115     has to pass the URL to one of them.
116
117     For this, YoutubeDL objects have a method that allows
118     InfoExtractors to be registered in a given order. When it is passed
119     a URL, the YoutubeDL object handles it to the first InfoExtractor it
120     finds that reports being able to handle it. The InfoExtractor extracts
121     all the information about the video or videos the URL refers to, and
122     YoutubeDL process the extracted information, possibly using a File
123     Downloader to download the video.
124
125     YoutubeDL objects accept a lot of parameters. In order not to saturate
126     the object constructor with arguments, it receives a dictionary of
127     options instead. These options are available through the params
128     attribute for the InfoExtractors to use. The YoutubeDL also
129     registers itself as the downloader in charge for the InfoExtractors
130     that are added to it, so this is a "mutual registration".
131
132     Available options:
133
134     username:          Username for authentication purposes.
135     password:          Password for authentication purposes.
136     videopassword:     Password for accessing a video.
137     ap_mso:            Adobe Pass multiple-system operator identifier.
138     ap_username:       Multiple-system operator account username.
139     ap_password:       Multiple-system operator account password.
140     usenetrc:          Use netrc for authentication instead.
141     verbose:           Print additional info to stdout.
142     quiet:             Do not print messages to stdout.
143     no_warnings:       Do not print out anything for warnings.
144     forceurl:          Force printing final URL.
145     forcetitle:        Force printing title.
146     forceid:           Force printing ID.
147     forcethumbnail:    Force printing thumbnail URL.
148     forcedescription:  Force printing description.
149     forcefilename:     Force printing final filename.
150     forceduration:     Force printing duration.
151     forcejson:         Force printing info_dict as JSON.
152     dump_single_json:  Force printing the info_dict of the whole playlist
153                        (or video) as a single JSON line.
154     simulate:          Do not download the video files.
155     format:            Video format code. See options.py for more information.
156     outtmpl:           Template for output names.
157     restrictfilenames: Do not allow "&" and spaces in file names
158     ignoreerrors:      Do not stop on download errors.
159     force_generic_extractor: Force downloader to use the generic extractor
160     nooverwrites:      Prevent overwriting files.
161     playliststart:     Playlist item to start at.
162     playlistend:       Playlist item to end at.
163     playlist_items:    Specific indices of playlist to download.
164     playlistreverse:   Download playlist items in reverse order.
165     playlistrandom:    Download playlist items in random order.
166     matchtitle:        Download only matching titles.
167     rejecttitle:       Reject downloads for matching titles.
168     logger:            Log messages to a logging.Logger instance.
169     logtostderr:       Log messages to stderr instead of stdout.
170     writedescription:  Write the video description to a .description file
171     writeinfojson:     Write the video description to a .info.json file
172     writeannotations:  Write the video annotations to a .annotations.xml file
173     writethumbnail:    Write the thumbnail image to a file
174     write_all_thumbnails:  Write all thumbnail formats to files
175     writesubtitles:    Write the video subtitles to a file
176     writeautomaticsub: Write the automatically generated subtitles to a file
177     allsubtitles:      Downloads all the subtitles of the video
178                        (requires writesubtitles or writeautomaticsub)
179     listsubtitles:     Lists all available subtitles for the video
180     subtitlesformat:   The format code for subtitles
181     subtitleslangs:    List of languages of the subtitles to download
182     keepvideo:         Keep the video file after post-processing
183     daterange:         A DateRange object, download only if the upload_date is in the range.
184     skip_download:     Skip the actual download of the video file
185     cachedir:          Location of the cache files in the filesystem.
186                        False to disable filesystem cache.
187     noplaylist:        Download single video instead of a playlist if in doubt.
188     age_limit:         An integer representing the user's age in years.
189                        Unsuitable videos for the given age are skipped.
190     min_views:         An integer representing the minimum view count the video
191                        must have in order to not be skipped.
192                        Videos without view count information are always
193                        downloaded. None for no limit.
194     max_views:         An integer representing the maximum view count.
195                        Videos that are more popular than that are not
196                        downloaded.
197                        Videos without view count information are always
198                        downloaded. None for no limit.
199     download_archive:  File name of a file where all downloads are recorded.
200                        Videos already present in the file are not downloaded
201                        again.
202     cookiefile:        File name where cookies should be read from and dumped to.
203     nocheckcertificate:Do not verify SSL certificates
204     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
205                        At the moment, this is only supported by YouTube.
206     proxy:             URL of the proxy server to use
207     geo_verification_proxy:  URL of the proxy to use for IP address verification
208                        on geo-restricted sites. (Experimental)
209     socket_timeout:    Time to wait for unresponsive hosts, in seconds
210     bidi_workaround:   Work around buggy terminals without bidirectional text
211                        support, using fridibi
212     debug_printtraffic:Print out sent and received HTTP traffic
213     include_ads:       Download ads as well
214     default_search:    Prepend this string if an input url is not valid.
215                        'auto' for elaborate guessing
216     encoding:          Use this encoding instead of the system-specified.
217     extract_flat:      Do not resolve URLs, return the immediate result.
218                        Pass in 'in_playlist' to only show this behavior for
219                        playlist items.
220     postprocessors:    A list of dictionaries, each with an entry
221                        * key:  The name of the postprocessor. See
222                                youtube_dl/postprocessor/__init__.py for a list.
223                        as well as any further keyword arguments for the
224                        postprocessor.
225     progress_hooks:    A list of functions that get called on download
226                        progress, with a dictionary with the entries
227                        * status: One of "downloading", "error", or "finished".
228                                  Check this first and ignore unknown values.
229
230                        If status is one of "downloading", or "finished", the
231                        following properties may also be present:
232                        * filename: The final filename (always present)
233                        * tmpfilename: The filename we're currently writing to
234                        * downloaded_bytes: Bytes on disk
235                        * total_bytes: Size of the whole file, None if unknown
236                        * total_bytes_estimate: Guess of the eventual file size,
237                                                None if unavailable.
238                        * elapsed: The number of seconds since download started.
239                        * eta: The estimated time in seconds, None if unknown
240                        * speed: The download speed in bytes/second, None if
241                                 unknown
242                        * fragment_index: The counter of the currently
243                                          downloaded video fragment.
244                        * fragment_count: The number of fragments (= individual
245                                          files that will be merged)
246
247                        Progress hooks are guaranteed to be called at least once
248                        (with status "finished") if the download is successful.
249     merge_output_format: Extension to use when merging formats.
250     fixup:             Automatically correct known faults of the file.
251                        One of:
252                        - "never": do nothing
253                        - "warn": only emit a warning
254                        - "detect_or_warn": check whether we can do anything
255                                            about it, warn otherwise (default)
256     source_address:    (Experimental) Client-side IP address to bind to.
257     call_home:         Boolean, true iff we are allowed to contact the
258                        youtube-dl servers for debugging.
259     sleep_interval:    Number of seconds to sleep before each download when
260                        used alone or a lower bound of a range for randomized
261                        sleep before each download (minimum possible number
262                        of seconds to sleep) when used along with
263                        max_sleep_interval.
264     max_sleep_interval:Upper bound of a range for randomized sleep before each
265                        download (maximum possible number of seconds to sleep).
266                        Must only be used along with sleep_interval.
267                        Actual sleep time will be a random float from range
268                        [sleep_interval; max_sleep_interval].
269     listformats:       Print an overview of available video formats and exit.
270     list_thumbnails:   Print a table of all thumbnails and exit.
271     match_filter:      A function that gets called with the info_dict of
272                        every video.
273                        If it returns a message, the video is ignored.
274                        If it returns None, the video is downloaded.
275                        match_filter_func in utils.py is one example for this.
276     no_color:          Do not emit color codes in output.
277     bypass_geo_restriction:
278                        Bypass geographic restriction via faking X-Forwarded-For
279                        HTTP header (experimental)
280     bypass_geo_restriction_as_country:
281                        Two-letter ISO 3166-2 country code that will be used for
282                        explicit geographic restriction bypassing via faking
283                        X-Forwarded-For HTTP header (experimental)
284
285     The following options determine which downloader is picked:
286     external_downloader: Executable of the external downloader to call.
287                        None or unset for standard (built-in) downloader.
288     hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv
289                        if True, otherwise use ffmpeg/avconv if False, otherwise
290                        use downloader suggested by extractor if None.
291
292     The following parameters are not used by YoutubeDL itself, they are used by
293     the downloader (see youtube_dl/downloader/common.py):
294     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
295     noresizebuffer, retries, continuedl, noprogress, consoletitle,
296     xattr_set_filesize, external_downloader_args, hls_use_mpegts.
297
298     The following options are used by the post processors:
299     prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
300                        otherwise prefer avconv.
301     postprocessor_args: A list of additional command-line arguments for the
302                         postprocessor.
303     """
304
305     params = None
306     _ies = []
307     _pps = []
308     _download_retcode = None
309     _num_downloads = None
310     _screen_file = None
311
312     def __init__(self, params=None, auto_init=True):
313         """Create a FileDownloader object with the given options."""
314         if params is None:
315             params = {}
316         self._ies = []
317         self._ies_instances = {}
318         self._pps = []
319         self._progress_hooks = []
320         self._download_retcode = 0
321         self._num_downloads = 0
322         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
323         self._err_file = sys.stderr
324         self.params = {
325             # Default parameters
326             'nocheckcertificate': False,
327         }
328         self.params.update(params)
329         self.cache = Cache(self)
330
331         if self.params.get('cn_verification_proxy') is not None:
332             self.report_warning('--cn-verification-proxy is deprecated. Use --geo-verification-proxy instead.')
333             if self.params.get('geo_verification_proxy') is None:
334                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
335
336         if params.get('bidi_workaround', False):
337             try:
338                 import pty
339                 master, slave = pty.openpty()
340                 width = compat_get_terminal_size().columns
341                 if width is None:
342                     width_args = []
343                 else:
344                     width_args = ['-w', str(width)]
345                 sp_kwargs = dict(
346                     stdin=subprocess.PIPE,
347                     stdout=slave,
348                     stderr=self._err_file)
349                 try:
350                     self._output_process = subprocess.Popen(
351                         ['bidiv'] + width_args, **sp_kwargs
352                     )
353                 except OSError:
354                     self._output_process = subprocess.Popen(
355                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
356                 self._output_channel = os.fdopen(master, 'rb')
357             except OSError as ose:
358                 if ose.errno == errno.ENOENT:
359                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
360                 else:
361                     raise
362
363         if (sys.version_info >= (3,) and sys.platform != 'win32' and
364                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and
365                 not params.get('restrictfilenames', False)):
366             # On Python 3, the Unicode filesystem API will throw errors (#1474)
367             self.report_warning(
368                 'Assuming --restrict-filenames since file system encoding '
369                 'cannot encode all characters. '
370                 'Set the LC_ALL environment variable to fix this.')
371             self.params['restrictfilenames'] = True
372
373         if isinstance(params.get('outtmpl'), bytes):
374             self.report_warning(
375                 'Parameter outtmpl is bytes, but should be a unicode string. '
376                 'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
377
378         self._setup_opener()
379
380         if auto_init:
381             self.print_debug_header()
382             self.add_default_info_extractors()
383
384         for pp_def_raw in self.params.get('postprocessors', []):
385             pp_class = get_postprocessor(pp_def_raw['key'])
386             pp_def = dict(pp_def_raw)
387             del pp_def['key']
388             pp = pp_class(self, **compat_kwargs(pp_def))
389             self.add_post_processor(pp)
390
391         for ph in self.params.get('progress_hooks', []):
392             self.add_progress_hook(ph)
393
394         register_socks_protocols()
395
396     def warn_if_short_id(self, argv):
397         # short YouTube ID starting with dash?
398         idxs = [
399             i for i, a in enumerate(argv)
400             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
401         if idxs:
402             correct_argv = (
403                 ['youtube-dl'] +
404                 [a for i, a in enumerate(argv) if i not in idxs] +
405                 ['--'] + [argv[i] for i in idxs]
406             )
407             self.report_warning(
408                 'Long argument string detected. '
409                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
410                 args_to_str(correct_argv))
411
412     def add_info_extractor(self, ie):
413         """Add an InfoExtractor object to the end of the list."""
414         self._ies.append(ie)
415         if not isinstance(ie, type):
416             self._ies_instances[ie.ie_key()] = ie
417             ie.set_downloader(self)
418
419     def get_info_extractor(self, ie_key):
420         """
421         Get an instance of an IE with name ie_key, it will try to get one from
422         the _ies list, if there's no instance it will create a new one and add
423         it to the extractor list.
424         """
425         ie = self._ies_instances.get(ie_key)
426         if ie is None:
427             ie = get_info_extractor(ie_key)()
428             self.add_info_extractor(ie)
429         return ie
430
431     def add_default_info_extractors(self):
432         """
433         Add the InfoExtractors returned by gen_extractors to the end of the list
434         """
435         for ie in gen_extractor_classes():
436             self.add_info_extractor(ie)
437
438     def add_post_processor(self, pp):
439         """Add a PostProcessor object to the end of the chain."""
440         self._pps.append(pp)
441         pp.set_downloader(self)
442
443     def add_progress_hook(self, ph):
444         """Add the progress hook (currently only for the file downloader)"""
445         self._progress_hooks.append(ph)
446
447     def _bidi_workaround(self, message):
448         if not hasattr(self, '_output_channel'):
449             return message
450
451         assert hasattr(self, '_output_process')
452         assert isinstance(message, compat_str)
453         line_count = message.count('\n') + 1
454         self._output_process.stdin.write((message + '\n').encode('utf-8'))
455         self._output_process.stdin.flush()
456         res = ''.join(self._output_channel.readline().decode('utf-8')
457                       for _ in range(line_count))
458         return res[:-len('\n')]
459
460     def to_screen(self, message, skip_eol=False):
461         """Print message to stdout if not in quiet mode."""
462         return self.to_stdout(message, skip_eol, check_quiet=True)
463
464     def _write_string(self, s, out=None):
465         write_string(s, out=out, encoding=self.params.get('encoding'))
466
467     def to_stdout(self, message, skip_eol=False, check_quiet=False):
468         """Print message to stdout if not in quiet mode."""
469         if self.params.get('logger'):
470             self.params['logger'].debug(message)
471         elif not check_quiet or not self.params.get('quiet', False):
472             message = self._bidi_workaround(message)
473             terminator = ['\n', ''][skip_eol]
474             output = message + terminator
475
476             self._write_string(output, self._screen_file)
477
478     def to_stderr(self, message):
479         """Print message to stderr."""
480         assert isinstance(message, compat_str)
481         if self.params.get('logger'):
482             self.params['logger'].error(message)
483         else:
484             message = self._bidi_workaround(message)
485             output = message + '\n'
486             self._write_string(output, self._err_file)
487
488     def to_console_title(self, message):
489         if not self.params.get('consoletitle', False):
490             return
491         if compat_os_name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
492             # c_wchar_p() might not be necessary if `message` is
493             # already of type unicode()
494             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
495         elif 'TERM' in os.environ:
496             self._write_string('\033]0;%s\007' % message, self._screen_file)
497
498     def save_console_title(self):
499         if not self.params.get('consoletitle', False):
500             return
501         if 'TERM' in os.environ:
502             # Save the title on stack
503             self._write_string('\033[22;0t', self._screen_file)
504
505     def restore_console_title(self):
506         if not self.params.get('consoletitle', False):
507             return
508         if 'TERM' in os.environ:
509             # Restore the title from stack
510             self._write_string('\033[23;0t', self._screen_file)
511
512     def __enter__(self):
513         self.save_console_title()
514         return self
515
516     def __exit__(self, *args):
517         self.restore_console_title()
518
519         if self.params.get('cookiefile') is not None:
520             self.cookiejar.save()
521
522     def trouble(self, message=None, tb=None):
523         """Determine action to take when a download problem appears.
524
525         Depending on if the downloader has been configured to ignore
526         download errors or not, this method may throw an exception or
527         not when errors are found, after printing the message.
528
529         tb, if given, is additional traceback information.
530         """
531         if message is not None:
532             self.to_stderr(message)
533         if self.params.get('verbose'):
534             if tb is None:
535                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
536                     tb = ''
537                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
538                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
539                     tb += encode_compat_str(traceback.format_exc())
540                 else:
541                     tb_data = traceback.format_list(traceback.extract_stack())
542                     tb = ''.join(tb_data)
543             self.to_stderr(tb)
544         if not self.params.get('ignoreerrors', False):
545             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
546                 exc_info = sys.exc_info()[1].exc_info
547             else:
548                 exc_info = sys.exc_info()
549             raise DownloadError(message, exc_info)
550         self._download_retcode = 1
551
552     def report_warning(self, message):
553         '''
554         Print the message to stderr, it will be prefixed with 'WARNING:'
555         If stderr is a tty file the 'WARNING:' will be colored
556         '''
557         if self.params.get('logger') is not None:
558             self.params['logger'].warning(message)
559         else:
560             if self.params.get('no_warnings'):
561                 return
562             if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
563                 _msg_header = '\033[0;33mWARNING:\033[0m'
564             else:
565                 _msg_header = 'WARNING:'
566             warning_message = '%s %s' % (_msg_header, message)
567             self.to_stderr(warning_message)
568
569     def report_error(self, message, tb=None):
570         '''
571         Do the same as trouble, but prefixes the message with 'ERROR:', colored
572         in red if stderr is a tty file.
573         '''
574         if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
575             _msg_header = '\033[0;31mERROR:\033[0m'
576         else:
577             _msg_header = 'ERROR:'
578         error_message = '%s %s' % (_msg_header, message)
579         self.trouble(error_message, tb)
580
581     def report_file_already_downloaded(self, file_name):
582         """Report file has already been fully downloaded."""
583         try:
584             self.to_screen('[download] %s has already been downloaded' % file_name)
585         except UnicodeEncodeError:
586             self.to_screen('[download] The file has already been downloaded')
587
588     def prepare_filename(self, info_dict):
589         """Generate the output filename."""
590         try:
591             template_dict = dict(info_dict)
592
593             template_dict['epoch'] = int(time.time())
594             autonumber_size = self.params.get('autonumber_size')
595             if autonumber_size is None:
596                 autonumber_size = 5
597             autonumber_templ = '%0' + str(autonumber_size) + 'd'
598             template_dict['autonumber'] = autonumber_templ % (self.params.get('autonumber_start', 1) - 1 + self._num_downloads)
599             if template_dict.get('playlist_index') is not None:
600                 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
601             if template_dict.get('resolution') is None:
602                 if template_dict.get('width') and template_dict.get('height'):
603                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
604                 elif template_dict.get('height'):
605                     template_dict['resolution'] = '%sp' % template_dict['height']
606                 elif template_dict.get('width'):
607                     template_dict['resolution'] = '%dx?' % template_dict['width']
608
609             sanitize = lambda k, v: sanitize_filename(
610                 compat_str(v),
611                 restricted=self.params.get('restrictfilenames'),
612                 is_id=(k == 'id'))
613             template_dict = dict((k, sanitize(k, v))
614                                  for k, v in template_dict.items()
615                                  if v is not None and not isinstance(v, (list, tuple, dict)))
616             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
617
618             outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
619             tmpl = compat_expanduser(outtmpl)
620             filename = tmpl % template_dict
621             # Temporary fix for #4787
622             # 'Treat' all problem characters by passing filename through preferredencoding
623             # to workaround encoding issues with subprocess on python2 @ Windows
624             if sys.version_info < (3, 0) and sys.platform == 'win32':
625                 filename = encodeFilename(filename, True).decode(preferredencoding())
626             return sanitize_path(filename)
627         except ValueError as err:
628             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
629             return None
630
631     def _match_entry(self, info_dict, incomplete):
632         """ Returns None iff the file should be downloaded """
633
634         video_title = info_dict.get('title', info_dict.get('id', 'video'))
635         if 'title' in info_dict:
636             # This can happen when we're just evaluating the playlist
637             title = info_dict['title']
638             matchtitle = self.params.get('matchtitle', False)
639             if matchtitle:
640                 if not re.search(matchtitle, title, re.IGNORECASE):
641                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
642             rejecttitle = self.params.get('rejecttitle', False)
643             if rejecttitle:
644                 if re.search(rejecttitle, title, re.IGNORECASE):
645                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
646         date = info_dict.get('upload_date')
647         if date is not None:
648             dateRange = self.params.get('daterange', DateRange())
649             if date not in dateRange:
650                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
651         view_count = info_dict.get('view_count')
652         if view_count is not None:
653             min_views = self.params.get('min_views')
654             if min_views is not None and view_count < min_views:
655                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
656             max_views = self.params.get('max_views')
657             if max_views is not None and view_count > max_views:
658                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
659         if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
660             return 'Skipping "%s" because it is age restricted' % video_title
661         if self.in_download_archive(info_dict):
662             return '%s has already been recorded in archive' % video_title
663
664         if not incomplete:
665             match_filter = self.params.get('match_filter')
666             if match_filter is not None:
667                 ret = match_filter(info_dict)
668                 if ret is not None:
669                     return ret
670
671         return None
672
673     @staticmethod
674     def add_extra_info(info_dict, extra_info):
675         '''Set the keys from extra_info in info dict if they are missing'''
676         for key, value in extra_info.items():
677             info_dict.setdefault(key, value)
678
679     def extract_info(self, url, download=True, ie_key=None, extra_info={},
680                      process=True, force_generic_extractor=False):
681         '''
682         Returns a list with a dictionary for each video we find.
683         If 'download', also downloads the videos.
684         extra_info is a dict containing the extra values to add to each result
685         '''
686
687         if not ie_key and force_generic_extractor:
688             ie_key = 'Generic'
689
690         if ie_key:
691             ies = [self.get_info_extractor(ie_key)]
692         else:
693             ies = self._ies
694
695         for ie in ies:
696             if not ie.suitable(url):
697                 continue
698
699             ie = self.get_info_extractor(ie.ie_key())
700             if not ie.working():
701                 self.report_warning('The program functionality for this site has been marked as broken, '
702                                     'and will probably not work.')
703
704             try:
705                 ie_result = ie.extract(url)
706                 if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
707                     break
708                 if isinstance(ie_result, list):
709                     # Backwards compatibility: old IE result format
710                     ie_result = {
711                         '_type': 'compat_list',
712                         'entries': ie_result,
713                     }
714                 self.add_default_extra_info(ie_result, ie, url)
715                 if process:
716                     return self.process_ie_result(ie_result, download, extra_info)
717                 else:
718                     return ie_result
719             except GeoRestrictedError as e:
720                 msg = e.msg
721                 if e.countries:
722                     msg += '\nThis video is available in %s.' % ', '.join(
723                         map(ISO3166Utils.short2full, e.countries))
724                 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
725                 self.report_error(msg)
726                 break
727             except ExtractorError as e:  # An error we somewhat expected
728                 self.report_error(compat_str(e), e.format_traceback())
729                 break
730             except MaxDownloadsReached:
731                 raise
732             except Exception as e:
733                 if self.params.get('ignoreerrors', False):
734                     self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
735                     break
736                 else:
737                     raise
738         else:
739             self.report_error('no suitable InfoExtractor for URL %s' % url)
740
741     def add_default_extra_info(self, ie_result, ie, url):
742         self.add_extra_info(ie_result, {
743             'extractor': ie.IE_NAME,
744             'webpage_url': url,
745             'webpage_url_basename': url_basename(url),
746             'extractor_key': ie.ie_key(),
747         })
748
749     def process_ie_result(self, ie_result, download=True, extra_info={}):
750         """
751         Take the result of the ie(may be modified) and resolve all unresolved
752         references (URLs, playlist items).
753
754         It will also download the videos if 'download'.
755         Returns the resolved ie_result.
756         """
757         result_type = ie_result.get('_type', 'video')
758
759         if result_type in ('url', 'url_transparent'):
760             ie_result['url'] = sanitize_url(ie_result['url'])
761             extract_flat = self.params.get('extract_flat', False)
762             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
763                     extract_flat is True):
764                 if self.params.get('forcejson', False):
765                     self.to_stdout(json.dumps(ie_result))
766                 return ie_result
767
768         if result_type == 'video':
769             self.add_extra_info(ie_result, extra_info)
770             return self.process_video_result(ie_result, download=download)
771         elif result_type == 'url':
772             # We have to add extra_info to the results because it may be
773             # contained in a playlist
774             return self.extract_info(ie_result['url'],
775                                      download,
776                                      ie_key=ie_result.get('ie_key'),
777                                      extra_info=extra_info)
778         elif result_type == 'url_transparent':
779             # Use the information from the embedding page
780             info = self.extract_info(
781                 ie_result['url'], ie_key=ie_result.get('ie_key'),
782                 extra_info=extra_info, download=False, process=False)
783
784             force_properties = dict(
785                 (k, v) for k, v in ie_result.items() if v is not None)
786             for f in ('_type', 'url', 'ie_key'):
787                 if f in force_properties:
788                     del force_properties[f]
789             new_result = info.copy()
790             new_result.update(force_properties)
791
792             assert new_result.get('_type') != 'url_transparent'
793
794             return self.process_ie_result(
795                 new_result, download=download, extra_info=extra_info)
796         elif result_type == 'playlist' or result_type == 'multi_video':
797             # We process each entry in the playlist
798             playlist = ie_result.get('title') or ie_result.get('id')
799             self.to_screen('[download] Downloading playlist: %s' % playlist)
800
801             playlist_results = []
802
803             playliststart = self.params.get('playliststart', 1) - 1
804             playlistend = self.params.get('playlistend')
805             # For backwards compatibility, interpret -1 as whole list
806             if playlistend == -1:
807                 playlistend = None
808
809             playlistitems_str = self.params.get('playlist_items')
810             playlistitems = None
811             if playlistitems_str is not None:
812                 def iter_playlistitems(format):
813                     for string_segment in format.split(','):
814                         if '-' in string_segment:
815                             start, end = string_segment.split('-')
816                             for item in range(int(start), int(end) + 1):
817                                 yield int(item)
818                         else:
819                             yield int(string_segment)
820                 playlistitems = iter_playlistitems(playlistitems_str)
821
822             ie_entries = ie_result['entries']
823             if isinstance(ie_entries, list):
824                 n_all_entries = len(ie_entries)
825                 if playlistitems:
826                     entries = [
827                         ie_entries[i - 1] for i in playlistitems
828                         if -n_all_entries <= i - 1 < n_all_entries]
829                 else:
830                     entries = ie_entries[playliststart:playlistend]
831                 n_entries = len(entries)
832                 self.to_screen(
833                     '[%s] playlist %s: Collected %d video ids (downloading %d of them)' %
834                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
835             elif isinstance(ie_entries, PagedList):
836                 if playlistitems:
837                     entries = []
838                     for item in playlistitems:
839                         entries.extend(ie_entries.getslice(
840                             item - 1, item
841                         ))
842                 else:
843                     entries = ie_entries.getslice(
844                         playliststart, playlistend)
845                 n_entries = len(entries)
846                 self.to_screen(
847                     '[%s] playlist %s: Downloading %d videos' %
848                     (ie_result['extractor'], playlist, n_entries))
849             else:  # iterable
850                 if playlistitems:
851                     entry_list = list(ie_entries)
852                     entries = [entry_list[i - 1] for i in playlistitems]
853                 else:
854                     entries = list(itertools.islice(
855                         ie_entries, playliststart, playlistend))
856                 n_entries = len(entries)
857                 self.to_screen(
858                     '[%s] playlist %s: Downloading %d videos' %
859                     (ie_result['extractor'], playlist, n_entries))
860
861             if self.params.get('playlistreverse', False):
862                 entries = entries[::-1]
863
864             if self.params.get('playlistrandom', False):
865                 random.shuffle(entries)
866
867             x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
868
869             for i, entry in enumerate(entries, 1):
870                 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
871                 # This __x_forwarded_for_ip thing is a bit ugly but requires
872                 # minimal changes
873                 if x_forwarded_for:
874                     entry['__x_forwarded_for_ip'] = x_forwarded_for
875                 extra = {
876                     'n_entries': n_entries,
877                     'playlist': playlist,
878                     'playlist_id': ie_result.get('id'),
879                     'playlist_title': ie_result.get('title'),
880                     'playlist_index': i + playliststart,
881                     'extractor': ie_result['extractor'],
882                     'webpage_url': ie_result['webpage_url'],
883                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
884                     'extractor_key': ie_result['extractor_key'],
885                 }
886
887                 reason = self._match_entry(entry, incomplete=True)
888                 if reason is not None:
889                     self.to_screen('[download] ' + reason)
890                     continue
891
892                 entry_result = self.process_ie_result(entry,
893                                                       download=download,
894                                                       extra_info=extra)
895                 playlist_results.append(entry_result)
896             ie_result['entries'] = playlist_results
897             self.to_screen('[download] Finished downloading playlist: %s' % playlist)
898             return ie_result
899         elif result_type == 'compat_list':
900             self.report_warning(
901                 'Extractor %s returned a compat_list result. '
902                 'It needs to be updated.' % ie_result.get('extractor'))
903
904             def _fixup(r):
905                 self.add_extra_info(
906                     r,
907                     {
908                         'extractor': ie_result['extractor'],
909                         'webpage_url': ie_result['webpage_url'],
910                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
911                         'extractor_key': ie_result['extractor_key'],
912                     }
913                 )
914                 return r
915             ie_result['entries'] = [
916                 self.process_ie_result(_fixup(r), download, extra_info)
917                 for r in ie_result['entries']
918             ]
919             return ie_result
920         else:
921             raise Exception('Invalid result type: %s' % result_type)
922
923     def _build_format_filter(self, filter_spec):
924         " Returns a function to filter the formats according to the filter_spec "
925
926         OPERATORS = {
927             '<': operator.lt,
928             '<=': operator.le,
929             '>': operator.gt,
930             '>=': operator.ge,
931             '=': operator.eq,
932             '!=': operator.ne,
933         }
934         operator_rex = re.compile(r'''(?x)\s*
935             (?P<key>width|height|tbr|abr|vbr|asr|filesize|fps)
936             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
937             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
938             $
939             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
940         m = operator_rex.search(filter_spec)
941         if m:
942             try:
943                 comparison_value = int(m.group('value'))
944             except ValueError:
945                 comparison_value = parse_filesize(m.group('value'))
946                 if comparison_value is None:
947                     comparison_value = parse_filesize(m.group('value') + 'B')
948                 if comparison_value is None:
949                     raise ValueError(
950                         'Invalid value %r in format specification %r' % (
951                             m.group('value'), filter_spec))
952             op = OPERATORS[m.group('op')]
953
954         if not m:
955             STR_OPERATORS = {
956                 '=': operator.eq,
957                 '!=': operator.ne,
958                 '^=': lambda attr, value: attr.startswith(value),
959                 '$=': lambda attr, value: attr.endswith(value),
960                 '*=': lambda attr, value: value in attr,
961             }
962             str_operator_rex = re.compile(r'''(?x)
963                 \s*(?P<key>ext|acodec|vcodec|container|protocol|format_id)
964                 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
965                 \s*(?P<value>[a-zA-Z0-9._-]+)
966                 \s*$
967                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
968             m = str_operator_rex.search(filter_spec)
969             if m:
970                 comparison_value = m.group('value')
971                 op = STR_OPERATORS[m.group('op')]
972
973         if not m:
974             raise ValueError('Invalid filter specification %r' % filter_spec)
975
976         def _filter(f):
977             actual_value = f.get(m.group('key'))
978             if actual_value is None:
979                 return m.group('none_inclusive')
980             return op(actual_value, comparison_value)
981         return _filter
982
983     def build_format_selector(self, format_spec):
984         def syntax_error(note, start):
985             message = (
986                 'Invalid format specification: '
987                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
988             return SyntaxError(message)
989
990         PICKFIRST = 'PICKFIRST'
991         MERGE = 'MERGE'
992         SINGLE = 'SINGLE'
993         GROUP = 'GROUP'
994         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
995
996         def _parse_filter(tokens):
997             filter_parts = []
998             for type, string, start, _, _ in tokens:
999                 if type == tokenize.OP and string == ']':
1000                     return ''.join(filter_parts)
1001                 else:
1002                     filter_parts.append(string)
1003
1004         def _remove_unused_ops(tokens):
1005             # Remove operators that we don't use and join them with the surrounding strings
1006             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1007             ALLOWED_OPS = ('/', '+', ',', '(', ')')
1008             last_string, last_start, last_end, last_line = None, None, None, None
1009             for type, string, start, end, line in tokens:
1010                 if type == tokenize.OP and string == '[':
1011                     if last_string:
1012                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1013                         last_string = None
1014                     yield type, string, start, end, line
1015                     # everything inside brackets will be handled by _parse_filter
1016                     for type, string, start, end, line in tokens:
1017                         yield type, string, start, end, line
1018                         if type == tokenize.OP and string == ']':
1019                             break
1020                 elif type == tokenize.OP and string in ALLOWED_OPS:
1021                     if last_string:
1022                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1023                         last_string = None
1024                     yield type, string, start, end, line
1025                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1026                     if not last_string:
1027                         last_string = string
1028                         last_start = start
1029                         last_end = end
1030                     else:
1031                         last_string += string
1032             if last_string:
1033                 yield tokenize.NAME, last_string, last_start, last_end, last_line
1034
1035         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1036             selectors = []
1037             current_selector = None
1038             for type, string, start, _, _ in tokens:
1039                 # ENCODING is only defined in python 3.x
1040                 if type == getattr(tokenize, 'ENCODING', None):
1041                     continue
1042                 elif type in [tokenize.NAME, tokenize.NUMBER]:
1043                     current_selector = FormatSelector(SINGLE, string, [])
1044                 elif type == tokenize.OP:
1045                     if string == ')':
1046                         if not inside_group:
1047                             # ')' will be handled by the parentheses group
1048                             tokens.restore_last_token()
1049                         break
1050                     elif inside_merge and string in ['/', ',']:
1051                         tokens.restore_last_token()
1052                         break
1053                     elif inside_choice and string == ',':
1054                         tokens.restore_last_token()
1055                         break
1056                     elif string == ',':
1057                         if not current_selector:
1058                             raise syntax_error('"," must follow a format selector', start)
1059                         selectors.append(current_selector)
1060                         current_selector = None
1061                     elif string == '/':
1062                         if not current_selector:
1063                             raise syntax_error('"/" must follow a format selector', start)
1064                         first_choice = current_selector
1065                         second_choice = _parse_format_selection(tokens, inside_choice=True)
1066                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1067                     elif string == '[':
1068                         if not current_selector:
1069                             current_selector = FormatSelector(SINGLE, 'best', [])
1070                         format_filter = _parse_filter(tokens)
1071                         current_selector.filters.append(format_filter)
1072                     elif string == '(':
1073                         if current_selector:
1074                             raise syntax_error('Unexpected "("', start)
1075                         group = _parse_format_selection(tokens, inside_group=True)
1076                         current_selector = FormatSelector(GROUP, group, [])
1077                     elif string == '+':
1078                         video_selector = current_selector
1079                         audio_selector = _parse_format_selection(tokens, inside_merge=True)
1080                         if not video_selector or not audio_selector:
1081                             raise syntax_error('"+" must be between two format selectors', start)
1082                         current_selector = FormatSelector(MERGE, (video_selector, audio_selector), [])
1083                     else:
1084                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1085                 elif type == tokenize.ENDMARKER:
1086                     break
1087             if current_selector:
1088                 selectors.append(current_selector)
1089             return selectors
1090
1091         def _build_selector_function(selector):
1092             if isinstance(selector, list):
1093                 fs = [_build_selector_function(s) for s in selector]
1094
1095                 def selector_function(ctx):
1096                     for f in fs:
1097                         for format in f(ctx):
1098                             yield format
1099                 return selector_function
1100             elif selector.type == GROUP:
1101                 selector_function = _build_selector_function(selector.selector)
1102             elif selector.type == PICKFIRST:
1103                 fs = [_build_selector_function(s) for s in selector.selector]
1104
1105                 def selector_function(ctx):
1106                     for f in fs:
1107                         picked_formats = list(f(ctx))
1108                         if picked_formats:
1109                             return picked_formats
1110                     return []
1111             elif selector.type == SINGLE:
1112                 format_spec = selector.selector
1113
1114                 def selector_function(ctx):
1115                     formats = list(ctx['formats'])
1116                     if not formats:
1117                         return
1118                     if format_spec == 'all':
1119                         for f in formats:
1120                             yield f
1121                     elif format_spec in ['best', 'worst', None]:
1122                         format_idx = 0 if format_spec == 'worst' else -1
1123                         audiovideo_formats = [
1124                             f for f in formats
1125                             if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
1126                         if audiovideo_formats:
1127                             yield audiovideo_formats[format_idx]
1128                         # for extractors with incomplete formats (audio only (soundcloud)
1129                         # or video only (imgur)) we will fallback to best/worst
1130                         # {video,audio}-only format
1131                         elif ctx['incomplete_formats']:
1132                             yield formats[format_idx]
1133                     elif format_spec == 'bestaudio':
1134                         audio_formats = [
1135                             f for f in formats
1136                             if f.get('vcodec') == 'none']
1137                         if audio_formats:
1138                             yield audio_formats[-1]
1139                     elif format_spec == 'worstaudio':
1140                         audio_formats = [
1141                             f for f in formats
1142                             if f.get('vcodec') == 'none']
1143                         if audio_formats:
1144                             yield audio_formats[0]
1145                     elif format_spec == 'bestvideo':
1146                         video_formats = [
1147                             f for f in formats
1148                             if f.get('acodec') == 'none']
1149                         if video_formats:
1150                             yield video_formats[-1]
1151                     elif format_spec == 'worstvideo':
1152                         video_formats = [
1153                             f for f in formats
1154                             if f.get('acodec') == 'none']
1155                         if video_formats:
1156                             yield video_formats[0]
1157                     else:
1158                         extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
1159                         if format_spec in extensions:
1160                             filter_f = lambda f: f['ext'] == format_spec
1161                         else:
1162                             filter_f = lambda f: f['format_id'] == format_spec
1163                         matches = list(filter(filter_f, formats))
1164                         if matches:
1165                             yield matches[-1]
1166             elif selector.type == MERGE:
1167                 def _merge(formats_info):
1168                     format_1, format_2 = [f['format_id'] for f in formats_info]
1169                     # The first format must contain the video and the
1170                     # second the audio
1171                     if formats_info[0].get('vcodec') == 'none':
1172                         self.report_error('The first format must '
1173                                           'contain the video, try using '
1174                                           '"-f %s+%s"' % (format_2, format_1))
1175                         return
1176                     # Formats must be opposite (video+audio)
1177                     if formats_info[0].get('acodec') == 'none' and formats_info[1].get('acodec') == 'none':
1178                         self.report_error(
1179                             'Both formats %s and %s are video-only, you must specify "-f video+audio"'
1180                             % (format_1, format_2))
1181                         return
1182                     output_ext = (
1183                         formats_info[0]['ext']
1184                         if self.params.get('merge_output_format') is None
1185                         else self.params['merge_output_format'])
1186                     return {
1187                         'requested_formats': formats_info,
1188                         'format': '%s+%s' % (formats_info[0].get('format'),
1189                                              formats_info[1].get('format')),
1190                         'format_id': '%s+%s' % (formats_info[0].get('format_id'),
1191                                                 formats_info[1].get('format_id')),
1192                         'width': formats_info[0].get('width'),
1193                         'height': formats_info[0].get('height'),
1194                         'resolution': formats_info[0].get('resolution'),
1195                         'fps': formats_info[0].get('fps'),
1196                         'vcodec': formats_info[0].get('vcodec'),
1197                         'vbr': formats_info[0].get('vbr'),
1198                         'stretched_ratio': formats_info[0].get('stretched_ratio'),
1199                         'acodec': formats_info[1].get('acodec'),
1200                         'abr': formats_info[1].get('abr'),
1201                         'ext': output_ext,
1202                     }
1203                 video_selector, audio_selector = map(_build_selector_function, selector.selector)
1204
1205                 def selector_function(ctx):
1206                     for pair in itertools.product(
1207                             video_selector(copy.deepcopy(ctx)), audio_selector(copy.deepcopy(ctx))):
1208                         yield _merge(pair)
1209
1210             filters = [self._build_format_filter(f) for f in selector.filters]
1211
1212             def final_selector(ctx):
1213                 ctx_copy = copy.deepcopy(ctx)
1214                 for _filter in filters:
1215                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
1216                 return selector_function(ctx_copy)
1217             return final_selector
1218
1219         stream = io.BytesIO(format_spec.encode('utf-8'))
1220         try:
1221             tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
1222         except tokenize.TokenError:
1223             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1224
1225         class TokenIterator(object):
1226             def __init__(self, tokens):
1227                 self.tokens = tokens
1228                 self.counter = 0
1229
1230             def __iter__(self):
1231                 return self
1232
1233             def __next__(self):
1234                 if self.counter >= len(self.tokens):
1235                     raise StopIteration()
1236                 value = self.tokens[self.counter]
1237                 self.counter += 1
1238                 return value
1239
1240             next = __next__
1241
1242             def restore_last_token(self):
1243                 self.counter -= 1
1244
1245         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
1246         return _build_selector_function(parsed_selector)
1247
1248     def _calc_headers(self, info_dict):
1249         res = std_headers.copy()
1250
1251         add_headers = info_dict.get('http_headers')
1252         if add_headers:
1253             res.update(add_headers)
1254
1255         cookies = self._calc_cookies(info_dict)
1256         if cookies:
1257             res['Cookie'] = cookies
1258
1259         if 'X-Forwarded-For' not in res:
1260             x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
1261             if x_forwarded_for_ip:
1262                 res['X-Forwarded-For'] = x_forwarded_for_ip
1263
1264         return res
1265
1266     def _calc_cookies(self, info_dict):
1267         pr = sanitized_Request(info_dict['url'])
1268         self.cookiejar.add_cookie_header(pr)
1269         return pr.get_header('Cookie')
1270
1271     def process_video_result(self, info_dict, download=True):
1272         assert info_dict.get('_type', 'video') == 'video'
1273
1274         if 'id' not in info_dict:
1275             raise ExtractorError('Missing "id" field in extractor result')
1276         if 'title' not in info_dict:
1277             raise ExtractorError('Missing "title" field in extractor result')
1278
1279         if not isinstance(info_dict['id'], compat_str):
1280             self.report_warning('"id" field is not a string - forcing string conversion')
1281             info_dict['id'] = compat_str(info_dict['id'])
1282
1283         if 'playlist' not in info_dict:
1284             # It isn't part of a playlist
1285             info_dict['playlist'] = None
1286             info_dict['playlist_index'] = None
1287
1288         thumbnails = info_dict.get('thumbnails')
1289         if thumbnails is None:
1290             thumbnail = info_dict.get('thumbnail')
1291             if thumbnail:
1292                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1293         if thumbnails:
1294             thumbnails.sort(key=lambda t: (
1295                 t.get('preference') if t.get('preference') is not None else -1,
1296                 t.get('width') if t.get('width') is not None else -1,
1297                 t.get('height') if t.get('height') is not None else -1,
1298                 t.get('id') if t.get('id') is not None else '', t.get('url')))
1299             for i, t in enumerate(thumbnails):
1300                 t['url'] = sanitize_url(t['url'])
1301                 if t.get('width') and t.get('height'):
1302                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
1303                 if t.get('id') is None:
1304                     t['id'] = '%d' % i
1305
1306         if self.params.get('list_thumbnails'):
1307             self.list_thumbnails(info_dict)
1308             return
1309
1310         thumbnail = info_dict.get('thumbnail')
1311         if thumbnail:
1312             info_dict['thumbnail'] = sanitize_url(thumbnail)
1313         elif thumbnails:
1314             info_dict['thumbnail'] = thumbnails[-1]['url']
1315
1316         if 'display_id' not in info_dict and 'id' in info_dict:
1317             info_dict['display_id'] = info_dict['id']
1318
1319         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
1320             # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1321             # see http://bugs.python.org/issue1646728)
1322             try:
1323                 upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
1324                 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1325             except (ValueError, OverflowError, OSError):
1326                 pass
1327
1328         # Auto generate title fields corresponding to the *_number fields when missing
1329         # in order to always have clean titles. This is very common for TV series.
1330         for field in ('chapter', 'season', 'episode'):
1331             if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
1332                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
1333
1334         subtitles = info_dict.get('subtitles')
1335         if subtitles:
1336             for _, subtitle in subtitles.items():
1337                 for subtitle_format in subtitle:
1338                     if subtitle_format.get('url'):
1339                         subtitle_format['url'] = sanitize_url(subtitle_format['url'])
1340                     if subtitle_format.get('ext') is None:
1341                         subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
1342
1343         if self.params.get('listsubtitles', False):
1344             if 'automatic_captions' in info_dict:
1345                 self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions')
1346             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
1347             return
1348         info_dict['requested_subtitles'] = self.process_subtitles(
1349             info_dict['id'], subtitles,
1350             info_dict.get('automatic_captions'))
1351
1352         # We now pick which formats have to be downloaded
1353         if info_dict.get('formats') is None:
1354             # There's only one format available
1355             formats = [info_dict]
1356         else:
1357             formats = info_dict['formats']
1358
1359         if not formats:
1360             raise ExtractorError('No video formats found!')
1361
1362         formats_dict = {}
1363
1364         # We check that all the formats have the format and format_id fields
1365         for i, format in enumerate(formats):
1366             if 'url' not in format:
1367                 raise ExtractorError('Missing "url" key in result (index %d)' % i)
1368
1369             format['url'] = sanitize_url(format['url'])
1370
1371             if format.get('format_id') is None:
1372                 format['format_id'] = compat_str(i)
1373             else:
1374                 # Sanitize format_id from characters used in format selector expression
1375                 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
1376             format_id = format['format_id']
1377             if format_id not in formats_dict:
1378                 formats_dict[format_id] = []
1379             formats_dict[format_id].append(format)
1380
1381         # Make sure all formats have unique format_id
1382         for format_id, ambiguous_formats in formats_dict.items():
1383             if len(ambiguous_formats) > 1:
1384                 for i, format in enumerate(ambiguous_formats):
1385                     format['format_id'] = '%s-%d' % (format_id, i)
1386
1387         for i, format in enumerate(formats):
1388             if format.get('format') is None:
1389                 format['format'] = '{id} - {res}{note}'.format(
1390                     id=format['format_id'],
1391                     res=self.format_resolution(format),
1392                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1393                 )
1394             # Automatically determine file extension if missing
1395             if format.get('ext') is None:
1396                 format['ext'] = determine_ext(format['url']).lower()
1397             # Automatically determine protocol if missing (useful for format
1398             # selection purposes)
1399             if format.get('protocol') is None:
1400                 format['protocol'] = determine_protocol(format)
1401             # Add HTTP headers, so that external programs can use them from the
1402             # json output
1403             full_format_info = info_dict.copy()
1404             full_format_info.update(format)
1405             format['http_headers'] = self._calc_headers(full_format_info)
1406         # Remove private housekeeping stuff
1407         if '__x_forwarded_for_ip' in info_dict:
1408             del info_dict['__x_forwarded_for_ip']
1409
1410         # TODO Central sorting goes here
1411
1412         if formats[0] is not info_dict:
1413             # only set the 'formats' fields if the original info_dict list them
1414             # otherwise we end up with a circular reference, the first (and unique)
1415             # element in the 'formats' field in info_dict is info_dict itself,
1416             # which can't be exported to json
1417             info_dict['formats'] = formats
1418         if self.params.get('listformats'):
1419             self.list_formats(info_dict)
1420             return
1421
1422         req_format = self.params.get('format')
1423         if req_format is None:
1424             req_format_list = []
1425             if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and
1426                     not info_dict.get('is_live')):
1427                 merger = FFmpegMergerPP(self)
1428                 if merger.available and merger.can_merge():
1429                     req_format_list.append('bestvideo+bestaudio')
1430             req_format_list.append('best')
1431             req_format = '/'.join(req_format_list)
1432         format_selector = self.build_format_selector(req_format)
1433
1434         # While in format selection we may need to have an access to the original
1435         # format set in order to calculate some metrics or do some processing.
1436         # For now we need to be able to guess whether original formats provided
1437         # by extractor are incomplete or not (i.e. whether extractor provides only
1438         # video-only or audio-only formats) for proper formats selection for
1439         # extractors with such incomplete formats (see
1440         # https://github.com/rg3/youtube-dl/pull/5556).
1441         # Since formats may be filtered during format selection and may not match
1442         # the original formats the results may be incorrect. Thus original formats
1443         # or pre-calculated metrics should be passed to format selection routines
1444         # as well.
1445         # We will pass a context object containing all necessary additional data
1446         # instead of just formats.
1447         # This fixes incorrect format selection issue (see
1448         # https://github.com/rg3/youtube-dl/issues/10083).
1449         incomplete_formats = (
1450             # All formats are video-only or
1451             all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats) or
1452             # all formats are audio-only
1453             all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
1454
1455         ctx = {
1456             'formats': formats,
1457             'incomplete_formats': incomplete_formats,
1458         }
1459
1460         formats_to_download = list(format_selector(ctx))
1461         if not formats_to_download:
1462             raise ExtractorError('requested format not available',
1463                                  expected=True)
1464
1465         if download:
1466             if len(formats_to_download) > 1:
1467                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1468             for format in formats_to_download:
1469                 new_info = dict(info_dict)
1470                 new_info.update(format)
1471                 self.process_info(new_info)
1472         # We update the info dict with the best quality format (backwards compatibility)
1473         info_dict.update(formats_to_download[-1])
1474         return info_dict
1475
1476     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
1477         """Select the requested subtitles and their format"""
1478         available_subs = {}
1479         if normal_subtitles and self.params.get('writesubtitles'):
1480             available_subs.update(normal_subtitles)
1481         if automatic_captions and self.params.get('writeautomaticsub'):
1482             for lang, cap_info in automatic_captions.items():
1483                 if lang not in available_subs:
1484                     available_subs[lang] = cap_info
1485
1486         if (not self.params.get('writesubtitles') and not
1487                 self.params.get('writeautomaticsub') or not
1488                 available_subs):
1489             return None
1490
1491         if self.params.get('allsubtitles', False):
1492             requested_langs = available_subs.keys()
1493         else:
1494             if self.params.get('subtitleslangs', False):
1495                 requested_langs = self.params.get('subtitleslangs')
1496             elif 'en' in available_subs:
1497                 requested_langs = ['en']
1498             else:
1499                 requested_langs = [list(available_subs.keys())[0]]
1500
1501         formats_query = self.params.get('subtitlesformat', 'best')
1502         formats_preference = formats_query.split('/') if formats_query else []
1503         subs = {}
1504         for lang in requested_langs:
1505             formats = available_subs.get(lang)
1506             if formats is None:
1507                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1508                 continue
1509             for ext in formats_preference:
1510                 if ext == 'best':
1511                     f = formats[-1]
1512                     break
1513                 matches = list(filter(lambda f: f['ext'] == ext, formats))
1514                 if matches:
1515                     f = matches[-1]
1516                     break
1517             else:
1518                 f = formats[-1]
1519                 self.report_warning(
1520                     'No subtitle format found matching "%s" for language %s, '
1521                     'using %s' % (formats_query, lang, f['ext']))
1522             subs[lang] = f
1523         return subs
1524
1525     def process_info(self, info_dict):
1526         """Process a single resolved IE result."""
1527
1528         assert info_dict.get('_type', 'video') == 'video'
1529
1530         max_downloads = self.params.get('max_downloads')
1531         if max_downloads is not None:
1532             if self._num_downloads >= int(max_downloads):
1533                 raise MaxDownloadsReached()
1534
1535         info_dict['fulltitle'] = info_dict['title']
1536         if len(info_dict['title']) > 200:
1537             info_dict['title'] = info_dict['title'][:197] + '...'
1538
1539         if 'format' not in info_dict:
1540             info_dict['format'] = info_dict['ext']
1541
1542         reason = self._match_entry(info_dict, incomplete=False)
1543         if reason is not None:
1544             self.to_screen('[download] ' + reason)
1545             return
1546
1547         self._num_downloads += 1
1548
1549         info_dict['_filename'] = filename = self.prepare_filename(info_dict)
1550
1551         # Forced printings
1552         if self.params.get('forcetitle', False):
1553             self.to_stdout(info_dict['fulltitle'])
1554         if self.params.get('forceid', False):
1555             self.to_stdout(info_dict['id'])
1556         if self.params.get('forceurl', False):
1557             if info_dict.get('requested_formats') is not None:
1558                 for f in info_dict['requested_formats']:
1559                     self.to_stdout(f['url'] + f.get('play_path', ''))
1560             else:
1561                 # For RTMP URLs, also include the playpath
1562                 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1563         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
1564             self.to_stdout(info_dict['thumbnail'])
1565         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
1566             self.to_stdout(info_dict['description'])
1567         if self.params.get('forcefilename', False) and filename is not None:
1568             self.to_stdout(filename)
1569         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1570             self.to_stdout(formatSeconds(info_dict['duration']))
1571         if self.params.get('forceformat', False):
1572             self.to_stdout(info_dict['format'])
1573         if self.params.get('forcejson', False):
1574             self.to_stdout(json.dumps(info_dict))
1575
1576         # Do nothing else if in simulate mode
1577         if self.params.get('simulate', False):
1578             return
1579
1580         if filename is None:
1581             return
1582
1583         try:
1584             dn = os.path.dirname(sanitize_path(encodeFilename(filename)))
1585             if dn and not os.path.exists(dn):
1586                 os.makedirs(dn)
1587         except (OSError, IOError) as err:
1588             self.report_error('unable to create directory ' + error_to_compat_str(err))
1589             return
1590
1591         if self.params.get('writedescription', False):
1592             descfn = replace_extension(filename, 'description', info_dict.get('ext'))
1593             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1594                 self.to_screen('[info] Video description is already present')
1595             elif info_dict.get('description') is None:
1596                 self.report_warning('There\'s no description to write.')
1597             else:
1598                 try:
1599                     self.to_screen('[info] Writing video description to: ' + descfn)
1600                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1601                         descfile.write(info_dict['description'])
1602                 except (OSError, IOError):
1603                     self.report_error('Cannot write description file ' + descfn)
1604                     return
1605
1606         if self.params.get('writeannotations', False):
1607             annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
1608             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1609                 self.to_screen('[info] Video annotations are already present')
1610             else:
1611                 try:
1612                     self.to_screen('[info] Writing video annotations to: ' + annofn)
1613                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1614                         annofile.write(info_dict['annotations'])
1615                 except (KeyError, TypeError):
1616                     self.report_warning('There are no annotations to write.')
1617                 except (OSError, IOError):
1618                     self.report_error('Cannot write annotations file: ' + annofn)
1619                     return
1620
1621         subtitles_are_requested = any([self.params.get('writesubtitles', False),
1622                                        self.params.get('writeautomaticsub')])
1623
1624         if subtitles_are_requested and info_dict.get('requested_subtitles'):
1625             # subtitles download errors are already managed as troubles in relevant IE
1626             # that way it will silently go on when used with unsupporting IE
1627             subtitles = info_dict['requested_subtitles']
1628             ie = self.get_info_extractor(info_dict['extractor_key'])
1629             for sub_lang, sub_info in subtitles.items():
1630                 sub_format = sub_info['ext']
1631                 if sub_info.get('data') is not None:
1632                     sub_data = sub_info['data']
1633                 else:
1634                     try:
1635                         sub_data = ie._download_webpage(
1636                             sub_info['url'], info_dict['id'], note=False)
1637                     except ExtractorError as err:
1638                         self.report_warning('Unable to download subtitle for "%s": %s' %
1639                                             (sub_lang, error_to_compat_str(err.cause)))
1640                         continue
1641                 try:
1642                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1643                     if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1644                         self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1645                     else:
1646                         self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1647                         # Use newline='' to prevent conversion of newline characters
1648                         # See https://github.com/rg3/youtube-dl/issues/10268
1649                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
1650                             subfile.write(sub_data)
1651                 except (OSError, IOError):
1652                     self.report_error('Cannot write subtitles file ' + sub_filename)
1653                     return
1654
1655         if self.params.get('writeinfojson', False):
1656             infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
1657             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1658                 self.to_screen('[info] Video description metadata is already present')
1659             else:
1660                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1661                 try:
1662                     write_json_file(self.filter_requested_info(info_dict), infofn)
1663                 except (OSError, IOError):
1664                     self.report_error('Cannot write metadata to JSON file ' + infofn)
1665                     return
1666
1667         self._write_thumbnails(info_dict, filename)
1668
1669         if not self.params.get('skip_download', False):
1670             try:
1671                 def dl(name, info):
1672                     fd = get_suitable_downloader(info, self.params)(self, self.params)
1673                     for ph in self._progress_hooks:
1674                         fd.add_progress_hook(ph)
1675                     if self.params.get('verbose'):
1676                         self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1677                     return fd.download(name, info)
1678
1679                 if info_dict.get('requested_formats') is not None:
1680                     downloaded = []
1681                     success = True
1682                     merger = FFmpegMergerPP(self)
1683                     if not merger.available:
1684                         postprocessors = []
1685                         self.report_warning('You have requested multiple '
1686                                             'formats but ffmpeg or avconv are not installed.'
1687                                             ' The formats won\'t be merged.')
1688                     else:
1689                         postprocessors = [merger]
1690
1691                     def compatible_formats(formats):
1692                         video, audio = formats
1693                         # Check extension
1694                         video_ext, audio_ext = audio.get('ext'), video.get('ext')
1695                         if video_ext and audio_ext:
1696                             COMPATIBLE_EXTS = (
1697                                 ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma'),
1698                                 ('webm')
1699                             )
1700                             for exts in COMPATIBLE_EXTS:
1701                                 if video_ext in exts and audio_ext in exts:
1702                                     return True
1703                         # TODO: Check acodec/vcodec
1704                         return False
1705
1706                     filename_real_ext = os.path.splitext(filename)[1][1:]
1707                     filename_wo_ext = (
1708                         os.path.splitext(filename)[0]
1709                         if filename_real_ext == info_dict['ext']
1710                         else filename)
1711                     requested_formats = info_dict['requested_formats']
1712                     if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
1713                         info_dict['ext'] = 'mkv'
1714                         self.report_warning(
1715                             'Requested formats are incompatible for merge and will be merged into mkv.')
1716                     # Ensure filename always has a correct extension for successful merge
1717                     filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
1718                     if os.path.exists(encodeFilename(filename)):
1719                         self.to_screen(
1720                             '[download] %s has already been downloaded and '
1721                             'merged' % filename)
1722                     else:
1723                         for f in requested_formats:
1724                             new_info = dict(info_dict)
1725                             new_info.update(f)
1726                             fname = self.prepare_filename(new_info)
1727                             fname = prepend_extension(fname, 'f%s' % f['format_id'], new_info['ext'])
1728                             downloaded.append(fname)
1729                             partial_success = dl(fname, new_info)
1730                             success = success and partial_success
1731                         info_dict['__postprocessors'] = postprocessors
1732                         info_dict['__files_to_merge'] = downloaded
1733                 else:
1734                     # Just a single file
1735                     success = dl(filename, info_dict)
1736             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1737                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
1738                 return
1739             except (OSError, IOError) as err:
1740                 raise UnavailableVideoError(err)
1741             except (ContentTooShortError, ) as err:
1742                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1743                 return
1744
1745             if success and filename != '-':
1746                 # Fixup content
1747                 fixup_policy = self.params.get('fixup')
1748                 if fixup_policy is None:
1749                     fixup_policy = 'detect_or_warn'
1750
1751                 INSTALL_FFMPEG_MESSAGE = 'Install ffmpeg or avconv to fix this automatically.'
1752
1753                 stretched_ratio = info_dict.get('stretched_ratio')
1754                 if stretched_ratio is not None and stretched_ratio != 1:
1755                     if fixup_policy == 'warn':
1756                         self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1757                             info_dict['id'], stretched_ratio))
1758                     elif fixup_policy == 'detect_or_warn':
1759                         stretched_pp = FFmpegFixupStretchedPP(self)
1760                         if stretched_pp.available:
1761                             info_dict.setdefault('__postprocessors', [])
1762                             info_dict['__postprocessors'].append(stretched_pp)
1763                         else:
1764                             self.report_warning(
1765                                 '%s: Non-uniform pixel ratio (%s). %s'
1766                                 % (info_dict['id'], stretched_ratio, INSTALL_FFMPEG_MESSAGE))
1767                     else:
1768                         assert fixup_policy in ('ignore', 'never')
1769
1770                 if (info_dict.get('requested_formats') is None and
1771                         info_dict.get('container') == 'm4a_dash'):
1772                     if fixup_policy == 'warn':
1773                         self.report_warning(
1774                             '%s: writing DASH m4a. '
1775                             'Only some players support this container.'
1776                             % info_dict['id'])
1777                     elif fixup_policy == 'detect_or_warn':
1778                         fixup_pp = FFmpegFixupM4aPP(self)
1779                         if fixup_pp.available:
1780                             info_dict.setdefault('__postprocessors', [])
1781                             info_dict['__postprocessors'].append(fixup_pp)
1782                         else:
1783                             self.report_warning(
1784                                 '%s: writing DASH m4a. '
1785                                 'Only some players support this container. %s'
1786                                 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
1787                     else:
1788                         assert fixup_policy in ('ignore', 'never')
1789
1790                 if (info_dict.get('protocol') == 'm3u8_native' or
1791                         info_dict.get('protocol') == 'm3u8' and
1792                         self.params.get('hls_prefer_native')):
1793                     if fixup_policy == 'warn':
1794                         self.report_warning('%s: malformated aac bitstream.' % (
1795                             info_dict['id']))
1796                     elif fixup_policy == 'detect_or_warn':
1797                         fixup_pp = FFmpegFixupM3u8PP(self)
1798                         if fixup_pp.available:
1799                             info_dict.setdefault('__postprocessors', [])
1800                             info_dict['__postprocessors'].append(fixup_pp)
1801                         else:
1802                             self.report_warning(
1803                                 '%s: malformated aac bitstream. %s'
1804                                 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
1805                     else:
1806                         assert fixup_policy in ('ignore', 'never')
1807
1808                 try:
1809                     self.post_process(filename, info_dict)
1810                 except (PostProcessingError) as err:
1811                     self.report_error('postprocessing: %s' % str(err))
1812                     return
1813                 self.record_download_archive(info_dict)
1814
1815     def download(self, url_list):
1816         """Download a given list of URLs."""
1817         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1818         if (len(url_list) > 1 and
1819                 '%' not in outtmpl and
1820                 self.params.get('max_downloads') != 1):
1821             raise SameFileError(outtmpl)
1822
1823         for url in url_list:
1824             try:
1825                 # It also downloads the videos
1826                 res = self.extract_info(
1827                     url, force_generic_extractor=self.params.get('force_generic_extractor', False))
1828             except UnavailableVideoError:
1829                 self.report_error('unable to download video')
1830             except MaxDownloadsReached:
1831                 self.to_screen('[info] Maximum number of downloaded files reached.')
1832                 raise
1833             else:
1834                 if self.params.get('dump_single_json', False):
1835                     self.to_stdout(json.dumps(res))
1836
1837         return self._download_retcode
1838
1839     def download_with_info_file(self, info_filename):
1840         with contextlib.closing(fileinput.FileInput(
1841                 [info_filename], mode='r',
1842                 openhook=fileinput.hook_encoded('utf-8'))) as f:
1843             # FileInput doesn't have a read method, we can't call json.load
1844             info = self.filter_requested_info(json.loads('\n'.join(f)))
1845         try:
1846             self.process_ie_result(info, download=True)
1847         except DownloadError:
1848             webpage_url = info.get('webpage_url')
1849             if webpage_url is not None:
1850                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1851                 return self.download([webpage_url])
1852             else:
1853                 raise
1854         return self._download_retcode
1855
1856     @staticmethod
1857     def filter_requested_info(info_dict):
1858         return dict(
1859             (k, v) for k, v in info_dict.items()
1860             if k not in ['requested_formats', 'requested_subtitles'])
1861
1862     def post_process(self, filename, ie_info):
1863         """Run all the postprocessors on the given file."""
1864         info = dict(ie_info)
1865         info['filepath'] = filename
1866         pps_chain = []
1867         if ie_info.get('__postprocessors') is not None:
1868             pps_chain.extend(ie_info['__postprocessors'])
1869         pps_chain.extend(self._pps)
1870         for pp in pps_chain:
1871             files_to_delete = []
1872             try:
1873                 files_to_delete, info = pp.run(info)
1874             except PostProcessingError as e:
1875                 self.report_error(e.msg)
1876             if files_to_delete and not self.params.get('keepvideo', False):
1877                 for old_filename in files_to_delete:
1878                     self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
1879                     try:
1880                         os.remove(encodeFilename(old_filename))
1881                     except (IOError, OSError):
1882                         self.report_warning('Unable to remove downloaded original file')
1883
1884     def _make_archive_id(self, info_dict):
1885         # Future-proof against any change in case
1886         # and backwards compatibility with prior versions
1887         extractor = info_dict.get('extractor_key')
1888         if extractor is None:
1889             if 'id' in info_dict:
1890                 extractor = info_dict.get('ie_key')  # key in a playlist
1891         if extractor is None:
1892             return None  # Incomplete video information
1893         return extractor.lower() + ' ' + info_dict['id']
1894
1895     def in_download_archive(self, info_dict):
1896         fn = self.params.get('download_archive')
1897         if fn is None:
1898             return False
1899
1900         vid_id = self._make_archive_id(info_dict)
1901         if vid_id is None:
1902             return False  # Incomplete video information
1903
1904         try:
1905             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1906                 for line in archive_file:
1907                     if line.strip() == vid_id:
1908                         return True
1909         except IOError as ioe:
1910             if ioe.errno != errno.ENOENT:
1911                 raise
1912         return False
1913
1914     def record_download_archive(self, info_dict):
1915         fn = self.params.get('download_archive')
1916         if fn is None:
1917             return
1918         vid_id = self._make_archive_id(info_dict)
1919         assert vid_id
1920         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1921             archive_file.write(vid_id + '\n')
1922
1923     @staticmethod
1924     def format_resolution(format, default='unknown'):
1925         if format.get('vcodec') == 'none':
1926             return 'audio only'
1927         if format.get('resolution') is not None:
1928             return format['resolution']
1929         if format.get('height') is not None:
1930             if format.get('width') is not None:
1931                 res = '%sx%s' % (format['width'], format['height'])
1932             else:
1933                 res = '%sp' % format['height']
1934         elif format.get('width') is not None:
1935             res = '%dx?' % format['width']
1936         else:
1937             res = default
1938         return res
1939
1940     def _format_note(self, fdict):
1941         res = ''
1942         if fdict.get('ext') in ['f4f', 'f4m']:
1943             res += '(unsupported) '
1944         if fdict.get('language'):
1945             if res:
1946                 res += ' '
1947             res += '[%s] ' % fdict['language']
1948         if fdict.get('format_note') is not None:
1949             res += fdict['format_note'] + ' '
1950         if fdict.get('tbr') is not None:
1951             res += '%4dk ' % fdict['tbr']
1952         if fdict.get('container') is not None:
1953             if res:
1954                 res += ', '
1955             res += '%s container' % fdict['container']
1956         if (fdict.get('vcodec') is not None and
1957                 fdict.get('vcodec') != 'none'):
1958             if res:
1959                 res += ', '
1960             res += fdict['vcodec']
1961             if fdict.get('vbr') is not None:
1962                 res += '@'
1963         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1964             res += 'video@'
1965         if fdict.get('vbr') is not None:
1966             res += '%4dk' % fdict['vbr']
1967         if fdict.get('fps') is not None:
1968             if res:
1969                 res += ', '
1970             res += '%sfps' % fdict['fps']
1971         if fdict.get('acodec') is not None:
1972             if res:
1973                 res += ', '
1974             if fdict['acodec'] == 'none':
1975                 res += 'video only'
1976             else:
1977                 res += '%-5s' % fdict['acodec']
1978         elif fdict.get('abr') is not None:
1979             if res:
1980                 res += ', '
1981             res += 'audio'
1982         if fdict.get('abr') is not None:
1983             res += '@%3dk' % fdict['abr']
1984         if fdict.get('asr') is not None:
1985             res += ' (%5dHz)' % fdict['asr']
1986         if fdict.get('filesize') is not None:
1987             if res:
1988                 res += ', '
1989             res += format_bytes(fdict['filesize'])
1990         elif fdict.get('filesize_approx') is not None:
1991             if res:
1992                 res += ', '
1993             res += '~' + format_bytes(fdict['filesize_approx'])
1994         return res
1995
1996     def list_formats(self, info_dict):
1997         formats = info_dict.get('formats', [info_dict])
1998         table = [
1999             [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
2000             for f in formats
2001             if f.get('preference') is None or f['preference'] >= -1000]
2002         if len(formats) > 1:
2003             table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
2004
2005         header_line = ['format code', 'extension', 'resolution', 'note']
2006         self.to_screen(
2007             '[info] Available formats for %s:\n%s' %
2008             (info_dict['id'], render_table(header_line, table)))
2009
2010     def list_thumbnails(self, info_dict):
2011         thumbnails = info_dict.get('thumbnails')
2012         if not thumbnails:
2013             self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
2014             return
2015
2016         self.to_screen(
2017             '[info] Thumbnails for %s:' % info_dict['id'])
2018         self.to_screen(render_table(
2019             ['ID', 'width', 'height', 'URL'],
2020             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
2021
2022     def list_subtitles(self, video_id, subtitles, name='subtitles'):
2023         if not subtitles:
2024             self.to_screen('%s has no %s' % (video_id, name))
2025             return
2026         self.to_screen(
2027             'Available %s for %s:' % (name, video_id))
2028         self.to_screen(render_table(
2029             ['Language', 'formats'],
2030             [[lang, ', '.join(f['ext'] for f in reversed(formats))]
2031                 for lang, formats in subtitles.items()]))
2032
2033     def urlopen(self, req):
2034         """ Start an HTTP download """
2035         if isinstance(req, compat_basestring):
2036             req = sanitized_Request(req)
2037         return self._opener.open(req, timeout=self._socket_timeout)
2038
2039     def print_debug_header(self):
2040         if not self.params.get('verbose'):
2041             return
2042
2043         if type('') is not compat_str:
2044             # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
2045             self.report_warning(
2046                 'Your Python is broken! Update to a newer and supported version')
2047
2048         stdout_encoding = getattr(
2049             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
2050         encoding_str = (
2051             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
2052                 locale.getpreferredencoding(),
2053                 sys.getfilesystemencoding(),
2054                 stdout_encoding,
2055                 self.get_encoding()))
2056         write_string(encoding_str, encoding=None)
2057
2058         self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
2059         if _LAZY_LOADER:
2060             self._write_string('[debug] Lazy loading extractors enabled' + '\n')
2061         try:
2062             sp = subprocess.Popen(
2063                 ['git', 'rev-parse', '--short', 'HEAD'],
2064                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
2065                 cwd=os.path.dirname(os.path.abspath(__file__)))
2066             out, err = sp.communicate()
2067             out = out.decode().strip()
2068             if re.match('[0-9a-f]+', out):
2069                 self._write_string('[debug] Git HEAD: ' + out + '\n')
2070         except Exception:
2071             try:
2072                 sys.exc_clear()
2073             except Exception:
2074                 pass
2075         self._write_string('[debug] Python version %s - %s\n' % (
2076             platform.python_version(), platform_name()))
2077
2078         exe_versions = FFmpegPostProcessor.get_versions(self)
2079         exe_versions['rtmpdump'] = rtmpdump_version()
2080         exe_str = ', '.join(
2081             '%s %s' % (exe, v)
2082             for exe, v in sorted(exe_versions.items())
2083             if v
2084         )
2085         if not exe_str:
2086             exe_str = 'none'
2087         self._write_string('[debug] exe versions: %s\n' % exe_str)
2088
2089         proxy_map = {}
2090         for handler in self._opener.handlers:
2091             if hasattr(handler, 'proxies'):
2092                 proxy_map.update(handler.proxies)
2093         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
2094
2095         if self.params.get('call_home', False):
2096             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
2097             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
2098             latest_version = self.urlopen(
2099                 'https://yt-dl.org/latest/version').read().decode('utf-8')
2100             if version_tuple(latest_version) > version_tuple(__version__):
2101                 self.report_warning(
2102                     'You are using an outdated version (newest version: %s)! '
2103                     'See https://yt-dl.org/update if you need help updating.' %
2104                     latest_version)
2105
2106     def _setup_opener(self):
2107         timeout_val = self.params.get('socket_timeout')
2108         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
2109
2110         opts_cookiefile = self.params.get('cookiefile')
2111         opts_proxy = self.params.get('proxy')
2112
2113         if opts_cookiefile is None:
2114             self.cookiejar = compat_cookiejar.CookieJar()
2115         else:
2116             opts_cookiefile = compat_expanduser(opts_cookiefile)
2117             self.cookiejar = compat_cookiejar.MozillaCookieJar(
2118                 opts_cookiefile)
2119             if os.access(opts_cookiefile, os.R_OK):
2120                 self.cookiejar.load()
2121
2122         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
2123         if opts_proxy is not None:
2124             if opts_proxy == '':
2125                 proxies = {}
2126             else:
2127                 proxies = {'http': opts_proxy, 'https': opts_proxy}
2128         else:
2129             proxies = compat_urllib_request.getproxies()
2130             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
2131             if 'http' in proxies and 'https' not in proxies:
2132                 proxies['https'] = proxies['http']
2133         proxy_handler = PerRequestProxyHandler(proxies)
2134
2135         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
2136         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
2137         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
2138         data_handler = compat_urllib_request_DataHandler()
2139
2140         # When passing our own FileHandler instance, build_opener won't add the
2141         # default FileHandler and allows us to disable the file protocol, which
2142         # can be used for malicious purposes (see
2143         # https://github.com/rg3/youtube-dl/issues/8227)
2144         file_handler = compat_urllib_request.FileHandler()
2145
2146         def file_open(*args, **kwargs):
2147             raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in youtube-dl for security reasons')
2148         file_handler.file_open = file_open
2149
2150         opener = compat_urllib_request.build_opener(
2151             proxy_handler, https_handler, cookie_processor, ydlh, data_handler, file_handler)
2152
2153         # Delete the default user-agent header, which would otherwise apply in
2154         # cases where our custom HTTP handler doesn't come into play
2155         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
2156         opener.addheaders = []
2157         self._opener = opener
2158
2159     def encode(self, s):
2160         if isinstance(s, bytes):
2161             return s  # Already encoded
2162
2163         try:
2164             return s.encode(self.get_encoding())
2165         except UnicodeEncodeError as err:
2166             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
2167             raise
2168
2169     def get_encoding(self):
2170         encoding = self.params.get('encoding')
2171         if encoding is None:
2172             encoding = preferredencoding()
2173         return encoding
2174
2175     def _write_thumbnails(self, info_dict, filename):
2176         if self.params.get('writethumbnail', False):
2177             thumbnails = info_dict.get('thumbnails')
2178             if thumbnails:
2179                 thumbnails = [thumbnails[-1]]
2180         elif self.params.get('write_all_thumbnails', False):
2181             thumbnails = info_dict.get('thumbnails')
2182         else:
2183             return
2184
2185         if not thumbnails:
2186             # No thumbnails present, so return immediately
2187             return
2188
2189         for t in thumbnails:
2190             thumb_ext = determine_ext(t['url'], 'jpg')
2191             suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
2192             thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
2193             t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
2194
2195             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
2196                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
2197                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
2198             else:
2199                 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
2200                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
2201                 try:
2202                     uf = self.urlopen(t['url'])
2203                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
2204                         shutil.copyfileobj(uf, thumbf)
2205                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
2206                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
2207                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2208                     self.report_warning('Unable to download thumbnail "%s": %s' %
2209                                         (t['url'], error_to_compat_str(err)))