]> git.bitcoin.ninja Git - youtube-dl/blob - youtube_dl/YoutubeDL.py
ce4b72fd3191f05d8035eba5c34fff342affae82
[youtube-dl] / youtube_dl / YoutubeDL.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 from __future__ import absolute_import, unicode_literals
5
6 import collections
7 import contextlib
8 import datetime
9 import errno
10 import fileinput
11 import io
12 import itertools
13 import json
14 import locale
15 import operator
16 import os
17 import platform
18 import re
19 import shutil
20 import subprocess
21 import socket
22 import sys
23 import time
24 import traceback
25
26 if os.name == 'nt':
27     import ctypes
28
29 from .compat import (
30     compat_basestring,
31     compat_cookiejar,
32     compat_expanduser,
33     compat_get_terminal_size,
34     compat_http_client,
35     compat_kwargs,
36     compat_str,
37     compat_urllib_error,
38     compat_urllib_request,
39 )
40 from .utils import (
41     escape_url,
42     ContentTooShortError,
43     date_from_str,
44     DateRange,
45     DEFAULT_OUTTMPL,
46     determine_ext,
47     DownloadError,
48     encodeFilename,
49     ExtractorError,
50     format_bytes,
51     formatSeconds,
52     locked_file,
53     make_HTTPS_handler,
54     MaxDownloadsReached,
55     PagedList,
56     parse_filesize,
57     PerRequestProxyHandler,
58     PostProcessingError,
59     platform_name,
60     preferredencoding,
61     render_table,
62     SameFileError,
63     sanitize_filename,
64     sanitize_path,
65     std_headers,
66     subtitles_filename,
67     takewhile_inclusive,
68     UnavailableVideoError,
69     url_basename,
70     version_tuple,
71     write_json_file,
72     write_string,
73     YoutubeDLHandler,
74     prepend_extension,
75     args_to_str,
76     age_restricted,
77 )
78 from .cache import Cache
79 from .extractor import get_info_extractor, gen_extractors
80 from .downloader import get_suitable_downloader
81 from .downloader.rtmp import rtmpdump_version
82 from .postprocessor import (
83     FFmpegFixupM4aPP,
84     FFmpegFixupStretchedPP,
85     FFmpegMergerPP,
86     FFmpegPostProcessor,
87     get_postprocessor,
88 )
89 from .version import __version__
90
91
92 class YoutubeDL(object):
93     """YoutubeDL class.
94
95     YoutubeDL objects are the ones responsible of downloading the
96     actual video file and writing it to disk if the user has requested
97     it, among some other tasks. In most cases there should be one per
98     program. As, given a video URL, the downloader doesn't know how to
99     extract all the needed information, task that InfoExtractors do, it
100     has to pass the URL to one of them.
101
102     For this, YoutubeDL objects have a method that allows
103     InfoExtractors to be registered in a given order. When it is passed
104     a URL, the YoutubeDL object handles it to the first InfoExtractor it
105     finds that reports being able to handle it. The InfoExtractor extracts
106     all the information about the video or videos the URL refers to, and
107     YoutubeDL process the extracted information, possibly using a File
108     Downloader to download the video.
109
110     YoutubeDL objects accept a lot of parameters. In order not to saturate
111     the object constructor with arguments, it receives a dictionary of
112     options instead. These options are available through the params
113     attribute for the InfoExtractors to use. The YoutubeDL also
114     registers itself as the downloader in charge for the InfoExtractors
115     that are added to it, so this is a "mutual registration".
116
117     Available options:
118
119     username:          Username for authentication purposes.
120     password:          Password for authentication purposes.
121     videopassword:     Password for acces a video.
122     usenetrc:          Use netrc for authentication instead.
123     verbose:           Print additional info to stdout.
124     quiet:             Do not print messages to stdout.
125     no_warnings:       Do not print out anything for warnings.
126     forceurl:          Force printing final URL.
127     forcetitle:        Force printing title.
128     forceid:           Force printing ID.
129     forcethumbnail:    Force printing thumbnail URL.
130     forcedescription:  Force printing description.
131     forcefilename:     Force printing final filename.
132     forceduration:     Force printing duration.
133     forcejson:         Force printing info_dict as JSON.
134     dump_single_json:  Force printing the info_dict of the whole playlist
135                        (or video) as a single JSON line.
136     simulate:          Do not download the video files.
137     format:            Video format code. See options.py for more information.
138     format_limit:      Highest quality format to try.
139     outtmpl:           Template for output names.
140     restrictfilenames: Do not allow "&" and spaces in file names
141     ignoreerrors:      Do not stop on download errors.
142     nooverwrites:      Prevent overwriting files.
143     playliststart:     Playlist item to start at.
144     playlistend:       Playlist item to end at.
145     playlist_items:    Specific indices of playlist to download.
146     playlistreverse:   Download playlist items in reverse order.
147     matchtitle:        Download only matching titles.
148     rejecttitle:       Reject downloads for matching titles.
149     logger:            Log messages to a logging.Logger instance.
150     logtostderr:       Log messages to stderr instead of stdout.
151     writedescription:  Write the video description to a .description file
152     writeinfojson:     Write the video description to a .info.json file
153     writeannotations:  Write the video annotations to a .annotations.xml file
154     writethumbnail:    Write the thumbnail image to a file
155     write_all_thumbnails:  Write all thumbnail formats to files
156     writesubtitles:    Write the video subtitles to a file
157     writeautomaticsub: Write the automatic subtitles to a file
158     allsubtitles:      Downloads all the subtitles of the video
159                        (requires writesubtitles or writeautomaticsub)
160     listsubtitles:     Lists all available subtitles for the video
161     subtitlesformat:   The format code for subtitles
162     subtitleslangs:    List of languages of the subtitles to download
163     keepvideo:         Keep the video file after post-processing
164     daterange:         A DateRange object, download only if the upload_date is in the range.
165     skip_download:     Skip the actual download of the video file
166     cachedir:          Location of the cache files in the filesystem.
167                        False to disable filesystem cache.
168     noplaylist:        Download single video instead of a playlist if in doubt.
169     age_limit:         An integer representing the user's age in years.
170                        Unsuitable videos for the given age are skipped.
171     min_views:         An integer representing the minimum view count the video
172                        must have in order to not be skipped.
173                        Videos without view count information are always
174                        downloaded. None for no limit.
175     max_views:         An integer representing the maximum view count.
176                        Videos that are more popular than that are not
177                        downloaded.
178                        Videos without view count information are always
179                        downloaded. None for no limit.
180     download_archive:  File name of a file where all downloads are recorded.
181                        Videos already present in the file are not downloaded
182                        again.
183     cookiefile:        File name where cookies should be read from and dumped to.
184     nocheckcertificate:Do not verify SSL certificates
185     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
186                        At the moment, this is only supported by YouTube.
187     proxy:             URL of the proxy server to use
188     cn_verification_proxy:  URL of the proxy to use for IP address verification
189                        on Chinese sites. (Experimental)
190     socket_timeout:    Time to wait for unresponsive hosts, in seconds
191     bidi_workaround:   Work around buggy terminals without bidirectional text
192                        support, using fridibi
193     debug_printtraffic:Print out sent and received HTTP traffic
194     include_ads:       Download ads as well
195     default_search:    Prepend this string if an input url is not valid.
196                        'auto' for elaborate guessing
197     encoding:          Use this encoding instead of the system-specified.
198     extract_flat:      Do not resolve URLs, return the immediate result.
199                        Pass in 'in_playlist' to only show this behavior for
200                        playlist items.
201     postprocessors:    A list of dictionaries, each with an entry
202                        * key:  The name of the postprocessor. See
203                                youtube_dl/postprocessor/__init__.py for a list.
204                        as well as any further keyword arguments for the
205                        postprocessor.
206     progress_hooks:    A list of functions that get called on download
207                        progress, with a dictionary with the entries
208                        * status: One of "downloading", "error", or "finished".
209                                  Check this first and ignore unknown values.
210
211                        If status is one of "downloading", or "finished", the
212                        following properties may also be present:
213                        * filename: The final filename (always present)
214                        * tmpfilename: The filename we're currently writing to
215                        * downloaded_bytes: Bytes on disk
216                        * total_bytes: Size of the whole file, None if unknown
217                        * total_bytes_estimate: Guess of the eventual file size,
218                                                None if unavailable.
219                        * elapsed: The number of seconds since download started.
220                        * eta: The estimated time in seconds, None if unknown
221                        * speed: The download speed in bytes/second, None if
222                                 unknown
223                        * fragment_index: The counter of the currently
224                                          downloaded video fragment.
225                        * fragment_count: The number of fragments (= individual
226                                          files that will be merged)
227
228                        Progress hooks are guaranteed to be called at least once
229                        (with status "finished") if the download is successful.
230     merge_output_format: Extension to use when merging formats.
231     fixup:             Automatically correct known faults of the file.
232                        One of:
233                        - "never": do nothing
234                        - "warn": only emit a warning
235                        - "detect_or_warn": check whether we can do anything
236                                            about it, warn otherwise (default)
237     source_address:    (Experimental) Client-side IP address to bind to.
238     call_home:         Boolean, true iff we are allowed to contact the
239                        youtube-dl servers for debugging.
240     sleep_interval:    Number of seconds to sleep before each download.
241     listformats:       Print an overview of available video formats and exit.
242     list_thumbnails:   Print a table of all thumbnails and exit.
243     match_filter:      A function that gets called with the info_dict of
244                        every video.
245                        If it returns a message, the video is ignored.
246                        If it returns None, the video is downloaded.
247                        match_filter_func in utils.py is one example for this.
248     no_color:          Do not emit color codes in output.
249
250     The following options determine which downloader is picked:
251     external_downloader: Executable of the external downloader to call.
252                        None or unset for standard (built-in) downloader.
253     hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv.
254
255     The following parameters are not used by YoutubeDL itself, they are used by
256     the downloader (see youtube_dl/downloader/common.py):
257     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
258     noresizebuffer, retries, continuedl, noprogress, consoletitle,
259     xattr_set_filesize, external_downloader_args.
260
261     The following options are used by the post processors:
262     prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
263                        otherwise prefer avconv.
264     exec_cmd:          Arbitrary command to run after downloading
265     """
266
267     params = None
268     _ies = []
269     _pps = []
270     _download_retcode = None
271     _num_downloads = None
272     _screen_file = None
273
274     def __init__(self, params=None, auto_init=True):
275         """Create a FileDownloader object with the given options."""
276         if params is None:
277             params = {}
278         self._ies = []
279         self._ies_instances = {}
280         self._pps = []
281         self._progress_hooks = []
282         self._download_retcode = 0
283         self._num_downloads = 0
284         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
285         self._err_file = sys.stderr
286         self.params = params
287         self.cache = Cache(self)
288
289         if params.get('bidi_workaround', False):
290             try:
291                 import pty
292                 master, slave = pty.openpty()
293                 width = compat_get_terminal_size().columns
294                 if width is None:
295                     width_args = []
296                 else:
297                     width_args = ['-w', str(width)]
298                 sp_kwargs = dict(
299                     stdin=subprocess.PIPE,
300                     stdout=slave,
301                     stderr=self._err_file)
302                 try:
303                     self._output_process = subprocess.Popen(
304                         ['bidiv'] + width_args, **sp_kwargs
305                     )
306                 except OSError:
307                     self._output_process = subprocess.Popen(
308                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
309                 self._output_channel = os.fdopen(master, 'rb')
310             except OSError as ose:
311                 if ose.errno == 2:
312                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
313                 else:
314                     raise
315
316         if (sys.version_info >= (3,) and sys.platform != 'win32' and
317                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and
318                 not params.get('restrictfilenames', False)):
319             # On Python 3, the Unicode filesystem API will throw errors (#1474)
320             self.report_warning(
321                 'Assuming --restrict-filenames since file system encoding '
322                 'cannot encode all characters. '
323                 'Set the LC_ALL environment variable to fix this.')
324             self.params['restrictfilenames'] = True
325
326         if isinstance(params.get('outtmpl'), bytes):
327             self.report_warning(
328                 'Parameter outtmpl is bytes, but should be a unicode string. '
329                 'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
330
331         self._setup_opener()
332
333         if auto_init:
334             self.print_debug_header()
335             self.add_default_info_extractors()
336
337         for pp_def_raw in self.params.get('postprocessors', []):
338             pp_class = get_postprocessor(pp_def_raw['key'])
339             pp_def = dict(pp_def_raw)
340             del pp_def['key']
341             pp = pp_class(self, **compat_kwargs(pp_def))
342             self.add_post_processor(pp)
343
344         for ph in self.params.get('progress_hooks', []):
345             self.add_progress_hook(ph)
346
347     def warn_if_short_id(self, argv):
348         # short YouTube ID starting with dash?
349         idxs = [
350             i for i, a in enumerate(argv)
351             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
352         if idxs:
353             correct_argv = (
354                 ['youtube-dl'] +
355                 [a for i, a in enumerate(argv) if i not in idxs] +
356                 ['--'] + [argv[i] for i in idxs]
357             )
358             self.report_warning(
359                 'Long argument string detected. '
360                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
361                 args_to_str(correct_argv))
362
363     def add_info_extractor(self, ie):
364         """Add an InfoExtractor object to the end of the list."""
365         self._ies.append(ie)
366         self._ies_instances[ie.ie_key()] = ie
367         ie.set_downloader(self)
368
369     def get_info_extractor(self, ie_key):
370         """
371         Get an instance of an IE with name ie_key, it will try to get one from
372         the _ies list, if there's no instance it will create a new one and add
373         it to the extractor list.
374         """
375         ie = self._ies_instances.get(ie_key)
376         if ie is None:
377             ie = get_info_extractor(ie_key)()
378             self.add_info_extractor(ie)
379         return ie
380
381     def add_default_info_extractors(self):
382         """
383         Add the InfoExtractors returned by gen_extractors to the end of the list
384         """
385         for ie in gen_extractors():
386             self.add_info_extractor(ie)
387
388     def add_post_processor(self, pp):
389         """Add a PostProcessor object to the end of the chain."""
390         self._pps.append(pp)
391         pp.set_downloader(self)
392
393     def add_progress_hook(self, ph):
394         """Add the progress hook (currently only for the file downloader)"""
395         self._progress_hooks.append(ph)
396
397     def _bidi_workaround(self, message):
398         if not hasattr(self, '_output_channel'):
399             return message
400
401         assert hasattr(self, '_output_process')
402         assert isinstance(message, compat_str)
403         line_count = message.count('\n') + 1
404         self._output_process.stdin.write((message + '\n').encode('utf-8'))
405         self._output_process.stdin.flush()
406         res = ''.join(self._output_channel.readline().decode('utf-8')
407                       for _ in range(line_count))
408         return res[:-len('\n')]
409
410     def to_screen(self, message, skip_eol=False):
411         """Print message to stdout if not in quiet mode."""
412         return self.to_stdout(message, skip_eol, check_quiet=True)
413
414     def _write_string(self, s, out=None):
415         write_string(s, out=out, encoding=self.params.get('encoding'))
416
417     def to_stdout(self, message, skip_eol=False, check_quiet=False):
418         """Print message to stdout if not in quiet mode."""
419         if self.params.get('logger'):
420             self.params['logger'].debug(message)
421         elif not check_quiet or not self.params.get('quiet', False):
422             message = self._bidi_workaround(message)
423             terminator = ['\n', ''][skip_eol]
424             output = message + terminator
425
426             self._write_string(output, self._screen_file)
427
428     def to_stderr(self, message):
429         """Print message to stderr."""
430         assert isinstance(message, compat_str)
431         if self.params.get('logger'):
432             self.params['logger'].error(message)
433         else:
434             message = self._bidi_workaround(message)
435             output = message + '\n'
436             self._write_string(output, self._err_file)
437
438     def to_console_title(self, message):
439         if not self.params.get('consoletitle', False):
440             return
441         if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
442             # c_wchar_p() might not be necessary if `message` is
443             # already of type unicode()
444             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
445         elif 'TERM' in os.environ:
446             self._write_string('\033]0;%s\007' % message, self._screen_file)
447
448     def save_console_title(self):
449         if not self.params.get('consoletitle', False):
450             return
451         if 'TERM' in os.environ:
452             # Save the title on stack
453             self._write_string('\033[22;0t', self._screen_file)
454
455     def restore_console_title(self):
456         if not self.params.get('consoletitle', False):
457             return
458         if 'TERM' in os.environ:
459             # Restore the title from stack
460             self._write_string('\033[23;0t', self._screen_file)
461
462     def __enter__(self):
463         self.save_console_title()
464         return self
465
466     def __exit__(self, *args):
467         self.restore_console_title()
468
469         if self.params.get('cookiefile') is not None:
470             self.cookiejar.save()
471
472     def trouble(self, message=None, tb=None):
473         """Determine action to take when a download problem appears.
474
475         Depending on if the downloader has been configured to ignore
476         download errors or not, this method may throw an exception or
477         not when errors are found, after printing the message.
478
479         tb, if given, is additional traceback information.
480         """
481         if message is not None:
482             self.to_stderr(message)
483         if self.params.get('verbose'):
484             if tb is None:
485                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
486                     tb = ''
487                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
488                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
489                     tb += compat_str(traceback.format_exc())
490                 else:
491                     tb_data = traceback.format_list(traceback.extract_stack())
492                     tb = ''.join(tb_data)
493             self.to_stderr(tb)
494         if not self.params.get('ignoreerrors', False):
495             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
496                 exc_info = sys.exc_info()[1].exc_info
497             else:
498                 exc_info = sys.exc_info()
499             raise DownloadError(message, exc_info)
500         self._download_retcode = 1
501
502     def report_warning(self, message):
503         '''
504         Print the message to stderr, it will be prefixed with 'WARNING:'
505         If stderr is a tty file the 'WARNING:' will be colored
506         '''
507         if self.params.get('logger') is not None:
508             self.params['logger'].warning(message)
509         else:
510             if self.params.get('no_warnings'):
511                 return
512             if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
513                 _msg_header = '\033[0;33mWARNING:\033[0m'
514             else:
515                 _msg_header = 'WARNING:'
516             warning_message = '%s %s' % (_msg_header, message)
517             self.to_stderr(warning_message)
518
519     def report_error(self, message, tb=None):
520         '''
521         Do the same as trouble, but prefixes the message with 'ERROR:', colored
522         in red if stderr is a tty file.
523         '''
524         if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
525             _msg_header = '\033[0;31mERROR:\033[0m'
526         else:
527             _msg_header = 'ERROR:'
528         error_message = '%s %s' % (_msg_header, message)
529         self.trouble(error_message, tb)
530
531     def report_file_already_downloaded(self, file_name):
532         """Report file has already been fully downloaded."""
533         try:
534             self.to_screen('[download] %s has already been downloaded' % file_name)
535         except UnicodeEncodeError:
536             self.to_screen('[download] The file has already been downloaded')
537
538     def prepare_filename(self, info_dict):
539         """Generate the output filename."""
540         try:
541             template_dict = dict(info_dict)
542
543             template_dict['epoch'] = int(time.time())
544             autonumber_size = self.params.get('autonumber_size')
545             if autonumber_size is None:
546                 autonumber_size = 5
547             autonumber_templ = '%0' + str(autonumber_size) + 'd'
548             template_dict['autonumber'] = autonumber_templ % self._num_downloads
549             if template_dict.get('playlist_index') is not None:
550                 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
551             if template_dict.get('resolution') is None:
552                 if template_dict.get('width') and template_dict.get('height'):
553                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
554                 elif template_dict.get('height'):
555                     template_dict['resolution'] = '%sp' % template_dict['height']
556                 elif template_dict.get('width'):
557                     template_dict['resolution'] = '?x%d' % template_dict['width']
558
559             sanitize = lambda k, v: sanitize_filename(
560                 compat_str(v),
561                 restricted=self.params.get('restrictfilenames'),
562                 is_id=(k == 'id'))
563             template_dict = dict((k, sanitize(k, v))
564                                  for k, v in template_dict.items()
565                                  if v is not None)
566             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
567
568             outtmpl = sanitize_path(self.params.get('outtmpl', DEFAULT_OUTTMPL))
569             tmpl = compat_expanduser(outtmpl)
570             filename = tmpl % template_dict
571             # Temporary fix for #4787
572             # 'Treat' all problem characters by passing filename through preferredencoding
573             # to workaround encoding issues with subprocess on python2 @ Windows
574             if sys.version_info < (3, 0) and sys.platform == 'win32':
575                 filename = encodeFilename(filename, True).decode(preferredencoding())
576             return filename
577         except ValueError as err:
578             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
579             return None
580
581     def _match_entry(self, info_dict, incomplete):
582         """ Returns None iff the file should be downloaded """
583
584         video_title = info_dict.get('title', info_dict.get('id', 'video'))
585         if 'title' in info_dict:
586             # This can happen when we're just evaluating the playlist
587             title = info_dict['title']
588             matchtitle = self.params.get('matchtitle', False)
589             if matchtitle:
590                 if not re.search(matchtitle, title, re.IGNORECASE):
591                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
592             rejecttitle = self.params.get('rejecttitle', False)
593             if rejecttitle:
594                 if re.search(rejecttitle, title, re.IGNORECASE):
595                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
596         date = info_dict.get('upload_date', None)
597         if date is not None:
598             dateRange = self.params.get('daterange', DateRange())
599             if date not in dateRange:
600                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
601         view_count = info_dict.get('view_count', None)
602         if view_count is not None:
603             min_views = self.params.get('min_views')
604             if min_views is not None and view_count < min_views:
605                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
606             max_views = self.params.get('max_views')
607             if max_views is not None and view_count > max_views:
608                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
609         if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
610             return 'Skipping "%s" because it is age restricted' % video_title
611         if self.in_download_archive(info_dict):
612             return '%s has already been recorded in archive' % video_title
613
614         if not incomplete:
615             match_filter = self.params.get('match_filter')
616             if match_filter is not None:
617                 ret = match_filter(info_dict)
618                 if ret is not None:
619                     return ret
620
621         return None
622
623     @staticmethod
624     def add_extra_info(info_dict, extra_info):
625         '''Set the keys from extra_info in info dict if they are missing'''
626         for key, value in extra_info.items():
627             info_dict.setdefault(key, value)
628
629     def extract_info(self, url, download=True, ie_key=None, extra_info={},
630                      process=True):
631         '''
632         Returns a list with a dictionary for each video we find.
633         If 'download', also downloads the videos.
634         extra_info is a dict containing the extra values to add to each result
635         '''
636
637         if ie_key:
638             ies = [self.get_info_extractor(ie_key)]
639         else:
640             ies = self._ies
641
642         for ie in ies:
643             if not ie.suitable(url):
644                 continue
645
646             if not ie.working():
647                 self.report_warning('The program functionality for this site has been marked as broken, '
648                                     'and will probably not work.')
649
650             try:
651                 ie_result = ie.extract(url)
652                 if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
653                     break
654                 if isinstance(ie_result, list):
655                     # Backwards compatibility: old IE result format
656                     ie_result = {
657                         '_type': 'compat_list',
658                         'entries': ie_result,
659                     }
660                 self.add_default_extra_info(ie_result, ie, url)
661                 if process:
662                     return self.process_ie_result(ie_result, download, extra_info)
663                 else:
664                     return ie_result
665             except ExtractorError as de:  # An error we somewhat expected
666                 self.report_error(compat_str(de), de.format_traceback())
667                 break
668             except MaxDownloadsReached:
669                 raise
670             except Exception as e:
671                 if self.params.get('ignoreerrors', False):
672                     self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
673                     break
674                 else:
675                     raise
676         else:
677             self.report_error('no suitable InfoExtractor for URL %s' % url)
678
679     def add_default_extra_info(self, ie_result, ie, url):
680         self.add_extra_info(ie_result, {
681             'extractor': ie.IE_NAME,
682             'webpage_url': url,
683             'webpage_url_basename': url_basename(url),
684             'extractor_key': ie.ie_key(),
685         })
686
687     def process_ie_result(self, ie_result, download=True, extra_info={}):
688         """
689         Take the result of the ie(may be modified) and resolve all unresolved
690         references (URLs, playlist items).
691
692         It will also download the videos if 'download'.
693         Returns the resolved ie_result.
694         """
695
696         result_type = ie_result.get('_type', 'video')
697
698         if result_type in ('url', 'url_transparent'):
699             extract_flat = self.params.get('extract_flat', False)
700             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
701                     extract_flat is True):
702                 if self.params.get('forcejson', False):
703                     self.to_stdout(json.dumps(ie_result))
704                 return ie_result
705
706         if result_type == 'video':
707             self.add_extra_info(ie_result, extra_info)
708             return self.process_video_result(ie_result, download=download)
709         elif result_type == 'url':
710             # We have to add extra_info to the results because it may be
711             # contained in a playlist
712             return self.extract_info(ie_result['url'],
713                                      download,
714                                      ie_key=ie_result.get('ie_key'),
715                                      extra_info=extra_info)
716         elif result_type == 'url_transparent':
717             # Use the information from the embedding page
718             info = self.extract_info(
719                 ie_result['url'], ie_key=ie_result.get('ie_key'),
720                 extra_info=extra_info, download=False, process=False)
721
722             force_properties = dict(
723                 (k, v) for k, v in ie_result.items() if v is not None)
724             for f in ('_type', 'url'):
725                 if f in force_properties:
726                     del force_properties[f]
727             new_result = info.copy()
728             new_result.update(force_properties)
729
730             assert new_result.get('_type') != 'url_transparent'
731
732             return self.process_ie_result(
733                 new_result, download=download, extra_info=extra_info)
734         elif result_type == 'playlist' or result_type == 'multi_video':
735             # We process each entry in the playlist
736             playlist = ie_result.get('title', None) or ie_result.get('id', None)
737             self.to_screen('[download] Downloading playlist: %s' % playlist)
738
739             playlist_results = []
740
741             playliststart = self.params.get('playliststart', 1) - 1
742             playlistend = self.params.get('playlistend', None)
743             # For backwards compatibility, interpret -1 as whole list
744             if playlistend == -1:
745                 playlistend = None
746
747             playlistitems_str = self.params.get('playlist_items', None)
748             playlistitems = None
749             if playlistitems_str is not None:
750                 def iter_playlistitems(format):
751                     for string_segment in format.split(','):
752                         if '-' in string_segment:
753                             start, end = string_segment.split('-')
754                             for item in range(int(start), int(end) + 1):
755                                 yield int(item)
756                         else:
757                             yield int(string_segment)
758                 playlistitems = iter_playlistitems(playlistitems_str)
759
760             ie_entries = ie_result['entries']
761             if isinstance(ie_entries, list):
762                 n_all_entries = len(ie_entries)
763                 if playlistitems:
764                     entries = [ie_entries[i - 1] for i in playlistitems]
765                 else:
766                     entries = ie_entries[playliststart:playlistend]
767                 n_entries = len(entries)
768                 self.to_screen(
769                     "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
770                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
771             elif isinstance(ie_entries, PagedList):
772                 if playlistitems:
773                     entries = []
774                     for item in playlistitems:
775                         entries.extend(ie_entries.getslice(
776                             item - 1, item
777                         ))
778                 else:
779                     entries = ie_entries.getslice(
780                         playliststart, playlistend)
781                 n_entries = len(entries)
782                 self.to_screen(
783                     "[%s] playlist %s: Downloading %d videos" %
784                     (ie_result['extractor'], playlist, n_entries))
785             else:  # iterable
786                 if playlistitems:
787                     entry_list = list(ie_entries)
788                     entries = [entry_list[i - 1] for i in playlistitems]
789                 else:
790                     entries = list(itertools.islice(
791                         ie_entries, playliststart, playlistend))
792                 n_entries = len(entries)
793                 self.to_screen(
794                     "[%s] playlist %s: Downloading %d videos" %
795                     (ie_result['extractor'], playlist, n_entries))
796
797             if self.params.get('playlistreverse', False):
798                 entries = entries[::-1]
799
800             for i, entry in enumerate(entries, 1):
801                 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
802                 extra = {
803                     'n_entries': n_entries,
804                     'playlist': playlist,
805                     'playlist_id': ie_result.get('id'),
806                     'playlist_title': ie_result.get('title'),
807                     'playlist_index': i + playliststart,
808                     'extractor': ie_result['extractor'],
809                     'webpage_url': ie_result['webpage_url'],
810                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
811                     'extractor_key': ie_result['extractor_key'],
812                 }
813
814                 reason = self._match_entry(entry, incomplete=True)
815                 if reason is not None:
816                     self.to_screen('[download] ' + reason)
817                     continue
818
819                 entry_result = self.process_ie_result(entry,
820                                                       download=download,
821                                                       extra_info=extra)
822                 playlist_results.append(entry_result)
823             ie_result['entries'] = playlist_results
824             return ie_result
825         elif result_type == 'compat_list':
826             self.report_warning(
827                 'Extractor %s returned a compat_list result. '
828                 'It needs to be updated.' % ie_result.get('extractor'))
829
830             def _fixup(r):
831                 self.add_extra_info(
832                     r,
833                     {
834                         'extractor': ie_result['extractor'],
835                         'webpage_url': ie_result['webpage_url'],
836                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
837                         'extractor_key': ie_result['extractor_key'],
838                     }
839                 )
840                 return r
841             ie_result['entries'] = [
842                 self.process_ie_result(_fixup(r), download, extra_info)
843                 for r in ie_result['entries']
844             ]
845             return ie_result
846         else:
847             raise Exception('Invalid result type: %s' % result_type)
848
849     def _apply_format_filter(self, format_spec, available_formats):
850         " Returns a tuple of the remaining format_spec and filtered formats "
851
852         OPERATORS = {
853             '<': operator.lt,
854             '<=': operator.le,
855             '>': operator.gt,
856             '>=': operator.ge,
857             '=': operator.eq,
858             '!=': operator.ne,
859         }
860         operator_rex = re.compile(r'''(?x)\s*\[
861             (?P<key>width|height|tbr|abr|vbr|asr|filesize|fps)
862             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
863             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
864             \]$
865             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
866         m = operator_rex.search(format_spec)
867         if m:
868             try:
869                 comparison_value = int(m.group('value'))
870             except ValueError:
871                 comparison_value = parse_filesize(m.group('value'))
872                 if comparison_value is None:
873                     comparison_value = parse_filesize(m.group('value') + 'B')
874                 if comparison_value is None:
875                     raise ValueError(
876                         'Invalid value %r in format specification %r' % (
877                             m.group('value'), format_spec))
878             op = OPERATORS[m.group('op')]
879
880         if not m:
881             STR_OPERATORS = {
882                 '=': operator.eq,
883                 '!=': operator.ne,
884             }
885             str_operator_rex = re.compile(r'''(?x)\s*\[
886                 \s*(?P<key>ext|acodec|vcodec|container|protocol)
887                 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
888                 \s*(?P<value>[a-zA-Z0-9_-]+)
889                 \s*\]$
890                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
891             m = str_operator_rex.search(format_spec)
892             if m:
893                 comparison_value = m.group('value')
894                 op = STR_OPERATORS[m.group('op')]
895
896         if not m:
897             raise ValueError('Invalid format specification %r' % format_spec)
898
899         def _filter(f):
900             actual_value = f.get(m.group('key'))
901             if actual_value is None:
902                 return m.group('none_inclusive')
903             return op(actual_value, comparison_value)
904         new_formats = [f for f in available_formats if _filter(f)]
905
906         new_format_spec = format_spec[:-len(m.group(0))]
907         if not new_format_spec:
908             new_format_spec = 'best'
909
910         return (new_format_spec, new_formats)
911
912     def select_format(self, format_spec, available_formats):
913         while format_spec.endswith(']'):
914             format_spec, available_formats = self._apply_format_filter(
915                 format_spec, available_formats)
916         if not available_formats:
917             return None
918
919         if format_spec == 'best' or format_spec is None:
920             return available_formats[-1]
921         elif format_spec == 'worst':
922             return available_formats[0]
923         elif format_spec == 'bestaudio':
924             audio_formats = [
925                 f for f in available_formats
926                 if f.get('vcodec') == 'none']
927             if audio_formats:
928                 return audio_formats[-1]
929         elif format_spec == 'worstaudio':
930             audio_formats = [
931                 f for f in available_formats
932                 if f.get('vcodec') == 'none']
933             if audio_formats:
934                 return audio_formats[0]
935         elif format_spec == 'bestvideo':
936             video_formats = [
937                 f for f in available_formats
938                 if f.get('acodec') == 'none']
939             if video_formats:
940                 return video_formats[-1]
941         elif format_spec == 'worstvideo':
942             video_formats = [
943                 f for f in available_formats
944                 if f.get('acodec') == 'none']
945             if video_formats:
946                 return video_formats[0]
947         else:
948             extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
949             if format_spec in extensions:
950                 filter_f = lambda f: f['ext'] == format_spec
951             else:
952                 filter_f = lambda f: f['format_id'] == format_spec
953             matches = list(filter(filter_f, available_formats))
954             if matches:
955                 return matches[-1]
956         return None
957
958     def _calc_headers(self, info_dict):
959         res = std_headers.copy()
960
961         add_headers = info_dict.get('http_headers')
962         if add_headers:
963             res.update(add_headers)
964
965         cookies = self._calc_cookies(info_dict)
966         if cookies:
967             res['Cookie'] = cookies
968
969         return res
970
971     def _calc_cookies(self, info_dict):
972         pr = compat_urllib_request.Request(info_dict['url'])
973         self.cookiejar.add_cookie_header(pr)
974         return pr.get_header('Cookie')
975
976     def process_video_result(self, info_dict, download=True):
977         assert info_dict.get('_type', 'video') == 'video'
978
979         if 'id' not in info_dict:
980             raise ExtractorError('Missing "id" field in extractor result')
981         if 'title' not in info_dict:
982             raise ExtractorError('Missing "title" field in extractor result')
983
984         if 'playlist' not in info_dict:
985             # It isn't part of a playlist
986             info_dict['playlist'] = None
987             info_dict['playlist_index'] = None
988
989         thumbnails = info_dict.get('thumbnails')
990         if thumbnails is None:
991             thumbnail = info_dict.get('thumbnail')
992             if thumbnail:
993                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
994         if thumbnails:
995             thumbnails.sort(key=lambda t: (
996                 t.get('preference'), t.get('width'), t.get('height'),
997                 t.get('id'), t.get('url')))
998             for i, t in enumerate(thumbnails):
999                 if 'width' in t and 'height' in t:
1000                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
1001                 if t.get('id') is None:
1002                     t['id'] = '%d' % i
1003
1004         if thumbnails and 'thumbnail' not in info_dict:
1005             info_dict['thumbnail'] = thumbnails[-1]['url']
1006
1007         if 'display_id' not in info_dict and 'id' in info_dict:
1008             info_dict['display_id'] = info_dict['id']
1009
1010         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
1011             # Working around negative timestamps in Windows
1012             # (see http://bugs.python.org/issue1646728)
1013             if info_dict['timestamp'] < 0 and os.name == 'nt':
1014                 info_dict['timestamp'] = 0
1015             upload_date = datetime.datetime.utcfromtimestamp(
1016                 info_dict['timestamp'])
1017             info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1018
1019         if self.params.get('listsubtitles', False):
1020             if 'automatic_captions' in info_dict:
1021                 self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions')
1022             self.list_subtitles(info_dict['id'], info_dict.get('subtitles'), 'subtitles')
1023             return
1024         info_dict['requested_subtitles'] = self.process_subtitles(
1025             info_dict['id'], info_dict.get('subtitles'),
1026             info_dict.get('automatic_captions'))
1027
1028         # This extractors handle format selection themselves
1029         if info_dict['extractor'] in ['Youku']:
1030             if download:
1031                 self.process_info(info_dict)
1032             return info_dict
1033
1034         # We now pick which formats have to be downloaded
1035         if info_dict.get('formats') is None:
1036             # There's only one format available
1037             formats = [info_dict]
1038         else:
1039             formats = info_dict['formats']
1040
1041         if not formats:
1042             raise ExtractorError('No video formats found!')
1043
1044         # We check that all the formats have the format and format_id fields
1045         for i, format in enumerate(formats):
1046             if 'url' not in format:
1047                 raise ExtractorError('Missing "url" key in result (index %d)' % i)
1048
1049             if format.get('format_id') is None:
1050                 format['format_id'] = compat_str(i)
1051             if format.get('format') is None:
1052                 format['format'] = '{id} - {res}{note}'.format(
1053                     id=format['format_id'],
1054                     res=self.format_resolution(format),
1055                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1056                 )
1057             # Automatically determine file extension if missing
1058             if 'ext' not in format:
1059                 format['ext'] = determine_ext(format['url']).lower()
1060             # Add HTTP headers, so that external programs can use them from the
1061             # json output
1062             full_format_info = info_dict.copy()
1063             full_format_info.update(format)
1064             format['http_headers'] = self._calc_headers(full_format_info)
1065
1066         format_limit = self.params.get('format_limit', None)
1067         if format_limit:
1068             formats = list(takewhile_inclusive(
1069                 lambda f: f['format_id'] != format_limit, formats
1070             ))
1071
1072         # TODO Central sorting goes here
1073
1074         if formats[0] is not info_dict:
1075             # only set the 'formats' fields if the original info_dict list them
1076             # otherwise we end up with a circular reference, the first (and unique)
1077             # element in the 'formats' field in info_dict is info_dict itself,
1078             # wich can't be exported to json
1079             info_dict['formats'] = formats
1080         if self.params.get('listformats'):
1081             self.list_formats(info_dict)
1082             return
1083         if self.params.get('list_thumbnails'):
1084             self.list_thumbnails(info_dict)
1085             return
1086
1087         req_format = self.params.get('format')
1088         if req_format is None:
1089             req_format = 'best'
1090         formats_to_download = []
1091         if req_format == 'all':
1092             formats_to_download = formats
1093         else:
1094             for rfstr in req_format.split(','):
1095                 # We can accept formats requested in the format: 34/5/best, we pick
1096                 # the first that is available, starting from left
1097                 req_formats = rfstr.split('/')
1098                 for rf in req_formats:
1099                     if re.match(r'.+?\+.+?', rf) is not None:
1100                         # Two formats have been requested like '137+139'
1101                         format_1, format_2 = rf.split('+')
1102                         formats_info = (self.select_format(format_1, formats),
1103                                         self.select_format(format_2, formats))
1104                         if all(formats_info):
1105                             # The first format must contain the video and the
1106                             # second the audio
1107                             if formats_info[0].get('vcodec') == 'none':
1108                                 self.report_error('The first format must '
1109                                                   'contain the video, try using '
1110                                                   '"-f %s+%s"' % (format_2, format_1))
1111                                 return
1112                             output_ext = (
1113                                 formats_info[0]['ext']
1114                                 if self.params.get('merge_output_format') is None
1115                                 else self.params['merge_output_format'])
1116                             selected_format = {
1117                                 'requested_formats': formats_info,
1118                                 'format': '%s+%s' % (formats_info[0].get('format'),
1119                                                      formats_info[1].get('format')),
1120                                 'format_id': '%s+%s' % (formats_info[0].get('format_id'),
1121                                                         formats_info[1].get('format_id')),
1122                                 'width': formats_info[0].get('width'),
1123                                 'height': formats_info[0].get('height'),
1124                                 'resolution': formats_info[0].get('resolution'),
1125                                 'fps': formats_info[0].get('fps'),
1126                                 'vcodec': formats_info[0].get('vcodec'),
1127                                 'vbr': formats_info[0].get('vbr'),
1128                                 'stretched_ratio': formats_info[0].get('stretched_ratio'),
1129                                 'acodec': formats_info[1].get('acodec'),
1130                                 'abr': formats_info[1].get('abr'),
1131                                 'ext': output_ext,
1132                             }
1133                         else:
1134                             selected_format = None
1135                     else:
1136                         selected_format = self.select_format(rf, formats)
1137                     if selected_format is not None:
1138                         formats_to_download.append(selected_format)
1139                         break
1140         if not formats_to_download:
1141             raise ExtractorError('requested format not available',
1142                                  expected=True)
1143
1144         if download:
1145             if len(formats_to_download) > 1:
1146                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1147             for format in formats_to_download:
1148                 new_info = dict(info_dict)
1149                 new_info.update(format)
1150                 self.process_info(new_info)
1151         # We update the info dict with the best quality format (backwards compatibility)
1152         info_dict.update(formats_to_download[-1])
1153         return info_dict
1154
1155     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
1156         """Select the requested subtitles and their format"""
1157         available_subs = {}
1158         if normal_subtitles and self.params.get('writesubtitles'):
1159             available_subs.update(normal_subtitles)
1160         if automatic_captions and self.params.get('writeautomaticsub'):
1161             for lang, cap_info in automatic_captions.items():
1162                 if lang not in available_subs:
1163                     available_subs[lang] = cap_info
1164
1165         if (not self.params.get('writesubtitles') and not
1166                 self.params.get('writeautomaticsub') or not
1167                 available_subs):
1168             return None
1169
1170         if self.params.get('allsubtitles', False):
1171             requested_langs = available_subs.keys()
1172         else:
1173             if self.params.get('subtitleslangs', False):
1174                 requested_langs = self.params.get('subtitleslangs')
1175             elif 'en' in available_subs:
1176                 requested_langs = ['en']
1177             else:
1178                 requested_langs = [list(available_subs.keys())[0]]
1179
1180         formats_query = self.params.get('subtitlesformat', 'best')
1181         formats_preference = formats_query.split('/') if formats_query else []
1182         subs = {}
1183         for lang in requested_langs:
1184             formats = available_subs.get(lang)
1185             if formats is None:
1186                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1187                 continue
1188             for ext in formats_preference:
1189                 if ext == 'best':
1190                     f = formats[-1]
1191                     break
1192                 matches = list(filter(lambda f: f['ext'] == ext, formats))
1193                 if matches:
1194                     f = matches[-1]
1195                     break
1196             else:
1197                 f = formats[-1]
1198                 self.report_warning(
1199                     'No subtitle format found matching "%s" for language %s, '
1200                     'using %s' % (formats_query, lang, f['ext']))
1201             subs[lang] = f
1202         return subs
1203
1204     def process_info(self, info_dict):
1205         """Process a single resolved IE result."""
1206
1207         assert info_dict.get('_type', 'video') == 'video'
1208
1209         max_downloads = self.params.get('max_downloads')
1210         if max_downloads is not None:
1211             if self._num_downloads >= int(max_downloads):
1212                 raise MaxDownloadsReached()
1213
1214         info_dict['fulltitle'] = info_dict['title']
1215         if len(info_dict['title']) > 200:
1216             info_dict['title'] = info_dict['title'][:197] + '...'
1217
1218         if 'format' not in info_dict:
1219             info_dict['format'] = info_dict['ext']
1220
1221         reason = self._match_entry(info_dict, incomplete=False)
1222         if reason is not None:
1223             self.to_screen('[download] ' + reason)
1224             return
1225
1226         self._num_downloads += 1
1227
1228         info_dict['_filename'] = filename = self.prepare_filename(info_dict)
1229
1230         # Forced printings
1231         if self.params.get('forcetitle', False):
1232             self.to_stdout(info_dict['fulltitle'])
1233         if self.params.get('forceid', False):
1234             self.to_stdout(info_dict['id'])
1235         if self.params.get('forceurl', False):
1236             if info_dict.get('requested_formats') is not None:
1237                 for f in info_dict['requested_formats']:
1238                     self.to_stdout(f['url'] + f.get('play_path', ''))
1239             else:
1240                 # For RTMP URLs, also include the playpath
1241                 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1242         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
1243             self.to_stdout(info_dict['thumbnail'])
1244         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
1245             self.to_stdout(info_dict['description'])
1246         if self.params.get('forcefilename', False) and filename is not None:
1247             self.to_stdout(filename)
1248         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1249             self.to_stdout(formatSeconds(info_dict['duration']))
1250         if self.params.get('forceformat', False):
1251             self.to_stdout(info_dict['format'])
1252         if self.params.get('forcejson', False):
1253             self.to_stdout(json.dumps(info_dict))
1254
1255         # Do nothing else if in simulate mode
1256         if self.params.get('simulate', False):
1257             return
1258
1259         if filename is None:
1260             return
1261
1262         try:
1263             dn = os.path.dirname(sanitize_path(encodeFilename(filename)))
1264             if dn and not os.path.exists(dn):
1265                 os.makedirs(dn)
1266         except (OSError, IOError) as err:
1267             self.report_error('unable to create directory ' + compat_str(err))
1268             return
1269
1270         if self.params.get('writedescription', False):
1271             descfn = filename + '.description'
1272             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1273                 self.to_screen('[info] Video description is already present')
1274             elif info_dict.get('description') is None:
1275                 self.report_warning('There\'s no description to write.')
1276             else:
1277                 try:
1278                     self.to_screen('[info] Writing video description to: ' + descfn)
1279                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1280                         descfile.write(info_dict['description'])
1281                 except (OSError, IOError):
1282                     self.report_error('Cannot write description file ' + descfn)
1283                     return
1284
1285         if self.params.get('writeannotations', False):
1286             annofn = filename + '.annotations.xml'
1287             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1288                 self.to_screen('[info] Video annotations are already present')
1289             else:
1290                 try:
1291                     self.to_screen('[info] Writing video annotations to: ' + annofn)
1292                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1293                         annofile.write(info_dict['annotations'])
1294                 except (KeyError, TypeError):
1295                     self.report_warning('There are no annotations to write.')
1296                 except (OSError, IOError):
1297                     self.report_error('Cannot write annotations file: ' + annofn)
1298                     return
1299
1300         subtitles_are_requested = any([self.params.get('writesubtitles', False),
1301                                        self.params.get('writeautomaticsub')])
1302
1303         if subtitles_are_requested and info_dict.get('requested_subtitles'):
1304             # subtitles download errors are already managed as troubles in relevant IE
1305             # that way it will silently go on when used with unsupporting IE
1306             subtitles = info_dict['requested_subtitles']
1307             ie = self.get_info_extractor(info_dict['extractor_key'])
1308             for sub_lang, sub_info in subtitles.items():
1309                 sub_format = sub_info['ext']
1310                 if sub_info.get('data') is not None:
1311                     sub_data = sub_info['data']
1312                 else:
1313                     try:
1314                         sub_data = ie._download_webpage(
1315                             sub_info['url'], info_dict['id'], note=False)
1316                     except ExtractorError as err:
1317                         self.report_warning('Unable to download subtitle for "%s": %s' %
1318                                             (sub_lang, compat_str(err.cause)))
1319                         continue
1320                 try:
1321                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1322                     if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1323                         self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1324                     else:
1325                         self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1326                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
1327                             subfile.write(sub_data)
1328                 except (OSError, IOError):
1329                     self.report_error('Cannot write subtitles file ' + sub_filename)
1330                     return
1331
1332         if self.params.get('writeinfojson', False):
1333             infofn = os.path.splitext(filename)[0] + '.info.json'
1334             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1335                 self.to_screen('[info] Video description metadata is already present')
1336             else:
1337                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1338                 try:
1339                     write_json_file(info_dict, infofn)
1340                 except (OSError, IOError):
1341                     self.report_error('Cannot write metadata to JSON file ' + infofn)
1342                     return
1343
1344         self._write_thumbnails(info_dict, filename)
1345
1346         if not self.params.get('skip_download', False):
1347             try:
1348                 def dl(name, info):
1349                     fd = get_suitable_downloader(info, self.params)(self, self.params)
1350                     for ph in self._progress_hooks:
1351                         fd.add_progress_hook(ph)
1352                     if self.params.get('verbose'):
1353                         self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1354                     return fd.download(name, info)
1355
1356                 if info_dict.get('requested_formats') is not None:
1357                     downloaded = []
1358                     success = True
1359                     merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
1360                     if not merger.available:
1361                         postprocessors = []
1362                         self.report_warning('You have requested multiple '
1363                                             'formats but ffmpeg or avconv are not installed.'
1364                                             ' The formats won\'t be merged')
1365                     else:
1366                         postprocessors = [merger]
1367                     for f in info_dict['requested_formats']:
1368                         new_info = dict(info_dict)
1369                         new_info.update(f)
1370                         fname = self.prepare_filename(new_info)
1371                         fname = prepend_extension(fname, 'f%s' % f['format_id'])
1372                         downloaded.append(fname)
1373                         partial_success = dl(fname, new_info)
1374                         success = success and partial_success
1375                     info_dict['__postprocessors'] = postprocessors
1376                     info_dict['__files_to_merge'] = downloaded
1377                 else:
1378                     # Just a single file
1379                     success = dl(filename, info_dict)
1380             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1381                 self.report_error('unable to download video data: %s' % str(err))
1382                 return
1383             except (OSError, IOError) as err:
1384                 raise UnavailableVideoError(err)
1385             except (ContentTooShortError, ) as err:
1386                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1387                 return
1388
1389             if success:
1390                 # Fixup content
1391                 fixup_policy = self.params.get('fixup')
1392                 if fixup_policy is None:
1393                     fixup_policy = 'detect_or_warn'
1394
1395                 stretched_ratio = info_dict.get('stretched_ratio')
1396                 if stretched_ratio is not None and stretched_ratio != 1:
1397                     if fixup_policy == 'warn':
1398                         self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1399                             info_dict['id'], stretched_ratio))
1400                     elif fixup_policy == 'detect_or_warn':
1401                         stretched_pp = FFmpegFixupStretchedPP(self)
1402                         if stretched_pp.available:
1403                             info_dict.setdefault('__postprocessors', [])
1404                             info_dict['__postprocessors'].append(stretched_pp)
1405                         else:
1406                             self.report_warning(
1407                                 '%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % (
1408                                     info_dict['id'], stretched_ratio))
1409                     else:
1410                         assert fixup_policy in ('ignore', 'never')
1411
1412                 if info_dict.get('requested_formats') is None and info_dict.get('container') == 'm4a_dash':
1413                     if fixup_policy == 'warn':
1414                         self.report_warning('%s: writing DASH m4a. Only some players support this container.' % (
1415                             info_dict['id']))
1416                     elif fixup_policy == 'detect_or_warn':
1417                         fixup_pp = FFmpegFixupM4aPP(self)
1418                         if fixup_pp.available:
1419                             info_dict.setdefault('__postprocessors', [])
1420                             info_dict['__postprocessors'].append(fixup_pp)
1421                         else:
1422                             self.report_warning(
1423                                 '%s: writing DASH m4a. Only some players support this container. Install ffmpeg or avconv to fix this automatically.' % (
1424                                     info_dict['id']))
1425                     else:
1426                         assert fixup_policy in ('ignore', 'never')
1427
1428                 try:
1429                     self.post_process(filename, info_dict)
1430                 except (PostProcessingError) as err:
1431                     self.report_error('postprocessing: %s' % str(err))
1432                     return
1433                 self.record_download_archive(info_dict)
1434
1435     def download(self, url_list):
1436         """Download a given list of URLs."""
1437         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1438         if (len(url_list) > 1 and
1439                 '%' not in outtmpl and
1440                 self.params.get('max_downloads') != 1):
1441             raise SameFileError(outtmpl)
1442
1443         for url in url_list:
1444             try:
1445                 # It also downloads the videos
1446                 res = self.extract_info(url)
1447             except UnavailableVideoError:
1448                 self.report_error('unable to download video')
1449             except MaxDownloadsReached:
1450                 self.to_screen('[info] Maximum number of downloaded files reached.')
1451                 raise
1452             else:
1453                 if self.params.get('dump_single_json', False):
1454                     self.to_stdout(json.dumps(res))
1455
1456         return self._download_retcode
1457
1458     def download_with_info_file(self, info_filename):
1459         with contextlib.closing(fileinput.FileInput(
1460                 [info_filename], mode='r',
1461                 openhook=fileinput.hook_encoded('utf-8'))) as f:
1462             # FileInput doesn't have a read method, we can't call json.load
1463             info = json.loads('\n'.join(f))
1464         try:
1465             self.process_ie_result(info, download=True)
1466         except DownloadError:
1467             webpage_url = info.get('webpage_url')
1468             if webpage_url is not None:
1469                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1470                 return self.download([webpage_url])
1471             else:
1472                 raise
1473         return self._download_retcode
1474
1475     def post_process(self, filename, ie_info):
1476         """Run all the postprocessors on the given file."""
1477         info = dict(ie_info)
1478         info['filepath'] = filename
1479         pps_chain = []
1480         if ie_info.get('__postprocessors') is not None:
1481             pps_chain.extend(ie_info['__postprocessors'])
1482         pps_chain.extend(self._pps)
1483         for pp in pps_chain:
1484             keep_video = None
1485             old_filename = info['filepath']
1486             try:
1487                 keep_video_wish, info = pp.run(info)
1488                 if keep_video_wish is not None:
1489                     if keep_video_wish:
1490                         keep_video = keep_video_wish
1491                     elif keep_video is None:
1492                         # No clear decision yet, let IE decide
1493                         keep_video = keep_video_wish
1494             except PostProcessingError as e:
1495                 self.report_error(e.msg)
1496             if keep_video is False and not self.params.get('keepvideo', False):
1497                 try:
1498                     self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
1499                     os.remove(encodeFilename(old_filename))
1500                 except (IOError, OSError):
1501                     self.report_warning('Unable to remove downloaded video file')
1502
1503     def _make_archive_id(self, info_dict):
1504         # Future-proof against any change in case
1505         # and backwards compatibility with prior versions
1506         extractor = info_dict.get('extractor_key')
1507         if extractor is None:
1508             if 'id' in info_dict:
1509                 extractor = info_dict.get('ie_key')  # key in a playlist
1510         if extractor is None:
1511             return None  # Incomplete video information
1512         return extractor.lower() + ' ' + info_dict['id']
1513
1514     def in_download_archive(self, info_dict):
1515         fn = self.params.get('download_archive')
1516         if fn is None:
1517             return False
1518
1519         vid_id = self._make_archive_id(info_dict)
1520         if vid_id is None:
1521             return False  # Incomplete video information
1522
1523         try:
1524             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1525                 for line in archive_file:
1526                     if line.strip() == vid_id:
1527                         return True
1528         except IOError as ioe:
1529             if ioe.errno != errno.ENOENT:
1530                 raise
1531         return False
1532
1533     def record_download_archive(self, info_dict):
1534         fn = self.params.get('download_archive')
1535         if fn is None:
1536             return
1537         vid_id = self._make_archive_id(info_dict)
1538         assert vid_id
1539         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1540             archive_file.write(vid_id + '\n')
1541
1542     @staticmethod
1543     def format_resolution(format, default='unknown'):
1544         if format.get('vcodec') == 'none':
1545             return 'audio only'
1546         if format.get('resolution') is not None:
1547             return format['resolution']
1548         if format.get('height') is not None:
1549             if format.get('width') is not None:
1550                 res = '%sx%s' % (format['width'], format['height'])
1551             else:
1552                 res = '%sp' % format['height']
1553         elif format.get('width') is not None:
1554             res = '?x%d' % format['width']
1555         else:
1556             res = default
1557         return res
1558
1559     def _format_note(self, fdict):
1560         res = ''
1561         if fdict.get('ext') in ['f4f', 'f4m']:
1562             res += '(unsupported) '
1563         if fdict.get('format_note') is not None:
1564             res += fdict['format_note'] + ' '
1565         if fdict.get('tbr') is not None:
1566             res += '%4dk ' % fdict['tbr']
1567         if fdict.get('container') is not None:
1568             if res:
1569                 res += ', '
1570             res += '%s container' % fdict['container']
1571         if (fdict.get('vcodec') is not None and
1572                 fdict.get('vcodec') != 'none'):
1573             if res:
1574                 res += ', '
1575             res += fdict['vcodec']
1576             if fdict.get('vbr') is not None:
1577                 res += '@'
1578         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1579             res += 'video@'
1580         if fdict.get('vbr') is not None:
1581             res += '%4dk' % fdict['vbr']
1582         if fdict.get('fps') is not None:
1583             res += ', %sfps' % fdict['fps']
1584         if fdict.get('acodec') is not None:
1585             if res:
1586                 res += ', '
1587             if fdict['acodec'] == 'none':
1588                 res += 'video only'
1589             else:
1590                 res += '%-5s' % fdict['acodec']
1591         elif fdict.get('abr') is not None:
1592             if res:
1593                 res += ', '
1594             res += 'audio'
1595         if fdict.get('abr') is not None:
1596             res += '@%3dk' % fdict['abr']
1597         if fdict.get('asr') is not None:
1598             res += ' (%5dHz)' % fdict['asr']
1599         if fdict.get('filesize') is not None:
1600             if res:
1601                 res += ', '
1602             res += format_bytes(fdict['filesize'])
1603         elif fdict.get('filesize_approx') is not None:
1604             if res:
1605                 res += ', '
1606             res += '~' + format_bytes(fdict['filesize_approx'])
1607         return res
1608
1609     def list_formats(self, info_dict):
1610         formats = info_dict.get('formats', [info_dict])
1611         table = [
1612             [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
1613             for f in formats
1614             if f.get('preference') is None or f['preference'] >= -1000]
1615         if len(formats) > 1:
1616             table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
1617
1618         header_line = ['format code', 'extension', 'resolution', 'note']
1619         self.to_screen(
1620             '[info] Available formats for %s:\n%s' %
1621             (info_dict['id'], render_table(header_line, table)))
1622
1623     def list_thumbnails(self, info_dict):
1624         thumbnails = info_dict.get('thumbnails')
1625         if not thumbnails:
1626             tn_url = info_dict.get('thumbnail')
1627             if tn_url:
1628                 thumbnails = [{'id': '0', 'url': tn_url}]
1629             else:
1630                 self.to_screen(
1631                     '[info] No thumbnails present for %s' % info_dict['id'])
1632                 return
1633
1634         self.to_screen(
1635             '[info] Thumbnails for %s:' % info_dict['id'])
1636         self.to_screen(render_table(
1637             ['ID', 'width', 'height', 'URL'],
1638             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
1639
1640     def list_subtitles(self, video_id, subtitles, name='subtitles'):
1641         if not subtitles:
1642             self.to_screen('%s has no %s' % (video_id, name))
1643             return
1644         self.to_screen(
1645             'Available %s for %s:' % (name, video_id))
1646         self.to_screen(render_table(
1647             ['Language', 'formats'],
1648             [[lang, ', '.join(f['ext'] for f in reversed(formats))]
1649                 for lang, formats in subtitles.items()]))
1650
1651     def urlopen(self, req):
1652         """ Start an HTTP download """
1653
1654         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1655         # always respected by websites, some tend to give out URLs with non percent-encoded
1656         # non-ASCII characters (see telemb.py, ard.py [#3412])
1657         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1658         # To work around aforementioned issue we will replace request's original URL with
1659         # percent-encoded one
1660         req_is_string = isinstance(req, compat_basestring)
1661         url = req if req_is_string else req.get_full_url()
1662         url_escaped = escape_url(url)
1663
1664         # Substitute URL if any change after escaping
1665         if url != url_escaped:
1666             if req_is_string:
1667                 req = url_escaped
1668             else:
1669                 req = compat_urllib_request.Request(
1670                     url_escaped, data=req.data, headers=req.headers,
1671                     origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
1672
1673         return self._opener.open(req, timeout=self._socket_timeout)
1674
1675     def print_debug_header(self):
1676         if not self.params.get('verbose'):
1677             return
1678
1679         if type('') is not compat_str:
1680             # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1681             self.report_warning(
1682                 'Your Python is broken! Update to a newer and supported version')
1683
1684         stdout_encoding = getattr(
1685             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
1686         encoding_str = (
1687             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1688                 locale.getpreferredencoding(),
1689                 sys.getfilesystemencoding(),
1690                 stdout_encoding,
1691                 self.get_encoding()))
1692         write_string(encoding_str, encoding=None)
1693
1694         self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1695         try:
1696             sp = subprocess.Popen(
1697                 ['git', 'rev-parse', '--short', 'HEAD'],
1698                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1699                 cwd=os.path.dirname(os.path.abspath(__file__)))
1700             out, err = sp.communicate()
1701             out = out.decode().strip()
1702             if re.match('[0-9a-f]+', out):
1703                 self._write_string('[debug] Git HEAD: ' + out + '\n')
1704         except Exception:
1705             try:
1706                 sys.exc_clear()
1707             except Exception:
1708                 pass
1709         self._write_string('[debug] Python version %s - %s\n' % (
1710             platform.python_version(), platform_name()))
1711
1712         exe_versions = FFmpegPostProcessor.get_versions(self)
1713         exe_versions['rtmpdump'] = rtmpdump_version()
1714         exe_str = ', '.join(
1715             '%s %s' % (exe, v)
1716             for exe, v in sorted(exe_versions.items())
1717             if v
1718         )
1719         if not exe_str:
1720             exe_str = 'none'
1721         self._write_string('[debug] exe versions: %s\n' % exe_str)
1722
1723         proxy_map = {}
1724         for handler in self._opener.handlers:
1725             if hasattr(handler, 'proxies'):
1726                 proxy_map.update(handler.proxies)
1727         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1728
1729         if self.params.get('call_home', False):
1730             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
1731             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
1732             latest_version = self.urlopen(
1733                 'https://yt-dl.org/latest/version').read().decode('utf-8')
1734             if version_tuple(latest_version) > version_tuple(__version__):
1735                 self.report_warning(
1736                     'You are using an outdated version (newest version: %s)! '
1737                     'See https://yt-dl.org/update if you need help updating.' %
1738                     latest_version)
1739
1740     def _setup_opener(self):
1741         timeout_val = self.params.get('socket_timeout')
1742         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1743
1744         opts_cookiefile = self.params.get('cookiefile')
1745         opts_proxy = self.params.get('proxy')
1746
1747         if opts_cookiefile is None:
1748             self.cookiejar = compat_cookiejar.CookieJar()
1749         else:
1750             self.cookiejar = compat_cookiejar.MozillaCookieJar(
1751                 opts_cookiefile)
1752             if os.access(opts_cookiefile, os.R_OK):
1753                 self.cookiejar.load()
1754
1755         cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1756             self.cookiejar)
1757         if opts_proxy is not None:
1758             if opts_proxy == '':
1759                 proxies = {}
1760             else:
1761                 proxies = {'http': opts_proxy, 'https': opts_proxy}
1762         else:
1763             proxies = compat_urllib_request.getproxies()
1764             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1765             if 'http' in proxies and 'https' not in proxies:
1766                 proxies['https'] = proxies['http']
1767         proxy_handler = PerRequestProxyHandler(proxies)
1768
1769         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1770         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
1771         # The ssl context is only available in python 2.7.9 and 3.x
1772         if hasattr(https_handler, '_context'):
1773             ctx = https_handler._context
1774             # get_ca_certs is unavailable prior to python 3.4
1775             if hasattr(ctx, 'get_ca_certs') and len(ctx.get_ca_certs()) == 0:
1776                 self.report_warning(
1777                     'No ssl certificates were loaded, urls that use https '
1778                     'won\'t work')
1779         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
1780         opener = compat_urllib_request.build_opener(
1781             proxy_handler, https_handler, cookie_processor, ydlh)
1782
1783         # Delete the default user-agent header, which would otherwise apply in
1784         # cases where our custom HTTP handler doesn't come into play
1785         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1786         opener.addheaders = []
1787         self._opener = opener
1788
1789     def encode(self, s):
1790         if isinstance(s, bytes):
1791             return s  # Already encoded
1792
1793         try:
1794             return s.encode(self.get_encoding())
1795         except UnicodeEncodeError as err:
1796             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1797             raise
1798
1799     def get_encoding(self):
1800         encoding = self.params.get('encoding')
1801         if encoding is None:
1802             encoding = preferredencoding()
1803         return encoding
1804
1805     def _write_thumbnails(self, info_dict, filename):
1806         if self.params.get('writethumbnail', False):
1807             thumbnails = info_dict.get('thumbnails')
1808             if thumbnails:
1809                 thumbnails = [thumbnails[-1]]
1810         elif self.params.get('write_all_thumbnails', False):
1811             thumbnails = info_dict.get('thumbnails')
1812         else:
1813             return
1814
1815         if not thumbnails:
1816             # No thumbnails present, so return immediately
1817             return
1818
1819         for t in thumbnails:
1820             thumb_ext = determine_ext(t['url'], 'jpg')
1821             suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
1822             thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
1823             thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
1824
1825             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
1826                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
1827                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
1828             else:
1829                 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
1830                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
1831                 try:
1832                     uf = self.urlopen(t['url'])
1833                     with open(thumb_filename, 'wb') as thumbf:
1834                         shutil.copyfileobj(uf, thumbf)
1835                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
1836                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
1837                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1838                     self.report_warning('Unable to download thumbnail "%s": %s' %
1839                                         (t['url'], compat_str(err)))