Merge branch 'subtitles-rework'
[youtube-dl] / youtube_dl / YoutubeDL.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 from __future__ import absolute_import, unicode_literals
5
6 import collections
7 import datetime
8 import errno
9 import io
10 import itertools
11 import json
12 import locale
13 import operator
14 import os
15 import platform
16 import re
17 import shutil
18 import subprocess
19 import socket
20 import sys
21 import time
22 import traceback
23
24 if os.name == 'nt':
25     import ctypes
26
27 from .compat import (
28     compat_basestring,
29     compat_cookiejar,
30     compat_expanduser,
31     compat_http_client,
32     compat_kwargs,
33     compat_str,
34     compat_urllib_error,
35     compat_urllib_request,
36 )
37 from .utils import (
38     escape_url,
39     ContentTooShortError,
40     date_from_str,
41     DateRange,
42     DEFAULT_OUTTMPL,
43     determine_ext,
44     DownloadError,
45     encodeFilename,
46     ExtractorError,
47     format_bytes,
48     formatSeconds,
49     get_term_width,
50     locked_file,
51     make_HTTPS_handler,
52     MaxDownloadsReached,
53     PagedList,
54     parse_filesize,
55     PostProcessingError,
56     platform_name,
57     preferredencoding,
58     render_table,
59     SameFileError,
60     sanitize_filename,
61     std_headers,
62     subtitles_filename,
63     takewhile_inclusive,
64     UnavailableVideoError,
65     url_basename,
66     version_tuple,
67     write_json_file,
68     write_string,
69     YoutubeDLHandler,
70     prepend_extension,
71     args_to_str,
72     age_restricted,
73 )
74 from .cache import Cache
75 from .extractor import get_info_extractor, gen_extractors
76 from .downloader import get_suitable_downloader
77 from .downloader.rtmp import rtmpdump_version
78 from .postprocessor import (
79     FFmpegFixupM4aPP,
80     FFmpegFixupStretchedPP,
81     FFmpegMergerPP,
82     FFmpegPostProcessor,
83     get_postprocessor,
84 )
85 from .version import __version__
86
87
88 class YoutubeDL(object):
89     """YoutubeDL class.
90
91     YoutubeDL objects are the ones responsible of downloading the
92     actual video file and writing it to disk if the user has requested
93     it, among some other tasks. In most cases there should be one per
94     program. As, given a video URL, the downloader doesn't know how to
95     extract all the needed information, task that InfoExtractors do, it
96     has to pass the URL to one of them.
97
98     For this, YoutubeDL objects have a method that allows
99     InfoExtractors to be registered in a given order. When it is passed
100     a URL, the YoutubeDL object handles it to the first InfoExtractor it
101     finds that reports being able to handle it. The InfoExtractor extracts
102     all the information about the video or videos the URL refers to, and
103     YoutubeDL process the extracted information, possibly using a File
104     Downloader to download the video.
105
106     YoutubeDL objects accept a lot of parameters. In order not to saturate
107     the object constructor with arguments, it receives a dictionary of
108     options instead. These options are available through the params
109     attribute for the InfoExtractors to use. The YoutubeDL also
110     registers itself as the downloader in charge for the InfoExtractors
111     that are added to it, so this is a "mutual registration".
112
113     Available options:
114
115     username:          Username for authentication purposes.
116     password:          Password for authentication purposes.
117     videopassword:     Password for acces a video.
118     usenetrc:          Use netrc for authentication instead.
119     verbose:           Print additional info to stdout.
120     quiet:             Do not print messages to stdout.
121     no_warnings:       Do not print out anything for warnings.
122     forceurl:          Force printing final URL.
123     forcetitle:        Force printing title.
124     forceid:           Force printing ID.
125     forcethumbnail:    Force printing thumbnail URL.
126     forcedescription:  Force printing description.
127     forcefilename:     Force printing final filename.
128     forceduration:     Force printing duration.
129     forcejson:         Force printing info_dict as JSON.
130     dump_single_json:  Force printing the info_dict of the whole playlist
131                        (or video) as a single JSON line.
132     simulate:          Do not download the video files.
133     format:            Video format code. See options.py for more information.
134     format_limit:      Highest quality format to try.
135     outtmpl:           Template for output names.
136     restrictfilenames: Do not allow "&" and spaces in file names
137     ignoreerrors:      Do not stop on download errors.
138     nooverwrites:      Prevent overwriting files.
139     playliststart:     Playlist item to start at.
140     playlistend:       Playlist item to end at.
141     playlist_items:    Specific indices of playlist to download.
142     playlistreverse:   Download playlist items in reverse order.
143     matchtitle:        Download only matching titles.
144     rejecttitle:       Reject downloads for matching titles.
145     logger:            Log messages to a logging.Logger instance.
146     logtostderr:       Log messages to stderr instead of stdout.
147     writedescription:  Write the video description to a .description file
148     writeinfojson:     Write the video description to a .info.json file
149     writeannotations:  Write the video annotations to a .annotations.xml file
150     writethumbnail:    Write the thumbnail image to a file
151     write_all_thumbnails:  Write all thumbnail formats to files
152     writesubtitles:    Write the video subtitles to a file
153     writeautomaticsub: Write the automatic subtitles to a file
154     allsubtitles:      Downloads all the subtitles of the video
155                        (requires writesubtitles or writeautomaticsub)
156     listsubtitles:     Lists all available subtitles for the video
157     subtitlesformat:   The format code for subtitles
158     subtitleslangs:    List of languages of the subtitles to download
159     keepvideo:         Keep the video file after post-processing
160     daterange:         A DateRange object, download only if the upload_date is in the range.
161     skip_download:     Skip the actual download of the video file
162     cachedir:          Location of the cache files in the filesystem.
163                        False to disable filesystem cache.
164     noplaylist:        Download single video instead of a playlist if in doubt.
165     age_limit:         An integer representing the user's age in years.
166                        Unsuitable videos for the given age are skipped.
167     min_views:         An integer representing the minimum view count the video
168                        must have in order to not be skipped.
169                        Videos without view count information are always
170                        downloaded. None for no limit.
171     max_views:         An integer representing the maximum view count.
172                        Videos that are more popular than that are not
173                        downloaded.
174                        Videos without view count information are always
175                        downloaded. None for no limit.
176     download_archive:  File name of a file where all downloads are recorded.
177                        Videos already present in the file are not downloaded
178                        again.
179     cookiefile:        File name where cookies should be read from and dumped to.
180     nocheckcertificate:Do not verify SSL certificates
181     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
182                        At the moment, this is only supported by YouTube.
183     proxy:             URL of the proxy server to use
184     socket_timeout:    Time to wait for unresponsive hosts, in seconds
185     bidi_workaround:   Work around buggy terminals without bidirectional text
186                        support, using fridibi
187     debug_printtraffic:Print out sent and received HTTP traffic
188     include_ads:       Download ads as well
189     default_search:    Prepend this string if an input url is not valid.
190                        'auto' for elaborate guessing
191     encoding:          Use this encoding instead of the system-specified.
192     extract_flat:      Do not resolve URLs, return the immediate result.
193                        Pass in 'in_playlist' to only show this behavior for
194                        playlist items.
195     postprocessors:    A list of dictionaries, each with an entry
196                        * key:  The name of the postprocessor. See
197                                youtube_dl/postprocessor/__init__.py for a list.
198                        as well as any further keyword arguments for the
199                        postprocessor.
200     progress_hooks:    A list of functions that get called on download
201                        progress, with a dictionary with the entries
202                        * status: One of "downloading", "error", or "finished".
203                                  Check this first and ignore unknown values.
204
205                        If status is one of "downloading", or "finished", the
206                        following properties may also be present:
207                        * filename: The final filename (always present)
208                        * tmpfilename: The filename we're currently writing to
209                        * downloaded_bytes: Bytes on disk
210                        * total_bytes: Size of the whole file, None if unknown
211                        * total_bytes_estimate: Guess of the eventual file size,
212                                                None if unavailable.
213                        * elapsed: The number of seconds since download started.
214                        * eta: The estimated time in seconds, None if unknown
215                        * speed: The download speed in bytes/second, None if
216                                 unknown
217                        * fragment_index: The counter of the currently
218                                          downloaded video fragment.
219                        * fragment_count: The number of fragments (= individual
220                                          files that will be merged)
221
222                        Progress hooks are guaranteed to be called at least once
223                        (with status "finished") if the download is successful.
224     merge_output_format: Extension to use when merging formats.
225     fixup:             Automatically correct known faults of the file.
226                        One of:
227                        - "never": do nothing
228                        - "warn": only emit a warning
229                        - "detect_or_warn": check whether we can do anything
230                                            about it, warn otherwise (default)
231     source_address:    (Experimental) Client-side IP address to bind to.
232     call_home:         Boolean, true iff we are allowed to contact the
233                        youtube-dl servers for debugging.
234     sleep_interval:    Number of seconds to sleep before each download.
235     listformats:       Print an overview of available video formats and exit.
236     list_thumbnails:   Print a table of all thumbnails and exit.
237     match_filter:      A function that gets called with the info_dict of
238                        every video.
239                        If it returns a message, the video is ignored.
240                        If it returns None, the video is downloaded.
241                        match_filter_func in utils.py is one example for this.
242     no_color:          Do not emit color codes in output.
243
244     The following options determine which downloader is picked:
245     external_downloader: Executable of the external downloader to call.
246                        None or unset for standard (built-in) downloader.
247     hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv.
248
249     The following parameters are not used by YoutubeDL itself, they are used by
250     the FileDownloader:
251     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
252     noresizebuffer, retries, continuedl, noprogress, consoletitle,
253     xattr_set_filesize.
254
255     The following options are used by the post processors:
256     prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
257                        otherwise prefer avconv.
258     exec_cmd:          Arbitrary command to run after downloading
259     """
260
261     params = None
262     _ies = []
263     _pps = []
264     _download_retcode = None
265     _num_downloads = None
266     _screen_file = None
267
268     def __init__(self, params=None, auto_init=True):
269         """Create a FileDownloader object with the given options."""
270         if params is None:
271             params = {}
272         self._ies = []
273         self._ies_instances = {}
274         self._pps = []
275         self._progress_hooks = []
276         self._download_retcode = 0
277         self._num_downloads = 0
278         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
279         self._err_file = sys.stderr
280         self.params = params
281         self.cache = Cache(self)
282
283         if params.get('bidi_workaround', False):
284             try:
285                 import pty
286                 master, slave = pty.openpty()
287                 width = get_term_width()
288                 if width is None:
289                     width_args = []
290                 else:
291                     width_args = ['-w', str(width)]
292                 sp_kwargs = dict(
293                     stdin=subprocess.PIPE,
294                     stdout=slave,
295                     stderr=self._err_file)
296                 try:
297                     self._output_process = subprocess.Popen(
298                         ['bidiv'] + width_args, **sp_kwargs
299                     )
300                 except OSError:
301                     self._output_process = subprocess.Popen(
302                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
303                 self._output_channel = os.fdopen(master, 'rb')
304             except OSError as ose:
305                 if ose.errno == 2:
306                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
307                 else:
308                     raise
309
310         if (sys.version_info >= (3,) and sys.platform != 'win32' and
311                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and
312                 not params.get('restrictfilenames', False)):
313             # On Python 3, the Unicode filesystem API will throw errors (#1474)
314             self.report_warning(
315                 'Assuming --restrict-filenames since file system encoding '
316                 'cannot encode all characters. '
317                 'Set the LC_ALL environment variable to fix this.')
318             self.params['restrictfilenames'] = True
319
320         if '%(stitle)s' in self.params.get('outtmpl', ''):
321             self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
322
323         self._setup_opener()
324
325         if auto_init:
326             self.print_debug_header()
327             self.add_default_info_extractors()
328
329         for pp_def_raw in self.params.get('postprocessors', []):
330             pp_class = get_postprocessor(pp_def_raw['key'])
331             pp_def = dict(pp_def_raw)
332             del pp_def['key']
333             pp = pp_class(self, **compat_kwargs(pp_def))
334             self.add_post_processor(pp)
335
336         for ph in self.params.get('progress_hooks', []):
337             self.add_progress_hook(ph)
338
339     def warn_if_short_id(self, argv):
340         # short YouTube ID starting with dash?
341         idxs = [
342             i for i, a in enumerate(argv)
343             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
344         if idxs:
345             correct_argv = (
346                 ['youtube-dl'] +
347                 [a for i, a in enumerate(argv) if i not in idxs] +
348                 ['--'] + [argv[i] for i in idxs]
349             )
350             self.report_warning(
351                 'Long argument string detected. '
352                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
353                 args_to_str(correct_argv))
354
355     def add_info_extractor(self, ie):
356         """Add an InfoExtractor object to the end of the list."""
357         self._ies.append(ie)
358         self._ies_instances[ie.ie_key()] = ie
359         ie.set_downloader(self)
360
361     def get_info_extractor(self, ie_key):
362         """
363         Get an instance of an IE with name ie_key, it will try to get one from
364         the _ies list, if there's no instance it will create a new one and add
365         it to the extractor list.
366         """
367         ie = self._ies_instances.get(ie_key)
368         if ie is None:
369             ie = get_info_extractor(ie_key)()
370             self.add_info_extractor(ie)
371         return ie
372
373     def add_default_info_extractors(self):
374         """
375         Add the InfoExtractors returned by gen_extractors to the end of the list
376         """
377         for ie in gen_extractors():
378             self.add_info_extractor(ie)
379
380     def add_post_processor(self, pp):
381         """Add a PostProcessor object to the end of the chain."""
382         self._pps.append(pp)
383         pp.set_downloader(self)
384
385     def add_progress_hook(self, ph):
386         """Add the progress hook (currently only for the file downloader)"""
387         self._progress_hooks.append(ph)
388
389     def _bidi_workaround(self, message):
390         if not hasattr(self, '_output_channel'):
391             return message
392
393         assert hasattr(self, '_output_process')
394         assert isinstance(message, compat_str)
395         line_count = message.count('\n') + 1
396         self._output_process.stdin.write((message + '\n').encode('utf-8'))
397         self._output_process.stdin.flush()
398         res = ''.join(self._output_channel.readline().decode('utf-8')
399                       for _ in range(line_count))
400         return res[:-len('\n')]
401
402     def to_screen(self, message, skip_eol=False):
403         """Print message to stdout if not in quiet mode."""
404         return self.to_stdout(message, skip_eol, check_quiet=True)
405
406     def _write_string(self, s, out=None):
407         write_string(s, out=out, encoding=self.params.get('encoding'))
408
409     def to_stdout(self, message, skip_eol=False, check_quiet=False):
410         """Print message to stdout if not in quiet mode."""
411         if self.params.get('logger'):
412             self.params['logger'].debug(message)
413         elif not check_quiet or not self.params.get('quiet', False):
414             message = self._bidi_workaround(message)
415             terminator = ['\n', ''][skip_eol]
416             output = message + terminator
417
418             self._write_string(output, self._screen_file)
419
420     def to_stderr(self, message):
421         """Print message to stderr."""
422         assert isinstance(message, compat_str)
423         if self.params.get('logger'):
424             self.params['logger'].error(message)
425         else:
426             message = self._bidi_workaround(message)
427             output = message + '\n'
428             self._write_string(output, self._err_file)
429
430     def to_console_title(self, message):
431         if not self.params.get('consoletitle', False):
432             return
433         if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
434             # c_wchar_p() might not be necessary if `message` is
435             # already of type unicode()
436             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
437         elif 'TERM' in os.environ:
438             self._write_string('\033]0;%s\007' % message, self._screen_file)
439
440     def save_console_title(self):
441         if not self.params.get('consoletitle', False):
442             return
443         if 'TERM' in os.environ:
444             # Save the title on stack
445             self._write_string('\033[22;0t', self._screen_file)
446
447     def restore_console_title(self):
448         if not self.params.get('consoletitle', False):
449             return
450         if 'TERM' in os.environ:
451             # Restore the title from stack
452             self._write_string('\033[23;0t', self._screen_file)
453
454     def __enter__(self):
455         self.save_console_title()
456         return self
457
458     def __exit__(self, *args):
459         self.restore_console_title()
460
461         if self.params.get('cookiefile') is not None:
462             self.cookiejar.save()
463
464     def trouble(self, message=None, tb=None):
465         """Determine action to take when a download problem appears.
466
467         Depending on if the downloader has been configured to ignore
468         download errors or not, this method may throw an exception or
469         not when errors are found, after printing the message.
470
471         tb, if given, is additional traceback information.
472         """
473         if message is not None:
474             self.to_stderr(message)
475         if self.params.get('verbose'):
476             if tb is None:
477                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
478                     tb = ''
479                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
480                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
481                     tb += compat_str(traceback.format_exc())
482                 else:
483                     tb_data = traceback.format_list(traceback.extract_stack())
484                     tb = ''.join(tb_data)
485             self.to_stderr(tb)
486         if not self.params.get('ignoreerrors', False):
487             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
488                 exc_info = sys.exc_info()[1].exc_info
489             else:
490                 exc_info = sys.exc_info()
491             raise DownloadError(message, exc_info)
492         self._download_retcode = 1
493
494     def report_warning(self, message):
495         '''
496         Print the message to stderr, it will be prefixed with 'WARNING:'
497         If stderr is a tty file the 'WARNING:' will be colored
498         '''
499         if self.params.get('logger') is not None:
500             self.params['logger'].warning(message)
501         else:
502             if self.params.get('no_warnings'):
503                 return
504             if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
505                 _msg_header = '\033[0;33mWARNING:\033[0m'
506             else:
507                 _msg_header = 'WARNING:'
508             warning_message = '%s %s' % (_msg_header, message)
509             self.to_stderr(warning_message)
510
511     def report_error(self, message, tb=None):
512         '''
513         Do the same as trouble, but prefixes the message with 'ERROR:', colored
514         in red if stderr is a tty file.
515         '''
516         if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
517             _msg_header = '\033[0;31mERROR:\033[0m'
518         else:
519             _msg_header = 'ERROR:'
520         error_message = '%s %s' % (_msg_header, message)
521         self.trouble(error_message, tb)
522
523     def report_file_already_downloaded(self, file_name):
524         """Report file has already been fully downloaded."""
525         try:
526             self.to_screen('[download] %s has already been downloaded' % file_name)
527         except UnicodeEncodeError:
528             self.to_screen('[download] The file has already been downloaded')
529
530     def prepare_filename(self, info_dict):
531         """Generate the output filename."""
532         try:
533             template_dict = dict(info_dict)
534
535             template_dict['epoch'] = int(time.time())
536             autonumber_size = self.params.get('autonumber_size')
537             if autonumber_size is None:
538                 autonumber_size = 5
539             autonumber_templ = '%0' + str(autonumber_size) + 'd'
540             template_dict['autonumber'] = autonumber_templ % self._num_downloads
541             if template_dict.get('playlist_index') is not None:
542                 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
543             if template_dict.get('resolution') is None:
544                 if template_dict.get('width') and template_dict.get('height'):
545                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
546                 elif template_dict.get('height'):
547                     template_dict['resolution'] = '%sp' % template_dict['height']
548                 elif template_dict.get('width'):
549                     template_dict['resolution'] = '?x%d' % template_dict['width']
550
551             sanitize = lambda k, v: sanitize_filename(
552                 compat_str(v),
553                 restricted=self.params.get('restrictfilenames'),
554                 is_id=(k == 'id'))
555             template_dict = dict((k, sanitize(k, v))
556                                  for k, v in template_dict.items()
557                                  if v is not None)
558             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
559
560             outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
561             tmpl = compat_expanduser(outtmpl)
562             filename = tmpl % template_dict
563             # Temporary fix for #4787
564             # 'Treat' all problem characters by passing filename through preferredencoding
565             # to workaround encoding issues with subprocess on python2 @ Windows
566             if sys.version_info < (3, 0) and sys.platform == 'win32':
567                 filename = encodeFilename(filename, True).decode(preferredencoding())
568             return filename
569         except ValueError as err:
570             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
571             return None
572
573     def _match_entry(self, info_dict, incomplete):
574         """ Returns None iff the file should be downloaded """
575
576         video_title = info_dict.get('title', info_dict.get('id', 'video'))
577         if 'title' in info_dict:
578             # This can happen when we're just evaluating the playlist
579             title = info_dict['title']
580             matchtitle = self.params.get('matchtitle', False)
581             if matchtitle:
582                 if not re.search(matchtitle, title, re.IGNORECASE):
583                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
584             rejecttitle = self.params.get('rejecttitle', False)
585             if rejecttitle:
586                 if re.search(rejecttitle, title, re.IGNORECASE):
587                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
588         date = info_dict.get('upload_date', None)
589         if date is not None:
590             dateRange = self.params.get('daterange', DateRange())
591             if date not in dateRange:
592                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
593         view_count = info_dict.get('view_count', None)
594         if view_count is not None:
595             min_views = self.params.get('min_views')
596             if min_views is not None and view_count < min_views:
597                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
598             max_views = self.params.get('max_views')
599             if max_views is not None and view_count > max_views:
600                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
601         if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
602             return 'Skipping "%s" because it is age restricted' % video_title
603         if self.in_download_archive(info_dict):
604             return '%s has already been recorded in archive' % video_title
605
606         if not incomplete:
607             match_filter = self.params.get('match_filter')
608             if match_filter is not None:
609                 ret = match_filter(info_dict)
610                 if ret is not None:
611                     return ret
612
613         return None
614
615     @staticmethod
616     def add_extra_info(info_dict, extra_info):
617         '''Set the keys from extra_info in info dict if they are missing'''
618         for key, value in extra_info.items():
619             info_dict.setdefault(key, value)
620
621     def extract_info(self, url, download=True, ie_key=None, extra_info={},
622                      process=True):
623         '''
624         Returns a list with a dictionary for each video we find.
625         If 'download', also downloads the videos.
626         extra_info is a dict containing the extra values to add to each result
627          '''
628
629         if ie_key:
630             ies = [self.get_info_extractor(ie_key)]
631         else:
632             ies = self._ies
633
634         for ie in ies:
635             if not ie.suitable(url):
636                 continue
637
638             if not ie.working():
639                 self.report_warning('The program functionality for this site has been marked as broken, '
640                                     'and will probably not work.')
641
642             try:
643                 ie_result = ie.extract(url)
644                 if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
645                     break
646                 if isinstance(ie_result, list):
647                     # Backwards compatibility: old IE result format
648                     ie_result = {
649                         '_type': 'compat_list',
650                         'entries': ie_result,
651                     }
652                 self.add_default_extra_info(ie_result, ie, url)
653                 if process:
654                     return self.process_ie_result(ie_result, download, extra_info)
655                 else:
656                     return ie_result
657             except ExtractorError as de:  # An error we somewhat expected
658                 self.report_error(compat_str(de), de.format_traceback())
659                 break
660             except MaxDownloadsReached:
661                 raise
662             except Exception as e:
663                 if self.params.get('ignoreerrors', False):
664                     self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
665                     break
666                 else:
667                     raise
668         else:
669             self.report_error('no suitable InfoExtractor for URL %s' % url)
670
671     def add_default_extra_info(self, ie_result, ie, url):
672         self.add_extra_info(ie_result, {
673             'extractor': ie.IE_NAME,
674             'webpage_url': url,
675             'webpage_url_basename': url_basename(url),
676             'extractor_key': ie.ie_key(),
677         })
678
679     def process_ie_result(self, ie_result, download=True, extra_info={}):
680         """
681         Take the result of the ie(may be modified) and resolve all unresolved
682         references (URLs, playlist items).
683
684         It will also download the videos if 'download'.
685         Returns the resolved ie_result.
686         """
687
688         result_type = ie_result.get('_type', 'video')
689
690         if result_type in ('url', 'url_transparent'):
691             extract_flat = self.params.get('extract_flat', False)
692             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
693                     extract_flat is True):
694                 if self.params.get('forcejson', False):
695                     self.to_stdout(json.dumps(ie_result))
696                 return ie_result
697
698         if result_type == 'video':
699             self.add_extra_info(ie_result, extra_info)
700             return self.process_video_result(ie_result, download=download)
701         elif result_type == 'url':
702             # We have to add extra_info to the results because it may be
703             # contained in a playlist
704             return self.extract_info(ie_result['url'],
705                                      download,
706                                      ie_key=ie_result.get('ie_key'),
707                                      extra_info=extra_info)
708         elif result_type == 'url_transparent':
709             # Use the information from the embedding page
710             info = self.extract_info(
711                 ie_result['url'], ie_key=ie_result.get('ie_key'),
712                 extra_info=extra_info, download=False, process=False)
713
714             force_properties = dict(
715                 (k, v) for k, v in ie_result.items() if v is not None)
716             for f in ('_type', 'url'):
717                 if f in force_properties:
718                     del force_properties[f]
719             new_result = info.copy()
720             new_result.update(force_properties)
721
722             assert new_result.get('_type') != 'url_transparent'
723
724             return self.process_ie_result(
725                 new_result, download=download, extra_info=extra_info)
726         elif result_type == 'playlist' or result_type == 'multi_video':
727             # We process each entry in the playlist
728             playlist = ie_result.get('title', None) or ie_result.get('id', None)
729             self.to_screen('[download] Downloading playlist: %s' % playlist)
730
731             playlist_results = []
732
733             playliststart = self.params.get('playliststart', 1) - 1
734             playlistend = self.params.get('playlistend', None)
735             # For backwards compatibility, interpret -1 as whole list
736             if playlistend == -1:
737                 playlistend = None
738
739             playlistitems_str = self.params.get('playlist_items', None)
740             playlistitems = None
741             if playlistitems_str is not None:
742                 def iter_playlistitems(format):
743                     for string_segment in format.split(','):
744                         if '-' in string_segment:
745                             start, end = string_segment.split('-')
746                             for item in range(int(start), int(end) + 1):
747                                 yield int(item)
748                         else:
749                             yield int(string_segment)
750                 playlistitems = iter_playlistitems(playlistitems_str)
751
752             ie_entries = ie_result['entries']
753             if isinstance(ie_entries, list):
754                 n_all_entries = len(ie_entries)
755                 if playlistitems:
756                     entries = [ie_entries[i - 1] for i in playlistitems]
757                 else:
758                     entries = ie_entries[playliststart:playlistend]
759                 n_entries = len(entries)
760                 self.to_screen(
761                     "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
762                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
763             elif isinstance(ie_entries, PagedList):
764                 if playlistitems:
765                     entries = []
766                     for item in playlistitems:
767                         entries.extend(ie_entries.getslice(
768                             item - 1, item
769                         ))
770                 else:
771                     entries = ie_entries.getslice(
772                         playliststart, playlistend)
773                 n_entries = len(entries)
774                 self.to_screen(
775                     "[%s] playlist %s: Downloading %d videos" %
776                     (ie_result['extractor'], playlist, n_entries))
777             else:  # iterable
778                 if playlistitems:
779                     entry_list = list(ie_entries)
780                     entries = [entry_list[i - 1] for i in playlistitems]
781                 else:
782                     entries = list(itertools.islice(
783                         ie_entries, playliststart, playlistend))
784                 n_entries = len(entries)
785                 self.to_screen(
786                     "[%s] playlist %s: Downloading %d videos" %
787                     (ie_result['extractor'], playlist, n_entries))
788
789             if self.params.get('playlistreverse', False):
790                 entries = entries[::-1]
791
792             for i, entry in enumerate(entries, 1):
793                 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
794                 extra = {
795                     'n_entries': n_entries,
796                     'playlist': playlist,
797                     'playlist_id': ie_result.get('id'),
798                     'playlist_title': ie_result.get('title'),
799                     'playlist_index': i + playliststart,
800                     'extractor': ie_result['extractor'],
801                     'webpage_url': ie_result['webpage_url'],
802                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
803                     'extractor_key': ie_result['extractor_key'],
804                 }
805
806                 reason = self._match_entry(entry, incomplete=True)
807                 if reason is not None:
808                     self.to_screen('[download] ' + reason)
809                     continue
810
811                 entry_result = self.process_ie_result(entry,
812                                                       download=download,
813                                                       extra_info=extra)
814                 playlist_results.append(entry_result)
815             ie_result['entries'] = playlist_results
816             return ie_result
817         elif result_type == 'compat_list':
818             self.report_warning(
819                 'Extractor %s returned a compat_list result. '
820                 'It needs to be updated.' % ie_result.get('extractor'))
821
822             def _fixup(r):
823                 self.add_extra_info(
824                     r,
825                     {
826                         'extractor': ie_result['extractor'],
827                         'webpage_url': ie_result['webpage_url'],
828                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
829                         'extractor_key': ie_result['extractor_key'],
830                     }
831                 )
832                 return r
833             ie_result['entries'] = [
834                 self.process_ie_result(_fixup(r), download, extra_info)
835                 for r in ie_result['entries']
836             ]
837             return ie_result
838         else:
839             raise Exception('Invalid result type: %s' % result_type)
840
841     def _apply_format_filter(self, format_spec, available_formats):
842         " Returns a tuple of the remaining format_spec and filtered formats "
843
844         OPERATORS = {
845             '<': operator.lt,
846             '<=': operator.le,
847             '>': operator.gt,
848             '>=': operator.ge,
849             '=': operator.eq,
850             '!=': operator.ne,
851         }
852         operator_rex = re.compile(r'''(?x)\s*\[
853             (?P<key>width|height|tbr|abr|vbr|asr|filesize|fps)
854             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
855             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
856             \]$
857             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
858         m = operator_rex.search(format_spec)
859         if m:
860             try:
861                 comparison_value = int(m.group('value'))
862             except ValueError:
863                 comparison_value = parse_filesize(m.group('value'))
864                 if comparison_value is None:
865                     comparison_value = parse_filesize(m.group('value') + 'B')
866                 if comparison_value is None:
867                     raise ValueError(
868                         'Invalid value %r in format specification %r' % (
869                             m.group('value'), format_spec))
870             op = OPERATORS[m.group('op')]
871
872         if not m:
873             STR_OPERATORS = {
874                 '=': operator.eq,
875                 '!=': operator.ne,
876             }
877             str_operator_rex = re.compile(r'''(?x)\s*\[
878                 \s*(?P<key>ext|acodec|vcodec|container|protocol)
879                 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
880                 \s*(?P<value>[a-zA-Z0-9_-]+)
881                 \s*\]$
882                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
883             m = str_operator_rex.search(format_spec)
884             if m:
885                 comparison_value = m.group('value')
886                 op = STR_OPERATORS[m.group('op')]
887
888         if not m:
889             raise ValueError('Invalid format specification %r' % format_spec)
890
891         def _filter(f):
892             actual_value = f.get(m.group('key'))
893             if actual_value is None:
894                 return m.group('none_inclusive')
895             return op(actual_value, comparison_value)
896         new_formats = [f for f in available_formats if _filter(f)]
897
898         new_format_spec = format_spec[:-len(m.group(0))]
899         if not new_format_spec:
900             new_format_spec = 'best'
901
902         return (new_format_spec, new_formats)
903
904     def select_format(self, format_spec, available_formats):
905         while format_spec.endswith(']'):
906             format_spec, available_formats = self._apply_format_filter(
907                 format_spec, available_formats)
908         if not available_formats:
909             return None
910
911         if format_spec == 'best' or format_spec is None:
912             return available_formats[-1]
913         elif format_spec == 'worst':
914             return available_formats[0]
915         elif format_spec == 'bestaudio':
916             audio_formats = [
917                 f for f in available_formats
918                 if f.get('vcodec') == 'none']
919             if audio_formats:
920                 return audio_formats[-1]
921         elif format_spec == 'worstaudio':
922             audio_formats = [
923                 f for f in available_formats
924                 if f.get('vcodec') == 'none']
925             if audio_formats:
926                 return audio_formats[0]
927         elif format_spec == 'bestvideo':
928             video_formats = [
929                 f for f in available_formats
930                 if f.get('acodec') == 'none']
931             if video_formats:
932                 return video_formats[-1]
933         elif format_spec == 'worstvideo':
934             video_formats = [
935                 f for f in available_formats
936                 if f.get('acodec') == 'none']
937             if video_formats:
938                 return video_formats[0]
939         else:
940             extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
941             if format_spec in extensions:
942                 filter_f = lambda f: f['ext'] == format_spec
943             else:
944                 filter_f = lambda f: f['format_id'] == format_spec
945             matches = list(filter(filter_f, available_formats))
946             if matches:
947                 return matches[-1]
948         return None
949
950     def _calc_headers(self, info_dict):
951         res = std_headers.copy()
952
953         add_headers = info_dict.get('http_headers')
954         if add_headers:
955             res.update(add_headers)
956
957         cookies = self._calc_cookies(info_dict)
958         if cookies:
959             res['Cookie'] = cookies
960
961         return res
962
963     def _calc_cookies(self, info_dict):
964         pr = compat_urllib_request.Request(info_dict['url'])
965         self.cookiejar.add_cookie_header(pr)
966         return pr.get_header('Cookie')
967
968     def process_video_result(self, info_dict, download=True):
969         assert info_dict.get('_type', 'video') == 'video'
970
971         if 'id' not in info_dict:
972             raise ExtractorError('Missing "id" field in extractor result')
973         if 'title' not in info_dict:
974             raise ExtractorError('Missing "title" field in extractor result')
975
976         if 'playlist' not in info_dict:
977             # It isn't part of a playlist
978             info_dict['playlist'] = None
979             info_dict['playlist_index'] = None
980
981         thumbnails = info_dict.get('thumbnails')
982         if thumbnails is None:
983             thumbnail = info_dict.get('thumbnail')
984             if thumbnail:
985                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
986         if thumbnails:
987             thumbnails.sort(key=lambda t: (
988                 t.get('preference'), t.get('width'), t.get('height'),
989                 t.get('id'), t.get('url')))
990             for i, t in enumerate(thumbnails):
991                 if 'width' in t and 'height' in t:
992                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
993                 if t.get('id') is None:
994                     t['id'] = '%d' % i
995
996         if thumbnails and 'thumbnail' not in info_dict:
997             info_dict['thumbnail'] = thumbnails[-1]['url']
998
999         if 'display_id' not in info_dict and 'id' in info_dict:
1000             info_dict['display_id'] = info_dict['id']
1001
1002         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
1003             # Working around negative timestamps in Windows
1004             # (see http://bugs.python.org/issue1646728)
1005             if info_dict['timestamp'] < 0 and os.name == 'nt':
1006                 info_dict['timestamp'] = 0
1007             upload_date = datetime.datetime.utcfromtimestamp(
1008                 info_dict['timestamp'])
1009             info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1010
1011         if self.params.get('listsubtitles', False):
1012             if 'automatic_captions' in info_dict:
1013                 self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions')
1014             self.list_subtitles(info_dict['id'], info_dict.get('subtitles'), 'subtitles')
1015             return
1016         info_dict['requested_subtitles'] = self.process_subtitles(
1017             info_dict['id'], info_dict.get('subtitles'),
1018             info_dict.get('automatic_captions'))
1019
1020         # This extractors handle format selection themselves
1021         if info_dict['extractor'] in ['Youku']:
1022             if download:
1023                 self.process_info(info_dict)
1024             return info_dict
1025
1026         # We now pick which formats have to be downloaded
1027         if info_dict.get('formats') is None:
1028             # There's only one format available
1029             formats = [info_dict]
1030         else:
1031             formats = info_dict['formats']
1032
1033         if not formats:
1034             raise ExtractorError('No video formats found!')
1035
1036         # We check that all the formats have the format and format_id fields
1037         for i, format in enumerate(formats):
1038             if 'url' not in format:
1039                 raise ExtractorError('Missing "url" key in result (index %d)' % i)
1040
1041             if format.get('format_id') is None:
1042                 format['format_id'] = compat_str(i)
1043             if format.get('format') is None:
1044                 format['format'] = '{id} - {res}{note}'.format(
1045                     id=format['format_id'],
1046                     res=self.format_resolution(format),
1047                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1048                 )
1049             # Automatically determine file extension if missing
1050             if 'ext' not in format:
1051                 format['ext'] = determine_ext(format['url']).lower()
1052             # Add HTTP headers, so that external programs can use them from the
1053             # json output
1054             full_format_info = info_dict.copy()
1055             full_format_info.update(format)
1056             format['http_headers'] = self._calc_headers(full_format_info)
1057
1058         format_limit = self.params.get('format_limit', None)
1059         if format_limit:
1060             formats = list(takewhile_inclusive(
1061                 lambda f: f['format_id'] != format_limit, formats
1062             ))
1063
1064         # TODO Central sorting goes here
1065
1066         if formats[0] is not info_dict:
1067             # only set the 'formats' fields if the original info_dict list them
1068             # otherwise we end up with a circular reference, the first (and unique)
1069             # element in the 'formats' field in info_dict is info_dict itself,
1070             # wich can't be exported to json
1071             info_dict['formats'] = formats
1072         if self.params.get('listformats'):
1073             self.list_formats(info_dict)
1074             return
1075         if self.params.get('list_thumbnails'):
1076             self.list_thumbnails(info_dict)
1077             return
1078
1079         req_format = self.params.get('format')
1080         if req_format is None:
1081             req_format = 'best'
1082         formats_to_download = []
1083         # The -1 is for supporting YoutubeIE
1084         if req_format in ('-1', 'all'):
1085             formats_to_download = formats
1086         else:
1087             for rfstr in req_format.split(','):
1088                 # We can accept formats requested in the format: 34/5/best, we pick
1089                 # the first that is available, starting from left
1090                 req_formats = rfstr.split('/')
1091                 for rf in req_formats:
1092                     if re.match(r'.+?\+.+?', rf) is not None:
1093                         # Two formats have been requested like '137+139'
1094                         format_1, format_2 = rf.split('+')
1095                         formats_info = (self.select_format(format_1, formats),
1096                                         self.select_format(format_2, formats))
1097                         if all(formats_info):
1098                             # The first format must contain the video and the
1099                             # second the audio
1100                             if formats_info[0].get('vcodec') == 'none':
1101                                 self.report_error('The first format must '
1102                                                   'contain the video, try using '
1103                                                   '"-f %s+%s"' % (format_2, format_1))
1104                                 return
1105                             output_ext = (
1106                                 formats_info[0]['ext']
1107                                 if self.params.get('merge_output_format') is None
1108                                 else self.params['merge_output_format'])
1109                             selected_format = {
1110                                 'requested_formats': formats_info,
1111                                 'format': '%s+%s' % (formats_info[0].get('format'),
1112                                                      formats_info[1].get('format')),
1113                                 'format_id': '%s+%s' % (formats_info[0].get('format_id'),
1114                                                         formats_info[1].get('format_id')),
1115                                 'width': formats_info[0].get('width'),
1116                                 'height': formats_info[0].get('height'),
1117                                 'resolution': formats_info[0].get('resolution'),
1118                                 'fps': formats_info[0].get('fps'),
1119                                 'vcodec': formats_info[0].get('vcodec'),
1120                                 'vbr': formats_info[0].get('vbr'),
1121                                 'stretched_ratio': formats_info[0].get('stretched_ratio'),
1122                                 'acodec': formats_info[1].get('acodec'),
1123                                 'abr': formats_info[1].get('abr'),
1124                                 'ext': output_ext,
1125                             }
1126                         else:
1127                             selected_format = None
1128                     else:
1129                         selected_format = self.select_format(rf, formats)
1130                     if selected_format is not None:
1131                         formats_to_download.append(selected_format)
1132                         break
1133         if not formats_to_download:
1134             raise ExtractorError('requested format not available',
1135                                  expected=True)
1136
1137         if download:
1138             if len(formats_to_download) > 1:
1139                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1140             for format in formats_to_download:
1141                 new_info = dict(info_dict)
1142                 new_info.update(format)
1143                 self.process_info(new_info)
1144         # We update the info dict with the best quality format (backwards compatibility)
1145         info_dict.update(formats_to_download[-1])
1146         return info_dict
1147
1148     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
1149         """Select the requested subtitles and their format"""
1150         available_subs = {}
1151         if normal_subtitles and self.params.get('writesubtitles'):
1152             available_subs.update(normal_subtitles)
1153         if automatic_captions and self.params.get('writeautomaticsub'):
1154             for lang, cap_info in automatic_captions.items():
1155                 if lang not in available_subs:
1156                     available_subs[lang] = cap_info
1157
1158         if (not self.params.get('writesubtitles') and not
1159                 self.params.get('writeautomaticsub') or not
1160                 available_subs):
1161             return None
1162
1163         if self.params.get('allsubtitles', False):
1164             requested_langs = available_subs.keys()
1165         else:
1166             if self.params.get('subtitleslangs', False):
1167                 requested_langs = self.params.get('subtitleslangs')
1168             elif 'en' in available_subs:
1169                 requested_langs = ['en']
1170             else:
1171                 requested_langs = [list(available_subs.keys())[0]]
1172
1173         formats_query = self.params.get('subtitlesformat', 'best')
1174         formats_preference = formats_query.split('/') if formats_query else []
1175         subs = {}
1176         for lang in requested_langs:
1177             formats = available_subs.get(lang)
1178             if formats is None:
1179                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1180                 continue
1181             for ext in formats_preference:
1182                 if ext == 'best':
1183                     f = formats[-1]
1184                     break
1185                 matches = list(filter(lambda f: f['ext'] == ext, formats))
1186                 if matches:
1187                     f = matches[-1]
1188                     break
1189             else:
1190                 f = formats[-1]
1191                 self.report_warning(
1192                     'No subtitle format found matching "%s" for language %s, '
1193                     'using %s' % (formats_query, lang, f['ext']))
1194             subs[lang] = f
1195         return subs
1196
1197     def process_info(self, info_dict):
1198         """Process a single resolved IE result."""
1199
1200         assert info_dict.get('_type', 'video') == 'video'
1201
1202         max_downloads = self.params.get('max_downloads')
1203         if max_downloads is not None:
1204             if self._num_downloads >= int(max_downloads):
1205                 raise MaxDownloadsReached()
1206
1207         info_dict['fulltitle'] = info_dict['title']
1208         if len(info_dict['title']) > 200:
1209             info_dict['title'] = info_dict['title'][:197] + '...'
1210
1211         # Keep for backwards compatibility
1212         info_dict['stitle'] = info_dict['title']
1213
1214         if 'format' not in info_dict:
1215             info_dict['format'] = info_dict['ext']
1216
1217         reason = self._match_entry(info_dict, incomplete=False)
1218         if reason is not None:
1219             self.to_screen('[download] ' + reason)
1220             return
1221
1222         self._num_downloads += 1
1223
1224         info_dict['_filename'] = filename = self.prepare_filename(info_dict)
1225
1226         # Forced printings
1227         if self.params.get('forcetitle', False):
1228             self.to_stdout(info_dict['fulltitle'])
1229         if self.params.get('forceid', False):
1230             self.to_stdout(info_dict['id'])
1231         if self.params.get('forceurl', False):
1232             if info_dict.get('requested_formats') is not None:
1233                 for f in info_dict['requested_formats']:
1234                     self.to_stdout(f['url'] + f.get('play_path', ''))
1235             else:
1236                 # For RTMP URLs, also include the playpath
1237                 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1238         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
1239             self.to_stdout(info_dict['thumbnail'])
1240         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
1241             self.to_stdout(info_dict['description'])
1242         if self.params.get('forcefilename', False) and filename is not None:
1243             self.to_stdout(filename)
1244         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1245             self.to_stdout(formatSeconds(info_dict['duration']))
1246         if self.params.get('forceformat', False):
1247             self.to_stdout(info_dict['format'])
1248         if self.params.get('forcejson', False):
1249             self.to_stdout(json.dumps(info_dict))
1250
1251         # Do nothing else if in simulate mode
1252         if self.params.get('simulate', False):
1253             return
1254
1255         if filename is None:
1256             return
1257
1258         try:
1259             dn = os.path.dirname(encodeFilename(filename))
1260             if dn and not os.path.exists(dn):
1261                 os.makedirs(dn)
1262         except (OSError, IOError) as err:
1263             self.report_error('unable to create directory ' + compat_str(err))
1264             return
1265
1266         if self.params.get('writedescription', False):
1267             descfn = filename + '.description'
1268             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1269                 self.to_screen('[info] Video description is already present')
1270             elif info_dict.get('description') is None:
1271                 self.report_warning('There\'s no description to write.')
1272             else:
1273                 try:
1274                     self.to_screen('[info] Writing video description to: ' + descfn)
1275                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1276                         descfile.write(info_dict['description'])
1277                 except (OSError, IOError):
1278                     self.report_error('Cannot write description file ' + descfn)
1279                     return
1280
1281         if self.params.get('writeannotations', False):
1282             annofn = filename + '.annotations.xml'
1283             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1284                 self.to_screen('[info] Video annotations are already present')
1285             else:
1286                 try:
1287                     self.to_screen('[info] Writing video annotations to: ' + annofn)
1288                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1289                         annofile.write(info_dict['annotations'])
1290                 except (KeyError, TypeError):
1291                     self.report_warning('There are no annotations to write.')
1292                 except (OSError, IOError):
1293                     self.report_error('Cannot write annotations file: ' + annofn)
1294                     return
1295
1296         subtitles_are_requested = any([self.params.get('writesubtitles', False),
1297                                        self.params.get('writeautomaticsub')])
1298
1299         if subtitles_are_requested and info_dict.get('requested_subtitles'):
1300             # subtitles download errors are already managed as troubles in relevant IE
1301             # that way it will silently go on when used with unsupporting IE
1302             subtitles = info_dict['requested_subtitles']
1303             for sub_lang, sub_info in subtitles.items():
1304                 sub_format = sub_info['ext']
1305                 if sub_info.get('data') is not None:
1306                     sub_data = sub_info['data']
1307                 else:
1308                     try:
1309                         uf = self.urlopen(sub_info['url'])
1310                         sub_data = uf.read().decode('utf-8')
1311                     except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1312                         self.report_warning('Unable to download subtitle for "%s": %s' %
1313                                             (sub_lang, compat_str(err)))
1314                         continue
1315                 try:
1316                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1317                     if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1318                         self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1319                     else:
1320                         self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1321                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
1322                             subfile.write(sub_data)
1323                 except (OSError, IOError):
1324                     self.report_error('Cannot write subtitles file ' + sub_filename)
1325                     return
1326
1327         if self.params.get('writeinfojson', False):
1328             infofn = os.path.splitext(filename)[0] + '.info.json'
1329             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1330                 self.to_screen('[info] Video description metadata is already present')
1331             else:
1332                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1333                 try:
1334                     write_json_file(info_dict, infofn)
1335                 except (OSError, IOError):
1336                     self.report_error('Cannot write metadata to JSON file ' + infofn)
1337                     return
1338
1339         self._write_thumbnails(info_dict, filename)
1340
1341         if not self.params.get('skip_download', False):
1342             try:
1343                 def dl(name, info):
1344                     fd = get_suitable_downloader(info, self.params)(self, self.params)
1345                     for ph in self._progress_hooks:
1346                         fd.add_progress_hook(ph)
1347                     if self.params.get('verbose'):
1348                         self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1349                     return fd.download(name, info)
1350
1351                 if info_dict.get('requested_formats') is not None:
1352                     downloaded = []
1353                     success = True
1354                     merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
1355                     if not merger.available:
1356                         postprocessors = []
1357                         self.report_warning('You have requested multiple '
1358                                             'formats but ffmpeg or avconv are not installed.'
1359                                             ' The formats won\'t be merged')
1360                     else:
1361                         postprocessors = [merger]
1362                     for f in info_dict['requested_formats']:
1363                         new_info = dict(info_dict)
1364                         new_info.update(f)
1365                         fname = self.prepare_filename(new_info)
1366                         fname = prepend_extension(fname, 'f%s' % f['format_id'])
1367                         downloaded.append(fname)
1368                         partial_success = dl(fname, new_info)
1369                         success = success and partial_success
1370                     info_dict['__postprocessors'] = postprocessors
1371                     info_dict['__files_to_merge'] = downloaded
1372                 else:
1373                     # Just a single file
1374                     success = dl(filename, info_dict)
1375             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1376                 self.report_error('unable to download video data: %s' % str(err))
1377                 return
1378             except (OSError, IOError) as err:
1379                 raise UnavailableVideoError(err)
1380             except (ContentTooShortError, ) as err:
1381                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1382                 return
1383
1384             if success:
1385                 # Fixup content
1386                 fixup_policy = self.params.get('fixup')
1387                 if fixup_policy is None:
1388                     fixup_policy = 'detect_or_warn'
1389
1390                 stretched_ratio = info_dict.get('stretched_ratio')
1391                 if stretched_ratio is not None and stretched_ratio != 1:
1392                     if fixup_policy == 'warn':
1393                         self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1394                             info_dict['id'], stretched_ratio))
1395                     elif fixup_policy == 'detect_or_warn':
1396                         stretched_pp = FFmpegFixupStretchedPP(self)
1397                         if stretched_pp.available:
1398                             info_dict.setdefault('__postprocessors', [])
1399                             info_dict['__postprocessors'].append(stretched_pp)
1400                         else:
1401                             self.report_warning(
1402                                 '%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % (
1403                                     info_dict['id'], stretched_ratio))
1404                     else:
1405                         assert fixup_policy in ('ignore', 'never')
1406
1407                 if info_dict.get('requested_formats') is None and info_dict.get('container') == 'm4a_dash':
1408                     if fixup_policy == 'warn':
1409                         self.report_warning('%s: writing DASH m4a. Only some players support this container.' % (
1410                             info_dict['id']))
1411                     elif fixup_policy == 'detect_or_warn':
1412                         fixup_pp = FFmpegFixupM4aPP(self)
1413                         if fixup_pp.available:
1414                             info_dict.setdefault('__postprocessors', [])
1415                             info_dict['__postprocessors'].append(fixup_pp)
1416                         else:
1417                             self.report_warning(
1418                                 '%s: writing DASH m4a. Only some players support this container. Install ffmpeg or avconv to fix this automatically.' % (
1419                                     info_dict['id']))
1420                     else:
1421                         assert fixup_policy in ('ignore', 'never')
1422
1423                 try:
1424                     self.post_process(filename, info_dict)
1425                 except (PostProcessingError) as err:
1426                     self.report_error('postprocessing: %s' % str(err))
1427                     return
1428                 self.record_download_archive(info_dict)
1429
1430     def download(self, url_list):
1431         """Download a given list of URLs."""
1432         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1433         if (len(url_list) > 1 and
1434                 '%' not in outtmpl and
1435                 self.params.get('max_downloads') != 1):
1436             raise SameFileError(outtmpl)
1437
1438         for url in url_list:
1439             try:
1440                 # It also downloads the videos
1441                 res = self.extract_info(url)
1442             except UnavailableVideoError:
1443                 self.report_error('unable to download video')
1444             except MaxDownloadsReached:
1445                 self.to_screen('[info] Maximum number of downloaded files reached.')
1446                 raise
1447             else:
1448                 if self.params.get('dump_single_json', False):
1449                     self.to_stdout(json.dumps(res))
1450
1451         return self._download_retcode
1452
1453     def download_with_info_file(self, info_filename):
1454         with io.open(info_filename, 'r', encoding='utf-8') as f:
1455             info = json.load(f)
1456         try:
1457             self.process_ie_result(info, download=True)
1458         except DownloadError:
1459             webpage_url = info.get('webpage_url')
1460             if webpage_url is not None:
1461                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1462                 return self.download([webpage_url])
1463             else:
1464                 raise
1465         return self._download_retcode
1466
1467     def post_process(self, filename, ie_info):
1468         """Run all the postprocessors on the given file."""
1469         info = dict(ie_info)
1470         info['filepath'] = filename
1471         pps_chain = []
1472         if ie_info.get('__postprocessors') is not None:
1473             pps_chain.extend(ie_info['__postprocessors'])
1474         pps_chain.extend(self._pps)
1475         for pp in pps_chain:
1476             keep_video = None
1477             old_filename = info['filepath']
1478             try:
1479                 keep_video_wish, info = pp.run(info)
1480                 if keep_video_wish is not None:
1481                     if keep_video_wish:
1482                         keep_video = keep_video_wish
1483                     elif keep_video is None:
1484                         # No clear decision yet, let IE decide
1485                         keep_video = keep_video_wish
1486             except PostProcessingError as e:
1487                 self.report_error(e.msg)
1488             if keep_video is False and not self.params.get('keepvideo', False):
1489                 try:
1490                     self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
1491                     os.remove(encodeFilename(old_filename))
1492                 except (IOError, OSError):
1493                     self.report_warning('Unable to remove downloaded video file')
1494
1495     def _make_archive_id(self, info_dict):
1496         # Future-proof against any change in case
1497         # and backwards compatibility with prior versions
1498         extractor = info_dict.get('extractor_key')
1499         if extractor is None:
1500             if 'id' in info_dict:
1501                 extractor = info_dict.get('ie_key')  # key in a playlist
1502         if extractor is None:
1503             return None  # Incomplete video information
1504         return extractor.lower() + ' ' + info_dict['id']
1505
1506     def in_download_archive(self, info_dict):
1507         fn = self.params.get('download_archive')
1508         if fn is None:
1509             return False
1510
1511         vid_id = self._make_archive_id(info_dict)
1512         if vid_id is None:
1513             return False  # Incomplete video information
1514
1515         try:
1516             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1517                 for line in archive_file:
1518                     if line.strip() == vid_id:
1519                         return True
1520         except IOError as ioe:
1521             if ioe.errno != errno.ENOENT:
1522                 raise
1523         return False
1524
1525     def record_download_archive(self, info_dict):
1526         fn = self.params.get('download_archive')
1527         if fn is None:
1528             return
1529         vid_id = self._make_archive_id(info_dict)
1530         assert vid_id
1531         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1532             archive_file.write(vid_id + '\n')
1533
1534     @staticmethod
1535     def format_resolution(format, default='unknown'):
1536         if format.get('vcodec') == 'none':
1537             return 'audio only'
1538         if format.get('resolution') is not None:
1539             return format['resolution']
1540         if format.get('height') is not None:
1541             if format.get('width') is not None:
1542                 res = '%sx%s' % (format['width'], format['height'])
1543             else:
1544                 res = '%sp' % format['height']
1545         elif format.get('width') is not None:
1546             res = '?x%d' % format['width']
1547         else:
1548             res = default
1549         return res
1550
1551     def _format_note(self, fdict):
1552         res = ''
1553         if fdict.get('ext') in ['f4f', 'f4m']:
1554             res += '(unsupported) '
1555         if fdict.get('format_note') is not None:
1556             res += fdict['format_note'] + ' '
1557         if fdict.get('tbr') is not None:
1558             res += '%4dk ' % fdict['tbr']
1559         if fdict.get('container') is not None:
1560             if res:
1561                 res += ', '
1562             res += '%s container' % fdict['container']
1563         if (fdict.get('vcodec') is not None and
1564                 fdict.get('vcodec') != 'none'):
1565             if res:
1566                 res += ', '
1567             res += fdict['vcodec']
1568             if fdict.get('vbr') is not None:
1569                 res += '@'
1570         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1571             res += 'video@'
1572         if fdict.get('vbr') is not None:
1573             res += '%4dk' % fdict['vbr']
1574         if fdict.get('fps') is not None:
1575             res += ', %sfps' % fdict['fps']
1576         if fdict.get('acodec') is not None:
1577             if res:
1578                 res += ', '
1579             if fdict['acodec'] == 'none':
1580                 res += 'video only'
1581             else:
1582                 res += '%-5s' % fdict['acodec']
1583         elif fdict.get('abr') is not None:
1584             if res:
1585                 res += ', '
1586             res += 'audio'
1587         if fdict.get('abr') is not None:
1588             res += '@%3dk' % fdict['abr']
1589         if fdict.get('asr') is not None:
1590             res += ' (%5dHz)' % fdict['asr']
1591         if fdict.get('filesize') is not None:
1592             if res:
1593                 res += ', '
1594             res += format_bytes(fdict['filesize'])
1595         elif fdict.get('filesize_approx') is not None:
1596             if res:
1597                 res += ', '
1598             res += '~' + format_bytes(fdict['filesize_approx'])
1599         return res
1600
1601     def list_formats(self, info_dict):
1602         formats = info_dict.get('formats', [info_dict])
1603         table = [
1604             [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
1605             for f in formats
1606             if f.get('preference') is None or f['preference'] >= -1000]
1607         if len(formats) > 1:
1608             table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
1609
1610         header_line = ['format code', 'extension', 'resolution', 'note']
1611         self.to_screen(
1612             '[info] Available formats for %s:\n%s' %
1613             (info_dict['id'], render_table(header_line, table)))
1614
1615     def list_thumbnails(self, info_dict):
1616         thumbnails = info_dict.get('thumbnails')
1617         if not thumbnails:
1618             tn_url = info_dict.get('thumbnail')
1619             if tn_url:
1620                 thumbnails = [{'id': '0', 'url': tn_url}]
1621             else:
1622                 self.to_screen(
1623                     '[info] No thumbnails present for %s' % info_dict['id'])
1624                 return
1625
1626         self.to_screen(
1627             '[info] Thumbnails for %s:' % info_dict['id'])
1628         self.to_screen(render_table(
1629             ['ID', 'width', 'height', 'URL'],
1630             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
1631
1632     def list_subtitles(self, video_id, subtitles, name='subtitles'):
1633         if not subtitles:
1634             self.to_screen('%s has no %s' % (video_id, name))
1635             return
1636         self.to_screen(
1637             'Available %s for %s:' % (name, video_id))
1638         self.to_screen(render_table(
1639             ['Language', 'formats'],
1640             [[lang, ', '.join(f['ext'] for f in reversed(formats))]
1641                 for lang, formats in subtitles.items()]))
1642
1643     def urlopen(self, req):
1644         """ Start an HTTP download """
1645
1646         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1647         # always respected by websites, some tend to give out URLs with non percent-encoded
1648         # non-ASCII characters (see telemb.py, ard.py [#3412])
1649         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1650         # To work around aforementioned issue we will replace request's original URL with
1651         # percent-encoded one
1652         req_is_string = isinstance(req, compat_basestring)
1653         url = req if req_is_string else req.get_full_url()
1654         url_escaped = escape_url(url)
1655
1656         # Substitute URL if any change after escaping
1657         if url != url_escaped:
1658             if req_is_string:
1659                 req = url_escaped
1660             else:
1661                 req = compat_urllib_request.Request(
1662                     url_escaped, data=req.data, headers=req.headers,
1663                     origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
1664
1665         return self._opener.open(req, timeout=self._socket_timeout)
1666
1667     def print_debug_header(self):
1668         if not self.params.get('verbose'):
1669             return
1670
1671         if type('') is not compat_str:
1672             # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1673             self.report_warning(
1674                 'Your Python is broken! Update to a newer and supported version')
1675
1676         stdout_encoding = getattr(
1677             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
1678         encoding_str = (
1679             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1680                 locale.getpreferredencoding(),
1681                 sys.getfilesystemencoding(),
1682                 stdout_encoding,
1683                 self.get_encoding()))
1684         write_string(encoding_str, encoding=None)
1685
1686         self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1687         try:
1688             sp = subprocess.Popen(
1689                 ['git', 'rev-parse', '--short', 'HEAD'],
1690                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1691                 cwd=os.path.dirname(os.path.abspath(__file__)))
1692             out, err = sp.communicate()
1693             out = out.decode().strip()
1694             if re.match('[0-9a-f]+', out):
1695                 self._write_string('[debug] Git HEAD: ' + out + '\n')
1696         except:
1697             try:
1698                 sys.exc_clear()
1699             except:
1700                 pass
1701         self._write_string('[debug] Python version %s - %s\n' % (
1702             platform.python_version(), platform_name()))
1703
1704         exe_versions = FFmpegPostProcessor.get_versions(self)
1705         exe_versions['rtmpdump'] = rtmpdump_version()
1706         exe_str = ', '.join(
1707             '%s %s' % (exe, v)
1708             for exe, v in sorted(exe_versions.items())
1709             if v
1710         )
1711         if not exe_str:
1712             exe_str = 'none'
1713         self._write_string('[debug] exe versions: %s\n' % exe_str)
1714
1715         proxy_map = {}
1716         for handler in self._opener.handlers:
1717             if hasattr(handler, 'proxies'):
1718                 proxy_map.update(handler.proxies)
1719         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1720
1721         if self.params.get('call_home', False):
1722             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
1723             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
1724             latest_version = self.urlopen(
1725                 'https://yt-dl.org/latest/version').read().decode('utf-8')
1726             if version_tuple(latest_version) > version_tuple(__version__):
1727                 self.report_warning(
1728                     'You are using an outdated version (newest version: %s)! '
1729                     'See https://yt-dl.org/update if you need help updating.' %
1730                     latest_version)
1731
1732     def _setup_opener(self):
1733         timeout_val = self.params.get('socket_timeout')
1734         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1735
1736         opts_cookiefile = self.params.get('cookiefile')
1737         opts_proxy = self.params.get('proxy')
1738
1739         if opts_cookiefile is None:
1740             self.cookiejar = compat_cookiejar.CookieJar()
1741         else:
1742             self.cookiejar = compat_cookiejar.MozillaCookieJar(
1743                 opts_cookiefile)
1744             if os.access(opts_cookiefile, os.R_OK):
1745                 self.cookiejar.load()
1746
1747         cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1748             self.cookiejar)
1749         if opts_proxy is not None:
1750             if opts_proxy == '':
1751                 proxies = {}
1752             else:
1753                 proxies = {'http': opts_proxy, 'https': opts_proxy}
1754         else:
1755             proxies = compat_urllib_request.getproxies()
1756             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1757             if 'http' in proxies and 'https' not in proxies:
1758                 proxies['https'] = proxies['http']
1759         proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1760
1761         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1762         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
1763         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
1764         opener = compat_urllib_request.build_opener(
1765             https_handler, proxy_handler, cookie_processor, ydlh)
1766         # Delete the default user-agent header, which would otherwise apply in
1767         # cases where our custom HTTP handler doesn't come into play
1768         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1769         opener.addheaders = []
1770         self._opener = opener
1771
1772     def encode(self, s):
1773         if isinstance(s, bytes):
1774             return s  # Already encoded
1775
1776         try:
1777             return s.encode(self.get_encoding())
1778         except UnicodeEncodeError as err:
1779             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1780             raise
1781
1782     def get_encoding(self):
1783         encoding = self.params.get('encoding')
1784         if encoding is None:
1785             encoding = preferredencoding()
1786         return encoding
1787
1788     def _write_thumbnails(self, info_dict, filename):
1789         if self.params.get('writethumbnail', False):
1790             thumbnails = info_dict.get('thumbnails')
1791             if thumbnails:
1792                 thumbnails = [thumbnails[-1]]
1793         elif self.params.get('write_all_thumbnails', False):
1794             thumbnails = info_dict.get('thumbnails')
1795         else:
1796             return
1797
1798         if not thumbnails:
1799             # No thumbnails present, so return immediately
1800             return
1801
1802         for t in thumbnails:
1803             thumb_ext = determine_ext(t['url'], 'jpg')
1804             suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
1805             thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
1806             thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
1807
1808             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
1809                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
1810                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
1811             else:
1812                 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
1813                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
1814                 try:
1815                     uf = self.urlopen(t['url'])
1816                     with open(thumb_filename, 'wb') as thumbf:
1817                         shutil.copyfileobj(uf, thumbf)
1818                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
1819                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
1820                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1821                     self.report_warning('Unable to download thumbnail "%s": %s' %
1822                                         (t['url'], compat_str(err)))