[douyutv] Add new extractor
[youtube-dl] / youtube_dl / YoutubeDL.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 from __future__ import absolute_import, unicode_literals
5
6 import collections
7 import contextlib
8 import datetime
9 import errno
10 import fileinput
11 import io
12 import itertools
13 import json
14 import locale
15 import operator
16 import os
17 import platform
18 import re
19 import shutil
20 import subprocess
21 import socket
22 import sys
23 import time
24 import traceback
25
26 if os.name == 'nt':
27     import ctypes
28
29 from .compat import (
30     compat_basestring,
31     compat_cookiejar,
32     compat_expanduser,
33     compat_get_terminal_size,
34     compat_http_client,
35     compat_kwargs,
36     compat_str,
37     compat_urllib_error,
38     compat_urllib_request,
39 )
40 from .utils import (
41     escape_url,
42     ContentTooShortError,
43     date_from_str,
44     DateRange,
45     DEFAULT_OUTTMPL,
46     determine_ext,
47     DownloadError,
48     encodeFilename,
49     ExtractorError,
50     format_bytes,
51     formatSeconds,
52     locked_file,
53     make_HTTPS_handler,
54     MaxDownloadsReached,
55     PagedList,
56     parse_filesize,
57     PostProcessingError,
58     platform_name,
59     preferredencoding,
60     render_table,
61     SameFileError,
62     sanitize_filename,
63     std_headers,
64     subtitles_filename,
65     takewhile_inclusive,
66     UnavailableVideoError,
67     url_basename,
68     version_tuple,
69     write_json_file,
70     write_string,
71     YoutubeDLHandler,
72     prepend_extension,
73     args_to_str,
74     age_restricted,
75 )
76 from .cache import Cache
77 from .extractor import get_info_extractor, gen_extractors
78 from .downloader import get_suitable_downloader
79 from .downloader.rtmp import rtmpdump_version
80 from .postprocessor import (
81     FFmpegFixupM4aPP,
82     FFmpegFixupStretchedPP,
83     FFmpegMergerPP,
84     FFmpegPostProcessor,
85     get_postprocessor,
86 )
87 from .version import __version__
88
89
90 class YoutubeDL(object):
91     """YoutubeDL class.
92
93     YoutubeDL objects are the ones responsible of downloading the
94     actual video file and writing it to disk if the user has requested
95     it, among some other tasks. In most cases there should be one per
96     program. As, given a video URL, the downloader doesn't know how to
97     extract all the needed information, task that InfoExtractors do, it
98     has to pass the URL to one of them.
99
100     For this, YoutubeDL objects have a method that allows
101     InfoExtractors to be registered in a given order. When it is passed
102     a URL, the YoutubeDL object handles it to the first InfoExtractor it
103     finds that reports being able to handle it. The InfoExtractor extracts
104     all the information about the video or videos the URL refers to, and
105     YoutubeDL process the extracted information, possibly using a File
106     Downloader to download the video.
107
108     YoutubeDL objects accept a lot of parameters. In order not to saturate
109     the object constructor with arguments, it receives a dictionary of
110     options instead. These options are available through the params
111     attribute for the InfoExtractors to use. The YoutubeDL also
112     registers itself as the downloader in charge for the InfoExtractors
113     that are added to it, so this is a "mutual registration".
114
115     Available options:
116
117     username:          Username for authentication purposes.
118     password:          Password for authentication purposes.
119     videopassword:     Password for acces a video.
120     usenetrc:          Use netrc for authentication instead.
121     verbose:           Print additional info to stdout.
122     quiet:             Do not print messages to stdout.
123     no_warnings:       Do not print out anything for warnings.
124     forceurl:          Force printing final URL.
125     forcetitle:        Force printing title.
126     forceid:           Force printing ID.
127     forcethumbnail:    Force printing thumbnail URL.
128     forcedescription:  Force printing description.
129     forcefilename:     Force printing final filename.
130     forceduration:     Force printing duration.
131     forcejson:         Force printing info_dict as JSON.
132     dump_single_json:  Force printing the info_dict of the whole playlist
133                        (or video) as a single JSON line.
134     simulate:          Do not download the video files.
135     format:            Video format code. See options.py for more information.
136     format_limit:      Highest quality format to try.
137     outtmpl:           Template for output names.
138     restrictfilenames: Do not allow "&" and spaces in file names
139     ignoreerrors:      Do not stop on download errors.
140     nooverwrites:      Prevent overwriting files.
141     playliststart:     Playlist item to start at.
142     playlistend:       Playlist item to end at.
143     playlist_items:    Specific indices of playlist to download.
144     playlistreverse:   Download playlist items in reverse order.
145     matchtitle:        Download only matching titles.
146     rejecttitle:       Reject downloads for matching titles.
147     logger:            Log messages to a logging.Logger instance.
148     logtostderr:       Log messages to stderr instead of stdout.
149     writedescription:  Write the video description to a .description file
150     writeinfojson:     Write the video description to a .info.json file
151     writeannotations:  Write the video annotations to a .annotations.xml file
152     writethumbnail:    Write the thumbnail image to a file
153     write_all_thumbnails:  Write all thumbnail formats to files
154     writesubtitles:    Write the video subtitles to a file
155     writeautomaticsub: Write the automatic subtitles to a file
156     allsubtitles:      Downloads all the subtitles of the video
157                        (requires writesubtitles or writeautomaticsub)
158     listsubtitles:     Lists all available subtitles for the video
159     subtitlesformat:   The format code for subtitles
160     subtitleslangs:    List of languages of the subtitles to download
161     keepvideo:         Keep the video file after post-processing
162     daterange:         A DateRange object, download only if the upload_date is in the range.
163     skip_download:     Skip the actual download of the video file
164     cachedir:          Location of the cache files in the filesystem.
165                        False to disable filesystem cache.
166     noplaylist:        Download single video instead of a playlist if in doubt.
167     age_limit:         An integer representing the user's age in years.
168                        Unsuitable videos for the given age are skipped.
169     min_views:         An integer representing the minimum view count the video
170                        must have in order to not be skipped.
171                        Videos without view count information are always
172                        downloaded. None for no limit.
173     max_views:         An integer representing the maximum view count.
174                        Videos that are more popular than that are not
175                        downloaded.
176                        Videos without view count information are always
177                        downloaded. None for no limit.
178     download_archive:  File name of a file where all downloads are recorded.
179                        Videos already present in the file are not downloaded
180                        again.
181     cookiefile:        File name where cookies should be read from and dumped to.
182     nocheckcertificate:Do not verify SSL certificates
183     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
184                        At the moment, this is only supported by YouTube.
185     proxy:             URL of the proxy server to use
186     socket_timeout:    Time to wait for unresponsive hosts, in seconds
187     bidi_workaround:   Work around buggy terminals without bidirectional text
188                        support, using fridibi
189     debug_printtraffic:Print out sent and received HTTP traffic
190     include_ads:       Download ads as well
191     default_search:    Prepend this string if an input url is not valid.
192                        'auto' for elaborate guessing
193     encoding:          Use this encoding instead of the system-specified.
194     extract_flat:      Do not resolve URLs, return the immediate result.
195                        Pass in 'in_playlist' to only show this behavior for
196                        playlist items.
197     postprocessors:    A list of dictionaries, each with an entry
198                        * key:  The name of the postprocessor. See
199                                youtube_dl/postprocessor/__init__.py for a list.
200                        as well as any further keyword arguments for the
201                        postprocessor.
202     progress_hooks:    A list of functions that get called on download
203                        progress, with a dictionary with the entries
204                        * status: One of "downloading", "error", or "finished".
205                                  Check this first and ignore unknown values.
206
207                        If status is one of "downloading", or "finished", the
208                        following properties may also be present:
209                        * filename: The final filename (always present)
210                        * tmpfilename: The filename we're currently writing to
211                        * downloaded_bytes: Bytes on disk
212                        * total_bytes: Size of the whole file, None if unknown
213                        * total_bytes_estimate: Guess of the eventual file size,
214                                                None if unavailable.
215                        * elapsed: The number of seconds since download started.
216                        * eta: The estimated time in seconds, None if unknown
217                        * speed: The download speed in bytes/second, None if
218                                 unknown
219                        * fragment_index: The counter of the currently
220                                          downloaded video fragment.
221                        * fragment_count: The number of fragments (= individual
222                                          files that will be merged)
223
224                        Progress hooks are guaranteed to be called at least once
225                        (with status "finished") if the download is successful.
226     merge_output_format: Extension to use when merging formats.
227     fixup:             Automatically correct known faults of the file.
228                        One of:
229                        - "never": do nothing
230                        - "warn": only emit a warning
231                        - "detect_or_warn": check whether we can do anything
232                                            about it, warn otherwise (default)
233     source_address:    (Experimental) Client-side IP address to bind to.
234     call_home:         Boolean, true iff we are allowed to contact the
235                        youtube-dl servers for debugging.
236     sleep_interval:    Number of seconds to sleep before each download.
237     listformats:       Print an overview of available video formats and exit.
238     list_thumbnails:   Print a table of all thumbnails and exit.
239     match_filter:      A function that gets called with the info_dict of
240                        every video.
241                        If it returns a message, the video is ignored.
242                        If it returns None, the video is downloaded.
243                        match_filter_func in utils.py is one example for this.
244     no_color:          Do not emit color codes in output.
245
246     The following options determine which downloader is picked:
247     external_downloader: Executable of the external downloader to call.
248                        None or unset for standard (built-in) downloader.
249     hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv.
250
251     The following parameters are not used by YoutubeDL itself, they are used by
252     the FileDownloader:
253     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
254     noresizebuffer, retries, continuedl, noprogress, consoletitle,
255     xattr_set_filesize.
256
257     The following options are used by the post processors:
258     prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
259                        otherwise prefer avconv.
260     exec_cmd:          Arbitrary command to run after downloading
261     """
262
263     params = None
264     _ies = []
265     _pps = []
266     _download_retcode = None
267     _num_downloads = None
268     _screen_file = None
269
270     def __init__(self, params=None, auto_init=True):
271         """Create a FileDownloader object with the given options."""
272         if params is None:
273             params = {}
274         self._ies = []
275         self._ies_instances = {}
276         self._pps = []
277         self._progress_hooks = []
278         self._download_retcode = 0
279         self._num_downloads = 0
280         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
281         self._err_file = sys.stderr
282         self.params = params
283         self.cache = Cache(self)
284
285         if params.get('bidi_workaround', False):
286             try:
287                 import pty
288                 master, slave = pty.openpty()
289                 width = compat_get_terminal_size().columns
290                 if width is None:
291                     width_args = []
292                 else:
293                     width_args = ['-w', str(width)]
294                 sp_kwargs = dict(
295                     stdin=subprocess.PIPE,
296                     stdout=slave,
297                     stderr=self._err_file)
298                 try:
299                     self._output_process = subprocess.Popen(
300                         ['bidiv'] + width_args, **sp_kwargs
301                     )
302                 except OSError:
303                     self._output_process = subprocess.Popen(
304                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
305                 self._output_channel = os.fdopen(master, 'rb')
306             except OSError as ose:
307                 if ose.errno == 2:
308                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
309                 else:
310                     raise
311
312         if (sys.version_info >= (3,) and sys.platform != 'win32' and
313                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and
314                 not params.get('restrictfilenames', False)):
315             # On Python 3, the Unicode filesystem API will throw errors (#1474)
316             self.report_warning(
317                 'Assuming --restrict-filenames since file system encoding '
318                 'cannot encode all characters. '
319                 'Set the LC_ALL environment variable to fix this.')
320             self.params['restrictfilenames'] = True
321
322         if '%(stitle)s' in self.params.get('outtmpl', ''):
323             self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
324
325         self._setup_opener()
326
327         if auto_init:
328             self.print_debug_header()
329             self.add_default_info_extractors()
330
331         for pp_def_raw in self.params.get('postprocessors', []):
332             pp_class = get_postprocessor(pp_def_raw['key'])
333             pp_def = dict(pp_def_raw)
334             del pp_def['key']
335             pp = pp_class(self, **compat_kwargs(pp_def))
336             self.add_post_processor(pp)
337
338         for ph in self.params.get('progress_hooks', []):
339             self.add_progress_hook(ph)
340
341     def warn_if_short_id(self, argv):
342         # short YouTube ID starting with dash?
343         idxs = [
344             i for i, a in enumerate(argv)
345             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
346         if idxs:
347             correct_argv = (
348                 ['youtube-dl'] +
349                 [a for i, a in enumerate(argv) if i not in idxs] +
350                 ['--'] + [argv[i] for i in idxs]
351             )
352             self.report_warning(
353                 'Long argument string detected. '
354                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
355                 args_to_str(correct_argv))
356
357     def add_info_extractor(self, ie):
358         """Add an InfoExtractor object to the end of the list."""
359         self._ies.append(ie)
360         self._ies_instances[ie.ie_key()] = ie
361         ie.set_downloader(self)
362
363     def get_info_extractor(self, ie_key):
364         """
365         Get an instance of an IE with name ie_key, it will try to get one from
366         the _ies list, if there's no instance it will create a new one and add
367         it to the extractor list.
368         """
369         ie = self._ies_instances.get(ie_key)
370         if ie is None:
371             ie = get_info_extractor(ie_key)()
372             self.add_info_extractor(ie)
373         return ie
374
375     def add_default_info_extractors(self):
376         """
377         Add the InfoExtractors returned by gen_extractors to the end of the list
378         """
379         for ie in gen_extractors():
380             self.add_info_extractor(ie)
381
382     def add_post_processor(self, pp):
383         """Add a PostProcessor object to the end of the chain."""
384         self._pps.append(pp)
385         pp.set_downloader(self)
386
387     def add_progress_hook(self, ph):
388         """Add the progress hook (currently only for the file downloader)"""
389         self._progress_hooks.append(ph)
390
391     def _bidi_workaround(self, message):
392         if not hasattr(self, '_output_channel'):
393             return message
394
395         assert hasattr(self, '_output_process')
396         assert isinstance(message, compat_str)
397         line_count = message.count('\n') + 1
398         self._output_process.stdin.write((message + '\n').encode('utf-8'))
399         self._output_process.stdin.flush()
400         res = ''.join(self._output_channel.readline().decode('utf-8')
401                       for _ in range(line_count))
402         return res[:-len('\n')]
403
404     def to_screen(self, message, skip_eol=False):
405         """Print message to stdout if not in quiet mode."""
406         return self.to_stdout(message, skip_eol, check_quiet=True)
407
408     def _write_string(self, s, out=None):
409         write_string(s, out=out, encoding=self.params.get('encoding'))
410
411     def to_stdout(self, message, skip_eol=False, check_quiet=False):
412         """Print message to stdout if not in quiet mode."""
413         if self.params.get('logger'):
414             self.params['logger'].debug(message)
415         elif not check_quiet or not self.params.get('quiet', False):
416             message = self._bidi_workaround(message)
417             terminator = ['\n', ''][skip_eol]
418             output = message + terminator
419
420             self._write_string(output, self._screen_file)
421
422     def to_stderr(self, message):
423         """Print message to stderr."""
424         assert isinstance(message, compat_str)
425         if self.params.get('logger'):
426             self.params['logger'].error(message)
427         else:
428             message = self._bidi_workaround(message)
429             output = message + '\n'
430             self._write_string(output, self._err_file)
431
432     def to_console_title(self, message):
433         if not self.params.get('consoletitle', False):
434             return
435         if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
436             # c_wchar_p() might not be necessary if `message` is
437             # already of type unicode()
438             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
439         elif 'TERM' in os.environ:
440             self._write_string('\033]0;%s\007' % message, self._screen_file)
441
442     def save_console_title(self):
443         if not self.params.get('consoletitle', False):
444             return
445         if 'TERM' in os.environ:
446             # Save the title on stack
447             self._write_string('\033[22;0t', self._screen_file)
448
449     def restore_console_title(self):
450         if not self.params.get('consoletitle', False):
451             return
452         if 'TERM' in os.environ:
453             # Restore the title from stack
454             self._write_string('\033[23;0t', self._screen_file)
455
456     def __enter__(self):
457         self.save_console_title()
458         return self
459
460     def __exit__(self, *args):
461         self.restore_console_title()
462
463         if self.params.get('cookiefile') is not None:
464             self.cookiejar.save()
465
466     def trouble(self, message=None, tb=None):
467         """Determine action to take when a download problem appears.
468
469         Depending on if the downloader has been configured to ignore
470         download errors or not, this method may throw an exception or
471         not when errors are found, after printing the message.
472
473         tb, if given, is additional traceback information.
474         """
475         if message is not None:
476             self.to_stderr(message)
477         if self.params.get('verbose'):
478             if tb is None:
479                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
480                     tb = ''
481                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
482                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
483                     tb += compat_str(traceback.format_exc())
484                 else:
485                     tb_data = traceback.format_list(traceback.extract_stack())
486                     tb = ''.join(tb_data)
487             self.to_stderr(tb)
488         if not self.params.get('ignoreerrors', False):
489             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
490                 exc_info = sys.exc_info()[1].exc_info
491             else:
492                 exc_info = sys.exc_info()
493             raise DownloadError(message, exc_info)
494         self._download_retcode = 1
495
496     def report_warning(self, message):
497         '''
498         Print the message to stderr, it will be prefixed with 'WARNING:'
499         If stderr is a tty file the 'WARNING:' will be colored
500         '''
501         if self.params.get('logger') is not None:
502             self.params['logger'].warning(message)
503         else:
504             if self.params.get('no_warnings'):
505                 return
506             if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
507                 _msg_header = '\033[0;33mWARNING:\033[0m'
508             else:
509                 _msg_header = 'WARNING:'
510             warning_message = '%s %s' % (_msg_header, message)
511             self.to_stderr(warning_message)
512
513     def report_error(self, message, tb=None):
514         '''
515         Do the same as trouble, but prefixes the message with 'ERROR:', colored
516         in red if stderr is a tty file.
517         '''
518         if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
519             _msg_header = '\033[0;31mERROR:\033[0m'
520         else:
521             _msg_header = 'ERROR:'
522         error_message = '%s %s' % (_msg_header, message)
523         self.trouble(error_message, tb)
524
525     def report_file_already_downloaded(self, file_name):
526         """Report file has already been fully downloaded."""
527         try:
528             self.to_screen('[download] %s has already been downloaded' % file_name)
529         except UnicodeEncodeError:
530             self.to_screen('[download] The file has already been downloaded')
531
532     def prepare_filename(self, info_dict):
533         """Generate the output filename."""
534         try:
535             template_dict = dict(info_dict)
536
537             template_dict['epoch'] = int(time.time())
538             autonumber_size = self.params.get('autonumber_size')
539             if autonumber_size is None:
540                 autonumber_size = 5
541             autonumber_templ = '%0' + str(autonumber_size) + 'd'
542             template_dict['autonumber'] = autonumber_templ % self._num_downloads
543             if template_dict.get('playlist_index') is not None:
544                 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
545             if template_dict.get('resolution') is None:
546                 if template_dict.get('width') and template_dict.get('height'):
547                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
548                 elif template_dict.get('height'):
549                     template_dict['resolution'] = '%sp' % template_dict['height']
550                 elif template_dict.get('width'):
551                     template_dict['resolution'] = '?x%d' % template_dict['width']
552
553             sanitize = lambda k, v: sanitize_filename(
554                 compat_str(v),
555                 restricted=self.params.get('restrictfilenames'),
556                 is_id=(k == 'id'))
557             template_dict = dict((k, sanitize(k, v))
558                                  for k, v in template_dict.items()
559                                  if v is not None)
560             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
561
562             outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
563             tmpl = compat_expanduser(outtmpl)
564             filename = tmpl % template_dict
565             # Temporary fix for #4787
566             # 'Treat' all problem characters by passing filename through preferredencoding
567             # to workaround encoding issues with subprocess on python2 @ Windows
568             if sys.version_info < (3, 0) and sys.platform == 'win32':
569                 filename = encodeFilename(filename, True).decode(preferredencoding())
570             return filename
571         except ValueError as err:
572             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
573             return None
574
575     def _match_entry(self, info_dict, incomplete):
576         """ Returns None iff the file should be downloaded """
577
578         video_title = info_dict.get('title', info_dict.get('id', 'video'))
579         if 'title' in info_dict:
580             # This can happen when we're just evaluating the playlist
581             title = info_dict['title']
582             matchtitle = self.params.get('matchtitle', False)
583             if matchtitle:
584                 if not re.search(matchtitle, title, re.IGNORECASE):
585                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
586             rejecttitle = self.params.get('rejecttitle', False)
587             if rejecttitle:
588                 if re.search(rejecttitle, title, re.IGNORECASE):
589                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
590         date = info_dict.get('upload_date', None)
591         if date is not None:
592             dateRange = self.params.get('daterange', DateRange())
593             if date not in dateRange:
594                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
595         view_count = info_dict.get('view_count', None)
596         if view_count is not None:
597             min_views = self.params.get('min_views')
598             if min_views is not None and view_count < min_views:
599                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
600             max_views = self.params.get('max_views')
601             if max_views is not None and view_count > max_views:
602                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
603         if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
604             return 'Skipping "%s" because it is age restricted' % video_title
605         if self.in_download_archive(info_dict):
606             return '%s has already been recorded in archive' % video_title
607
608         if not incomplete:
609             match_filter = self.params.get('match_filter')
610             if match_filter is not None:
611                 ret = match_filter(info_dict)
612                 if ret is not None:
613                     return ret
614
615         return None
616
617     @staticmethod
618     def add_extra_info(info_dict, extra_info):
619         '''Set the keys from extra_info in info dict if they are missing'''
620         for key, value in extra_info.items():
621             info_dict.setdefault(key, value)
622
623     def extract_info(self, url, download=True, ie_key=None, extra_info={},
624                      process=True):
625         '''
626         Returns a list with a dictionary for each video we find.
627         If 'download', also downloads the videos.
628         extra_info is a dict containing the extra values to add to each result
629          '''
630
631         if ie_key:
632             ies = [self.get_info_extractor(ie_key)]
633         else:
634             ies = self._ies
635
636         for ie in ies:
637             if not ie.suitable(url):
638                 continue
639
640             if not ie.working():
641                 self.report_warning('The program functionality for this site has been marked as broken, '
642                                     'and will probably not work.')
643
644             try:
645                 ie_result = ie.extract(url)
646                 if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
647                     break
648                 if isinstance(ie_result, list):
649                     # Backwards compatibility: old IE result format
650                     ie_result = {
651                         '_type': 'compat_list',
652                         'entries': ie_result,
653                     }
654                 self.add_default_extra_info(ie_result, ie, url)
655                 if process:
656                     return self.process_ie_result(ie_result, download, extra_info)
657                 else:
658                     return ie_result
659             except ExtractorError as de:  # An error we somewhat expected
660                 self.report_error(compat_str(de), de.format_traceback())
661                 break
662             except MaxDownloadsReached:
663                 raise
664             except Exception as e:
665                 if self.params.get('ignoreerrors', False):
666                     self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
667                     break
668                 else:
669                     raise
670         else:
671             self.report_error('no suitable InfoExtractor for URL %s' % url)
672
673     def add_default_extra_info(self, ie_result, ie, url):
674         self.add_extra_info(ie_result, {
675             'extractor': ie.IE_NAME,
676             'webpage_url': url,
677             'webpage_url_basename': url_basename(url),
678             'extractor_key': ie.ie_key(),
679         })
680
681     def process_ie_result(self, ie_result, download=True, extra_info={}):
682         """
683         Take the result of the ie(may be modified) and resolve all unresolved
684         references (URLs, playlist items).
685
686         It will also download the videos if 'download'.
687         Returns the resolved ie_result.
688         """
689
690         result_type = ie_result.get('_type', 'video')
691
692         if result_type in ('url', 'url_transparent'):
693             extract_flat = self.params.get('extract_flat', False)
694             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
695                     extract_flat is True):
696                 if self.params.get('forcejson', False):
697                     self.to_stdout(json.dumps(ie_result))
698                 return ie_result
699
700         if result_type == 'video':
701             self.add_extra_info(ie_result, extra_info)
702             return self.process_video_result(ie_result, download=download)
703         elif result_type == 'url':
704             # We have to add extra_info to the results because it may be
705             # contained in a playlist
706             return self.extract_info(ie_result['url'],
707                                      download,
708                                      ie_key=ie_result.get('ie_key'),
709                                      extra_info=extra_info)
710         elif result_type == 'url_transparent':
711             # Use the information from the embedding page
712             info = self.extract_info(
713                 ie_result['url'], ie_key=ie_result.get('ie_key'),
714                 extra_info=extra_info, download=False, process=False)
715
716             force_properties = dict(
717                 (k, v) for k, v in ie_result.items() if v is not None)
718             for f in ('_type', 'url'):
719                 if f in force_properties:
720                     del force_properties[f]
721             new_result = info.copy()
722             new_result.update(force_properties)
723
724             assert new_result.get('_type') != 'url_transparent'
725
726             return self.process_ie_result(
727                 new_result, download=download, extra_info=extra_info)
728         elif result_type == 'playlist' or result_type == 'multi_video':
729             # We process each entry in the playlist
730             playlist = ie_result.get('title', None) or ie_result.get('id', None)
731             self.to_screen('[download] Downloading playlist: %s' % playlist)
732
733             playlist_results = []
734
735             playliststart = self.params.get('playliststart', 1) - 1
736             playlistend = self.params.get('playlistend', None)
737             # For backwards compatibility, interpret -1 as whole list
738             if playlistend == -1:
739                 playlistend = None
740
741             playlistitems_str = self.params.get('playlist_items', None)
742             playlistitems = None
743             if playlistitems_str is not None:
744                 def iter_playlistitems(format):
745                     for string_segment in format.split(','):
746                         if '-' in string_segment:
747                             start, end = string_segment.split('-')
748                             for item in range(int(start), int(end) + 1):
749                                 yield int(item)
750                         else:
751                             yield int(string_segment)
752                 playlistitems = iter_playlistitems(playlistitems_str)
753
754             ie_entries = ie_result['entries']
755             if isinstance(ie_entries, list):
756                 n_all_entries = len(ie_entries)
757                 if playlistitems:
758                     entries = [ie_entries[i - 1] for i in playlistitems]
759                 else:
760                     entries = ie_entries[playliststart:playlistend]
761                 n_entries = len(entries)
762                 self.to_screen(
763                     "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
764                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
765             elif isinstance(ie_entries, PagedList):
766                 if playlistitems:
767                     entries = []
768                     for item in playlistitems:
769                         entries.extend(ie_entries.getslice(
770                             item - 1, item
771                         ))
772                 else:
773                     entries = ie_entries.getslice(
774                         playliststart, playlistend)
775                 n_entries = len(entries)
776                 self.to_screen(
777                     "[%s] playlist %s: Downloading %d videos" %
778                     (ie_result['extractor'], playlist, n_entries))
779             else:  # iterable
780                 if playlistitems:
781                     entry_list = list(ie_entries)
782                     entries = [entry_list[i - 1] for i in playlistitems]
783                 else:
784                     entries = list(itertools.islice(
785                         ie_entries, playliststart, playlistend))
786                 n_entries = len(entries)
787                 self.to_screen(
788                     "[%s] playlist %s: Downloading %d videos" %
789                     (ie_result['extractor'], playlist, n_entries))
790
791             if self.params.get('playlistreverse', False):
792                 entries = entries[::-1]
793
794             for i, entry in enumerate(entries, 1):
795                 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
796                 extra = {
797                     'n_entries': n_entries,
798                     'playlist': playlist,
799                     'playlist_id': ie_result.get('id'),
800                     'playlist_title': ie_result.get('title'),
801                     'playlist_index': i + playliststart,
802                     'extractor': ie_result['extractor'],
803                     'webpage_url': ie_result['webpage_url'],
804                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
805                     'extractor_key': ie_result['extractor_key'],
806                 }
807
808                 reason = self._match_entry(entry, incomplete=True)
809                 if reason is not None:
810                     self.to_screen('[download] ' + reason)
811                     continue
812
813                 entry_result = self.process_ie_result(entry,
814                                                       download=download,
815                                                       extra_info=extra)
816                 playlist_results.append(entry_result)
817             ie_result['entries'] = playlist_results
818             return ie_result
819         elif result_type == 'compat_list':
820             self.report_warning(
821                 'Extractor %s returned a compat_list result. '
822                 'It needs to be updated.' % ie_result.get('extractor'))
823
824             def _fixup(r):
825                 self.add_extra_info(
826                     r,
827                     {
828                         'extractor': ie_result['extractor'],
829                         'webpage_url': ie_result['webpage_url'],
830                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
831                         'extractor_key': ie_result['extractor_key'],
832                     }
833                 )
834                 return r
835             ie_result['entries'] = [
836                 self.process_ie_result(_fixup(r), download, extra_info)
837                 for r in ie_result['entries']
838             ]
839             return ie_result
840         else:
841             raise Exception('Invalid result type: %s' % result_type)
842
843     def _apply_format_filter(self, format_spec, available_formats):
844         " Returns a tuple of the remaining format_spec and filtered formats "
845
846         OPERATORS = {
847             '<': operator.lt,
848             '<=': operator.le,
849             '>': operator.gt,
850             '>=': operator.ge,
851             '=': operator.eq,
852             '!=': operator.ne,
853         }
854         operator_rex = re.compile(r'''(?x)\s*\[
855             (?P<key>width|height|tbr|abr|vbr|asr|filesize|fps)
856             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
857             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
858             \]$
859             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
860         m = operator_rex.search(format_spec)
861         if m:
862             try:
863                 comparison_value = int(m.group('value'))
864             except ValueError:
865                 comparison_value = parse_filesize(m.group('value'))
866                 if comparison_value is None:
867                     comparison_value = parse_filesize(m.group('value') + 'B')
868                 if comparison_value is None:
869                     raise ValueError(
870                         'Invalid value %r in format specification %r' % (
871                             m.group('value'), format_spec))
872             op = OPERATORS[m.group('op')]
873
874         if not m:
875             STR_OPERATORS = {
876                 '=': operator.eq,
877                 '!=': operator.ne,
878             }
879             str_operator_rex = re.compile(r'''(?x)\s*\[
880                 \s*(?P<key>ext|acodec|vcodec|container|protocol)
881                 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
882                 \s*(?P<value>[a-zA-Z0-9_-]+)
883                 \s*\]$
884                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
885             m = str_operator_rex.search(format_spec)
886             if m:
887                 comparison_value = m.group('value')
888                 op = STR_OPERATORS[m.group('op')]
889
890         if not m:
891             raise ValueError('Invalid format specification %r' % format_spec)
892
893         def _filter(f):
894             actual_value = f.get(m.group('key'))
895             if actual_value is None:
896                 return m.group('none_inclusive')
897             return op(actual_value, comparison_value)
898         new_formats = [f for f in available_formats if _filter(f)]
899
900         new_format_spec = format_spec[:-len(m.group(0))]
901         if not new_format_spec:
902             new_format_spec = 'best'
903
904         return (new_format_spec, new_formats)
905
906     def select_format(self, format_spec, available_formats):
907         while format_spec.endswith(']'):
908             format_spec, available_formats = self._apply_format_filter(
909                 format_spec, available_formats)
910         if not available_formats:
911             return None
912
913         if format_spec == 'best' or format_spec is None:
914             return available_formats[-1]
915         elif format_spec == 'worst':
916             return available_formats[0]
917         elif format_spec == 'bestaudio':
918             audio_formats = [
919                 f for f in available_formats
920                 if f.get('vcodec') == 'none']
921             if audio_formats:
922                 return audio_formats[-1]
923         elif format_spec == 'worstaudio':
924             audio_formats = [
925                 f for f in available_formats
926                 if f.get('vcodec') == 'none']
927             if audio_formats:
928                 return audio_formats[0]
929         elif format_spec == 'bestvideo':
930             video_formats = [
931                 f for f in available_formats
932                 if f.get('acodec') == 'none']
933             if video_formats:
934                 return video_formats[-1]
935         elif format_spec == 'worstvideo':
936             video_formats = [
937                 f for f in available_formats
938                 if f.get('acodec') == 'none']
939             if video_formats:
940                 return video_formats[0]
941         else:
942             extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
943             if format_spec in extensions:
944                 filter_f = lambda f: f['ext'] == format_spec
945             else:
946                 filter_f = lambda f: f['format_id'] == format_spec
947             matches = list(filter(filter_f, available_formats))
948             if matches:
949                 return matches[-1]
950         return None
951
952     def _calc_headers(self, info_dict):
953         res = std_headers.copy()
954
955         add_headers = info_dict.get('http_headers')
956         if add_headers:
957             res.update(add_headers)
958
959         cookies = self._calc_cookies(info_dict)
960         if cookies:
961             res['Cookie'] = cookies
962
963         return res
964
965     def _calc_cookies(self, info_dict):
966         pr = compat_urllib_request.Request(info_dict['url'])
967         self.cookiejar.add_cookie_header(pr)
968         return pr.get_header('Cookie')
969
970     def process_video_result(self, info_dict, download=True):
971         assert info_dict.get('_type', 'video') == 'video'
972
973         if 'id' not in info_dict:
974             raise ExtractorError('Missing "id" field in extractor result')
975         if 'title' not in info_dict:
976             raise ExtractorError('Missing "title" field in extractor result')
977
978         if 'playlist' not in info_dict:
979             # It isn't part of a playlist
980             info_dict['playlist'] = None
981             info_dict['playlist_index'] = None
982
983         thumbnails = info_dict.get('thumbnails')
984         if thumbnails is None:
985             thumbnail = info_dict.get('thumbnail')
986             if thumbnail:
987                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
988         if thumbnails:
989             thumbnails.sort(key=lambda t: (
990                 t.get('preference'), t.get('width'), t.get('height'),
991                 t.get('id'), t.get('url')))
992             for i, t in enumerate(thumbnails):
993                 if 'width' in t and 'height' in t:
994                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
995                 if t.get('id') is None:
996                     t['id'] = '%d' % i
997
998         if thumbnails and 'thumbnail' not in info_dict:
999             info_dict['thumbnail'] = thumbnails[-1]['url']
1000
1001         if 'display_id' not in info_dict and 'id' in info_dict:
1002             info_dict['display_id'] = info_dict['id']
1003
1004         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
1005             # Working around negative timestamps in Windows
1006             # (see http://bugs.python.org/issue1646728)
1007             if info_dict['timestamp'] < 0 and os.name == 'nt':
1008                 info_dict['timestamp'] = 0
1009             upload_date = datetime.datetime.utcfromtimestamp(
1010                 info_dict['timestamp'])
1011             info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1012
1013         if self.params.get('listsubtitles', False):
1014             if 'automatic_captions' in info_dict:
1015                 self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions')
1016             self.list_subtitles(info_dict['id'], info_dict.get('subtitles'), 'subtitles')
1017             return
1018         info_dict['requested_subtitles'] = self.process_subtitles(
1019             info_dict['id'], info_dict.get('subtitles'),
1020             info_dict.get('automatic_captions'))
1021
1022         # This extractors handle format selection themselves
1023         if info_dict['extractor'] in ['Youku']:
1024             if download:
1025                 self.process_info(info_dict)
1026             return info_dict
1027
1028         # We now pick which formats have to be downloaded
1029         if info_dict.get('formats') is None:
1030             # There's only one format available
1031             formats = [info_dict]
1032         else:
1033             formats = info_dict['formats']
1034
1035         if not formats:
1036             raise ExtractorError('No video formats found!')
1037
1038         # We check that all the formats have the format and format_id fields
1039         for i, format in enumerate(formats):
1040             if 'url' not in format:
1041                 raise ExtractorError('Missing "url" key in result (index %d)' % i)
1042
1043             if format.get('format_id') is None:
1044                 format['format_id'] = compat_str(i)
1045             if format.get('format') is None:
1046                 format['format'] = '{id} - {res}{note}'.format(
1047                     id=format['format_id'],
1048                     res=self.format_resolution(format),
1049                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1050                 )
1051             # Automatically determine file extension if missing
1052             if 'ext' not in format:
1053                 format['ext'] = determine_ext(format['url']).lower()
1054             # Add HTTP headers, so that external programs can use them from the
1055             # json output
1056             full_format_info = info_dict.copy()
1057             full_format_info.update(format)
1058             format['http_headers'] = self._calc_headers(full_format_info)
1059
1060         format_limit = self.params.get('format_limit', None)
1061         if format_limit:
1062             formats = list(takewhile_inclusive(
1063                 lambda f: f['format_id'] != format_limit, formats
1064             ))
1065
1066         # TODO Central sorting goes here
1067
1068         if formats[0] is not info_dict:
1069             # only set the 'formats' fields if the original info_dict list them
1070             # otherwise we end up with a circular reference, the first (and unique)
1071             # element in the 'formats' field in info_dict is info_dict itself,
1072             # wich can't be exported to json
1073             info_dict['formats'] = formats
1074         if self.params.get('listformats'):
1075             self.list_formats(info_dict)
1076             return
1077         if self.params.get('list_thumbnails'):
1078             self.list_thumbnails(info_dict)
1079             return
1080
1081         req_format = self.params.get('format')
1082         if req_format is None:
1083             req_format = 'best'
1084         formats_to_download = []
1085         # The -1 is for supporting YoutubeIE
1086         if req_format in ('-1', 'all'):
1087             formats_to_download = formats
1088         else:
1089             for rfstr in req_format.split(','):
1090                 # We can accept formats requested in the format: 34/5/best, we pick
1091                 # the first that is available, starting from left
1092                 req_formats = rfstr.split('/')
1093                 for rf in req_formats:
1094                     if re.match(r'.+?\+.+?', rf) is not None:
1095                         # Two formats have been requested like '137+139'
1096                         format_1, format_2 = rf.split('+')
1097                         formats_info = (self.select_format(format_1, formats),
1098                                         self.select_format(format_2, formats))
1099                         if all(formats_info):
1100                             # The first format must contain the video and the
1101                             # second the audio
1102                             if formats_info[0].get('vcodec') == 'none':
1103                                 self.report_error('The first format must '
1104                                                   'contain the video, try using '
1105                                                   '"-f %s+%s"' % (format_2, format_1))
1106                                 return
1107                             output_ext = (
1108                                 formats_info[0]['ext']
1109                                 if self.params.get('merge_output_format') is None
1110                                 else self.params['merge_output_format'])
1111                             selected_format = {
1112                                 'requested_formats': formats_info,
1113                                 'format': '%s+%s' % (formats_info[0].get('format'),
1114                                                      formats_info[1].get('format')),
1115                                 'format_id': '%s+%s' % (formats_info[0].get('format_id'),
1116                                                         formats_info[1].get('format_id')),
1117                                 'width': formats_info[0].get('width'),
1118                                 'height': formats_info[0].get('height'),
1119                                 'resolution': formats_info[0].get('resolution'),
1120                                 'fps': formats_info[0].get('fps'),
1121                                 'vcodec': formats_info[0].get('vcodec'),
1122                                 'vbr': formats_info[0].get('vbr'),
1123                                 'stretched_ratio': formats_info[0].get('stretched_ratio'),
1124                                 'acodec': formats_info[1].get('acodec'),
1125                                 'abr': formats_info[1].get('abr'),
1126                                 'ext': output_ext,
1127                             }
1128                         else:
1129                             selected_format = None
1130                     else:
1131                         selected_format = self.select_format(rf, formats)
1132                     if selected_format is not None:
1133                         formats_to_download.append(selected_format)
1134                         break
1135         if not formats_to_download:
1136             raise ExtractorError('requested format not available',
1137                                  expected=True)
1138
1139         if download:
1140             if len(formats_to_download) > 1:
1141                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1142             for format in formats_to_download:
1143                 new_info = dict(info_dict)
1144                 new_info.update(format)
1145                 self.process_info(new_info)
1146         # We update the info dict with the best quality format (backwards compatibility)
1147         info_dict.update(formats_to_download[-1])
1148         return info_dict
1149
1150     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
1151         """Select the requested subtitles and their format"""
1152         available_subs = {}
1153         if normal_subtitles and self.params.get('writesubtitles'):
1154             available_subs.update(normal_subtitles)
1155         if automatic_captions and self.params.get('writeautomaticsub'):
1156             for lang, cap_info in automatic_captions.items():
1157                 if lang not in available_subs:
1158                     available_subs[lang] = cap_info
1159
1160         if (not self.params.get('writesubtitles') and not
1161                 self.params.get('writeautomaticsub') or not
1162                 available_subs):
1163             return None
1164
1165         if self.params.get('allsubtitles', False):
1166             requested_langs = available_subs.keys()
1167         else:
1168             if self.params.get('subtitleslangs', False):
1169                 requested_langs = self.params.get('subtitleslangs')
1170             elif 'en' in available_subs:
1171                 requested_langs = ['en']
1172             else:
1173                 requested_langs = [list(available_subs.keys())[0]]
1174
1175         formats_query = self.params.get('subtitlesformat', 'best')
1176         formats_preference = formats_query.split('/') if formats_query else []
1177         subs = {}
1178         for lang in requested_langs:
1179             formats = available_subs.get(lang)
1180             if formats is None:
1181                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1182                 continue
1183             for ext in formats_preference:
1184                 if ext == 'best':
1185                     f = formats[-1]
1186                     break
1187                 matches = list(filter(lambda f: f['ext'] == ext, formats))
1188                 if matches:
1189                     f = matches[-1]
1190                     break
1191             else:
1192                 f = formats[-1]
1193                 self.report_warning(
1194                     'No subtitle format found matching "%s" for language %s, '
1195                     'using %s' % (formats_query, lang, f['ext']))
1196             subs[lang] = f
1197         return subs
1198
1199     def process_info(self, info_dict):
1200         """Process a single resolved IE result."""
1201
1202         assert info_dict.get('_type', 'video') == 'video'
1203
1204         max_downloads = self.params.get('max_downloads')
1205         if max_downloads is not None:
1206             if self._num_downloads >= int(max_downloads):
1207                 raise MaxDownloadsReached()
1208
1209         info_dict['fulltitle'] = info_dict['title']
1210         if len(info_dict['title']) > 200:
1211             info_dict['title'] = info_dict['title'][:197] + '...'
1212
1213         # Keep for backwards compatibility
1214         info_dict['stitle'] = info_dict['title']
1215
1216         if 'format' not in info_dict:
1217             info_dict['format'] = info_dict['ext']
1218
1219         reason = self._match_entry(info_dict, incomplete=False)
1220         if reason is not None:
1221             self.to_screen('[download] ' + reason)
1222             return
1223
1224         self._num_downloads += 1
1225
1226         info_dict['_filename'] = filename = self.prepare_filename(info_dict)
1227
1228         # Forced printings
1229         if self.params.get('forcetitle', False):
1230             self.to_stdout(info_dict['fulltitle'])
1231         if self.params.get('forceid', False):
1232             self.to_stdout(info_dict['id'])
1233         if self.params.get('forceurl', False):
1234             if info_dict.get('requested_formats') is not None:
1235                 for f in info_dict['requested_formats']:
1236                     self.to_stdout(f['url'] + f.get('play_path', ''))
1237             else:
1238                 # For RTMP URLs, also include the playpath
1239                 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1240         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
1241             self.to_stdout(info_dict['thumbnail'])
1242         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
1243             self.to_stdout(info_dict['description'])
1244         if self.params.get('forcefilename', False) and filename is not None:
1245             self.to_stdout(filename)
1246         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1247             self.to_stdout(formatSeconds(info_dict['duration']))
1248         if self.params.get('forceformat', False):
1249             self.to_stdout(info_dict['format'])
1250         if self.params.get('forcejson', False):
1251             self.to_stdout(json.dumps(info_dict))
1252
1253         # Do nothing else if in simulate mode
1254         if self.params.get('simulate', False):
1255             return
1256
1257         if filename is None:
1258             return
1259
1260         try:
1261             dn = os.path.dirname(encodeFilename(filename))
1262             if dn and not os.path.exists(dn):
1263                 os.makedirs(dn)
1264         except (OSError, IOError) as err:
1265             self.report_error('unable to create directory ' + compat_str(err))
1266             return
1267
1268         if self.params.get('writedescription', False):
1269             descfn = filename + '.description'
1270             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1271                 self.to_screen('[info] Video description is already present')
1272             elif info_dict.get('description') is None:
1273                 self.report_warning('There\'s no description to write.')
1274             else:
1275                 try:
1276                     self.to_screen('[info] Writing video description to: ' + descfn)
1277                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1278                         descfile.write(info_dict['description'])
1279                 except (OSError, IOError):
1280                     self.report_error('Cannot write description file ' + descfn)
1281                     return
1282
1283         if self.params.get('writeannotations', False):
1284             annofn = filename + '.annotations.xml'
1285             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1286                 self.to_screen('[info] Video annotations are already present')
1287             else:
1288                 try:
1289                     self.to_screen('[info] Writing video annotations to: ' + annofn)
1290                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1291                         annofile.write(info_dict['annotations'])
1292                 except (KeyError, TypeError):
1293                     self.report_warning('There are no annotations to write.')
1294                 except (OSError, IOError):
1295                     self.report_error('Cannot write annotations file: ' + annofn)
1296                     return
1297
1298         subtitles_are_requested = any([self.params.get('writesubtitles', False),
1299                                        self.params.get('writeautomaticsub')])
1300
1301         if subtitles_are_requested and info_dict.get('requested_subtitles'):
1302             # subtitles download errors are already managed as troubles in relevant IE
1303             # that way it will silently go on when used with unsupporting IE
1304             subtitles = info_dict['requested_subtitles']
1305             ie = self.get_info_extractor(info_dict['extractor_key'])
1306             for sub_lang, sub_info in subtitles.items():
1307                 sub_format = sub_info['ext']
1308                 if sub_info.get('data') is not None:
1309                     sub_data = sub_info['data']
1310                 else:
1311                     try:
1312                         sub_data = ie._download_webpage(
1313                             sub_info['url'], info_dict['id'], note=False)
1314                     except ExtractorError as err:
1315                         self.report_warning('Unable to download subtitle for "%s": %s' %
1316                                             (sub_lang, compat_str(err.cause)))
1317                         continue
1318                 try:
1319                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1320                     if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1321                         self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1322                     else:
1323                         self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1324                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
1325                             subfile.write(sub_data)
1326                 except (OSError, IOError):
1327                     self.report_error('Cannot write subtitles file ' + sub_filename)
1328                     return
1329
1330         if self.params.get('writeinfojson', False):
1331             infofn = os.path.splitext(filename)[0] + '.info.json'
1332             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1333                 self.to_screen('[info] Video description metadata is already present')
1334             else:
1335                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1336                 try:
1337                     write_json_file(info_dict, infofn)
1338                 except (OSError, IOError):
1339                     self.report_error('Cannot write metadata to JSON file ' + infofn)
1340                     return
1341
1342         self._write_thumbnails(info_dict, filename)
1343
1344         if not self.params.get('skip_download', False):
1345             try:
1346                 def dl(name, info):
1347                     fd = get_suitable_downloader(info, self.params)(self, self.params)
1348                     for ph in self._progress_hooks:
1349                         fd.add_progress_hook(ph)
1350                     if self.params.get('verbose'):
1351                         self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1352                     return fd.download(name, info)
1353
1354                 if info_dict.get('requested_formats') is not None:
1355                     downloaded = []
1356                     success = True
1357                     merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
1358                     if not merger.available:
1359                         postprocessors = []
1360                         self.report_warning('You have requested multiple '
1361                                             'formats but ffmpeg or avconv are not installed.'
1362                                             ' The formats won\'t be merged')
1363                     else:
1364                         postprocessors = [merger]
1365                     for f in info_dict['requested_formats']:
1366                         new_info = dict(info_dict)
1367                         new_info.update(f)
1368                         fname = self.prepare_filename(new_info)
1369                         fname = prepend_extension(fname, 'f%s' % f['format_id'])
1370                         downloaded.append(fname)
1371                         partial_success = dl(fname, new_info)
1372                         success = success and partial_success
1373                     info_dict['__postprocessors'] = postprocessors
1374                     info_dict['__files_to_merge'] = downloaded
1375                 else:
1376                     # Just a single file
1377                     success = dl(filename, info_dict)
1378             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1379                 self.report_error('unable to download video data: %s' % str(err))
1380                 return
1381             except (OSError, IOError) as err:
1382                 raise UnavailableVideoError(err)
1383             except (ContentTooShortError, ) as err:
1384                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1385                 return
1386
1387             if success:
1388                 # Fixup content
1389                 fixup_policy = self.params.get('fixup')
1390                 if fixup_policy is None:
1391                     fixup_policy = 'detect_or_warn'
1392
1393                 stretched_ratio = info_dict.get('stretched_ratio')
1394                 if stretched_ratio is not None and stretched_ratio != 1:
1395                     if fixup_policy == 'warn':
1396                         self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1397                             info_dict['id'], stretched_ratio))
1398                     elif fixup_policy == 'detect_or_warn':
1399                         stretched_pp = FFmpegFixupStretchedPP(self)
1400                         if stretched_pp.available:
1401                             info_dict.setdefault('__postprocessors', [])
1402                             info_dict['__postprocessors'].append(stretched_pp)
1403                         else:
1404                             self.report_warning(
1405                                 '%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % (
1406                                     info_dict['id'], stretched_ratio))
1407                     else:
1408                         assert fixup_policy in ('ignore', 'never')
1409
1410                 if info_dict.get('requested_formats') is None and info_dict.get('container') == 'm4a_dash':
1411                     if fixup_policy == 'warn':
1412                         self.report_warning('%s: writing DASH m4a. Only some players support this container.' % (
1413                             info_dict['id']))
1414                     elif fixup_policy == 'detect_or_warn':
1415                         fixup_pp = FFmpegFixupM4aPP(self)
1416                         if fixup_pp.available:
1417                             info_dict.setdefault('__postprocessors', [])
1418                             info_dict['__postprocessors'].append(fixup_pp)
1419                         else:
1420                             self.report_warning(
1421                                 '%s: writing DASH m4a. Only some players support this container. Install ffmpeg or avconv to fix this automatically.' % (
1422                                     info_dict['id']))
1423                     else:
1424                         assert fixup_policy in ('ignore', 'never')
1425
1426                 try:
1427                     self.post_process(filename, info_dict)
1428                 except (PostProcessingError) as err:
1429                     self.report_error('postprocessing: %s' % str(err))
1430                     return
1431                 self.record_download_archive(info_dict)
1432
1433     def download(self, url_list):
1434         """Download a given list of URLs."""
1435         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1436         if (len(url_list) > 1 and
1437                 '%' not in outtmpl and
1438                 self.params.get('max_downloads') != 1):
1439             raise SameFileError(outtmpl)
1440
1441         for url in url_list:
1442             try:
1443                 # It also downloads the videos
1444                 res = self.extract_info(url)
1445             except UnavailableVideoError:
1446                 self.report_error('unable to download video')
1447             except MaxDownloadsReached:
1448                 self.to_screen('[info] Maximum number of downloaded files reached.')
1449                 raise
1450             else:
1451                 if self.params.get('dump_single_json', False):
1452                     self.to_stdout(json.dumps(res))
1453
1454         return self._download_retcode
1455
1456     def download_with_info_file(self, info_filename):
1457         with contextlib.closing(fileinput.FileInput(
1458                 [info_filename], mode='r',
1459                 openhook=fileinput.hook_encoded('utf-8'))) as f:
1460             # FileInput doesn't have a read method, we can't call json.load
1461             info = json.loads('\n'.join(f))
1462         try:
1463             self.process_ie_result(info, download=True)
1464         except DownloadError:
1465             webpage_url = info.get('webpage_url')
1466             if webpage_url is not None:
1467                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1468                 return self.download([webpage_url])
1469             else:
1470                 raise
1471         return self._download_retcode
1472
1473     def post_process(self, filename, ie_info):
1474         """Run all the postprocessors on the given file."""
1475         info = dict(ie_info)
1476         info['filepath'] = filename
1477         pps_chain = []
1478         if ie_info.get('__postprocessors') is not None:
1479             pps_chain.extend(ie_info['__postprocessors'])
1480         pps_chain.extend(self._pps)
1481         for pp in pps_chain:
1482             keep_video = None
1483             old_filename = info['filepath']
1484             try:
1485                 keep_video_wish, info = pp.run(info)
1486                 if keep_video_wish is not None:
1487                     if keep_video_wish:
1488                         keep_video = keep_video_wish
1489                     elif keep_video is None:
1490                         # No clear decision yet, let IE decide
1491                         keep_video = keep_video_wish
1492             except PostProcessingError as e:
1493                 self.report_error(e.msg)
1494             if keep_video is False and not self.params.get('keepvideo', False):
1495                 try:
1496                     self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
1497                     os.remove(encodeFilename(old_filename))
1498                 except (IOError, OSError):
1499                     self.report_warning('Unable to remove downloaded video file')
1500
1501     def _make_archive_id(self, info_dict):
1502         # Future-proof against any change in case
1503         # and backwards compatibility with prior versions
1504         extractor = info_dict.get('extractor_key')
1505         if extractor is None:
1506             if 'id' in info_dict:
1507                 extractor = info_dict.get('ie_key')  # key in a playlist
1508         if extractor is None:
1509             return None  # Incomplete video information
1510         return extractor.lower() + ' ' + info_dict['id']
1511
1512     def in_download_archive(self, info_dict):
1513         fn = self.params.get('download_archive')
1514         if fn is None:
1515             return False
1516
1517         vid_id = self._make_archive_id(info_dict)
1518         if vid_id is None:
1519             return False  # Incomplete video information
1520
1521         try:
1522             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1523                 for line in archive_file:
1524                     if line.strip() == vid_id:
1525                         return True
1526         except IOError as ioe:
1527             if ioe.errno != errno.ENOENT:
1528                 raise
1529         return False
1530
1531     def record_download_archive(self, info_dict):
1532         fn = self.params.get('download_archive')
1533         if fn is None:
1534             return
1535         vid_id = self._make_archive_id(info_dict)
1536         assert vid_id
1537         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1538             archive_file.write(vid_id + '\n')
1539
1540     @staticmethod
1541     def format_resolution(format, default='unknown'):
1542         if format.get('vcodec') == 'none':
1543             return 'audio only'
1544         if format.get('resolution') is not None:
1545             return format['resolution']
1546         if format.get('height') is not None:
1547             if format.get('width') is not None:
1548                 res = '%sx%s' % (format['width'], format['height'])
1549             else:
1550                 res = '%sp' % format['height']
1551         elif format.get('width') is not None:
1552             res = '?x%d' % format['width']
1553         else:
1554             res = default
1555         return res
1556
1557     def _format_note(self, fdict):
1558         res = ''
1559         if fdict.get('ext') in ['f4f', 'f4m']:
1560             res += '(unsupported) '
1561         if fdict.get('format_note') is not None:
1562             res += fdict['format_note'] + ' '
1563         if fdict.get('tbr') is not None:
1564             res += '%4dk ' % fdict['tbr']
1565         if fdict.get('container') is not None:
1566             if res:
1567                 res += ', '
1568             res += '%s container' % fdict['container']
1569         if (fdict.get('vcodec') is not None and
1570                 fdict.get('vcodec') != 'none'):
1571             if res:
1572                 res += ', '
1573             res += fdict['vcodec']
1574             if fdict.get('vbr') is not None:
1575                 res += '@'
1576         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1577             res += 'video@'
1578         if fdict.get('vbr') is not None:
1579             res += '%4dk' % fdict['vbr']
1580         if fdict.get('fps') is not None:
1581             res += ', %sfps' % fdict['fps']
1582         if fdict.get('acodec') is not None:
1583             if res:
1584                 res += ', '
1585             if fdict['acodec'] == 'none':
1586                 res += 'video only'
1587             else:
1588                 res += '%-5s' % fdict['acodec']
1589         elif fdict.get('abr') is not None:
1590             if res:
1591                 res += ', '
1592             res += 'audio'
1593         if fdict.get('abr') is not None:
1594             res += '@%3dk' % fdict['abr']
1595         if fdict.get('asr') is not None:
1596             res += ' (%5dHz)' % fdict['asr']
1597         if fdict.get('filesize') is not None:
1598             if res:
1599                 res += ', '
1600             res += format_bytes(fdict['filesize'])
1601         elif fdict.get('filesize_approx') is not None:
1602             if res:
1603                 res += ', '
1604             res += '~' + format_bytes(fdict['filesize_approx'])
1605         return res
1606
1607     def list_formats(self, info_dict):
1608         formats = info_dict.get('formats', [info_dict])
1609         table = [
1610             [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
1611             for f in formats
1612             if f.get('preference') is None or f['preference'] >= -1000]
1613         if len(formats) > 1:
1614             table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
1615
1616         header_line = ['format code', 'extension', 'resolution', 'note']
1617         self.to_screen(
1618             '[info] Available formats for %s:\n%s' %
1619             (info_dict['id'], render_table(header_line, table)))
1620
1621     def list_thumbnails(self, info_dict):
1622         thumbnails = info_dict.get('thumbnails')
1623         if not thumbnails:
1624             tn_url = info_dict.get('thumbnail')
1625             if tn_url:
1626                 thumbnails = [{'id': '0', 'url': tn_url}]
1627             else:
1628                 self.to_screen(
1629                     '[info] No thumbnails present for %s' % info_dict['id'])
1630                 return
1631
1632         self.to_screen(
1633             '[info] Thumbnails for %s:' % info_dict['id'])
1634         self.to_screen(render_table(
1635             ['ID', 'width', 'height', 'URL'],
1636             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
1637
1638     def list_subtitles(self, video_id, subtitles, name='subtitles'):
1639         if not subtitles:
1640             self.to_screen('%s has no %s' % (video_id, name))
1641             return
1642         self.to_screen(
1643             'Available %s for %s:' % (name, video_id))
1644         self.to_screen(render_table(
1645             ['Language', 'formats'],
1646             [[lang, ', '.join(f['ext'] for f in reversed(formats))]
1647                 for lang, formats in subtitles.items()]))
1648
1649     def urlopen(self, req):
1650         """ Start an HTTP download """
1651
1652         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1653         # always respected by websites, some tend to give out URLs with non percent-encoded
1654         # non-ASCII characters (see telemb.py, ard.py [#3412])
1655         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1656         # To work around aforementioned issue we will replace request's original URL with
1657         # percent-encoded one
1658         req_is_string = isinstance(req, compat_basestring)
1659         url = req if req_is_string else req.get_full_url()
1660         url_escaped = escape_url(url)
1661
1662         # Substitute URL if any change after escaping
1663         if url != url_escaped:
1664             if req_is_string:
1665                 req = url_escaped
1666             else:
1667                 req = compat_urllib_request.Request(
1668                     url_escaped, data=req.data, headers=req.headers,
1669                     origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
1670
1671         return self._opener.open(req, timeout=self._socket_timeout)
1672
1673     def print_debug_header(self):
1674         if not self.params.get('verbose'):
1675             return
1676
1677         if type('') is not compat_str:
1678             # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1679             self.report_warning(
1680                 'Your Python is broken! Update to a newer and supported version')
1681
1682         stdout_encoding = getattr(
1683             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
1684         encoding_str = (
1685             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1686                 locale.getpreferredencoding(),
1687                 sys.getfilesystemencoding(),
1688                 stdout_encoding,
1689                 self.get_encoding()))
1690         write_string(encoding_str, encoding=None)
1691
1692         self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1693         try:
1694             sp = subprocess.Popen(
1695                 ['git', 'rev-parse', '--short', 'HEAD'],
1696                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1697                 cwd=os.path.dirname(os.path.abspath(__file__)))
1698             out, err = sp.communicate()
1699             out = out.decode().strip()
1700             if re.match('[0-9a-f]+', out):
1701                 self._write_string('[debug] Git HEAD: ' + out + '\n')
1702         except:
1703             try:
1704                 sys.exc_clear()
1705             except:
1706                 pass
1707         self._write_string('[debug] Python version %s - %s\n' % (
1708             platform.python_version(), platform_name()))
1709
1710         exe_versions = FFmpegPostProcessor.get_versions(self)
1711         exe_versions['rtmpdump'] = rtmpdump_version()
1712         exe_str = ', '.join(
1713             '%s %s' % (exe, v)
1714             for exe, v in sorted(exe_versions.items())
1715             if v
1716         )
1717         if not exe_str:
1718             exe_str = 'none'
1719         self._write_string('[debug] exe versions: %s\n' % exe_str)
1720
1721         proxy_map = {}
1722         for handler in self._opener.handlers:
1723             if hasattr(handler, 'proxies'):
1724                 proxy_map.update(handler.proxies)
1725         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1726
1727         if self.params.get('call_home', False):
1728             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
1729             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
1730             latest_version = self.urlopen(
1731                 'https://yt-dl.org/latest/version').read().decode('utf-8')
1732             if version_tuple(latest_version) > version_tuple(__version__):
1733                 self.report_warning(
1734                     'You are using an outdated version (newest version: %s)! '
1735                     'See https://yt-dl.org/update if you need help updating.' %
1736                     latest_version)
1737
1738     def _setup_opener(self):
1739         timeout_val = self.params.get('socket_timeout')
1740         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1741
1742         opts_cookiefile = self.params.get('cookiefile')
1743         opts_proxy = self.params.get('proxy')
1744
1745         if opts_cookiefile is None:
1746             self.cookiejar = compat_cookiejar.CookieJar()
1747         else:
1748             self.cookiejar = compat_cookiejar.MozillaCookieJar(
1749                 opts_cookiefile)
1750             if os.access(opts_cookiefile, os.R_OK):
1751                 self.cookiejar.load()
1752
1753         cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1754             self.cookiejar)
1755         if opts_proxy is not None:
1756             if opts_proxy == '':
1757                 proxies = {}
1758             else:
1759                 proxies = {'http': opts_proxy, 'https': opts_proxy}
1760         else:
1761             proxies = compat_urllib_request.getproxies()
1762             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1763             if 'http' in proxies and 'https' not in proxies:
1764                 proxies['https'] = proxies['http']
1765         proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1766
1767         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1768         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
1769         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
1770         opener = compat_urllib_request.build_opener(
1771             https_handler, proxy_handler, cookie_processor, ydlh)
1772         # Delete the default user-agent header, which would otherwise apply in
1773         # cases where our custom HTTP handler doesn't come into play
1774         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1775         opener.addheaders = []
1776         self._opener = opener
1777
1778     def encode(self, s):
1779         if isinstance(s, bytes):
1780             return s  # Already encoded
1781
1782         try:
1783             return s.encode(self.get_encoding())
1784         except UnicodeEncodeError as err:
1785             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1786             raise
1787
1788     def get_encoding(self):
1789         encoding = self.params.get('encoding')
1790         if encoding is None:
1791             encoding = preferredencoding()
1792         return encoding
1793
1794     def _write_thumbnails(self, info_dict, filename):
1795         if self.params.get('writethumbnail', False):
1796             thumbnails = info_dict.get('thumbnails')
1797             if thumbnails:
1798                 thumbnails = [thumbnails[-1]]
1799         elif self.params.get('write_all_thumbnails', False):
1800             thumbnails = info_dict.get('thumbnails')
1801         else:
1802             return
1803
1804         if not thumbnails:
1805             # No thumbnails present, so return immediately
1806             return
1807
1808         for t in thumbnails:
1809             thumb_ext = determine_ext(t['url'], 'jpg')
1810             suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
1811             thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
1812             thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
1813
1814             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
1815                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
1816                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
1817             else:
1818                 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
1819                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
1820                 try:
1821                     uf = self.urlopen(t['url'])
1822                     with open(thumb_filename, 'wb') as thumbf:
1823                         shutil.copyfileobj(uf, thumbf)
1824                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
1825                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
1826                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1827                     self.report_warning('Unable to download thumbnail "%s": %s' %
1828                                         (t['url'], compat_str(err)))