[letv] Add --cn-verification-proxy (Closes #5077)
[youtube-dl] / youtube_dl / YoutubeDL.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 from __future__ import absolute_import, unicode_literals
5
6 import collections
7 import contextlib
8 import datetime
9 import errno
10 import fileinput
11 import io
12 import itertools
13 import json
14 import locale
15 import operator
16 import os
17 import platform
18 import re
19 import shutil
20 import subprocess
21 import socket
22 import sys
23 import time
24 import traceback
25
26 if os.name == 'nt':
27     import ctypes
28
29 from .compat import (
30     compat_basestring,
31     compat_cookiejar,
32     compat_expanduser,
33     compat_get_terminal_size,
34     compat_http_client,
35     compat_kwargs,
36     compat_str,
37     compat_urllib_error,
38     compat_urllib_request,
39 )
40 from .utils import (
41     escape_url,
42     ContentTooShortError,
43     date_from_str,
44     DateRange,
45     DEFAULT_OUTTMPL,
46     determine_ext,
47     DownloadError,
48     encodeFilename,
49     ExtractorError,
50     format_bytes,
51     formatSeconds,
52     locked_file,
53     make_HTTPS_handler,
54     MaxDownloadsReached,
55     PagedList,
56     parse_filesize,
57     PerRequestProxyHandler,
58     PostProcessingError,
59     platform_name,
60     preferredencoding,
61     render_table,
62     SameFileError,
63     sanitize_filename,
64     std_headers,
65     subtitles_filename,
66     takewhile_inclusive,
67     UnavailableVideoError,
68     url_basename,
69     version_tuple,
70     write_json_file,
71     write_string,
72     YoutubeDLHandler,
73     prepend_extension,
74     args_to_str,
75     age_restricted,
76 )
77 from .cache import Cache
78 from .extractor import get_info_extractor, gen_extractors
79 from .downloader import get_suitable_downloader
80 from .downloader.rtmp import rtmpdump_version
81 from .postprocessor import (
82     FFmpegFixupM4aPP,
83     FFmpegFixupStretchedPP,
84     FFmpegMergerPP,
85     FFmpegPostProcessor,
86     get_postprocessor,
87 )
88 from .version import __version__
89
90
91 class YoutubeDL(object):
92     """YoutubeDL class.
93
94     YoutubeDL objects are the ones responsible of downloading the
95     actual video file and writing it to disk if the user has requested
96     it, among some other tasks. In most cases there should be one per
97     program. As, given a video URL, the downloader doesn't know how to
98     extract all the needed information, task that InfoExtractors do, it
99     has to pass the URL to one of them.
100
101     For this, YoutubeDL objects have a method that allows
102     InfoExtractors to be registered in a given order. When it is passed
103     a URL, the YoutubeDL object handles it to the first InfoExtractor it
104     finds that reports being able to handle it. The InfoExtractor extracts
105     all the information about the video or videos the URL refers to, and
106     YoutubeDL process the extracted information, possibly using a File
107     Downloader to download the video.
108
109     YoutubeDL objects accept a lot of parameters. In order not to saturate
110     the object constructor with arguments, it receives a dictionary of
111     options instead. These options are available through the params
112     attribute for the InfoExtractors to use. The YoutubeDL also
113     registers itself as the downloader in charge for the InfoExtractors
114     that are added to it, so this is a "mutual registration".
115
116     Available options:
117
118     username:          Username for authentication purposes.
119     password:          Password for authentication purposes.
120     videopassword:     Password for acces a video.
121     usenetrc:          Use netrc for authentication instead.
122     verbose:           Print additional info to stdout.
123     quiet:             Do not print messages to stdout.
124     no_warnings:       Do not print out anything for warnings.
125     forceurl:          Force printing final URL.
126     forcetitle:        Force printing title.
127     forceid:           Force printing ID.
128     forcethumbnail:    Force printing thumbnail URL.
129     forcedescription:  Force printing description.
130     forcefilename:     Force printing final filename.
131     forceduration:     Force printing duration.
132     forcejson:         Force printing info_dict as JSON.
133     dump_single_json:  Force printing the info_dict of the whole playlist
134                        (or video) as a single JSON line.
135     simulate:          Do not download the video files.
136     format:            Video format code. See options.py for more information.
137     format_limit:      Highest quality format to try.
138     outtmpl:           Template for output names.
139     restrictfilenames: Do not allow "&" and spaces in file names
140     ignoreerrors:      Do not stop on download errors.
141     nooverwrites:      Prevent overwriting files.
142     playliststart:     Playlist item to start at.
143     playlistend:       Playlist item to end at.
144     playlist_items:    Specific indices of playlist to download.
145     playlistreverse:   Download playlist items in reverse order.
146     matchtitle:        Download only matching titles.
147     rejecttitle:       Reject downloads for matching titles.
148     logger:            Log messages to a logging.Logger instance.
149     logtostderr:       Log messages to stderr instead of stdout.
150     writedescription:  Write the video description to a .description file
151     writeinfojson:     Write the video description to a .info.json file
152     writeannotations:  Write the video annotations to a .annotations.xml file
153     writethumbnail:    Write the thumbnail image to a file
154     write_all_thumbnails:  Write all thumbnail formats to files
155     writesubtitles:    Write the video subtitles to a file
156     writeautomaticsub: Write the automatic subtitles to a file
157     allsubtitles:      Downloads all the subtitles of the video
158                        (requires writesubtitles or writeautomaticsub)
159     listsubtitles:     Lists all available subtitles for the video
160     subtitlesformat:   The format code for subtitles
161     subtitleslangs:    List of languages of the subtitles to download
162     keepvideo:         Keep the video file after post-processing
163     daterange:         A DateRange object, download only if the upload_date is in the range.
164     skip_download:     Skip the actual download of the video file
165     cachedir:          Location of the cache files in the filesystem.
166                        False to disable filesystem cache.
167     noplaylist:        Download single video instead of a playlist if in doubt.
168     age_limit:         An integer representing the user's age in years.
169                        Unsuitable videos for the given age are skipped.
170     min_views:         An integer representing the minimum view count the video
171                        must have in order to not be skipped.
172                        Videos without view count information are always
173                        downloaded. None for no limit.
174     max_views:         An integer representing the maximum view count.
175                        Videos that are more popular than that are not
176                        downloaded.
177                        Videos without view count information are always
178                        downloaded. None for no limit.
179     download_archive:  File name of a file where all downloads are recorded.
180                        Videos already present in the file are not downloaded
181                        again.
182     cookiefile:        File name where cookies should be read from and dumped to.
183     nocheckcertificate:Do not verify SSL certificates
184     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
185                        At the moment, this is only supported by YouTube.
186     proxy:             URL of the proxy server to use
187     cn_verification_proxy:  URL of the proxy to use for IP address verification
188                        on Chinese sites. (Experimental)
189     socket_timeout:    Time to wait for unresponsive hosts, in seconds
190     bidi_workaround:   Work around buggy terminals without bidirectional text
191                        support, using fridibi
192     debug_printtraffic:Print out sent and received HTTP traffic
193     include_ads:       Download ads as well
194     default_search:    Prepend this string if an input url is not valid.
195                        'auto' for elaborate guessing
196     encoding:          Use this encoding instead of the system-specified.
197     extract_flat:      Do not resolve URLs, return the immediate result.
198                        Pass in 'in_playlist' to only show this behavior for
199                        playlist items.
200     postprocessors:    A list of dictionaries, each with an entry
201                        * key:  The name of the postprocessor. See
202                                youtube_dl/postprocessor/__init__.py for a list.
203                        as well as any further keyword arguments for the
204                        postprocessor.
205     progress_hooks:    A list of functions that get called on download
206                        progress, with a dictionary with the entries
207                        * status: One of "downloading", "error", or "finished".
208                                  Check this first and ignore unknown values.
209
210                        If status is one of "downloading", or "finished", the
211                        following properties may also be present:
212                        * filename: The final filename (always present)
213                        * tmpfilename: The filename we're currently writing to
214                        * downloaded_bytes: Bytes on disk
215                        * total_bytes: Size of the whole file, None if unknown
216                        * total_bytes_estimate: Guess of the eventual file size,
217                                                None if unavailable.
218                        * elapsed: The number of seconds since download started.
219                        * eta: The estimated time in seconds, None if unknown
220                        * speed: The download speed in bytes/second, None if
221                                 unknown
222                        * fragment_index: The counter of the currently
223                                          downloaded video fragment.
224                        * fragment_count: The number of fragments (= individual
225                                          files that will be merged)
226
227                        Progress hooks are guaranteed to be called at least once
228                        (with status "finished") if the download is successful.
229     merge_output_format: Extension to use when merging formats.
230     fixup:             Automatically correct known faults of the file.
231                        One of:
232                        - "never": do nothing
233                        - "warn": only emit a warning
234                        - "detect_or_warn": check whether we can do anything
235                                            about it, warn otherwise (default)
236     source_address:    (Experimental) Client-side IP address to bind to.
237     call_home:         Boolean, true iff we are allowed to contact the
238                        youtube-dl servers for debugging.
239     sleep_interval:    Number of seconds to sleep before each download.
240     listformats:       Print an overview of available video formats and exit.
241     list_thumbnails:   Print a table of all thumbnails and exit.
242     match_filter:      A function that gets called with the info_dict of
243                        every video.
244                        If it returns a message, the video is ignored.
245                        If it returns None, the video is downloaded.
246                        match_filter_func in utils.py is one example for this.
247     no_color:          Do not emit color codes in output.
248
249     The following options determine which downloader is picked:
250     external_downloader: Executable of the external downloader to call.
251                        None or unset for standard (built-in) downloader.
252     hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv.
253
254     The following parameters are not used by YoutubeDL itself, they are used by
255     the downloader (see youtube_dl/downloader/common.py):
256     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
257     noresizebuffer, retries, continuedl, noprogress, consoletitle,
258     xattr_set_filesize, external_downloader_args.
259
260     The following options are used by the post processors:
261     prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
262                        otherwise prefer avconv.
263     exec_cmd:          Arbitrary command to run after downloading
264     """
265
266     params = None
267     _ies = []
268     _pps = []
269     _download_retcode = None
270     _num_downloads = None
271     _screen_file = None
272
273     def __init__(self, params=None, auto_init=True):
274         """Create a FileDownloader object with the given options."""
275         if params is None:
276             params = {}
277         self._ies = []
278         self._ies_instances = {}
279         self._pps = []
280         self._progress_hooks = []
281         self._download_retcode = 0
282         self._num_downloads = 0
283         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
284         self._err_file = sys.stderr
285         self.params = params
286         self.cache = Cache(self)
287
288         if params.get('bidi_workaround', False):
289             try:
290                 import pty
291                 master, slave = pty.openpty()
292                 width = compat_get_terminal_size().columns
293                 if width is None:
294                     width_args = []
295                 else:
296                     width_args = ['-w', str(width)]
297                 sp_kwargs = dict(
298                     stdin=subprocess.PIPE,
299                     stdout=slave,
300                     stderr=self._err_file)
301                 try:
302                     self._output_process = subprocess.Popen(
303                         ['bidiv'] + width_args, **sp_kwargs
304                     )
305                 except OSError:
306                     self._output_process = subprocess.Popen(
307                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
308                 self._output_channel = os.fdopen(master, 'rb')
309             except OSError as ose:
310                 if ose.errno == 2:
311                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
312                 else:
313                     raise
314
315         if (sys.version_info >= (3,) and sys.platform != 'win32' and
316                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and
317                 not params.get('restrictfilenames', False)):
318             # On Python 3, the Unicode filesystem API will throw errors (#1474)
319             self.report_warning(
320                 'Assuming --restrict-filenames since file system encoding '
321                 'cannot encode all characters. '
322                 'Set the LC_ALL environment variable to fix this.')
323             self.params['restrictfilenames'] = True
324
325         if '%(stitle)s' in self.params.get('outtmpl', ''):
326             self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
327
328         self._setup_opener()
329
330         if auto_init:
331             self.print_debug_header()
332             self.add_default_info_extractors()
333
334         for pp_def_raw in self.params.get('postprocessors', []):
335             pp_class = get_postprocessor(pp_def_raw['key'])
336             pp_def = dict(pp_def_raw)
337             del pp_def['key']
338             pp = pp_class(self, **compat_kwargs(pp_def))
339             self.add_post_processor(pp)
340
341         for ph in self.params.get('progress_hooks', []):
342             self.add_progress_hook(ph)
343
344     def warn_if_short_id(self, argv):
345         # short YouTube ID starting with dash?
346         idxs = [
347             i for i, a in enumerate(argv)
348             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
349         if idxs:
350             correct_argv = (
351                 ['youtube-dl'] +
352                 [a for i, a in enumerate(argv) if i not in idxs] +
353                 ['--'] + [argv[i] for i in idxs]
354             )
355             self.report_warning(
356                 'Long argument string detected. '
357                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
358                 args_to_str(correct_argv))
359
360     def add_info_extractor(self, ie):
361         """Add an InfoExtractor object to the end of the list."""
362         self._ies.append(ie)
363         self._ies_instances[ie.ie_key()] = ie
364         ie.set_downloader(self)
365
366     def get_info_extractor(self, ie_key):
367         """
368         Get an instance of an IE with name ie_key, it will try to get one from
369         the _ies list, if there's no instance it will create a new one and add
370         it to the extractor list.
371         """
372         ie = self._ies_instances.get(ie_key)
373         if ie is None:
374             ie = get_info_extractor(ie_key)()
375             self.add_info_extractor(ie)
376         return ie
377
378     def add_default_info_extractors(self):
379         """
380         Add the InfoExtractors returned by gen_extractors to the end of the list
381         """
382         for ie in gen_extractors():
383             self.add_info_extractor(ie)
384
385     def add_post_processor(self, pp):
386         """Add a PostProcessor object to the end of the chain."""
387         self._pps.append(pp)
388         pp.set_downloader(self)
389
390     def add_progress_hook(self, ph):
391         """Add the progress hook (currently only for the file downloader)"""
392         self._progress_hooks.append(ph)
393
394     def _bidi_workaround(self, message):
395         if not hasattr(self, '_output_channel'):
396             return message
397
398         assert hasattr(self, '_output_process')
399         assert isinstance(message, compat_str)
400         line_count = message.count('\n') + 1
401         self._output_process.stdin.write((message + '\n').encode('utf-8'))
402         self._output_process.stdin.flush()
403         res = ''.join(self._output_channel.readline().decode('utf-8')
404                       for _ in range(line_count))
405         return res[:-len('\n')]
406
407     def to_screen(self, message, skip_eol=False):
408         """Print message to stdout if not in quiet mode."""
409         return self.to_stdout(message, skip_eol, check_quiet=True)
410
411     def _write_string(self, s, out=None):
412         write_string(s, out=out, encoding=self.params.get('encoding'))
413
414     def to_stdout(self, message, skip_eol=False, check_quiet=False):
415         """Print message to stdout if not in quiet mode."""
416         if self.params.get('logger'):
417             self.params['logger'].debug(message)
418         elif not check_quiet or not self.params.get('quiet', False):
419             message = self._bidi_workaround(message)
420             terminator = ['\n', ''][skip_eol]
421             output = message + terminator
422
423             self._write_string(output, self._screen_file)
424
425     def to_stderr(self, message):
426         """Print message to stderr."""
427         assert isinstance(message, compat_str)
428         if self.params.get('logger'):
429             self.params['logger'].error(message)
430         else:
431             message = self._bidi_workaround(message)
432             output = message + '\n'
433             self._write_string(output, self._err_file)
434
435     def to_console_title(self, message):
436         if not self.params.get('consoletitle', False):
437             return
438         if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
439             # c_wchar_p() might not be necessary if `message` is
440             # already of type unicode()
441             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
442         elif 'TERM' in os.environ:
443             self._write_string('\033]0;%s\007' % message, self._screen_file)
444
445     def save_console_title(self):
446         if not self.params.get('consoletitle', False):
447             return
448         if 'TERM' in os.environ:
449             # Save the title on stack
450             self._write_string('\033[22;0t', self._screen_file)
451
452     def restore_console_title(self):
453         if not self.params.get('consoletitle', False):
454             return
455         if 'TERM' in os.environ:
456             # Restore the title from stack
457             self._write_string('\033[23;0t', self._screen_file)
458
459     def __enter__(self):
460         self.save_console_title()
461         return self
462
463     def __exit__(self, *args):
464         self.restore_console_title()
465
466         if self.params.get('cookiefile') is not None:
467             self.cookiejar.save()
468
469     def trouble(self, message=None, tb=None):
470         """Determine action to take when a download problem appears.
471
472         Depending on if the downloader has been configured to ignore
473         download errors or not, this method may throw an exception or
474         not when errors are found, after printing the message.
475
476         tb, if given, is additional traceback information.
477         """
478         if message is not None:
479             self.to_stderr(message)
480         if self.params.get('verbose'):
481             if tb is None:
482                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
483                     tb = ''
484                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
485                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
486                     tb += compat_str(traceback.format_exc())
487                 else:
488                     tb_data = traceback.format_list(traceback.extract_stack())
489                     tb = ''.join(tb_data)
490             self.to_stderr(tb)
491         if not self.params.get('ignoreerrors', False):
492             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
493                 exc_info = sys.exc_info()[1].exc_info
494             else:
495                 exc_info = sys.exc_info()
496             raise DownloadError(message, exc_info)
497         self._download_retcode = 1
498
499     def report_warning(self, message):
500         '''
501         Print the message to stderr, it will be prefixed with 'WARNING:'
502         If stderr is a tty file the 'WARNING:' will be colored
503         '''
504         if self.params.get('logger') is not None:
505             self.params['logger'].warning(message)
506         else:
507             if self.params.get('no_warnings'):
508                 return
509             if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
510                 _msg_header = '\033[0;33mWARNING:\033[0m'
511             else:
512                 _msg_header = 'WARNING:'
513             warning_message = '%s %s' % (_msg_header, message)
514             self.to_stderr(warning_message)
515
516     def report_error(self, message, tb=None):
517         '''
518         Do the same as trouble, but prefixes the message with 'ERROR:', colored
519         in red if stderr is a tty file.
520         '''
521         if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
522             _msg_header = '\033[0;31mERROR:\033[0m'
523         else:
524             _msg_header = 'ERROR:'
525         error_message = '%s %s' % (_msg_header, message)
526         self.trouble(error_message, tb)
527
528     def report_file_already_downloaded(self, file_name):
529         """Report file has already been fully downloaded."""
530         try:
531             self.to_screen('[download] %s has already been downloaded' % file_name)
532         except UnicodeEncodeError:
533             self.to_screen('[download] The file has already been downloaded')
534
535     def prepare_filename(self, info_dict):
536         """Generate the output filename."""
537         try:
538             template_dict = dict(info_dict)
539
540             template_dict['epoch'] = int(time.time())
541             autonumber_size = self.params.get('autonumber_size')
542             if autonumber_size is None:
543                 autonumber_size = 5
544             autonumber_templ = '%0' + str(autonumber_size) + 'd'
545             template_dict['autonumber'] = autonumber_templ % self._num_downloads
546             if template_dict.get('playlist_index') is not None:
547                 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
548             if template_dict.get('resolution') is None:
549                 if template_dict.get('width') and template_dict.get('height'):
550                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
551                 elif template_dict.get('height'):
552                     template_dict['resolution'] = '%sp' % template_dict['height']
553                 elif template_dict.get('width'):
554                     template_dict['resolution'] = '?x%d' % template_dict['width']
555
556             sanitize = lambda k, v: sanitize_filename(
557                 compat_str(v),
558                 restricted=self.params.get('restrictfilenames'),
559                 is_id=(k == 'id'))
560             template_dict = dict((k, sanitize(k, v))
561                                  for k, v in template_dict.items()
562                                  if v is not None)
563             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
564
565             outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
566             tmpl = compat_expanduser(outtmpl)
567             filename = tmpl % template_dict
568             # Temporary fix for #4787
569             # 'Treat' all problem characters by passing filename through preferredencoding
570             # to workaround encoding issues with subprocess on python2 @ Windows
571             if sys.version_info < (3, 0) and sys.platform == 'win32':
572                 filename = encodeFilename(filename, True).decode(preferredencoding())
573             return filename
574         except ValueError as err:
575             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
576             return None
577
578     def _match_entry(self, info_dict, incomplete):
579         """ Returns None iff the file should be downloaded """
580
581         video_title = info_dict.get('title', info_dict.get('id', 'video'))
582         if 'title' in info_dict:
583             # This can happen when we're just evaluating the playlist
584             title = info_dict['title']
585             matchtitle = self.params.get('matchtitle', False)
586             if matchtitle:
587                 if not re.search(matchtitle, title, re.IGNORECASE):
588                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
589             rejecttitle = self.params.get('rejecttitle', False)
590             if rejecttitle:
591                 if re.search(rejecttitle, title, re.IGNORECASE):
592                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
593         date = info_dict.get('upload_date', None)
594         if date is not None:
595             dateRange = self.params.get('daterange', DateRange())
596             if date not in dateRange:
597                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
598         view_count = info_dict.get('view_count', None)
599         if view_count is not None:
600             min_views = self.params.get('min_views')
601             if min_views is not None and view_count < min_views:
602                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
603             max_views = self.params.get('max_views')
604             if max_views is not None and view_count > max_views:
605                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
606         if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
607             return 'Skipping "%s" because it is age restricted' % video_title
608         if self.in_download_archive(info_dict):
609             return '%s has already been recorded in archive' % video_title
610
611         if not incomplete:
612             match_filter = self.params.get('match_filter')
613             if match_filter is not None:
614                 ret = match_filter(info_dict)
615                 if ret is not None:
616                     return ret
617
618         return None
619
620     @staticmethod
621     def add_extra_info(info_dict, extra_info):
622         '''Set the keys from extra_info in info dict if they are missing'''
623         for key, value in extra_info.items():
624             info_dict.setdefault(key, value)
625
626     def extract_info(self, url, download=True, ie_key=None, extra_info={},
627                      process=True):
628         '''
629         Returns a list with a dictionary for each video we find.
630         If 'download', also downloads the videos.
631         extra_info is a dict containing the extra values to add to each result
632          '''
633
634         if ie_key:
635             ies = [self.get_info_extractor(ie_key)]
636         else:
637             ies = self._ies
638
639         for ie in ies:
640             if not ie.suitable(url):
641                 continue
642
643             if not ie.working():
644                 self.report_warning('The program functionality for this site has been marked as broken, '
645                                     'and will probably not work.')
646
647             try:
648                 ie_result = ie.extract(url)
649                 if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
650                     break
651                 if isinstance(ie_result, list):
652                     # Backwards compatibility: old IE result format
653                     ie_result = {
654                         '_type': 'compat_list',
655                         'entries': ie_result,
656                     }
657                 self.add_default_extra_info(ie_result, ie, url)
658                 if process:
659                     return self.process_ie_result(ie_result, download, extra_info)
660                 else:
661                     return ie_result
662             except ExtractorError as de:  # An error we somewhat expected
663                 self.report_error(compat_str(de), de.format_traceback())
664                 break
665             except MaxDownloadsReached:
666                 raise
667             except Exception as e:
668                 if self.params.get('ignoreerrors', False):
669                     self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
670                     break
671                 else:
672                     raise
673         else:
674             self.report_error('no suitable InfoExtractor for URL %s' % url)
675
676     def add_default_extra_info(self, ie_result, ie, url):
677         self.add_extra_info(ie_result, {
678             'extractor': ie.IE_NAME,
679             'webpage_url': url,
680             'webpage_url_basename': url_basename(url),
681             'extractor_key': ie.ie_key(),
682         })
683
684     def process_ie_result(self, ie_result, download=True, extra_info={}):
685         """
686         Take the result of the ie(may be modified) and resolve all unresolved
687         references (URLs, playlist items).
688
689         It will also download the videos if 'download'.
690         Returns the resolved ie_result.
691         """
692
693         result_type = ie_result.get('_type', 'video')
694
695         if result_type in ('url', 'url_transparent'):
696             extract_flat = self.params.get('extract_flat', False)
697             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
698                     extract_flat is True):
699                 if self.params.get('forcejson', False):
700                     self.to_stdout(json.dumps(ie_result))
701                 return ie_result
702
703         if result_type == 'video':
704             self.add_extra_info(ie_result, extra_info)
705             return self.process_video_result(ie_result, download=download)
706         elif result_type == 'url':
707             # We have to add extra_info to the results because it may be
708             # contained in a playlist
709             return self.extract_info(ie_result['url'],
710                                      download,
711                                      ie_key=ie_result.get('ie_key'),
712                                      extra_info=extra_info)
713         elif result_type == 'url_transparent':
714             # Use the information from the embedding page
715             info = self.extract_info(
716                 ie_result['url'], ie_key=ie_result.get('ie_key'),
717                 extra_info=extra_info, download=False, process=False)
718
719             force_properties = dict(
720                 (k, v) for k, v in ie_result.items() if v is not None)
721             for f in ('_type', 'url'):
722                 if f in force_properties:
723                     del force_properties[f]
724             new_result = info.copy()
725             new_result.update(force_properties)
726
727             assert new_result.get('_type') != 'url_transparent'
728
729             return self.process_ie_result(
730                 new_result, download=download, extra_info=extra_info)
731         elif result_type == 'playlist' or result_type == 'multi_video':
732             # We process each entry in the playlist
733             playlist = ie_result.get('title', None) or ie_result.get('id', None)
734             self.to_screen('[download] Downloading playlist: %s' % playlist)
735
736             playlist_results = []
737
738             playliststart = self.params.get('playliststart', 1) - 1
739             playlistend = self.params.get('playlistend', None)
740             # For backwards compatibility, interpret -1 as whole list
741             if playlistend == -1:
742                 playlistend = None
743
744             playlistitems_str = self.params.get('playlist_items', None)
745             playlistitems = None
746             if playlistitems_str is not None:
747                 def iter_playlistitems(format):
748                     for string_segment in format.split(','):
749                         if '-' in string_segment:
750                             start, end = string_segment.split('-')
751                             for item in range(int(start), int(end) + 1):
752                                 yield int(item)
753                         else:
754                             yield int(string_segment)
755                 playlistitems = iter_playlistitems(playlistitems_str)
756
757             ie_entries = ie_result['entries']
758             if isinstance(ie_entries, list):
759                 n_all_entries = len(ie_entries)
760                 if playlistitems:
761                     entries = [ie_entries[i - 1] for i in playlistitems]
762                 else:
763                     entries = ie_entries[playliststart:playlistend]
764                 n_entries = len(entries)
765                 self.to_screen(
766                     "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
767                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
768             elif isinstance(ie_entries, PagedList):
769                 if playlistitems:
770                     entries = []
771                     for item in playlistitems:
772                         entries.extend(ie_entries.getslice(
773                             item - 1, item
774                         ))
775                 else:
776                     entries = ie_entries.getslice(
777                         playliststart, playlistend)
778                 n_entries = len(entries)
779                 self.to_screen(
780                     "[%s] playlist %s: Downloading %d videos" %
781                     (ie_result['extractor'], playlist, n_entries))
782             else:  # iterable
783                 if playlistitems:
784                     entry_list = list(ie_entries)
785                     entries = [entry_list[i - 1] for i in playlistitems]
786                 else:
787                     entries = list(itertools.islice(
788                         ie_entries, playliststart, playlistend))
789                 n_entries = len(entries)
790                 self.to_screen(
791                     "[%s] playlist %s: Downloading %d videos" %
792                     (ie_result['extractor'], playlist, n_entries))
793
794             if self.params.get('playlistreverse', False):
795                 entries = entries[::-1]
796
797             for i, entry in enumerate(entries, 1):
798                 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
799                 extra = {
800                     'n_entries': n_entries,
801                     'playlist': playlist,
802                     'playlist_id': ie_result.get('id'),
803                     'playlist_title': ie_result.get('title'),
804                     'playlist_index': i + playliststart,
805                     'extractor': ie_result['extractor'],
806                     'webpage_url': ie_result['webpage_url'],
807                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
808                     'extractor_key': ie_result['extractor_key'],
809                 }
810
811                 reason = self._match_entry(entry, incomplete=True)
812                 if reason is not None:
813                     self.to_screen('[download] ' + reason)
814                     continue
815
816                 entry_result = self.process_ie_result(entry,
817                                                       download=download,
818                                                       extra_info=extra)
819                 playlist_results.append(entry_result)
820             ie_result['entries'] = playlist_results
821             return ie_result
822         elif result_type == 'compat_list':
823             self.report_warning(
824                 'Extractor %s returned a compat_list result. '
825                 'It needs to be updated.' % ie_result.get('extractor'))
826
827             def _fixup(r):
828                 self.add_extra_info(
829                     r,
830                     {
831                         'extractor': ie_result['extractor'],
832                         'webpage_url': ie_result['webpage_url'],
833                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
834                         'extractor_key': ie_result['extractor_key'],
835                     }
836                 )
837                 return r
838             ie_result['entries'] = [
839                 self.process_ie_result(_fixup(r), download, extra_info)
840                 for r in ie_result['entries']
841             ]
842             return ie_result
843         else:
844             raise Exception('Invalid result type: %s' % result_type)
845
846     def _apply_format_filter(self, format_spec, available_formats):
847         " Returns a tuple of the remaining format_spec and filtered formats "
848
849         OPERATORS = {
850             '<': operator.lt,
851             '<=': operator.le,
852             '>': operator.gt,
853             '>=': operator.ge,
854             '=': operator.eq,
855             '!=': operator.ne,
856         }
857         operator_rex = re.compile(r'''(?x)\s*\[
858             (?P<key>width|height|tbr|abr|vbr|asr|filesize|fps)
859             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
860             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
861             \]$
862             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
863         m = operator_rex.search(format_spec)
864         if m:
865             try:
866                 comparison_value = int(m.group('value'))
867             except ValueError:
868                 comparison_value = parse_filesize(m.group('value'))
869                 if comparison_value is None:
870                     comparison_value = parse_filesize(m.group('value') + 'B')
871                 if comparison_value is None:
872                     raise ValueError(
873                         'Invalid value %r in format specification %r' % (
874                             m.group('value'), format_spec))
875             op = OPERATORS[m.group('op')]
876
877         if not m:
878             STR_OPERATORS = {
879                 '=': operator.eq,
880                 '!=': operator.ne,
881             }
882             str_operator_rex = re.compile(r'''(?x)\s*\[
883                 \s*(?P<key>ext|acodec|vcodec|container|protocol)
884                 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
885                 \s*(?P<value>[a-zA-Z0-9_-]+)
886                 \s*\]$
887                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
888             m = str_operator_rex.search(format_spec)
889             if m:
890                 comparison_value = m.group('value')
891                 op = STR_OPERATORS[m.group('op')]
892
893         if not m:
894             raise ValueError('Invalid format specification %r' % format_spec)
895
896         def _filter(f):
897             actual_value = f.get(m.group('key'))
898             if actual_value is None:
899                 return m.group('none_inclusive')
900             return op(actual_value, comparison_value)
901         new_formats = [f for f in available_formats if _filter(f)]
902
903         new_format_spec = format_spec[:-len(m.group(0))]
904         if not new_format_spec:
905             new_format_spec = 'best'
906
907         return (new_format_spec, new_formats)
908
909     def select_format(self, format_spec, available_formats):
910         while format_spec.endswith(']'):
911             format_spec, available_formats = self._apply_format_filter(
912                 format_spec, available_formats)
913         if not available_formats:
914             return None
915
916         if format_spec == 'best' or format_spec is None:
917             return available_formats[-1]
918         elif format_spec == 'worst':
919             return available_formats[0]
920         elif format_spec == 'bestaudio':
921             audio_formats = [
922                 f for f in available_formats
923                 if f.get('vcodec') == 'none']
924             if audio_formats:
925                 return audio_formats[-1]
926         elif format_spec == 'worstaudio':
927             audio_formats = [
928                 f for f in available_formats
929                 if f.get('vcodec') == 'none']
930             if audio_formats:
931                 return audio_formats[0]
932         elif format_spec == 'bestvideo':
933             video_formats = [
934                 f for f in available_formats
935                 if f.get('acodec') == 'none']
936             if video_formats:
937                 return video_formats[-1]
938         elif format_spec == 'worstvideo':
939             video_formats = [
940                 f for f in available_formats
941                 if f.get('acodec') == 'none']
942             if video_formats:
943                 return video_formats[0]
944         else:
945             extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
946             if format_spec in extensions:
947                 filter_f = lambda f: f['ext'] == format_spec
948             else:
949                 filter_f = lambda f: f['format_id'] == format_spec
950             matches = list(filter(filter_f, available_formats))
951             if matches:
952                 return matches[-1]
953         return None
954
955     def _calc_headers(self, info_dict):
956         res = std_headers.copy()
957
958         add_headers = info_dict.get('http_headers')
959         if add_headers:
960             res.update(add_headers)
961
962         cookies = self._calc_cookies(info_dict)
963         if cookies:
964             res['Cookie'] = cookies
965
966         return res
967
968     def _calc_cookies(self, info_dict):
969         pr = compat_urllib_request.Request(info_dict['url'])
970         self.cookiejar.add_cookie_header(pr)
971         return pr.get_header('Cookie')
972
973     def process_video_result(self, info_dict, download=True):
974         assert info_dict.get('_type', 'video') == 'video'
975
976         if 'id' not in info_dict:
977             raise ExtractorError('Missing "id" field in extractor result')
978         if 'title' not in info_dict:
979             raise ExtractorError('Missing "title" field in extractor result')
980
981         if 'playlist' not in info_dict:
982             # It isn't part of a playlist
983             info_dict['playlist'] = None
984             info_dict['playlist_index'] = None
985
986         thumbnails = info_dict.get('thumbnails')
987         if thumbnails is None:
988             thumbnail = info_dict.get('thumbnail')
989             if thumbnail:
990                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
991         if thumbnails:
992             thumbnails.sort(key=lambda t: (
993                 t.get('preference'), t.get('width'), t.get('height'),
994                 t.get('id'), t.get('url')))
995             for i, t in enumerate(thumbnails):
996                 if 'width' in t and 'height' in t:
997                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
998                 if t.get('id') is None:
999                     t['id'] = '%d' % i
1000
1001         if thumbnails and 'thumbnail' not in info_dict:
1002             info_dict['thumbnail'] = thumbnails[-1]['url']
1003
1004         if 'display_id' not in info_dict and 'id' in info_dict:
1005             info_dict['display_id'] = info_dict['id']
1006
1007         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
1008             # Working around negative timestamps in Windows
1009             # (see http://bugs.python.org/issue1646728)
1010             if info_dict['timestamp'] < 0 and os.name == 'nt':
1011                 info_dict['timestamp'] = 0
1012             upload_date = datetime.datetime.utcfromtimestamp(
1013                 info_dict['timestamp'])
1014             info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1015
1016         if self.params.get('listsubtitles', False):
1017             if 'automatic_captions' in info_dict:
1018                 self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions')
1019             self.list_subtitles(info_dict['id'], info_dict.get('subtitles'), 'subtitles')
1020             return
1021         info_dict['requested_subtitles'] = self.process_subtitles(
1022             info_dict['id'], info_dict.get('subtitles'),
1023             info_dict.get('automatic_captions'))
1024
1025         # This extractors handle format selection themselves
1026         if info_dict['extractor'] in ['Youku']:
1027             if download:
1028                 self.process_info(info_dict)
1029             return info_dict
1030
1031         # We now pick which formats have to be downloaded
1032         if info_dict.get('formats') is None:
1033             # There's only one format available
1034             formats = [info_dict]
1035         else:
1036             formats = info_dict['formats']
1037
1038         if not formats:
1039             raise ExtractorError('No video formats found!')
1040
1041         # We check that all the formats have the format and format_id fields
1042         for i, format in enumerate(formats):
1043             if 'url' not in format:
1044                 raise ExtractorError('Missing "url" key in result (index %d)' % i)
1045
1046             if format.get('format_id') is None:
1047                 format['format_id'] = compat_str(i)
1048             if format.get('format') is None:
1049                 format['format'] = '{id} - {res}{note}'.format(
1050                     id=format['format_id'],
1051                     res=self.format_resolution(format),
1052                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1053                 )
1054             # Automatically determine file extension if missing
1055             if 'ext' not in format:
1056                 format['ext'] = determine_ext(format['url']).lower()
1057             # Add HTTP headers, so that external programs can use them from the
1058             # json output
1059             full_format_info = info_dict.copy()
1060             full_format_info.update(format)
1061             format['http_headers'] = self._calc_headers(full_format_info)
1062
1063         format_limit = self.params.get('format_limit', None)
1064         if format_limit:
1065             formats = list(takewhile_inclusive(
1066                 lambda f: f['format_id'] != format_limit, formats
1067             ))
1068
1069         # TODO Central sorting goes here
1070
1071         if formats[0] is not info_dict:
1072             # only set the 'formats' fields if the original info_dict list them
1073             # otherwise we end up with a circular reference, the first (and unique)
1074             # element in the 'formats' field in info_dict is info_dict itself,
1075             # wich can't be exported to json
1076             info_dict['formats'] = formats
1077         if self.params.get('listformats'):
1078             self.list_formats(info_dict)
1079             return
1080         if self.params.get('list_thumbnails'):
1081             self.list_thumbnails(info_dict)
1082             return
1083
1084         req_format = self.params.get('format')
1085         if req_format is None:
1086             req_format = 'best'
1087         formats_to_download = []
1088         # The -1 is for supporting YoutubeIE
1089         if req_format in ('-1', 'all'):
1090             formats_to_download = formats
1091         else:
1092             for rfstr in req_format.split(','):
1093                 # We can accept formats requested in the format: 34/5/best, we pick
1094                 # the first that is available, starting from left
1095                 req_formats = rfstr.split('/')
1096                 for rf in req_formats:
1097                     if re.match(r'.+?\+.+?', rf) is not None:
1098                         # Two formats have been requested like '137+139'
1099                         format_1, format_2 = rf.split('+')
1100                         formats_info = (self.select_format(format_1, formats),
1101                                         self.select_format(format_2, formats))
1102                         if all(formats_info):
1103                             # The first format must contain the video and the
1104                             # second the audio
1105                             if formats_info[0].get('vcodec') == 'none':
1106                                 self.report_error('The first format must '
1107                                                   'contain the video, try using '
1108                                                   '"-f %s+%s"' % (format_2, format_1))
1109                                 return
1110                             output_ext = (
1111                                 formats_info[0]['ext']
1112                                 if self.params.get('merge_output_format') is None
1113                                 else self.params['merge_output_format'])
1114                             selected_format = {
1115                                 'requested_formats': formats_info,
1116                                 'format': '%s+%s' % (formats_info[0].get('format'),
1117                                                      formats_info[1].get('format')),
1118                                 'format_id': '%s+%s' % (formats_info[0].get('format_id'),
1119                                                         formats_info[1].get('format_id')),
1120                                 'width': formats_info[0].get('width'),
1121                                 'height': formats_info[0].get('height'),
1122                                 'resolution': formats_info[0].get('resolution'),
1123                                 'fps': formats_info[0].get('fps'),
1124                                 'vcodec': formats_info[0].get('vcodec'),
1125                                 'vbr': formats_info[0].get('vbr'),
1126                                 'stretched_ratio': formats_info[0].get('stretched_ratio'),
1127                                 'acodec': formats_info[1].get('acodec'),
1128                                 'abr': formats_info[1].get('abr'),
1129                                 'ext': output_ext,
1130                             }
1131                         else:
1132                             selected_format = None
1133                     else:
1134                         selected_format = self.select_format(rf, formats)
1135                     if selected_format is not None:
1136                         formats_to_download.append(selected_format)
1137                         break
1138         if not formats_to_download:
1139             raise ExtractorError('requested format not available',
1140                                  expected=True)
1141
1142         if download:
1143             if len(formats_to_download) > 1:
1144                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1145             for format in formats_to_download:
1146                 new_info = dict(info_dict)
1147                 new_info.update(format)
1148                 self.process_info(new_info)
1149         # We update the info dict with the best quality format (backwards compatibility)
1150         info_dict.update(formats_to_download[-1])
1151         return info_dict
1152
1153     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
1154         """Select the requested subtitles and their format"""
1155         available_subs = {}
1156         if normal_subtitles and self.params.get('writesubtitles'):
1157             available_subs.update(normal_subtitles)
1158         if automatic_captions and self.params.get('writeautomaticsub'):
1159             for lang, cap_info in automatic_captions.items():
1160                 if lang not in available_subs:
1161                     available_subs[lang] = cap_info
1162
1163         if (not self.params.get('writesubtitles') and not
1164                 self.params.get('writeautomaticsub') or not
1165                 available_subs):
1166             return None
1167
1168         if self.params.get('allsubtitles', False):
1169             requested_langs = available_subs.keys()
1170         else:
1171             if self.params.get('subtitleslangs', False):
1172                 requested_langs = self.params.get('subtitleslangs')
1173             elif 'en' in available_subs:
1174                 requested_langs = ['en']
1175             else:
1176                 requested_langs = [list(available_subs.keys())[0]]
1177
1178         formats_query = self.params.get('subtitlesformat', 'best')
1179         formats_preference = formats_query.split('/') if formats_query else []
1180         subs = {}
1181         for lang in requested_langs:
1182             formats = available_subs.get(lang)
1183             if formats is None:
1184                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1185                 continue
1186             for ext in formats_preference:
1187                 if ext == 'best':
1188                     f = formats[-1]
1189                     break
1190                 matches = list(filter(lambda f: f['ext'] == ext, formats))
1191                 if matches:
1192                     f = matches[-1]
1193                     break
1194             else:
1195                 f = formats[-1]
1196                 self.report_warning(
1197                     'No subtitle format found matching "%s" for language %s, '
1198                     'using %s' % (formats_query, lang, f['ext']))
1199             subs[lang] = f
1200         return subs
1201
1202     def process_info(self, info_dict):
1203         """Process a single resolved IE result."""
1204
1205         assert info_dict.get('_type', 'video') == 'video'
1206
1207         max_downloads = self.params.get('max_downloads')
1208         if max_downloads is not None:
1209             if self._num_downloads >= int(max_downloads):
1210                 raise MaxDownloadsReached()
1211
1212         info_dict['fulltitle'] = info_dict['title']
1213         if len(info_dict['title']) > 200:
1214             info_dict['title'] = info_dict['title'][:197] + '...'
1215
1216         # Keep for backwards compatibility
1217         info_dict['stitle'] = info_dict['title']
1218
1219         if 'format' not in info_dict:
1220             info_dict['format'] = info_dict['ext']
1221
1222         reason = self._match_entry(info_dict, incomplete=False)
1223         if reason is not None:
1224             self.to_screen('[download] ' + reason)
1225             return
1226
1227         self._num_downloads += 1
1228
1229         info_dict['_filename'] = filename = self.prepare_filename(info_dict)
1230
1231         # Forced printings
1232         if self.params.get('forcetitle', False):
1233             self.to_stdout(info_dict['fulltitle'])
1234         if self.params.get('forceid', False):
1235             self.to_stdout(info_dict['id'])
1236         if self.params.get('forceurl', False):
1237             if info_dict.get('requested_formats') is not None:
1238                 for f in info_dict['requested_formats']:
1239                     self.to_stdout(f['url'] + f.get('play_path', ''))
1240             else:
1241                 # For RTMP URLs, also include the playpath
1242                 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1243         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
1244             self.to_stdout(info_dict['thumbnail'])
1245         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
1246             self.to_stdout(info_dict['description'])
1247         if self.params.get('forcefilename', False) and filename is not None:
1248             self.to_stdout(filename)
1249         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1250             self.to_stdout(formatSeconds(info_dict['duration']))
1251         if self.params.get('forceformat', False):
1252             self.to_stdout(info_dict['format'])
1253         if self.params.get('forcejson', False):
1254             self.to_stdout(json.dumps(info_dict))
1255
1256         # Do nothing else if in simulate mode
1257         if self.params.get('simulate', False):
1258             return
1259
1260         if filename is None:
1261             return
1262
1263         try:
1264             dn = os.path.dirname(encodeFilename(filename))
1265             if dn and not os.path.exists(dn):
1266                 os.makedirs(dn)
1267         except (OSError, IOError) as err:
1268             self.report_error('unable to create directory ' + compat_str(err))
1269             return
1270
1271         if self.params.get('writedescription', False):
1272             descfn = filename + '.description'
1273             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1274                 self.to_screen('[info] Video description is already present')
1275             elif info_dict.get('description') is None:
1276                 self.report_warning('There\'s no description to write.')
1277             else:
1278                 try:
1279                     self.to_screen('[info] Writing video description to: ' + descfn)
1280                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1281                         descfile.write(info_dict['description'])
1282                 except (OSError, IOError):
1283                     self.report_error('Cannot write description file ' + descfn)
1284                     return
1285
1286         if self.params.get('writeannotations', False):
1287             annofn = filename + '.annotations.xml'
1288             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1289                 self.to_screen('[info] Video annotations are already present')
1290             else:
1291                 try:
1292                     self.to_screen('[info] Writing video annotations to: ' + annofn)
1293                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1294                         annofile.write(info_dict['annotations'])
1295                 except (KeyError, TypeError):
1296                     self.report_warning('There are no annotations to write.')
1297                 except (OSError, IOError):
1298                     self.report_error('Cannot write annotations file: ' + annofn)
1299                     return
1300
1301         subtitles_are_requested = any([self.params.get('writesubtitles', False),
1302                                        self.params.get('writeautomaticsub')])
1303
1304         if subtitles_are_requested and info_dict.get('requested_subtitles'):
1305             # subtitles download errors are already managed as troubles in relevant IE
1306             # that way it will silently go on when used with unsupporting IE
1307             subtitles = info_dict['requested_subtitles']
1308             ie = self.get_info_extractor(info_dict['extractor_key'])
1309             for sub_lang, sub_info in subtitles.items():
1310                 sub_format = sub_info['ext']
1311                 if sub_info.get('data') is not None:
1312                     sub_data = sub_info['data']
1313                 else:
1314                     try:
1315                         sub_data = ie._download_webpage(
1316                             sub_info['url'], info_dict['id'], note=False)
1317                     except ExtractorError as err:
1318                         self.report_warning('Unable to download subtitle for "%s": %s' %
1319                                             (sub_lang, compat_str(err.cause)))
1320                         continue
1321                 try:
1322                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1323                     if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1324                         self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1325                     else:
1326                         self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1327                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
1328                             subfile.write(sub_data)
1329                 except (OSError, IOError):
1330                     self.report_error('Cannot write subtitles file ' + sub_filename)
1331                     return
1332
1333         if self.params.get('writeinfojson', False):
1334             infofn = os.path.splitext(filename)[0] + '.info.json'
1335             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1336                 self.to_screen('[info] Video description metadata is already present')
1337             else:
1338                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1339                 try:
1340                     write_json_file(info_dict, infofn)
1341                 except (OSError, IOError):
1342                     self.report_error('Cannot write metadata to JSON file ' + infofn)
1343                     return
1344
1345         self._write_thumbnails(info_dict, filename)
1346
1347         if not self.params.get('skip_download', False):
1348             try:
1349                 def dl(name, info):
1350                     fd = get_suitable_downloader(info, self.params)(self, self.params)
1351                     for ph in self._progress_hooks:
1352                         fd.add_progress_hook(ph)
1353                     if self.params.get('verbose'):
1354                         self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1355                     return fd.download(name, info)
1356
1357                 if info_dict.get('requested_formats') is not None:
1358                     downloaded = []
1359                     success = True
1360                     merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
1361                     if not merger.available:
1362                         postprocessors = []
1363                         self.report_warning('You have requested multiple '
1364                                             'formats but ffmpeg or avconv are not installed.'
1365                                             ' The formats won\'t be merged')
1366                     else:
1367                         postprocessors = [merger]
1368                     for f in info_dict['requested_formats']:
1369                         new_info = dict(info_dict)
1370                         new_info.update(f)
1371                         fname = self.prepare_filename(new_info)
1372                         fname = prepend_extension(fname, 'f%s' % f['format_id'])
1373                         downloaded.append(fname)
1374                         partial_success = dl(fname, new_info)
1375                         success = success and partial_success
1376                     info_dict['__postprocessors'] = postprocessors
1377                     info_dict['__files_to_merge'] = downloaded
1378                 else:
1379                     # Just a single file
1380                     success = dl(filename, info_dict)
1381             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1382                 self.report_error('unable to download video data: %s' % str(err))
1383                 return
1384             except (OSError, IOError) as err:
1385                 raise UnavailableVideoError(err)
1386             except (ContentTooShortError, ) as err:
1387                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1388                 return
1389
1390             if success:
1391                 # Fixup content
1392                 fixup_policy = self.params.get('fixup')
1393                 if fixup_policy is None:
1394                     fixup_policy = 'detect_or_warn'
1395
1396                 stretched_ratio = info_dict.get('stretched_ratio')
1397                 if stretched_ratio is not None and stretched_ratio != 1:
1398                     if fixup_policy == 'warn':
1399                         self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1400                             info_dict['id'], stretched_ratio))
1401                     elif fixup_policy == 'detect_or_warn':
1402                         stretched_pp = FFmpegFixupStretchedPP(self)
1403                         if stretched_pp.available:
1404                             info_dict.setdefault('__postprocessors', [])
1405                             info_dict['__postprocessors'].append(stretched_pp)
1406                         else:
1407                             self.report_warning(
1408                                 '%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % (
1409                                     info_dict['id'], stretched_ratio))
1410                     else:
1411                         assert fixup_policy in ('ignore', 'never')
1412
1413                 if info_dict.get('requested_formats') is None and info_dict.get('container') == 'm4a_dash':
1414                     if fixup_policy == 'warn':
1415                         self.report_warning('%s: writing DASH m4a. Only some players support this container.' % (
1416                             info_dict['id']))
1417                     elif fixup_policy == 'detect_or_warn':
1418                         fixup_pp = FFmpegFixupM4aPP(self)
1419                         if fixup_pp.available:
1420                             info_dict.setdefault('__postprocessors', [])
1421                             info_dict['__postprocessors'].append(fixup_pp)
1422                         else:
1423                             self.report_warning(
1424                                 '%s: writing DASH m4a. Only some players support this container. Install ffmpeg or avconv to fix this automatically.' % (
1425                                     info_dict['id']))
1426                     else:
1427                         assert fixup_policy in ('ignore', 'never')
1428
1429                 try:
1430                     self.post_process(filename, info_dict)
1431                 except (PostProcessingError) as err:
1432                     self.report_error('postprocessing: %s' % str(err))
1433                     return
1434                 self.record_download_archive(info_dict)
1435
1436     def download(self, url_list):
1437         """Download a given list of URLs."""
1438         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1439         if (len(url_list) > 1 and
1440                 '%' not in outtmpl and
1441                 self.params.get('max_downloads') != 1):
1442             raise SameFileError(outtmpl)
1443
1444         for url in url_list:
1445             try:
1446                 # It also downloads the videos
1447                 res = self.extract_info(url)
1448             except UnavailableVideoError:
1449                 self.report_error('unable to download video')
1450             except MaxDownloadsReached:
1451                 self.to_screen('[info] Maximum number of downloaded files reached.')
1452                 raise
1453             else:
1454                 if self.params.get('dump_single_json', False):
1455                     self.to_stdout(json.dumps(res))
1456
1457         return self._download_retcode
1458
1459     def download_with_info_file(self, info_filename):
1460         with contextlib.closing(fileinput.FileInput(
1461                 [info_filename], mode='r',
1462                 openhook=fileinput.hook_encoded('utf-8'))) as f:
1463             # FileInput doesn't have a read method, we can't call json.load
1464             info = json.loads('\n'.join(f))
1465         try:
1466             self.process_ie_result(info, download=True)
1467         except DownloadError:
1468             webpage_url = info.get('webpage_url')
1469             if webpage_url is not None:
1470                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1471                 return self.download([webpage_url])
1472             else:
1473                 raise
1474         return self._download_retcode
1475
1476     def post_process(self, filename, ie_info):
1477         """Run all the postprocessors on the given file."""
1478         info = dict(ie_info)
1479         info['filepath'] = filename
1480         pps_chain = []
1481         if ie_info.get('__postprocessors') is not None:
1482             pps_chain.extend(ie_info['__postprocessors'])
1483         pps_chain.extend(self._pps)
1484         for pp in pps_chain:
1485             keep_video = None
1486             old_filename = info['filepath']
1487             try:
1488                 keep_video_wish, info = pp.run(info)
1489                 if keep_video_wish is not None:
1490                     if keep_video_wish:
1491                         keep_video = keep_video_wish
1492                     elif keep_video is None:
1493                         # No clear decision yet, let IE decide
1494                         keep_video = keep_video_wish
1495             except PostProcessingError as e:
1496                 self.report_error(e.msg)
1497             if keep_video is False and not self.params.get('keepvideo', False):
1498                 try:
1499                     self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
1500                     os.remove(encodeFilename(old_filename))
1501                 except (IOError, OSError):
1502                     self.report_warning('Unable to remove downloaded video file')
1503
1504     def _make_archive_id(self, info_dict):
1505         # Future-proof against any change in case
1506         # and backwards compatibility with prior versions
1507         extractor = info_dict.get('extractor_key')
1508         if extractor is None:
1509             if 'id' in info_dict:
1510                 extractor = info_dict.get('ie_key')  # key in a playlist
1511         if extractor is None:
1512             return None  # Incomplete video information
1513         return extractor.lower() + ' ' + info_dict['id']
1514
1515     def in_download_archive(self, info_dict):
1516         fn = self.params.get('download_archive')
1517         if fn is None:
1518             return False
1519
1520         vid_id = self._make_archive_id(info_dict)
1521         if vid_id is None:
1522             return False  # Incomplete video information
1523
1524         try:
1525             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1526                 for line in archive_file:
1527                     if line.strip() == vid_id:
1528                         return True
1529         except IOError as ioe:
1530             if ioe.errno != errno.ENOENT:
1531                 raise
1532         return False
1533
1534     def record_download_archive(self, info_dict):
1535         fn = self.params.get('download_archive')
1536         if fn is None:
1537             return
1538         vid_id = self._make_archive_id(info_dict)
1539         assert vid_id
1540         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1541             archive_file.write(vid_id + '\n')
1542
1543     @staticmethod
1544     def format_resolution(format, default='unknown'):
1545         if format.get('vcodec') == 'none':
1546             return 'audio only'
1547         if format.get('resolution') is not None:
1548             return format['resolution']
1549         if format.get('height') is not None:
1550             if format.get('width') is not None:
1551                 res = '%sx%s' % (format['width'], format['height'])
1552             else:
1553                 res = '%sp' % format['height']
1554         elif format.get('width') is not None:
1555             res = '?x%d' % format['width']
1556         else:
1557             res = default
1558         return res
1559
1560     def _format_note(self, fdict):
1561         res = ''
1562         if fdict.get('ext') in ['f4f', 'f4m']:
1563             res += '(unsupported) '
1564         if fdict.get('format_note') is not None:
1565             res += fdict['format_note'] + ' '
1566         if fdict.get('tbr') is not None:
1567             res += '%4dk ' % fdict['tbr']
1568         if fdict.get('container') is not None:
1569             if res:
1570                 res += ', '
1571             res += '%s container' % fdict['container']
1572         if (fdict.get('vcodec') is not None and
1573                 fdict.get('vcodec') != 'none'):
1574             if res:
1575                 res += ', '
1576             res += fdict['vcodec']
1577             if fdict.get('vbr') is not None:
1578                 res += '@'
1579         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1580             res += 'video@'
1581         if fdict.get('vbr') is not None:
1582             res += '%4dk' % fdict['vbr']
1583         if fdict.get('fps') is not None:
1584             res += ', %sfps' % fdict['fps']
1585         if fdict.get('acodec') is not None:
1586             if res:
1587                 res += ', '
1588             if fdict['acodec'] == 'none':
1589                 res += 'video only'
1590             else:
1591                 res += '%-5s' % fdict['acodec']
1592         elif fdict.get('abr') is not None:
1593             if res:
1594                 res += ', '
1595             res += 'audio'
1596         if fdict.get('abr') is not None:
1597             res += '@%3dk' % fdict['abr']
1598         if fdict.get('asr') is not None:
1599             res += ' (%5dHz)' % fdict['asr']
1600         if fdict.get('filesize') is not None:
1601             if res:
1602                 res += ', '
1603             res += format_bytes(fdict['filesize'])
1604         elif fdict.get('filesize_approx') is not None:
1605             if res:
1606                 res += ', '
1607             res += '~' + format_bytes(fdict['filesize_approx'])
1608         return res
1609
1610     def list_formats(self, info_dict):
1611         formats = info_dict.get('formats', [info_dict])
1612         table = [
1613             [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
1614             for f in formats
1615             if f.get('preference') is None or f['preference'] >= -1000]
1616         if len(formats) > 1:
1617             table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
1618
1619         header_line = ['format code', 'extension', 'resolution', 'note']
1620         self.to_screen(
1621             '[info] Available formats for %s:\n%s' %
1622             (info_dict['id'], render_table(header_line, table)))
1623
1624     def list_thumbnails(self, info_dict):
1625         thumbnails = info_dict.get('thumbnails')
1626         if not thumbnails:
1627             tn_url = info_dict.get('thumbnail')
1628             if tn_url:
1629                 thumbnails = [{'id': '0', 'url': tn_url}]
1630             else:
1631                 self.to_screen(
1632                     '[info] No thumbnails present for %s' % info_dict['id'])
1633                 return
1634
1635         self.to_screen(
1636             '[info] Thumbnails for %s:' % info_dict['id'])
1637         self.to_screen(render_table(
1638             ['ID', 'width', 'height', 'URL'],
1639             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
1640
1641     def list_subtitles(self, video_id, subtitles, name='subtitles'):
1642         if not subtitles:
1643             self.to_screen('%s has no %s' % (video_id, name))
1644             return
1645         self.to_screen(
1646             'Available %s for %s:' % (name, video_id))
1647         self.to_screen(render_table(
1648             ['Language', 'formats'],
1649             [[lang, ', '.join(f['ext'] for f in reversed(formats))]
1650                 for lang, formats in subtitles.items()]))
1651
1652     def urlopen(self, req):
1653         """ Start an HTTP download """
1654
1655         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1656         # always respected by websites, some tend to give out URLs with non percent-encoded
1657         # non-ASCII characters (see telemb.py, ard.py [#3412])
1658         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1659         # To work around aforementioned issue we will replace request's original URL with
1660         # percent-encoded one
1661         req_is_string = isinstance(req, compat_basestring)
1662         url = req if req_is_string else req.get_full_url()
1663         url_escaped = escape_url(url)
1664
1665         # Substitute URL if any change after escaping
1666         if url != url_escaped:
1667             if req_is_string:
1668                 req = url_escaped
1669             else:
1670                 req = compat_urllib_request.Request(
1671                     url_escaped, data=req.data, headers=req.headers,
1672                     origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
1673
1674         return self._opener.open(req, timeout=self._socket_timeout)
1675
1676     def print_debug_header(self):
1677         if not self.params.get('verbose'):
1678             return
1679
1680         if type('') is not compat_str:
1681             # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1682             self.report_warning(
1683                 'Your Python is broken! Update to a newer and supported version')
1684
1685         stdout_encoding = getattr(
1686             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
1687         encoding_str = (
1688             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1689                 locale.getpreferredencoding(),
1690                 sys.getfilesystemencoding(),
1691                 stdout_encoding,
1692                 self.get_encoding()))
1693         write_string(encoding_str, encoding=None)
1694
1695         self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1696         try:
1697             sp = subprocess.Popen(
1698                 ['git', 'rev-parse', '--short', 'HEAD'],
1699                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1700                 cwd=os.path.dirname(os.path.abspath(__file__)))
1701             out, err = sp.communicate()
1702             out = out.decode().strip()
1703             if re.match('[0-9a-f]+', out):
1704                 self._write_string('[debug] Git HEAD: ' + out + '\n')
1705         except:
1706             try:
1707                 sys.exc_clear()
1708             except:
1709                 pass
1710         self._write_string('[debug] Python version %s - %s\n' % (
1711             platform.python_version(), platform_name()))
1712
1713         exe_versions = FFmpegPostProcessor.get_versions(self)
1714         exe_versions['rtmpdump'] = rtmpdump_version()
1715         exe_str = ', '.join(
1716             '%s %s' % (exe, v)
1717             for exe, v in sorted(exe_versions.items())
1718             if v
1719         )
1720         if not exe_str:
1721             exe_str = 'none'
1722         self._write_string('[debug] exe versions: %s\n' % exe_str)
1723
1724         proxy_map = {}
1725         for handler in self._opener.handlers:
1726             if hasattr(handler, 'proxies'):
1727                 proxy_map.update(handler.proxies)
1728         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1729
1730         if self.params.get('call_home', False):
1731             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
1732             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
1733             latest_version = self.urlopen(
1734                 'https://yt-dl.org/latest/version').read().decode('utf-8')
1735             if version_tuple(latest_version) > version_tuple(__version__):
1736                 self.report_warning(
1737                     'You are using an outdated version (newest version: %s)! '
1738                     'See https://yt-dl.org/update if you need help updating.' %
1739                     latest_version)
1740
1741     def _setup_opener(self):
1742         timeout_val = self.params.get('socket_timeout')
1743         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1744
1745         opts_cookiefile = self.params.get('cookiefile')
1746         opts_proxy = self.params.get('proxy')
1747
1748         if opts_cookiefile is None:
1749             self.cookiejar = compat_cookiejar.CookieJar()
1750         else:
1751             self.cookiejar = compat_cookiejar.MozillaCookieJar(
1752                 opts_cookiefile)
1753             if os.access(opts_cookiefile, os.R_OK):
1754                 self.cookiejar.load()
1755
1756         cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1757             self.cookiejar)
1758         if opts_proxy is not None:
1759             if opts_proxy == '':
1760                 proxies = {}
1761             else:
1762                 proxies = {'http': opts_proxy, 'https': opts_proxy}
1763         else:
1764             proxies = compat_urllib_request.getproxies()
1765             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1766             if 'http' in proxies and 'https' not in proxies:
1767                 proxies['https'] = proxies['http']
1768         proxy_handler = PerRequestProxyHandler(proxies)
1769
1770         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1771         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
1772         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
1773         opener = compat_urllib_request.build_opener(
1774             https_handler, proxy_handler, cookie_processor, ydlh)
1775         # Delete the default user-agent header, which would otherwise apply in
1776         # cases where our custom HTTP handler doesn't come into play
1777         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1778         opener.addheaders = []
1779         self._opener = opener
1780
1781     def encode(self, s):
1782         if isinstance(s, bytes):
1783             return s  # Already encoded
1784
1785         try:
1786             return s.encode(self.get_encoding())
1787         except UnicodeEncodeError as err:
1788             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1789             raise
1790
1791     def get_encoding(self):
1792         encoding = self.params.get('encoding')
1793         if encoding is None:
1794             encoding = preferredencoding()
1795         return encoding
1796
1797     def _write_thumbnails(self, info_dict, filename):
1798         if self.params.get('writethumbnail', False):
1799             thumbnails = info_dict.get('thumbnails')
1800             if thumbnails:
1801                 thumbnails = [thumbnails[-1]]
1802         elif self.params.get('write_all_thumbnails', False):
1803             thumbnails = info_dict.get('thumbnails')
1804         else:
1805             return
1806
1807         if not thumbnails:
1808             # No thumbnails present, so return immediately
1809             return
1810
1811         for t in thumbnails:
1812             thumb_ext = determine_ext(t['url'], 'jpg')
1813             suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
1814             thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
1815             thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
1816
1817             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
1818                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
1819                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
1820             else:
1821                 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
1822                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
1823                 try:
1824                     uf = self.urlopen(t['url'])
1825                     with open(thumb_filename, 'wb') as thumbf:
1826                         shutil.copyfileobj(uf, thumbf)
1827                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
1828                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
1829                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1830                     self.report_warning('Unable to download thumbnail "%s": %s' %
1831                                         (t['url'], compat_str(err)))