]> git.bitcoin.ninja Git - youtube-dl/blob - youtube_dl/YoutubeDL.py
[srgssr] handle all play urls only in SRGSSRIE and keep RTSIE for articles
[youtube-dl] / youtube_dl / YoutubeDL.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 from __future__ import absolute_import, unicode_literals
5
6 import collections
7 import contextlib
8 import datetime
9 import errno
10 import fileinput
11 import io
12 import itertools
13 import json
14 import locale
15 import operator
16 import os
17 import platform
18 import re
19 import shutil
20 import subprocess
21 import socket
22 import sys
23 import time
24 import tokenize
25 import traceback
26
27 if os.name == 'nt':
28     import ctypes
29
30 from .compat import (
31     compat_cookiejar,
32     compat_expanduser,
33     compat_get_terminal_size,
34     compat_http_client,
35     compat_kwargs,
36     compat_str,
37     compat_tokenize_tokenize,
38     compat_urllib_error,
39     compat_urllib_request,
40 )
41 from .utils import (
42     ContentTooShortError,
43     date_from_str,
44     DateRange,
45     DEFAULT_OUTTMPL,
46     determine_ext,
47     DownloadError,
48     encodeFilename,
49     ExtractorError,
50     format_bytes,
51     formatSeconds,
52     locked_file,
53     make_HTTPS_handler,
54     MaxDownloadsReached,
55     PagedList,
56     parse_filesize,
57     PerRequestProxyHandler,
58     PostProcessingError,
59     platform_name,
60     preferredencoding,
61     render_table,
62     SameFileError,
63     sanitize_filename,
64     sanitize_path,
65     std_headers,
66     subtitles_filename,
67     UnavailableVideoError,
68     url_basename,
69     version_tuple,
70     write_json_file,
71     write_string,
72     YoutubeDLCookieProcessor,
73     YoutubeDLHandler,
74     prepend_extension,
75     replace_extension,
76     args_to_str,
77     age_restricted,
78 )
79 from .cache import Cache
80 from .extractor import get_info_extractor, gen_extractors
81 from .downloader import get_suitable_downloader
82 from .downloader.rtmp import rtmpdump_version
83 from .postprocessor import (
84     FFmpegFixupM4aPP,
85     FFmpegFixupStretchedPP,
86     FFmpegMergerPP,
87     FFmpegPostProcessor,
88     get_postprocessor,
89 )
90 from .version import __version__
91
92
93 class YoutubeDL(object):
94     """YoutubeDL class.
95
96     YoutubeDL objects are the ones responsible of downloading the
97     actual video file and writing it to disk if the user has requested
98     it, among some other tasks. In most cases there should be one per
99     program. As, given a video URL, the downloader doesn't know how to
100     extract all the needed information, task that InfoExtractors do, it
101     has to pass the URL to one of them.
102
103     For this, YoutubeDL objects have a method that allows
104     InfoExtractors to be registered in a given order. When it is passed
105     a URL, the YoutubeDL object handles it to the first InfoExtractor it
106     finds that reports being able to handle it. The InfoExtractor extracts
107     all the information about the video or videos the URL refers to, and
108     YoutubeDL process the extracted information, possibly using a File
109     Downloader to download the video.
110
111     YoutubeDL objects accept a lot of parameters. In order not to saturate
112     the object constructor with arguments, it receives a dictionary of
113     options instead. These options are available through the params
114     attribute for the InfoExtractors to use. The YoutubeDL also
115     registers itself as the downloader in charge for the InfoExtractors
116     that are added to it, so this is a "mutual registration".
117
118     Available options:
119
120     username:          Username for authentication purposes.
121     password:          Password for authentication purposes.
122     videopassword:     Password for accessing a video.
123     usenetrc:          Use netrc for authentication instead.
124     verbose:           Print additional info to stdout.
125     quiet:             Do not print messages to stdout.
126     no_warnings:       Do not print out anything for warnings.
127     forceurl:          Force printing final URL.
128     forcetitle:        Force printing title.
129     forceid:           Force printing ID.
130     forcethumbnail:    Force printing thumbnail URL.
131     forcedescription:  Force printing description.
132     forcefilename:     Force printing final filename.
133     forceduration:     Force printing duration.
134     forcejson:         Force printing info_dict as JSON.
135     dump_single_json:  Force printing the info_dict of the whole playlist
136                        (or video) as a single JSON line.
137     simulate:          Do not download the video files.
138     format:            Video format code. See options.py for more information.
139     outtmpl:           Template for output names.
140     restrictfilenames: Do not allow "&" and spaces in file names
141     ignoreerrors:      Do not stop on download errors.
142     force_generic_extractor: Force downloader to use the generic extractor
143     nooverwrites:      Prevent overwriting files.
144     playliststart:     Playlist item to start at.
145     playlistend:       Playlist item to end at.
146     playlist_items:    Specific indices of playlist to download.
147     playlistreverse:   Download playlist items in reverse order.
148     matchtitle:        Download only matching titles.
149     rejecttitle:       Reject downloads for matching titles.
150     logger:            Log messages to a logging.Logger instance.
151     logtostderr:       Log messages to stderr instead of stdout.
152     writedescription:  Write the video description to a .description file
153     writeinfojson:     Write the video description to a .info.json file
154     writeannotations:  Write the video annotations to a .annotations.xml file
155     writethumbnail:    Write the thumbnail image to a file
156     write_all_thumbnails:  Write all thumbnail formats to files
157     writesubtitles:    Write the video subtitles to a file
158     writeautomaticsub: Write the automatic subtitles to a file
159     allsubtitles:      Downloads all the subtitles of the video
160                        (requires writesubtitles or writeautomaticsub)
161     listsubtitles:     Lists all available subtitles for the video
162     subtitlesformat:   The format code for subtitles
163     subtitleslangs:    List of languages of the subtitles to download
164     keepvideo:         Keep the video file after post-processing
165     daterange:         A DateRange object, download only if the upload_date is in the range.
166     skip_download:     Skip the actual download of the video file
167     cachedir:          Location of the cache files in the filesystem.
168                        False to disable filesystem cache.
169     noplaylist:        Download single video instead of a playlist if in doubt.
170     age_limit:         An integer representing the user's age in years.
171                        Unsuitable videos for the given age are skipped.
172     min_views:         An integer representing the minimum view count the video
173                        must have in order to not be skipped.
174                        Videos without view count information are always
175                        downloaded. None for no limit.
176     max_views:         An integer representing the maximum view count.
177                        Videos that are more popular than that are not
178                        downloaded.
179                        Videos without view count information are always
180                        downloaded. None for no limit.
181     download_archive:  File name of a file where all downloads are recorded.
182                        Videos already present in the file are not downloaded
183                        again.
184     cookiefile:        File name where cookies should be read from and dumped to.
185     nocheckcertificate:Do not verify SSL certificates
186     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
187                        At the moment, this is only supported by YouTube.
188     proxy:             URL of the proxy server to use
189     cn_verification_proxy:  URL of the proxy to use for IP address verification
190                        on Chinese sites. (Experimental)
191     socket_timeout:    Time to wait for unresponsive hosts, in seconds
192     bidi_workaround:   Work around buggy terminals without bidirectional text
193                        support, using fridibi
194     debug_printtraffic:Print out sent and received HTTP traffic
195     include_ads:       Download ads as well
196     default_search:    Prepend this string if an input url is not valid.
197                        'auto' for elaborate guessing
198     encoding:          Use this encoding instead of the system-specified.
199     extract_flat:      Do not resolve URLs, return the immediate result.
200                        Pass in 'in_playlist' to only show this behavior for
201                        playlist items.
202     postprocessors:    A list of dictionaries, each with an entry
203                        * key:  The name of the postprocessor. See
204                                youtube_dl/postprocessor/__init__.py for a list.
205                        as well as any further keyword arguments for the
206                        postprocessor.
207     progress_hooks:    A list of functions that get called on download
208                        progress, with a dictionary with the entries
209                        * status: One of "downloading", "error", or "finished".
210                                  Check this first and ignore unknown values.
211
212                        If status is one of "downloading", or "finished", the
213                        following properties may also be present:
214                        * filename: The final filename (always present)
215                        * tmpfilename: The filename we're currently writing to
216                        * downloaded_bytes: Bytes on disk
217                        * total_bytes: Size of the whole file, None if unknown
218                        * total_bytes_estimate: Guess of the eventual file size,
219                                                None if unavailable.
220                        * elapsed: The number of seconds since download started.
221                        * eta: The estimated time in seconds, None if unknown
222                        * speed: The download speed in bytes/second, None if
223                                 unknown
224                        * fragment_index: The counter of the currently
225                                          downloaded video fragment.
226                        * fragment_count: The number of fragments (= individual
227                                          files that will be merged)
228
229                        Progress hooks are guaranteed to be called at least once
230                        (with status "finished") if the download is successful.
231     merge_output_format: Extension to use when merging formats.
232     fixup:             Automatically correct known faults of the file.
233                        One of:
234                        - "never": do nothing
235                        - "warn": only emit a warning
236                        - "detect_or_warn": check whether we can do anything
237                                            about it, warn otherwise (default)
238     source_address:    (Experimental) Client-side IP address to bind to.
239     call_home:         Boolean, true iff we are allowed to contact the
240                        youtube-dl servers for debugging.
241     sleep_interval:    Number of seconds to sleep before each download.
242     listformats:       Print an overview of available video formats and exit.
243     list_thumbnails:   Print a table of all thumbnails and exit.
244     match_filter:      A function that gets called with the info_dict of
245                        every video.
246                        If it returns a message, the video is ignored.
247                        If it returns None, the video is downloaded.
248                        match_filter_func in utils.py is one example for this.
249     no_color:          Do not emit color codes in output.
250
251     The following options determine which downloader is picked:
252     external_downloader: Executable of the external downloader to call.
253                        None or unset for standard (built-in) downloader.
254     hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv.
255
256     The following parameters are not used by YoutubeDL itself, they are used by
257     the downloader (see youtube_dl/downloader/common.py):
258     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
259     noresizebuffer, retries, continuedl, noprogress, consoletitle,
260     xattr_set_filesize, external_downloader_args.
261
262     The following options are used by the post processors:
263     prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
264                        otherwise prefer avconv.
265     postprocessor_args: A list of additional command-line arguments for the
266                         postprocessor.
267     """
268
269     params = None
270     _ies = []
271     _pps = []
272     _download_retcode = None
273     _num_downloads = None
274     _screen_file = None
275
276     def __init__(self, params=None, auto_init=True):
277         """Create a FileDownloader object with the given options."""
278         if params is None:
279             params = {}
280         self._ies = []
281         self._ies_instances = {}
282         self._pps = []
283         self._progress_hooks = []
284         self._download_retcode = 0
285         self._num_downloads = 0
286         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
287         self._err_file = sys.stderr
288         self.params = {
289             # Default parameters
290             'nocheckcertificate': False,
291         }
292         self.params.update(params)
293         self.cache = Cache(self)
294
295         if params.get('bidi_workaround', False):
296             try:
297                 import pty
298                 master, slave = pty.openpty()
299                 width = compat_get_terminal_size().columns
300                 if width is None:
301                     width_args = []
302                 else:
303                     width_args = ['-w', str(width)]
304                 sp_kwargs = dict(
305                     stdin=subprocess.PIPE,
306                     stdout=slave,
307                     stderr=self._err_file)
308                 try:
309                     self._output_process = subprocess.Popen(
310                         ['bidiv'] + width_args, **sp_kwargs
311                     )
312                 except OSError:
313                     self._output_process = subprocess.Popen(
314                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
315                 self._output_channel = os.fdopen(master, 'rb')
316             except OSError as ose:
317                 if ose.errno == 2:
318                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
319                 else:
320                     raise
321
322         if (sys.version_info >= (3,) and sys.platform != 'win32' and
323                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and
324                 not params.get('restrictfilenames', False)):
325             # On Python 3, the Unicode filesystem API will throw errors (#1474)
326             self.report_warning(
327                 'Assuming --restrict-filenames since file system encoding '
328                 'cannot encode all characters. '
329                 'Set the LC_ALL environment variable to fix this.')
330             self.params['restrictfilenames'] = True
331
332         if isinstance(params.get('outtmpl'), bytes):
333             self.report_warning(
334                 'Parameter outtmpl is bytes, but should be a unicode string. '
335                 'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
336
337         self._setup_opener()
338
339         if auto_init:
340             self.print_debug_header()
341             self.add_default_info_extractors()
342
343         for pp_def_raw in self.params.get('postprocessors', []):
344             pp_class = get_postprocessor(pp_def_raw['key'])
345             pp_def = dict(pp_def_raw)
346             del pp_def['key']
347             pp = pp_class(self, **compat_kwargs(pp_def))
348             self.add_post_processor(pp)
349
350         for ph in self.params.get('progress_hooks', []):
351             self.add_progress_hook(ph)
352
353     def warn_if_short_id(self, argv):
354         # short YouTube ID starting with dash?
355         idxs = [
356             i for i, a in enumerate(argv)
357             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
358         if idxs:
359             correct_argv = (
360                 ['youtube-dl'] +
361                 [a for i, a in enumerate(argv) if i not in idxs] +
362                 ['--'] + [argv[i] for i in idxs]
363             )
364             self.report_warning(
365                 'Long argument string detected. '
366                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
367                 args_to_str(correct_argv))
368
369     def add_info_extractor(self, ie):
370         """Add an InfoExtractor object to the end of the list."""
371         self._ies.append(ie)
372         self._ies_instances[ie.ie_key()] = ie
373         ie.set_downloader(self)
374
375     def get_info_extractor(self, ie_key):
376         """
377         Get an instance of an IE with name ie_key, it will try to get one from
378         the _ies list, if there's no instance it will create a new one and add
379         it to the extractor list.
380         """
381         ie = self._ies_instances.get(ie_key)
382         if ie is None:
383             ie = get_info_extractor(ie_key)()
384             self.add_info_extractor(ie)
385         return ie
386
387     def add_default_info_extractors(self):
388         """
389         Add the InfoExtractors returned by gen_extractors to the end of the list
390         """
391         for ie in gen_extractors():
392             self.add_info_extractor(ie)
393
394     def add_post_processor(self, pp):
395         """Add a PostProcessor object to the end of the chain."""
396         self._pps.append(pp)
397         pp.set_downloader(self)
398
399     def add_progress_hook(self, ph):
400         """Add the progress hook (currently only for the file downloader)"""
401         self._progress_hooks.append(ph)
402
403     def _bidi_workaround(self, message):
404         if not hasattr(self, '_output_channel'):
405             return message
406
407         assert hasattr(self, '_output_process')
408         assert isinstance(message, compat_str)
409         line_count = message.count('\n') + 1
410         self._output_process.stdin.write((message + '\n').encode('utf-8'))
411         self._output_process.stdin.flush()
412         res = ''.join(self._output_channel.readline().decode('utf-8')
413                       for _ in range(line_count))
414         return res[:-len('\n')]
415
416     def to_screen(self, message, skip_eol=False):
417         """Print message to stdout if not in quiet mode."""
418         return self.to_stdout(message, skip_eol, check_quiet=True)
419
420     def _write_string(self, s, out=None):
421         write_string(s, out=out, encoding=self.params.get('encoding'))
422
423     def to_stdout(self, message, skip_eol=False, check_quiet=False):
424         """Print message to stdout if not in quiet mode."""
425         if self.params.get('logger'):
426             self.params['logger'].debug(message)
427         elif not check_quiet or not self.params.get('quiet', False):
428             message = self._bidi_workaround(message)
429             terminator = ['\n', ''][skip_eol]
430             output = message + terminator
431
432             self._write_string(output, self._screen_file)
433
434     def to_stderr(self, message):
435         """Print message to stderr."""
436         assert isinstance(message, compat_str)
437         if self.params.get('logger'):
438             self.params['logger'].error(message)
439         else:
440             message = self._bidi_workaround(message)
441             output = message + '\n'
442             self._write_string(output, self._err_file)
443
444     def to_console_title(self, message):
445         if not self.params.get('consoletitle', False):
446             return
447         if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
448             # c_wchar_p() might not be necessary if `message` is
449             # already of type unicode()
450             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
451         elif 'TERM' in os.environ:
452             self._write_string('\033]0;%s\007' % message, self._screen_file)
453
454     def save_console_title(self):
455         if not self.params.get('consoletitle', False):
456             return
457         if 'TERM' in os.environ:
458             # Save the title on stack
459             self._write_string('\033[22;0t', self._screen_file)
460
461     def restore_console_title(self):
462         if not self.params.get('consoletitle', False):
463             return
464         if 'TERM' in os.environ:
465             # Restore the title from stack
466             self._write_string('\033[23;0t', self._screen_file)
467
468     def __enter__(self):
469         self.save_console_title()
470         return self
471
472     def __exit__(self, *args):
473         self.restore_console_title()
474
475         if self.params.get('cookiefile') is not None:
476             self.cookiejar.save()
477
478     def trouble(self, message=None, tb=None):
479         """Determine action to take when a download problem appears.
480
481         Depending on if the downloader has been configured to ignore
482         download errors or not, this method may throw an exception or
483         not when errors are found, after printing the message.
484
485         tb, if given, is additional traceback information.
486         """
487         if message is not None:
488             self.to_stderr(message)
489         if self.params.get('verbose'):
490             if tb is None:
491                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
492                     tb = ''
493                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
494                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
495                     tb += compat_str(traceback.format_exc())
496                 else:
497                     tb_data = traceback.format_list(traceback.extract_stack())
498                     tb = ''.join(tb_data)
499             self.to_stderr(tb)
500         if not self.params.get('ignoreerrors', False):
501             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
502                 exc_info = sys.exc_info()[1].exc_info
503             else:
504                 exc_info = sys.exc_info()
505             raise DownloadError(message, exc_info)
506         self._download_retcode = 1
507
508     def report_warning(self, message):
509         '''
510         Print the message to stderr, it will be prefixed with 'WARNING:'
511         If stderr is a tty file the 'WARNING:' will be colored
512         '''
513         if self.params.get('logger') is not None:
514             self.params['logger'].warning(message)
515         else:
516             if self.params.get('no_warnings'):
517                 return
518             if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
519                 _msg_header = '\033[0;33mWARNING:\033[0m'
520             else:
521                 _msg_header = 'WARNING:'
522             warning_message = '%s %s' % (_msg_header, message)
523             self.to_stderr(warning_message)
524
525     def report_error(self, message, tb=None):
526         '''
527         Do the same as trouble, but prefixes the message with 'ERROR:', colored
528         in red if stderr is a tty file.
529         '''
530         if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
531             _msg_header = '\033[0;31mERROR:\033[0m'
532         else:
533             _msg_header = 'ERROR:'
534         error_message = '%s %s' % (_msg_header, message)
535         self.trouble(error_message, tb)
536
537     def report_file_already_downloaded(self, file_name):
538         """Report file has already been fully downloaded."""
539         try:
540             self.to_screen('[download] %s has already been downloaded' % file_name)
541         except UnicodeEncodeError:
542             self.to_screen('[download] The file has already been downloaded')
543
544     def prepare_filename(self, info_dict):
545         """Generate the output filename."""
546         try:
547             template_dict = dict(info_dict)
548
549             template_dict['epoch'] = int(time.time())
550             autonumber_size = self.params.get('autonumber_size')
551             if autonumber_size is None:
552                 autonumber_size = 5
553             autonumber_templ = '%0' + str(autonumber_size) + 'd'
554             template_dict['autonumber'] = autonumber_templ % self._num_downloads
555             if template_dict.get('playlist_index') is not None:
556                 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
557             if template_dict.get('resolution') is None:
558                 if template_dict.get('width') and template_dict.get('height'):
559                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
560                 elif template_dict.get('height'):
561                     template_dict['resolution'] = '%sp' % template_dict['height']
562                 elif template_dict.get('width'):
563                     template_dict['resolution'] = '?x%d' % template_dict['width']
564
565             sanitize = lambda k, v: sanitize_filename(
566                 compat_str(v),
567                 restricted=self.params.get('restrictfilenames'),
568                 is_id=(k == 'id'))
569             template_dict = dict((k, sanitize(k, v))
570                                  for k, v in template_dict.items()
571                                  if v is not None)
572             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
573
574             outtmpl = sanitize_path(self.params.get('outtmpl', DEFAULT_OUTTMPL))
575             tmpl = compat_expanduser(outtmpl)
576             filename = tmpl % template_dict
577             # Temporary fix for #4787
578             # 'Treat' all problem characters by passing filename through preferredencoding
579             # to workaround encoding issues with subprocess on python2 @ Windows
580             if sys.version_info < (3, 0) and sys.platform == 'win32':
581                 filename = encodeFilename(filename, True).decode(preferredencoding())
582             return filename
583         except ValueError as err:
584             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
585             return None
586
587     def _match_entry(self, info_dict, incomplete):
588         """ Returns None iff the file should be downloaded """
589
590         video_title = info_dict.get('title', info_dict.get('id', 'video'))
591         if 'title' in info_dict:
592             # This can happen when we're just evaluating the playlist
593             title = info_dict['title']
594             matchtitle = self.params.get('matchtitle', False)
595             if matchtitle:
596                 if not re.search(matchtitle, title, re.IGNORECASE):
597                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
598             rejecttitle = self.params.get('rejecttitle', False)
599             if rejecttitle:
600                 if re.search(rejecttitle, title, re.IGNORECASE):
601                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
602         date = info_dict.get('upload_date', None)
603         if date is not None:
604             dateRange = self.params.get('daterange', DateRange())
605             if date not in dateRange:
606                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
607         view_count = info_dict.get('view_count', None)
608         if view_count is not None:
609             min_views = self.params.get('min_views')
610             if min_views is not None and view_count < min_views:
611                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
612             max_views = self.params.get('max_views')
613             if max_views is not None and view_count > max_views:
614                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
615         if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
616             return 'Skipping "%s" because it is age restricted' % video_title
617         if self.in_download_archive(info_dict):
618             return '%s has already been recorded in archive' % video_title
619
620         if not incomplete:
621             match_filter = self.params.get('match_filter')
622             if match_filter is not None:
623                 ret = match_filter(info_dict)
624                 if ret is not None:
625                     return ret
626
627         return None
628
629     @staticmethod
630     def add_extra_info(info_dict, extra_info):
631         '''Set the keys from extra_info in info dict if they are missing'''
632         for key, value in extra_info.items():
633             info_dict.setdefault(key, value)
634
635     def extract_info(self, url, download=True, ie_key=None, extra_info={},
636                      process=True, force_generic_extractor=False):
637         '''
638         Returns a list with a dictionary for each video we find.
639         If 'download', also downloads the videos.
640         extra_info is a dict containing the extra values to add to each result
641         '''
642
643         if not ie_key and force_generic_extractor:
644             ie_key = 'Generic'
645
646         if ie_key:
647             ies = [self.get_info_extractor(ie_key)]
648         else:
649             ies = self._ies
650
651         for ie in ies:
652             if not ie.suitable(url):
653                 continue
654
655             if not ie.working():
656                 self.report_warning('The program functionality for this site has been marked as broken, '
657                                     'and will probably not work.')
658
659             try:
660                 ie_result = ie.extract(url)
661                 if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
662                     break
663                 if isinstance(ie_result, list):
664                     # Backwards compatibility: old IE result format
665                     ie_result = {
666                         '_type': 'compat_list',
667                         'entries': ie_result,
668                     }
669                 self.add_default_extra_info(ie_result, ie, url)
670                 if process:
671                     return self.process_ie_result(ie_result, download, extra_info)
672                 else:
673                     return ie_result
674             except ExtractorError as de:  # An error we somewhat expected
675                 self.report_error(compat_str(de), de.format_traceback())
676                 break
677             except MaxDownloadsReached:
678                 raise
679             except Exception as e:
680                 if self.params.get('ignoreerrors', False):
681                     self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
682                     break
683                 else:
684                     raise
685         else:
686             self.report_error('no suitable InfoExtractor for URL %s' % url)
687
688     def add_default_extra_info(self, ie_result, ie, url):
689         self.add_extra_info(ie_result, {
690             'extractor': ie.IE_NAME,
691             'webpage_url': url,
692             'webpage_url_basename': url_basename(url),
693             'extractor_key': ie.ie_key(),
694         })
695
696     def process_ie_result(self, ie_result, download=True, extra_info={}):
697         """
698         Take the result of the ie(may be modified) and resolve all unresolved
699         references (URLs, playlist items).
700
701         It will also download the videos if 'download'.
702         Returns the resolved ie_result.
703         """
704
705         result_type = ie_result.get('_type', 'video')
706
707         if result_type in ('url', 'url_transparent'):
708             extract_flat = self.params.get('extract_flat', False)
709             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
710                     extract_flat is True):
711                 if self.params.get('forcejson', False):
712                     self.to_stdout(json.dumps(ie_result))
713                 return ie_result
714
715         if result_type == 'video':
716             self.add_extra_info(ie_result, extra_info)
717             return self.process_video_result(ie_result, download=download)
718         elif result_type == 'url':
719             # We have to add extra_info to the results because it may be
720             # contained in a playlist
721             return self.extract_info(ie_result['url'],
722                                      download,
723                                      ie_key=ie_result.get('ie_key'),
724                                      extra_info=extra_info)
725         elif result_type == 'url_transparent':
726             # Use the information from the embedding page
727             info = self.extract_info(
728                 ie_result['url'], ie_key=ie_result.get('ie_key'),
729                 extra_info=extra_info, download=False, process=False)
730
731             force_properties = dict(
732                 (k, v) for k, v in ie_result.items() if v is not None)
733             for f in ('_type', 'url'):
734                 if f in force_properties:
735                     del force_properties[f]
736             new_result = info.copy()
737             new_result.update(force_properties)
738
739             assert new_result.get('_type') != 'url_transparent'
740
741             return self.process_ie_result(
742                 new_result, download=download, extra_info=extra_info)
743         elif result_type == 'playlist' or result_type == 'multi_video':
744             # We process each entry in the playlist
745             playlist = ie_result.get('title', None) or ie_result.get('id', None)
746             self.to_screen('[download] Downloading playlist: %s' % playlist)
747
748             playlist_results = []
749
750             playliststart = self.params.get('playliststart', 1) - 1
751             playlistend = self.params.get('playlistend', None)
752             # For backwards compatibility, interpret -1 as whole list
753             if playlistend == -1:
754                 playlistend = None
755
756             playlistitems_str = self.params.get('playlist_items', None)
757             playlistitems = None
758             if playlistitems_str is not None:
759                 def iter_playlistitems(format):
760                     for string_segment in format.split(','):
761                         if '-' in string_segment:
762                             start, end = string_segment.split('-')
763                             for item in range(int(start), int(end) + 1):
764                                 yield int(item)
765                         else:
766                             yield int(string_segment)
767                 playlistitems = iter_playlistitems(playlistitems_str)
768
769             ie_entries = ie_result['entries']
770             if isinstance(ie_entries, list):
771                 n_all_entries = len(ie_entries)
772                 if playlistitems:
773                     entries = [
774                         ie_entries[i - 1] for i in playlistitems
775                         if -n_all_entries <= i - 1 < n_all_entries]
776                 else:
777                     entries = ie_entries[playliststart:playlistend]
778                 n_entries = len(entries)
779                 self.to_screen(
780                     "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
781                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
782             elif isinstance(ie_entries, PagedList):
783                 if playlistitems:
784                     entries = []
785                     for item in playlistitems:
786                         entries.extend(ie_entries.getslice(
787                             item - 1, item
788                         ))
789                 else:
790                     entries = ie_entries.getslice(
791                         playliststart, playlistend)
792                 n_entries = len(entries)
793                 self.to_screen(
794                     "[%s] playlist %s: Downloading %d videos" %
795                     (ie_result['extractor'], playlist, n_entries))
796             else:  # iterable
797                 if playlistitems:
798                     entry_list = list(ie_entries)
799                     entries = [entry_list[i - 1] for i in playlistitems]
800                 else:
801                     entries = list(itertools.islice(
802                         ie_entries, playliststart, playlistend))
803                 n_entries = len(entries)
804                 self.to_screen(
805                     "[%s] playlist %s: Downloading %d videos" %
806                     (ie_result['extractor'], playlist, n_entries))
807
808             if self.params.get('playlistreverse', False):
809                 entries = entries[::-1]
810
811             for i, entry in enumerate(entries, 1):
812                 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
813                 extra = {
814                     'n_entries': n_entries,
815                     'playlist': playlist,
816                     'playlist_id': ie_result.get('id'),
817                     'playlist_title': ie_result.get('title'),
818                     'playlist_index': i + playliststart,
819                     'extractor': ie_result['extractor'],
820                     'webpage_url': ie_result['webpage_url'],
821                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
822                     'extractor_key': ie_result['extractor_key'],
823                 }
824
825                 reason = self._match_entry(entry, incomplete=True)
826                 if reason is not None:
827                     self.to_screen('[download] ' + reason)
828                     continue
829
830                 entry_result = self.process_ie_result(entry,
831                                                       download=download,
832                                                       extra_info=extra)
833                 playlist_results.append(entry_result)
834             ie_result['entries'] = playlist_results
835             return ie_result
836         elif result_type == 'compat_list':
837             self.report_warning(
838                 'Extractor %s returned a compat_list result. '
839                 'It needs to be updated.' % ie_result.get('extractor'))
840
841             def _fixup(r):
842                 self.add_extra_info(
843                     r,
844                     {
845                         'extractor': ie_result['extractor'],
846                         'webpage_url': ie_result['webpage_url'],
847                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
848                         'extractor_key': ie_result['extractor_key'],
849                     }
850                 )
851                 return r
852             ie_result['entries'] = [
853                 self.process_ie_result(_fixup(r), download, extra_info)
854                 for r in ie_result['entries']
855             ]
856             return ie_result
857         else:
858             raise Exception('Invalid result type: %s' % result_type)
859
860     def _build_format_filter(self, filter_spec):
861         " Returns a function to filter the formats according to the filter_spec "
862
863         OPERATORS = {
864             '<': operator.lt,
865             '<=': operator.le,
866             '>': operator.gt,
867             '>=': operator.ge,
868             '=': operator.eq,
869             '!=': operator.ne,
870         }
871         operator_rex = re.compile(r'''(?x)\s*
872             (?P<key>width|height|tbr|abr|vbr|asr|filesize|fps)
873             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
874             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
875             $
876             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
877         m = operator_rex.search(filter_spec)
878         if m:
879             try:
880                 comparison_value = int(m.group('value'))
881             except ValueError:
882                 comparison_value = parse_filesize(m.group('value'))
883                 if comparison_value is None:
884                     comparison_value = parse_filesize(m.group('value') + 'B')
885                 if comparison_value is None:
886                     raise ValueError(
887                         'Invalid value %r in format specification %r' % (
888                             m.group('value'), filter_spec))
889             op = OPERATORS[m.group('op')]
890
891         if not m:
892             STR_OPERATORS = {
893                 '=': operator.eq,
894                 '!=': operator.ne,
895             }
896             str_operator_rex = re.compile(r'''(?x)
897                 \s*(?P<key>ext|acodec|vcodec|container|protocol)
898                 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
899                 \s*(?P<value>[a-zA-Z0-9_-]+)
900                 \s*$
901                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
902             m = str_operator_rex.search(filter_spec)
903             if m:
904                 comparison_value = m.group('value')
905                 op = STR_OPERATORS[m.group('op')]
906
907         if not m:
908             raise ValueError('Invalid filter specification %r' % filter_spec)
909
910         def _filter(f):
911             actual_value = f.get(m.group('key'))
912             if actual_value is None:
913                 return m.group('none_inclusive')
914             return op(actual_value, comparison_value)
915         return _filter
916
917     def build_format_selector(self, format_spec):
918         def syntax_error(note, start):
919             message = (
920                 'Invalid format specification: '
921                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
922             return SyntaxError(message)
923
924         PICKFIRST = 'PICKFIRST'
925         MERGE = 'MERGE'
926         SINGLE = 'SINGLE'
927         GROUP = 'GROUP'
928         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
929
930         def _parse_filter(tokens):
931             filter_parts = []
932             for type, string, start, _, _ in tokens:
933                 if type == tokenize.OP and string == ']':
934                     return ''.join(filter_parts)
935                 else:
936                     filter_parts.append(string)
937
938         def _remove_unused_ops(tokens):
939             # Remove operators that we don't use and join them with the sourrounding strings
940             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
941             ALLOWED_OPS = ('/', '+', ',', '(', ')')
942             last_string, last_start, last_end, last_line = None, None, None, None
943             for type, string, start, end, line in tokens:
944                 if type == tokenize.OP and string == '[':
945                     if last_string:
946                         yield tokenize.NAME, last_string, last_start, last_end, last_line
947                         last_string = None
948                     yield type, string, start, end, line
949                     # everything inside brackets will be handled by _parse_filter
950                     for type, string, start, end, line in tokens:
951                         yield type, string, start, end, line
952                         if type == tokenize.OP and string == ']':
953                             break
954                 elif type == tokenize.OP and string in ALLOWED_OPS:
955                     if last_string:
956                         yield tokenize.NAME, last_string, last_start, last_end, last_line
957                         last_string = None
958                     yield type, string, start, end, line
959                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
960                     if not last_string:
961                         last_string = string
962                         last_start = start
963                         last_end = end
964                     else:
965                         last_string += string
966             if last_string:
967                 yield tokenize.NAME, last_string, last_start, last_end, last_line
968
969         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
970             selectors = []
971             current_selector = None
972             for type, string, start, _, _ in tokens:
973                 # ENCODING is only defined in python 3.x
974                 if type == getattr(tokenize, 'ENCODING', None):
975                     continue
976                 elif type in [tokenize.NAME, tokenize.NUMBER]:
977                     current_selector = FormatSelector(SINGLE, string, [])
978                 elif type == tokenize.OP:
979                     if string == ')':
980                         if not inside_group:
981                             # ')' will be handled by the parentheses group
982                             tokens.restore_last_token()
983                         break
984                     elif inside_merge and string in ['/', ',']:
985                         tokens.restore_last_token()
986                         break
987                     elif inside_choice and string == ',':
988                         tokens.restore_last_token()
989                         break
990                     elif string == ',':
991                         if not current_selector:
992                             raise syntax_error('"," must follow a format selector', start)
993                         selectors.append(current_selector)
994                         current_selector = None
995                     elif string == '/':
996                         if not current_selector:
997                             raise syntax_error('"/" must follow a format selector', start)
998                         first_choice = current_selector
999                         second_choice = _parse_format_selection(tokens, inside_choice=True)
1000                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1001                     elif string == '[':
1002                         if not current_selector:
1003                             current_selector = FormatSelector(SINGLE, 'best', [])
1004                         format_filter = _parse_filter(tokens)
1005                         current_selector.filters.append(format_filter)
1006                     elif string == '(':
1007                         if current_selector:
1008                             raise syntax_error('Unexpected "("', start)
1009                         group = _parse_format_selection(tokens, inside_group=True)
1010                         current_selector = FormatSelector(GROUP, group, [])
1011                     elif string == '+':
1012                         video_selector = current_selector
1013                         audio_selector = _parse_format_selection(tokens, inside_merge=True)
1014                         if not video_selector or not audio_selector:
1015                             raise syntax_error('"+" must be between two format selectors', start)
1016                         current_selector = FormatSelector(MERGE, (video_selector, audio_selector), [])
1017                     else:
1018                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1019                 elif type == tokenize.ENDMARKER:
1020                     break
1021             if current_selector:
1022                 selectors.append(current_selector)
1023             return selectors
1024
1025         def _build_selector_function(selector):
1026             if isinstance(selector, list):
1027                 fs = [_build_selector_function(s) for s in selector]
1028
1029                 def selector_function(formats):
1030                     for f in fs:
1031                         for format in f(formats):
1032                             yield format
1033                 return selector_function
1034             elif selector.type == GROUP:
1035                 selector_function = _build_selector_function(selector.selector)
1036             elif selector.type == PICKFIRST:
1037                 fs = [_build_selector_function(s) for s in selector.selector]
1038
1039                 def selector_function(formats):
1040                     for f in fs:
1041                         picked_formats = list(f(formats))
1042                         if picked_formats:
1043                             return picked_formats
1044                     return []
1045             elif selector.type == SINGLE:
1046                 format_spec = selector.selector
1047
1048                 def selector_function(formats):
1049                     formats = list(formats)
1050                     if not formats:
1051                         return
1052                     if format_spec == 'all':
1053                         for f in formats:
1054                             yield f
1055                     elif format_spec in ['best', 'worst', None]:
1056                         format_idx = 0 if format_spec == 'worst' else -1
1057                         audiovideo_formats = [
1058                             f for f in formats
1059                             if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
1060                         if audiovideo_formats:
1061                             yield audiovideo_formats[format_idx]
1062                         # for audio only (soundcloud) or video only (imgur) urls, select the best/worst audio format
1063                         elif (all(f.get('acodec') != 'none' for f in formats) or
1064                               all(f.get('vcodec') != 'none' for f in formats)):
1065                             yield formats[format_idx]
1066                     elif format_spec == 'bestaudio':
1067                         audio_formats = [
1068                             f for f in formats
1069                             if f.get('vcodec') == 'none']
1070                         if audio_formats:
1071                             yield audio_formats[-1]
1072                     elif format_spec == 'worstaudio':
1073                         audio_formats = [
1074                             f for f in formats
1075                             if f.get('vcodec') == 'none']
1076                         if audio_formats:
1077                             yield audio_formats[0]
1078                     elif format_spec == 'bestvideo':
1079                         video_formats = [
1080                             f for f in formats
1081                             if f.get('acodec') == 'none']
1082                         if video_formats:
1083                             yield video_formats[-1]
1084                     elif format_spec == 'worstvideo':
1085                         video_formats = [
1086                             f for f in formats
1087                             if f.get('acodec') == 'none']
1088                         if video_formats:
1089                             yield video_formats[0]
1090                     else:
1091                         extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
1092                         if format_spec in extensions:
1093                             filter_f = lambda f: f['ext'] == format_spec
1094                         else:
1095                             filter_f = lambda f: f['format_id'] == format_spec
1096                         matches = list(filter(filter_f, formats))
1097                         if matches:
1098                             yield matches[-1]
1099             elif selector.type == MERGE:
1100                 def _merge(formats_info):
1101                     format_1, format_2 = [f['format_id'] for f in formats_info]
1102                     # The first format must contain the video and the
1103                     # second the audio
1104                     if formats_info[0].get('vcodec') == 'none':
1105                         self.report_error('The first format must '
1106                                           'contain the video, try using '
1107                                           '"-f %s+%s"' % (format_2, format_1))
1108                         return
1109                     output_ext = (
1110                         formats_info[0]['ext']
1111                         if self.params.get('merge_output_format') is None
1112                         else self.params['merge_output_format'])
1113                     return {
1114                         'requested_formats': formats_info,
1115                         'format': '%s+%s' % (formats_info[0].get('format'),
1116                                              formats_info[1].get('format')),
1117                         'format_id': '%s+%s' % (formats_info[0].get('format_id'),
1118                                                 formats_info[1].get('format_id')),
1119                         'width': formats_info[0].get('width'),
1120                         'height': formats_info[0].get('height'),
1121                         'resolution': formats_info[0].get('resolution'),
1122                         'fps': formats_info[0].get('fps'),
1123                         'vcodec': formats_info[0].get('vcodec'),
1124                         'vbr': formats_info[0].get('vbr'),
1125                         'stretched_ratio': formats_info[0].get('stretched_ratio'),
1126                         'acodec': formats_info[1].get('acodec'),
1127                         'abr': formats_info[1].get('abr'),
1128                         'ext': output_ext,
1129                     }
1130                 video_selector, audio_selector = map(_build_selector_function, selector.selector)
1131
1132                 def selector_function(formats):
1133                     formats = list(formats)
1134                     for pair in itertools.product(video_selector(formats), audio_selector(formats)):
1135                         yield _merge(pair)
1136
1137             filters = [self._build_format_filter(f) for f in selector.filters]
1138
1139             def final_selector(formats):
1140                 for _filter in filters:
1141                     formats = list(filter(_filter, formats))
1142                 return selector_function(formats)
1143             return final_selector
1144
1145         stream = io.BytesIO(format_spec.encode('utf-8'))
1146         try:
1147             tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
1148         except tokenize.TokenError:
1149             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1150
1151         class TokenIterator(object):
1152             def __init__(self, tokens):
1153                 self.tokens = tokens
1154                 self.counter = 0
1155
1156             def __iter__(self):
1157                 return self
1158
1159             def __next__(self):
1160                 if self.counter >= len(self.tokens):
1161                     raise StopIteration()
1162                 value = self.tokens[self.counter]
1163                 self.counter += 1
1164                 return value
1165
1166             next = __next__
1167
1168             def restore_last_token(self):
1169                 self.counter -= 1
1170
1171         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
1172         return _build_selector_function(parsed_selector)
1173
1174     def _calc_headers(self, info_dict):
1175         res = std_headers.copy()
1176
1177         add_headers = info_dict.get('http_headers')
1178         if add_headers:
1179             res.update(add_headers)
1180
1181         cookies = self._calc_cookies(info_dict)
1182         if cookies:
1183             res['Cookie'] = cookies
1184
1185         return res
1186
1187     def _calc_cookies(self, info_dict):
1188         pr = compat_urllib_request.Request(info_dict['url'])
1189         self.cookiejar.add_cookie_header(pr)
1190         return pr.get_header('Cookie')
1191
1192     def process_video_result(self, info_dict, download=True):
1193         assert info_dict.get('_type', 'video') == 'video'
1194
1195         if 'id' not in info_dict:
1196             raise ExtractorError('Missing "id" field in extractor result')
1197         if 'title' not in info_dict:
1198             raise ExtractorError('Missing "title" field in extractor result')
1199
1200         if 'playlist' not in info_dict:
1201             # It isn't part of a playlist
1202             info_dict['playlist'] = None
1203             info_dict['playlist_index'] = None
1204
1205         thumbnails = info_dict.get('thumbnails')
1206         if thumbnails is None:
1207             thumbnail = info_dict.get('thumbnail')
1208             if thumbnail:
1209                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1210         if thumbnails:
1211             thumbnails.sort(key=lambda t: (
1212                 t.get('preference'), t.get('width'), t.get('height'),
1213                 t.get('id'), t.get('url')))
1214             for i, t in enumerate(thumbnails):
1215                 if t.get('width') and t.get('height'):
1216                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
1217                 if t.get('id') is None:
1218                     t['id'] = '%d' % i
1219
1220         if thumbnails and 'thumbnail' not in info_dict:
1221             info_dict['thumbnail'] = thumbnails[-1]['url']
1222
1223         if 'display_id' not in info_dict and 'id' in info_dict:
1224             info_dict['display_id'] = info_dict['id']
1225
1226         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
1227             # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1228             # see http://bugs.python.org/issue1646728)
1229             try:
1230                 upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
1231                 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1232             except (ValueError, OverflowError, OSError):
1233                 pass
1234
1235         subtitles = info_dict.get('subtitles')
1236         if subtitles:
1237             for _, subtitle in subtitles.items():
1238                 for subtitle_format in subtitle:
1239                     if 'ext' not in subtitle_format:
1240                         subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
1241
1242         if self.params.get('listsubtitles', False):
1243             if 'automatic_captions' in info_dict:
1244                 self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions')
1245             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
1246             return
1247         info_dict['requested_subtitles'] = self.process_subtitles(
1248             info_dict['id'], subtitles,
1249             info_dict.get('automatic_captions'))
1250
1251         # We now pick which formats have to be downloaded
1252         if info_dict.get('formats') is None:
1253             # There's only one format available
1254             formats = [info_dict]
1255         else:
1256             formats = info_dict['formats']
1257
1258         if not formats:
1259             raise ExtractorError('No video formats found!')
1260
1261         formats_dict = {}
1262
1263         # We check that all the formats have the format and format_id fields
1264         for i, format in enumerate(formats):
1265             if 'url' not in format:
1266                 raise ExtractorError('Missing "url" key in result (index %d)' % i)
1267
1268             if format.get('format_id') is None:
1269                 format['format_id'] = compat_str(i)
1270             format_id = format['format_id']
1271             if format_id not in formats_dict:
1272                 formats_dict[format_id] = []
1273             formats_dict[format_id].append(format)
1274
1275         # Make sure all formats have unique format_id
1276         for format_id, ambiguous_formats in formats_dict.items():
1277             if len(ambiguous_formats) > 1:
1278                 for i, format in enumerate(ambiguous_formats):
1279                     format['format_id'] = '%s-%d' % (format_id, i)
1280
1281         for i, format in enumerate(formats):
1282             if format.get('format') is None:
1283                 format['format'] = '{id} - {res}{note}'.format(
1284                     id=format['format_id'],
1285                     res=self.format_resolution(format),
1286                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1287                 )
1288             # Automatically determine file extension if missing
1289             if 'ext' not in format:
1290                 format['ext'] = determine_ext(format['url']).lower()
1291             # Add HTTP headers, so that external programs can use them from the
1292             # json output
1293             full_format_info = info_dict.copy()
1294             full_format_info.update(format)
1295             format['http_headers'] = self._calc_headers(full_format_info)
1296
1297         # TODO Central sorting goes here
1298
1299         if formats[0] is not info_dict:
1300             # only set the 'formats' fields if the original info_dict list them
1301             # otherwise we end up with a circular reference, the first (and unique)
1302             # element in the 'formats' field in info_dict is info_dict itself,
1303             # wich can't be exported to json
1304             info_dict['formats'] = formats
1305         if self.params.get('listformats'):
1306             self.list_formats(info_dict)
1307             return
1308         if self.params.get('list_thumbnails'):
1309             self.list_thumbnails(info_dict)
1310             return
1311
1312         req_format = self.params.get('format')
1313         if req_format is None:
1314             req_format_list = []
1315             if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and
1316                     info_dict['extractor'] in ['youtube', 'ted'] and
1317                     not info_dict.get('is_live')):
1318                 merger = FFmpegMergerPP(self)
1319                 if merger.available and merger.can_merge():
1320                     req_format_list.append('bestvideo+bestaudio')
1321             req_format_list.append('best')
1322             req_format = '/'.join(req_format_list)
1323         format_selector = self.build_format_selector(req_format)
1324         formats_to_download = list(format_selector(formats))
1325         if not formats_to_download:
1326             raise ExtractorError('requested format not available',
1327                                  expected=True)
1328
1329         if download:
1330             if len(formats_to_download) > 1:
1331                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1332             for format in formats_to_download:
1333                 new_info = dict(info_dict)
1334                 new_info.update(format)
1335                 self.process_info(new_info)
1336         # We update the info dict with the best quality format (backwards compatibility)
1337         info_dict.update(formats_to_download[-1])
1338         return info_dict
1339
1340     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
1341         """Select the requested subtitles and their format"""
1342         available_subs = {}
1343         if normal_subtitles and self.params.get('writesubtitles'):
1344             available_subs.update(normal_subtitles)
1345         if automatic_captions and self.params.get('writeautomaticsub'):
1346             for lang, cap_info in automatic_captions.items():
1347                 if lang not in available_subs:
1348                     available_subs[lang] = cap_info
1349
1350         if (not self.params.get('writesubtitles') and not
1351                 self.params.get('writeautomaticsub') or not
1352                 available_subs):
1353             return None
1354
1355         if self.params.get('allsubtitles', False):
1356             requested_langs = available_subs.keys()
1357         else:
1358             if self.params.get('subtitleslangs', False):
1359                 requested_langs = self.params.get('subtitleslangs')
1360             elif 'en' in available_subs:
1361                 requested_langs = ['en']
1362             else:
1363                 requested_langs = [list(available_subs.keys())[0]]
1364
1365         formats_query = self.params.get('subtitlesformat', 'best')
1366         formats_preference = formats_query.split('/') if formats_query else []
1367         subs = {}
1368         for lang in requested_langs:
1369             formats = available_subs.get(lang)
1370             if formats is None:
1371                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1372                 continue
1373             for ext in formats_preference:
1374                 if ext == 'best':
1375                     f = formats[-1]
1376                     break
1377                 matches = list(filter(lambda f: f['ext'] == ext, formats))
1378                 if matches:
1379                     f = matches[-1]
1380                     break
1381             else:
1382                 f = formats[-1]
1383                 self.report_warning(
1384                     'No subtitle format found matching "%s" for language %s, '
1385                     'using %s' % (formats_query, lang, f['ext']))
1386             subs[lang] = f
1387         return subs
1388
1389     def process_info(self, info_dict):
1390         """Process a single resolved IE result."""
1391
1392         assert info_dict.get('_type', 'video') == 'video'
1393
1394         max_downloads = self.params.get('max_downloads')
1395         if max_downloads is not None:
1396             if self._num_downloads >= int(max_downloads):
1397                 raise MaxDownloadsReached()
1398
1399         info_dict['fulltitle'] = info_dict['title']
1400         if len(info_dict['title']) > 200:
1401             info_dict['title'] = info_dict['title'][:197] + '...'
1402
1403         if 'format' not in info_dict:
1404             info_dict['format'] = info_dict['ext']
1405
1406         reason = self._match_entry(info_dict, incomplete=False)
1407         if reason is not None:
1408             self.to_screen('[download] ' + reason)
1409             return
1410
1411         self._num_downloads += 1
1412
1413         info_dict['_filename'] = filename = self.prepare_filename(info_dict)
1414
1415         # Forced printings
1416         if self.params.get('forcetitle', False):
1417             self.to_stdout(info_dict['fulltitle'])
1418         if self.params.get('forceid', False):
1419             self.to_stdout(info_dict['id'])
1420         if self.params.get('forceurl', False):
1421             if info_dict.get('requested_formats') is not None:
1422                 for f in info_dict['requested_formats']:
1423                     self.to_stdout(f['url'] + f.get('play_path', ''))
1424             else:
1425                 # For RTMP URLs, also include the playpath
1426                 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1427         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
1428             self.to_stdout(info_dict['thumbnail'])
1429         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
1430             self.to_stdout(info_dict['description'])
1431         if self.params.get('forcefilename', False) and filename is not None:
1432             self.to_stdout(filename)
1433         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1434             self.to_stdout(formatSeconds(info_dict['duration']))
1435         if self.params.get('forceformat', False):
1436             self.to_stdout(info_dict['format'])
1437         if self.params.get('forcejson', False):
1438             self.to_stdout(json.dumps(info_dict))
1439
1440         # Do nothing else if in simulate mode
1441         if self.params.get('simulate', False):
1442             return
1443
1444         if filename is None:
1445             return
1446
1447         try:
1448             dn = os.path.dirname(sanitize_path(encodeFilename(filename)))
1449             if dn and not os.path.exists(dn):
1450                 os.makedirs(dn)
1451         except (OSError, IOError) as err:
1452             self.report_error('unable to create directory ' + compat_str(err))
1453             return
1454
1455         if self.params.get('writedescription', False):
1456             descfn = replace_extension(filename, 'description', info_dict.get('ext'))
1457             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1458                 self.to_screen('[info] Video description is already present')
1459             elif info_dict.get('description') is None:
1460                 self.report_warning('There\'s no description to write.')
1461             else:
1462                 try:
1463                     self.to_screen('[info] Writing video description to: ' + descfn)
1464                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1465                         descfile.write(info_dict['description'])
1466                 except (OSError, IOError):
1467                     self.report_error('Cannot write description file ' + descfn)
1468                     return
1469
1470         if self.params.get('writeannotations', False):
1471             annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
1472             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1473                 self.to_screen('[info] Video annotations are already present')
1474             else:
1475                 try:
1476                     self.to_screen('[info] Writing video annotations to: ' + annofn)
1477                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1478                         annofile.write(info_dict['annotations'])
1479                 except (KeyError, TypeError):
1480                     self.report_warning('There are no annotations to write.')
1481                 except (OSError, IOError):
1482                     self.report_error('Cannot write annotations file: ' + annofn)
1483                     return
1484
1485         subtitles_are_requested = any([self.params.get('writesubtitles', False),
1486                                        self.params.get('writeautomaticsub')])
1487
1488         if subtitles_are_requested and info_dict.get('requested_subtitles'):
1489             # subtitles download errors are already managed as troubles in relevant IE
1490             # that way it will silently go on when used with unsupporting IE
1491             subtitles = info_dict['requested_subtitles']
1492             ie = self.get_info_extractor(info_dict['extractor_key'])
1493             for sub_lang, sub_info in subtitles.items():
1494                 sub_format = sub_info['ext']
1495                 if sub_info.get('data') is not None:
1496                     sub_data = sub_info['data']
1497                 else:
1498                     try:
1499                         sub_data = ie._download_webpage(
1500                             sub_info['url'], info_dict['id'], note=False)
1501                     except ExtractorError as err:
1502                         self.report_warning('Unable to download subtitle for "%s": %s' %
1503                                             (sub_lang, compat_str(err.cause)))
1504                         continue
1505                 try:
1506                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1507                     if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1508                         self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1509                     else:
1510                         self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1511                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
1512                             subfile.write(sub_data)
1513                 except (OSError, IOError):
1514                     self.report_error('Cannot write subtitles file ' + sub_filename)
1515                     return
1516
1517         if self.params.get('writeinfojson', False):
1518             infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
1519             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1520                 self.to_screen('[info] Video description metadata is already present')
1521             else:
1522                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1523                 try:
1524                     write_json_file(self.filter_requested_info(info_dict), infofn)
1525                 except (OSError, IOError):
1526                     self.report_error('Cannot write metadata to JSON file ' + infofn)
1527                     return
1528
1529         self._write_thumbnails(info_dict, filename)
1530
1531         if not self.params.get('skip_download', False):
1532             try:
1533                 def dl(name, info):
1534                     fd = get_suitable_downloader(info, self.params)(self, self.params)
1535                     for ph in self._progress_hooks:
1536                         fd.add_progress_hook(ph)
1537                     if self.params.get('verbose'):
1538                         self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1539                     return fd.download(name, info)
1540
1541                 if info_dict.get('requested_formats') is not None:
1542                     downloaded = []
1543                     success = True
1544                     merger = FFmpegMergerPP(self)
1545                     if not merger.available:
1546                         postprocessors = []
1547                         self.report_warning('You have requested multiple '
1548                                             'formats but ffmpeg or avconv are not installed.'
1549                                             ' The formats won\'t be merged.')
1550                     else:
1551                         postprocessors = [merger]
1552
1553                     def compatible_formats(formats):
1554                         video, audio = formats
1555                         # Check extension
1556                         video_ext, audio_ext = audio.get('ext'), video.get('ext')
1557                         if video_ext and audio_ext:
1558                             COMPATIBLE_EXTS = (
1559                                 ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v'),
1560                                 ('webm')
1561                             )
1562                             for exts in COMPATIBLE_EXTS:
1563                                 if video_ext in exts and audio_ext in exts:
1564                                     return True
1565                         # TODO: Check acodec/vcodec
1566                         return False
1567
1568                     filename_real_ext = os.path.splitext(filename)[1][1:]
1569                     filename_wo_ext = (
1570                         os.path.splitext(filename)[0]
1571                         if filename_real_ext == info_dict['ext']
1572                         else filename)
1573                     requested_formats = info_dict['requested_formats']
1574                     if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
1575                         info_dict['ext'] = 'mkv'
1576                         self.report_warning(
1577                             'Requested formats are incompatible for merge and will be merged into mkv.')
1578                     # Ensure filename always has a correct extension for successful merge
1579                     filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
1580                     if os.path.exists(encodeFilename(filename)):
1581                         self.to_screen(
1582                             '[download] %s has already been downloaded and '
1583                             'merged' % filename)
1584                     else:
1585                         for f in requested_formats:
1586                             new_info = dict(info_dict)
1587                             new_info.update(f)
1588                             fname = self.prepare_filename(new_info)
1589                             fname = prepend_extension(fname, 'f%s' % f['format_id'], new_info['ext'])
1590                             downloaded.append(fname)
1591                             partial_success = dl(fname, new_info)
1592                             success = success and partial_success
1593                         info_dict['__postprocessors'] = postprocessors
1594                         info_dict['__files_to_merge'] = downloaded
1595                 else:
1596                     # Just a single file
1597                     success = dl(filename, info_dict)
1598             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1599                 self.report_error('unable to download video data: %s' % str(err))
1600                 return
1601             except (OSError, IOError) as err:
1602                 raise UnavailableVideoError(err)
1603             except (ContentTooShortError, ) as err:
1604                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1605                 return
1606
1607             if success:
1608                 # Fixup content
1609                 fixup_policy = self.params.get('fixup')
1610                 if fixup_policy is None:
1611                     fixup_policy = 'detect_or_warn'
1612
1613                 stretched_ratio = info_dict.get('stretched_ratio')
1614                 if stretched_ratio is not None and stretched_ratio != 1:
1615                     if fixup_policy == 'warn':
1616                         self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1617                             info_dict['id'], stretched_ratio))
1618                     elif fixup_policy == 'detect_or_warn':
1619                         stretched_pp = FFmpegFixupStretchedPP(self)
1620                         if stretched_pp.available:
1621                             info_dict.setdefault('__postprocessors', [])
1622                             info_dict['__postprocessors'].append(stretched_pp)
1623                         else:
1624                             self.report_warning(
1625                                 '%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % (
1626                                     info_dict['id'], stretched_ratio))
1627                     else:
1628                         assert fixup_policy in ('ignore', 'never')
1629
1630                 if info_dict.get('requested_formats') is None and info_dict.get('container') == 'm4a_dash':
1631                     if fixup_policy == 'warn':
1632                         self.report_warning('%s: writing DASH m4a. Only some players support this container.' % (
1633                             info_dict['id']))
1634                     elif fixup_policy == 'detect_or_warn':
1635                         fixup_pp = FFmpegFixupM4aPP(self)
1636                         if fixup_pp.available:
1637                             info_dict.setdefault('__postprocessors', [])
1638                             info_dict['__postprocessors'].append(fixup_pp)
1639                         else:
1640                             self.report_warning(
1641                                 '%s: writing DASH m4a. Only some players support this container. Install ffmpeg or avconv to fix this automatically.' % (
1642                                     info_dict['id']))
1643                     else:
1644                         assert fixup_policy in ('ignore', 'never')
1645
1646                 try:
1647                     self.post_process(filename, info_dict)
1648                 except (PostProcessingError) as err:
1649                     self.report_error('postprocessing: %s' % str(err))
1650                     return
1651                 self.record_download_archive(info_dict)
1652
1653     def download(self, url_list):
1654         """Download a given list of URLs."""
1655         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1656         if (len(url_list) > 1 and
1657                 '%' not in outtmpl and
1658                 self.params.get('max_downloads') != 1):
1659             raise SameFileError(outtmpl)
1660
1661         for url in url_list:
1662             try:
1663                 # It also downloads the videos
1664                 res = self.extract_info(
1665                     url, force_generic_extractor=self.params.get('force_generic_extractor', False))
1666             except UnavailableVideoError:
1667                 self.report_error('unable to download video')
1668             except MaxDownloadsReached:
1669                 self.to_screen('[info] Maximum number of downloaded files reached.')
1670                 raise
1671             else:
1672                 if self.params.get('dump_single_json', False):
1673                     self.to_stdout(json.dumps(res))
1674
1675         return self._download_retcode
1676
1677     def download_with_info_file(self, info_filename):
1678         with contextlib.closing(fileinput.FileInput(
1679                 [info_filename], mode='r',
1680                 openhook=fileinput.hook_encoded('utf-8'))) as f:
1681             # FileInput doesn't have a read method, we can't call json.load
1682             info = self.filter_requested_info(json.loads('\n'.join(f)))
1683         try:
1684             self.process_ie_result(info, download=True)
1685         except DownloadError:
1686             webpage_url = info.get('webpage_url')
1687             if webpage_url is not None:
1688                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1689                 return self.download([webpage_url])
1690             else:
1691                 raise
1692         return self._download_retcode
1693
1694     @staticmethod
1695     def filter_requested_info(info_dict):
1696         return dict(
1697             (k, v) for k, v in info_dict.items()
1698             if k not in ['requested_formats', 'requested_subtitles'])
1699
1700     def post_process(self, filename, ie_info):
1701         """Run all the postprocessors on the given file."""
1702         info = dict(ie_info)
1703         info['filepath'] = filename
1704         pps_chain = []
1705         if ie_info.get('__postprocessors') is not None:
1706             pps_chain.extend(ie_info['__postprocessors'])
1707         pps_chain.extend(self._pps)
1708         for pp in pps_chain:
1709             files_to_delete = []
1710             try:
1711                 files_to_delete, info = pp.run(info)
1712             except PostProcessingError as e:
1713                 self.report_error(e.msg)
1714             if files_to_delete and not self.params.get('keepvideo', False):
1715                 for old_filename in files_to_delete:
1716                     self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
1717                     try:
1718                         os.remove(encodeFilename(old_filename))
1719                     except (IOError, OSError):
1720                         self.report_warning('Unable to remove downloaded original file')
1721
1722     def _make_archive_id(self, info_dict):
1723         # Future-proof against any change in case
1724         # and backwards compatibility with prior versions
1725         extractor = info_dict.get('extractor_key')
1726         if extractor is None:
1727             if 'id' in info_dict:
1728                 extractor = info_dict.get('ie_key')  # key in a playlist
1729         if extractor is None:
1730             return None  # Incomplete video information
1731         return extractor.lower() + ' ' + info_dict['id']
1732
1733     def in_download_archive(self, info_dict):
1734         fn = self.params.get('download_archive')
1735         if fn is None:
1736             return False
1737
1738         vid_id = self._make_archive_id(info_dict)
1739         if vid_id is None:
1740             return False  # Incomplete video information
1741
1742         try:
1743             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1744                 for line in archive_file:
1745                     if line.strip() == vid_id:
1746                         return True
1747         except IOError as ioe:
1748             if ioe.errno != errno.ENOENT:
1749                 raise
1750         return False
1751
1752     def record_download_archive(self, info_dict):
1753         fn = self.params.get('download_archive')
1754         if fn is None:
1755             return
1756         vid_id = self._make_archive_id(info_dict)
1757         assert vid_id
1758         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1759             archive_file.write(vid_id + '\n')
1760
1761     @staticmethod
1762     def format_resolution(format, default='unknown'):
1763         if format.get('vcodec') == 'none':
1764             return 'audio only'
1765         if format.get('resolution') is not None:
1766             return format['resolution']
1767         if format.get('height') is not None:
1768             if format.get('width') is not None:
1769                 res = '%sx%s' % (format['width'], format['height'])
1770             else:
1771                 res = '%sp' % format['height']
1772         elif format.get('width') is not None:
1773             res = '?x%d' % format['width']
1774         else:
1775             res = default
1776         return res
1777
1778     def _format_note(self, fdict):
1779         res = ''
1780         if fdict.get('ext') in ['f4f', 'f4m']:
1781             res += '(unsupported) '
1782         if fdict.get('format_note') is not None:
1783             res += fdict['format_note'] + ' '
1784         if fdict.get('tbr') is not None:
1785             res += '%4dk ' % fdict['tbr']
1786         if fdict.get('container') is not None:
1787             if res:
1788                 res += ', '
1789             res += '%s container' % fdict['container']
1790         if (fdict.get('vcodec') is not None and
1791                 fdict.get('vcodec') != 'none'):
1792             if res:
1793                 res += ', '
1794             res += fdict['vcodec']
1795             if fdict.get('vbr') is not None:
1796                 res += '@'
1797         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1798             res += 'video@'
1799         if fdict.get('vbr') is not None:
1800             res += '%4dk' % fdict['vbr']
1801         if fdict.get('fps') is not None:
1802             res += ', %sfps' % fdict['fps']
1803         if fdict.get('acodec') is not None:
1804             if res:
1805                 res += ', '
1806             if fdict['acodec'] == 'none':
1807                 res += 'video only'
1808             else:
1809                 res += '%-5s' % fdict['acodec']
1810         elif fdict.get('abr') is not None:
1811             if res:
1812                 res += ', '
1813             res += 'audio'
1814         if fdict.get('abr') is not None:
1815             res += '@%3dk' % fdict['abr']
1816         if fdict.get('asr') is not None:
1817             res += ' (%5dHz)' % fdict['asr']
1818         if fdict.get('filesize') is not None:
1819             if res:
1820                 res += ', '
1821             res += format_bytes(fdict['filesize'])
1822         elif fdict.get('filesize_approx') is not None:
1823             if res:
1824                 res += ', '
1825             res += '~' + format_bytes(fdict['filesize_approx'])
1826         return res
1827
1828     def list_formats(self, info_dict):
1829         formats = info_dict.get('formats', [info_dict])
1830         table = [
1831             [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
1832             for f in formats
1833             if f.get('preference') is None or f['preference'] >= -1000]
1834         if len(formats) > 1:
1835             table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
1836
1837         header_line = ['format code', 'extension', 'resolution', 'note']
1838         self.to_screen(
1839             '[info] Available formats for %s:\n%s' %
1840             (info_dict['id'], render_table(header_line, table)))
1841
1842     def list_thumbnails(self, info_dict):
1843         thumbnails = info_dict.get('thumbnails')
1844         if not thumbnails:
1845             tn_url = info_dict.get('thumbnail')
1846             if tn_url:
1847                 thumbnails = [{'id': '0', 'url': tn_url}]
1848             else:
1849                 self.to_screen(
1850                     '[info] No thumbnails present for %s' % info_dict['id'])
1851                 return
1852
1853         self.to_screen(
1854             '[info] Thumbnails for %s:' % info_dict['id'])
1855         self.to_screen(render_table(
1856             ['ID', 'width', 'height', 'URL'],
1857             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
1858
1859     def list_subtitles(self, video_id, subtitles, name='subtitles'):
1860         if not subtitles:
1861             self.to_screen('%s has no %s' % (video_id, name))
1862             return
1863         self.to_screen(
1864             'Available %s for %s:' % (name, video_id))
1865         self.to_screen(render_table(
1866             ['Language', 'formats'],
1867             [[lang, ', '.join(f['ext'] for f in reversed(formats))]
1868                 for lang, formats in subtitles.items()]))
1869
1870     def urlopen(self, req):
1871         """ Start an HTTP download """
1872         return self._opener.open(req, timeout=self._socket_timeout)
1873
1874     def print_debug_header(self):
1875         if not self.params.get('verbose'):
1876             return
1877
1878         if type('') is not compat_str:
1879             # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1880             self.report_warning(
1881                 'Your Python is broken! Update to a newer and supported version')
1882
1883         stdout_encoding = getattr(
1884             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
1885         encoding_str = (
1886             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1887                 locale.getpreferredencoding(),
1888                 sys.getfilesystemencoding(),
1889                 stdout_encoding,
1890                 self.get_encoding()))
1891         write_string(encoding_str, encoding=None)
1892
1893         self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1894         try:
1895             sp = subprocess.Popen(
1896                 ['git', 'rev-parse', '--short', 'HEAD'],
1897                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1898                 cwd=os.path.dirname(os.path.abspath(__file__)))
1899             out, err = sp.communicate()
1900             out = out.decode().strip()
1901             if re.match('[0-9a-f]+', out):
1902                 self._write_string('[debug] Git HEAD: ' + out + '\n')
1903         except Exception:
1904             try:
1905                 sys.exc_clear()
1906             except Exception:
1907                 pass
1908         self._write_string('[debug] Python version %s - %s\n' % (
1909             platform.python_version(), platform_name()))
1910
1911         exe_versions = FFmpegPostProcessor.get_versions(self)
1912         exe_versions['rtmpdump'] = rtmpdump_version()
1913         exe_str = ', '.join(
1914             '%s %s' % (exe, v)
1915             for exe, v in sorted(exe_versions.items())
1916             if v
1917         )
1918         if not exe_str:
1919             exe_str = 'none'
1920         self._write_string('[debug] exe versions: %s\n' % exe_str)
1921
1922         proxy_map = {}
1923         for handler in self._opener.handlers:
1924             if hasattr(handler, 'proxies'):
1925                 proxy_map.update(handler.proxies)
1926         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1927
1928         if self.params.get('call_home', False):
1929             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
1930             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
1931             latest_version = self.urlopen(
1932                 'https://yt-dl.org/latest/version').read().decode('utf-8')
1933             if version_tuple(latest_version) > version_tuple(__version__):
1934                 self.report_warning(
1935                     'You are using an outdated version (newest version: %s)! '
1936                     'See https://yt-dl.org/update if you need help updating.' %
1937                     latest_version)
1938
1939     def _setup_opener(self):
1940         timeout_val = self.params.get('socket_timeout')
1941         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1942
1943         opts_cookiefile = self.params.get('cookiefile')
1944         opts_proxy = self.params.get('proxy')
1945
1946         if opts_cookiefile is None:
1947             self.cookiejar = compat_cookiejar.CookieJar()
1948         else:
1949             self.cookiejar = compat_cookiejar.MozillaCookieJar(
1950                 opts_cookiefile)
1951             if os.access(opts_cookiefile, os.R_OK):
1952                 self.cookiejar.load()
1953
1954         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
1955         if opts_proxy is not None:
1956             if opts_proxy == '':
1957                 proxies = {}
1958             else:
1959                 proxies = {'http': opts_proxy, 'https': opts_proxy}
1960         else:
1961             proxies = compat_urllib_request.getproxies()
1962             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1963             if 'http' in proxies and 'https' not in proxies:
1964                 proxies['https'] = proxies['http']
1965         proxy_handler = PerRequestProxyHandler(proxies)
1966
1967         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1968         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
1969         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
1970         opener = compat_urllib_request.build_opener(
1971             proxy_handler, https_handler, cookie_processor, ydlh)
1972
1973         # Delete the default user-agent header, which would otherwise apply in
1974         # cases where our custom HTTP handler doesn't come into play
1975         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1976         opener.addheaders = []
1977         self._opener = opener
1978
1979     def encode(self, s):
1980         if isinstance(s, bytes):
1981             return s  # Already encoded
1982
1983         try:
1984             return s.encode(self.get_encoding())
1985         except UnicodeEncodeError as err:
1986             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1987             raise
1988
1989     def get_encoding(self):
1990         encoding = self.params.get('encoding')
1991         if encoding is None:
1992             encoding = preferredencoding()
1993         return encoding
1994
1995     def _write_thumbnails(self, info_dict, filename):
1996         if self.params.get('writethumbnail', False):
1997             thumbnails = info_dict.get('thumbnails')
1998             if thumbnails:
1999                 thumbnails = [thumbnails[-1]]
2000         elif self.params.get('write_all_thumbnails', False):
2001             thumbnails = info_dict.get('thumbnails')
2002         else:
2003             return
2004
2005         if not thumbnails:
2006             # No thumbnails present, so return immediately
2007             return
2008
2009         for t in thumbnails:
2010             thumb_ext = determine_ext(t['url'], 'jpg')
2011             suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
2012             thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
2013             t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
2014
2015             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
2016                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
2017                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
2018             else:
2019                 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
2020                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
2021                 try:
2022                     uf = self.urlopen(t['url'])
2023                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
2024                         shutil.copyfileobj(uf, thumbf)
2025                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
2026                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
2027                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2028                     self.report_warning('Unable to download thumbnail "%s": %s' %
2029                                         (t['url'], compat_str(err)))