4 from __future__ import absolute_import, unicode_literals
32 compat_get_terminal_size,
38 compat_tokenize_tokenize,
40 compat_urllib_request,
41 compat_urllib_request_DataHandler,
67 PerRequestProxyHandler,
72 register_socks_protocols,
82 UnavailableVideoError,
87 YoutubeDLCookieProcessor,
90 from .cache import Cache
91 from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER
92 from .downloader import get_suitable_downloader
93 from .downloader.rtmp import rtmpdump_version
94 from .postprocessor import (
97 FFmpegFixupStretchedPP,
102 from .version import __version__
104 if compat_os_name == 'nt':
108 class YoutubeDL(object):
111 YoutubeDL objects are the ones responsible of downloading the
112 actual video file and writing it to disk if the user has requested
113 it, among some other tasks. In most cases there should be one per
114 program. As, given a video URL, the downloader doesn't know how to
115 extract all the needed information, task that InfoExtractors do, it
116 has to pass the URL to one of them.
118 For this, YoutubeDL objects have a method that allows
119 InfoExtractors to be registered in a given order. When it is passed
120 a URL, the YoutubeDL object handles it to the first InfoExtractor it
121 finds that reports being able to handle it. The InfoExtractor extracts
122 all the information about the video or videos the URL refers to, and
123 YoutubeDL process the extracted information, possibly using a File
124 Downloader to download the video.
126 YoutubeDL objects accept a lot of parameters. In order not to saturate
127 the object constructor with arguments, it receives a dictionary of
128 options instead. These options are available through the params
129 attribute for the InfoExtractors to use. The YoutubeDL also
130 registers itself as the downloader in charge for the InfoExtractors
131 that are added to it, so this is a "mutual registration".
135 username: Username for authentication purposes.
136 password: Password for authentication purposes.
137 videopassword: Password for accessing a video.
138 ap_mso: Adobe Pass multiple-system operator identifier.
139 ap_username: Multiple-system operator account username.
140 ap_password: Multiple-system operator account password.
141 usenetrc: Use netrc for authentication instead.
142 verbose: Print additional info to stdout.
143 quiet: Do not print messages to stdout.
144 no_warnings: Do not print out anything for warnings.
145 forceurl: Force printing final URL.
146 forcetitle: Force printing title.
147 forceid: Force printing ID.
148 forcethumbnail: Force printing thumbnail URL.
149 forcedescription: Force printing description.
150 forcefilename: Force printing final filename.
151 forceduration: Force printing duration.
152 forcejson: Force printing info_dict as JSON.
153 dump_single_json: Force printing the info_dict of the whole playlist
154 (or video) as a single JSON line.
155 simulate: Do not download the video files.
156 format: Video format code. See options.py for more information.
157 outtmpl: Template for output names.
158 restrictfilenames: Do not allow "&" and spaces in file names
159 ignoreerrors: Do not stop on download errors.
160 force_generic_extractor: Force downloader to use the generic extractor
161 nooverwrites: Prevent overwriting files.
162 playliststart: Playlist item to start at.
163 playlistend: Playlist item to end at.
164 playlist_items: Specific indices of playlist to download.
165 playlistreverse: Download playlist items in reverse order.
166 playlistrandom: Download playlist items in random order.
167 matchtitle: Download only matching titles.
168 rejecttitle: Reject downloads for matching titles.
169 logger: Log messages to a logging.Logger instance.
170 logtostderr: Log messages to stderr instead of stdout.
171 writedescription: Write the video description to a .description file
172 writeinfojson: Write the video description to a .info.json file
173 writeannotations: Write the video annotations to a .annotations.xml file
174 writethumbnail: Write the thumbnail image to a file
175 write_all_thumbnails: Write all thumbnail formats to files
176 writesubtitles: Write the video subtitles to a file
177 writeautomaticsub: Write the automatically generated subtitles to a file
178 allsubtitles: Downloads all the subtitles of the video
179 (requires writesubtitles or writeautomaticsub)
180 listsubtitles: Lists all available subtitles for the video
181 subtitlesformat: The format code for subtitles
182 subtitleslangs: List of languages of the subtitles to download
183 keepvideo: Keep the video file after post-processing
184 daterange: A DateRange object, download only if the upload_date is in the range.
185 skip_download: Skip the actual download of the video file
186 cachedir: Location of the cache files in the filesystem.
187 False to disable filesystem cache.
188 noplaylist: Download single video instead of a playlist if in doubt.
189 age_limit: An integer representing the user's age in years.
190 Unsuitable videos for the given age are skipped.
191 min_views: An integer representing the minimum view count the video
192 must have in order to not be skipped.
193 Videos without view count information are always
194 downloaded. None for no limit.
195 max_views: An integer representing the maximum view count.
196 Videos that are more popular than that are not
198 Videos without view count information are always
199 downloaded. None for no limit.
200 download_archive: File name of a file where all downloads are recorded.
201 Videos already present in the file are not downloaded
203 cookiefile: File name where cookies should be read from and dumped to.
204 nocheckcertificate:Do not verify SSL certificates
205 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
206 At the moment, this is only supported by YouTube.
207 proxy: URL of the proxy server to use
208 geo_verification_proxy: URL of the proxy to use for IP address verification
209 on geo-restricted sites. (Experimental)
210 socket_timeout: Time to wait for unresponsive hosts, in seconds
211 bidi_workaround: Work around buggy terminals without bidirectional text
212 support, using fridibi
213 debug_printtraffic:Print out sent and received HTTP traffic
214 include_ads: Download ads as well
215 default_search: Prepend this string if an input url is not valid.
216 'auto' for elaborate guessing
217 encoding: Use this encoding instead of the system-specified.
218 extract_flat: Do not resolve URLs, return the immediate result.
219 Pass in 'in_playlist' to only show this behavior for
221 postprocessors: A list of dictionaries, each with an entry
222 * key: The name of the postprocessor. See
223 youtube_dl/postprocessor/__init__.py for a list.
224 as well as any further keyword arguments for the
226 progress_hooks: A list of functions that get called on download
227 progress, with a dictionary with the entries
228 * status: One of "downloading", "error", or "finished".
229 Check this first and ignore unknown values.
231 If status is one of "downloading", or "finished", the
232 following properties may also be present:
233 * filename: The final filename (always present)
234 * tmpfilename: The filename we're currently writing to
235 * downloaded_bytes: Bytes on disk
236 * total_bytes: Size of the whole file, None if unknown
237 * total_bytes_estimate: Guess of the eventual file size,
239 * elapsed: The number of seconds since download started.
240 * eta: The estimated time in seconds, None if unknown
241 * speed: The download speed in bytes/second, None if
243 * fragment_index: The counter of the currently
244 downloaded video fragment.
245 * fragment_count: The number of fragments (= individual
246 files that will be merged)
248 Progress hooks are guaranteed to be called at least once
249 (with status "finished") if the download is successful.
250 merge_output_format: Extension to use when merging formats.
251 fixup: Automatically correct known faults of the file.
253 - "never": do nothing
254 - "warn": only emit a warning
255 - "detect_or_warn": check whether we can do anything
256 about it, warn otherwise (default)
257 source_address: (Experimental) Client-side IP address to bind to.
258 call_home: Boolean, true iff we are allowed to contact the
259 youtube-dl servers for debugging.
260 sleep_interval: Number of seconds to sleep before each download when
261 used alone or a lower bound of a range for randomized
262 sleep before each download (minimum possible number
263 of seconds to sleep) when used along with
265 max_sleep_interval:Upper bound of a range for randomized sleep before each
266 download (maximum possible number of seconds to sleep).
267 Must only be used along with sleep_interval.
268 Actual sleep time will be a random float from range
269 [sleep_interval; max_sleep_interval].
270 listformats: Print an overview of available video formats and exit.
271 list_thumbnails: Print a table of all thumbnails and exit.
272 match_filter: A function that gets called with the info_dict of
274 If it returns a message, the video is ignored.
275 If it returns None, the video is downloaded.
276 match_filter_func in utils.py is one example for this.
277 no_color: Do not emit color codes in output.
278 geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
279 HTTP header (experimental)
281 Two-letter ISO 3166-2 country code that will be used for
282 explicit geographic restriction bypassing via faking
283 X-Forwarded-For HTTP header (experimental)
285 The following options determine which downloader is picked:
286 external_downloader: Executable of the external downloader to call.
287 None or unset for standard (built-in) downloader.
288 hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv
289 if True, otherwise use ffmpeg/avconv if False, otherwise
290 use downloader suggested by extractor if None.
292 The following parameters are not used by YoutubeDL itself, they are used by
293 the downloader (see youtube_dl/downloader/common.py):
294 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
295 noresizebuffer, retries, continuedl, noprogress, consoletitle,
296 xattr_set_filesize, external_downloader_args, hls_use_mpegts.
298 The following options are used by the post processors:
299 prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
300 otherwise prefer avconv.
301 postprocessor_args: A list of additional command-line arguments for the
308 _download_retcode = None
309 _num_downloads = None
312 def __init__(self, params=None, auto_init=True):
313 """Create a FileDownloader object with the given options."""
317 self._ies_instances = {}
319 self._progress_hooks = []
320 self._download_retcode = 0
321 self._num_downloads = 0
322 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
323 self._err_file = sys.stderr
326 'nocheckcertificate': False,
328 self.params.update(params)
329 self.cache = Cache(self)
331 def check_deprecated(param, option, suggestion):
332 if self.params.get(param) is not None:
334 '%s is deprecated. Use %s instead.' % (option, suggestion))
338 if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
339 if self.params.get('geo_verification_proxy') is None:
340 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
342 check_deprecated('autonumber_size', '--autonumber-size', 'output template with %(autonumber)0Nd, where N in the number of digits')
343 check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
344 check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
346 if params.get('bidi_workaround', False):
349 master, slave = pty.openpty()
350 width = compat_get_terminal_size().columns
354 width_args = ['-w', str(width)]
356 stdin=subprocess.PIPE,
358 stderr=self._err_file)
360 self._output_process = subprocess.Popen(
361 ['bidiv'] + width_args, **sp_kwargs
364 self._output_process = subprocess.Popen(
365 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
366 self._output_channel = os.fdopen(master, 'rb')
367 except OSError as ose:
368 if ose.errno == errno.ENOENT:
369 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
373 if (sys.version_info >= (3,) and sys.platform != 'win32' and
374 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and
375 not params.get('restrictfilenames', False)):
376 # On Python 3, the Unicode filesystem API will throw errors (#1474)
378 'Assuming --restrict-filenames since file system encoding '
379 'cannot encode all characters. '
380 'Set the LC_ALL environment variable to fix this.')
381 self.params['restrictfilenames'] = True
383 if isinstance(params.get('outtmpl'), bytes):
385 'Parameter outtmpl is bytes, but should be a unicode string. '
386 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
391 self.print_debug_header()
392 self.add_default_info_extractors()
394 for pp_def_raw in self.params.get('postprocessors', []):
395 pp_class = get_postprocessor(pp_def_raw['key'])
396 pp_def = dict(pp_def_raw)
398 pp = pp_class(self, **compat_kwargs(pp_def))
399 self.add_post_processor(pp)
401 for ph in self.params.get('progress_hooks', []):
402 self.add_progress_hook(ph)
404 register_socks_protocols()
406 def warn_if_short_id(self, argv):
407 # short YouTube ID starting with dash?
409 i for i, a in enumerate(argv)
410 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
414 [a for i, a in enumerate(argv) if i not in idxs] +
415 ['--'] + [argv[i] for i in idxs]
418 'Long argument string detected. '
419 'Use -- to separate parameters and URLs, like this:\n%s\n' %
420 args_to_str(correct_argv))
422 def add_info_extractor(self, ie):
423 """Add an InfoExtractor object to the end of the list."""
425 if not isinstance(ie, type):
426 self._ies_instances[ie.ie_key()] = ie
427 ie.set_downloader(self)
429 def get_info_extractor(self, ie_key):
431 Get an instance of an IE with name ie_key, it will try to get one from
432 the _ies list, if there's no instance it will create a new one and add
433 it to the extractor list.
435 ie = self._ies_instances.get(ie_key)
437 ie = get_info_extractor(ie_key)()
438 self.add_info_extractor(ie)
441 def add_default_info_extractors(self):
443 Add the InfoExtractors returned by gen_extractors to the end of the list
445 for ie in gen_extractor_classes():
446 self.add_info_extractor(ie)
448 def add_post_processor(self, pp):
449 """Add a PostProcessor object to the end of the chain."""
451 pp.set_downloader(self)
453 def add_progress_hook(self, ph):
454 """Add the progress hook (currently only for the file downloader)"""
455 self._progress_hooks.append(ph)
457 def _bidi_workaround(self, message):
458 if not hasattr(self, '_output_channel'):
461 assert hasattr(self, '_output_process')
462 assert isinstance(message, compat_str)
463 line_count = message.count('\n') + 1
464 self._output_process.stdin.write((message + '\n').encode('utf-8'))
465 self._output_process.stdin.flush()
466 res = ''.join(self._output_channel.readline().decode('utf-8')
467 for _ in range(line_count))
468 return res[:-len('\n')]
470 def to_screen(self, message, skip_eol=False):
471 """Print message to stdout if not in quiet mode."""
472 return self.to_stdout(message, skip_eol, check_quiet=True)
474 def _write_string(self, s, out=None):
475 write_string(s, out=out, encoding=self.params.get('encoding'))
477 def to_stdout(self, message, skip_eol=False, check_quiet=False):
478 """Print message to stdout if not in quiet mode."""
479 if self.params.get('logger'):
480 self.params['logger'].debug(message)
481 elif not check_quiet or not self.params.get('quiet', False):
482 message = self._bidi_workaround(message)
483 terminator = ['\n', ''][skip_eol]
484 output = message + terminator
486 self._write_string(output, self._screen_file)
488 def to_stderr(self, message):
489 """Print message to stderr."""
490 assert isinstance(message, compat_str)
491 if self.params.get('logger'):
492 self.params['logger'].error(message)
494 message = self._bidi_workaround(message)
495 output = message + '\n'
496 self._write_string(output, self._err_file)
498 def to_console_title(self, message):
499 if not self.params.get('consoletitle', False):
501 if compat_os_name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
502 # c_wchar_p() might not be necessary if `message` is
503 # already of type unicode()
504 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
505 elif 'TERM' in os.environ:
506 self._write_string('\033]0;%s\007' % message, self._screen_file)
508 def save_console_title(self):
509 if not self.params.get('consoletitle', False):
511 if 'TERM' in os.environ:
512 # Save the title on stack
513 self._write_string('\033[22;0t', self._screen_file)
515 def restore_console_title(self):
516 if not self.params.get('consoletitle', False):
518 if 'TERM' in os.environ:
519 # Restore the title from stack
520 self._write_string('\033[23;0t', self._screen_file)
523 self.save_console_title()
526 def __exit__(self, *args):
527 self.restore_console_title()
529 if self.params.get('cookiefile') is not None:
530 self.cookiejar.save()
532 def trouble(self, message=None, tb=None):
533 """Determine action to take when a download problem appears.
535 Depending on if the downloader has been configured to ignore
536 download errors or not, this method may throw an exception or
537 not when errors are found, after printing the message.
539 tb, if given, is additional traceback information.
541 if message is not None:
542 self.to_stderr(message)
543 if self.params.get('verbose'):
545 if sys.exc_info()[0]: # if .trouble has been called from an except block
547 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
548 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
549 tb += encode_compat_str(traceback.format_exc())
551 tb_data = traceback.format_list(traceback.extract_stack())
552 tb = ''.join(tb_data)
554 if not self.params.get('ignoreerrors', False):
555 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
556 exc_info = sys.exc_info()[1].exc_info
558 exc_info = sys.exc_info()
559 raise DownloadError(message, exc_info)
560 self._download_retcode = 1
562 def report_warning(self, message):
564 Print the message to stderr, it will be prefixed with 'WARNING:'
565 If stderr is a tty file the 'WARNING:' will be colored
567 if self.params.get('logger') is not None:
568 self.params['logger'].warning(message)
570 if self.params.get('no_warnings'):
572 if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
573 _msg_header = '\033[0;33mWARNING:\033[0m'
575 _msg_header = 'WARNING:'
576 warning_message = '%s %s' % (_msg_header, message)
577 self.to_stderr(warning_message)
579 def report_error(self, message, tb=None):
581 Do the same as trouble, but prefixes the message with 'ERROR:', colored
582 in red if stderr is a tty file.
584 if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
585 _msg_header = '\033[0;31mERROR:\033[0m'
587 _msg_header = 'ERROR:'
588 error_message = '%s %s' % (_msg_header, message)
589 self.trouble(error_message, tb)
591 def report_file_already_downloaded(self, file_name):
592 """Report file has already been fully downloaded."""
594 self.to_screen('[download] %s has already been downloaded' % file_name)
595 except UnicodeEncodeError:
596 self.to_screen('[download] The file has already been downloaded')
598 def prepare_filename(self, info_dict):
599 """Generate the output filename."""
601 template_dict = dict(info_dict)
603 template_dict['epoch'] = int(time.time())
604 autonumber_size = self.params.get('autonumber_size')
605 if autonumber_size is None:
607 template_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
608 if template_dict.get('resolution') is None:
609 if template_dict.get('width') and template_dict.get('height'):
610 template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
611 elif template_dict.get('height'):
612 template_dict['resolution'] = '%sp' % template_dict['height']
613 elif template_dict.get('width'):
614 template_dict['resolution'] = '%dx?' % template_dict['width']
616 sanitize = lambda k, v: sanitize_filename(
618 restricted=self.params.get('restrictfilenames'),
619 is_id=(k == 'id' or k.endswith('_id')))
620 template_dict = dict((k, v if isinstance(v, compat_numeric_types) else sanitize(k, v))
621 for k, v in template_dict.items()
622 if v is not None and not isinstance(v, (list, tuple, dict)))
623 template_dict = collections.defaultdict(lambda: 'NA', template_dict)
625 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
627 # For fields playlist_index and autonumber convert all occurrences
628 # of %(field)s to %(field)0Nd for backward compatibility
629 field_size_compat_map = {
630 'playlist_index': len(str(template_dict['n_entries'])),
631 'autonumber': autonumber_size,
633 FIELD_SIZE_COMPAT_RE = r'(?<!%)%\((?P<field>autonumber|playlist_index)\)s'
634 mobj = re.search(FIELD_SIZE_COMPAT_RE, outtmpl)
637 FIELD_SIZE_COMPAT_RE,
638 r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')],
641 NUMERIC_FIELDS = set((
642 'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
643 'upload_year', 'upload_month', 'upload_day',
644 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
645 'average_rating', 'comment_count', 'age_limit',
646 'start_time', 'end_time',
647 'chapter_number', 'season_number', 'episode_number',
648 'track_number', 'disc_number', 'release_year',
652 # Missing numeric fields used together with integer presentation types
653 # in format specification will break the argument substitution since
654 # string 'NA' is returned for missing fields. We will patch output
655 # template for missing fields to meet string presentation type.
656 for numeric_field in NUMERIC_FIELDS:
657 if numeric_field not in template_dict:
658 # As of [1] format syntax is:
659 # %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
660 # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
664 \({0}\) # mapping key
665 (?:[#0\-+ ]+)? # conversion flags (optional)
666 (?:\d+)? # minimum field width (optional)
667 (?:\.\d+)? # precision (optional)
668 [hlL]? # length modifier (optional)
669 [diouxXeEfFgGcrs%] # conversion type
672 FORMAT_RE.format(numeric_field),
673 r'%({0})s'.format(numeric_field), outtmpl)
675 tmpl = expand_path(outtmpl)
676 filename = tmpl % template_dict
677 # Temporary fix for #4787
678 # 'Treat' all problem characters by passing filename through preferredencoding
679 # to workaround encoding issues with subprocess on python2 @ Windows
680 if sys.version_info < (3, 0) and sys.platform == 'win32':
681 filename = encodeFilename(filename, True).decode(preferredencoding())
682 return sanitize_path(filename)
683 except ValueError as err:
684 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
687 def _match_entry(self, info_dict, incomplete):
688 """ Returns None iff the file should be downloaded """
690 video_title = info_dict.get('title', info_dict.get('id', 'video'))
691 if 'title' in info_dict:
692 # This can happen when we're just evaluating the playlist
693 title = info_dict['title']
694 matchtitle = self.params.get('matchtitle', False)
696 if not re.search(matchtitle, title, re.IGNORECASE):
697 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
698 rejecttitle = self.params.get('rejecttitle', False)
700 if re.search(rejecttitle, title, re.IGNORECASE):
701 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
702 date = info_dict.get('upload_date')
704 dateRange = self.params.get('daterange', DateRange())
705 if date not in dateRange:
706 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
707 view_count = info_dict.get('view_count')
708 if view_count is not None:
709 min_views = self.params.get('min_views')
710 if min_views is not None and view_count < min_views:
711 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
712 max_views = self.params.get('max_views')
713 if max_views is not None and view_count > max_views:
714 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
715 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
716 return 'Skipping "%s" because it is age restricted' % video_title
717 if self.in_download_archive(info_dict):
718 return '%s has already been recorded in archive' % video_title
721 match_filter = self.params.get('match_filter')
722 if match_filter is not None:
723 ret = match_filter(info_dict)
730 def add_extra_info(info_dict, extra_info):
731 '''Set the keys from extra_info in info dict if they are missing'''
732 for key, value in extra_info.items():
733 info_dict.setdefault(key, value)
735 def extract_info(self, url, download=True, ie_key=None, extra_info={},
736 process=True, force_generic_extractor=False):
738 Returns a list with a dictionary for each video we find.
739 If 'download', also downloads the videos.
740 extra_info is a dict containing the extra values to add to each result
743 if not ie_key and force_generic_extractor:
747 ies = [self.get_info_extractor(ie_key)]
752 if not ie.suitable(url):
755 ie = self.get_info_extractor(ie.ie_key())
757 self.report_warning('The program functionality for this site has been marked as broken, '
758 'and will probably not work.')
761 ie_result = ie.extract(url)
762 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
764 if isinstance(ie_result, list):
765 # Backwards compatibility: old IE result format
767 '_type': 'compat_list',
768 'entries': ie_result,
770 self.add_default_extra_info(ie_result, ie, url)
772 return self.process_ie_result(ie_result, download, extra_info)
775 except GeoRestrictedError as e:
778 msg += '\nThis video is available in %s.' % ', '.join(
779 map(ISO3166Utils.short2full, e.countries))
780 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
781 self.report_error(msg)
783 except ExtractorError as e: # An error we somewhat expected
784 self.report_error(compat_str(e), e.format_traceback())
786 except MaxDownloadsReached:
788 except Exception as e:
789 if self.params.get('ignoreerrors', False):
790 self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
795 self.report_error('no suitable InfoExtractor for URL %s' % url)
797 def add_default_extra_info(self, ie_result, ie, url):
798 self.add_extra_info(ie_result, {
799 'extractor': ie.IE_NAME,
801 'webpage_url_basename': url_basename(url),
802 'extractor_key': ie.ie_key(),
805 def process_ie_result(self, ie_result, download=True, extra_info={}):
807 Take the result of the ie(may be modified) and resolve all unresolved
808 references (URLs, playlist items).
810 It will also download the videos if 'download'.
811 Returns the resolved ie_result.
813 result_type = ie_result.get('_type', 'video')
815 if result_type in ('url', 'url_transparent'):
816 ie_result['url'] = sanitize_url(ie_result['url'])
817 extract_flat = self.params.get('extract_flat', False)
818 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
819 extract_flat is True):
820 if self.params.get('forcejson', False):
821 self.to_stdout(json.dumps(ie_result))
824 if result_type == 'video':
825 self.add_extra_info(ie_result, extra_info)
826 return self.process_video_result(ie_result, download=download)
827 elif result_type == 'url':
828 # We have to add extra_info to the results because it may be
829 # contained in a playlist
830 return self.extract_info(ie_result['url'],
832 ie_key=ie_result.get('ie_key'),
833 extra_info=extra_info)
834 elif result_type == 'url_transparent':
835 # Use the information from the embedding page
836 info = self.extract_info(
837 ie_result['url'], ie_key=ie_result.get('ie_key'),
838 extra_info=extra_info, download=False, process=False)
840 # extract_info may return None when ignoreerrors is enabled and
841 # extraction failed with an error, don't crash and return early
846 force_properties = dict(
847 (k, v) for k, v in ie_result.items() if v is not None)
848 for f in ('_type', 'url', 'ie_key'):
849 if f in force_properties:
850 del force_properties[f]
851 new_result = info.copy()
852 new_result.update(force_properties)
854 assert new_result.get('_type') != 'url_transparent'
856 return self.process_ie_result(
857 new_result, download=download, extra_info=extra_info)
858 elif result_type == 'playlist' or result_type == 'multi_video':
859 # We process each entry in the playlist
860 playlist = ie_result.get('title') or ie_result.get('id')
861 self.to_screen('[download] Downloading playlist: %s' % playlist)
863 playlist_results = []
865 playliststart = self.params.get('playliststart', 1) - 1
866 playlistend = self.params.get('playlistend')
867 # For backwards compatibility, interpret -1 as whole list
868 if playlistend == -1:
871 playlistitems_str = self.params.get('playlist_items')
873 if playlistitems_str is not None:
874 def iter_playlistitems(format):
875 for string_segment in format.split(','):
876 if '-' in string_segment:
877 start, end = string_segment.split('-')
878 for item in range(int(start), int(end) + 1):
881 yield int(string_segment)
882 playlistitems = iter_playlistitems(playlistitems_str)
884 ie_entries = ie_result['entries']
885 if isinstance(ie_entries, list):
886 n_all_entries = len(ie_entries)
889 ie_entries[i - 1] for i in playlistitems
890 if -n_all_entries <= i - 1 < n_all_entries]
892 entries = ie_entries[playliststart:playlistend]
893 n_entries = len(entries)
895 '[%s] playlist %s: Collected %d video ids (downloading %d of them)' %
896 (ie_result['extractor'], playlist, n_all_entries, n_entries))
897 elif isinstance(ie_entries, PagedList):
900 for item in playlistitems:
901 entries.extend(ie_entries.getslice(
905 entries = ie_entries.getslice(
906 playliststart, playlistend)
907 n_entries = len(entries)
909 '[%s] playlist %s: Downloading %d videos' %
910 (ie_result['extractor'], playlist, n_entries))
913 entry_list = list(ie_entries)
914 entries = [entry_list[i - 1] for i in playlistitems]
916 entries = list(itertools.islice(
917 ie_entries, playliststart, playlistend))
918 n_entries = len(entries)
920 '[%s] playlist %s: Downloading %d videos' %
921 (ie_result['extractor'], playlist, n_entries))
923 if self.params.get('playlistreverse', False):
924 entries = entries[::-1]
926 if self.params.get('playlistrandom', False):
927 random.shuffle(entries)
929 x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
931 for i, entry in enumerate(entries, 1):
932 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
933 # This __x_forwarded_for_ip thing is a bit ugly but requires
936 entry['__x_forwarded_for_ip'] = x_forwarded_for
938 'n_entries': n_entries,
939 'playlist': playlist,
940 'playlist_id': ie_result.get('id'),
941 'playlist_title': ie_result.get('title'),
942 'playlist_index': i + playliststart,
943 'extractor': ie_result['extractor'],
944 'webpage_url': ie_result['webpage_url'],
945 'webpage_url_basename': url_basename(ie_result['webpage_url']),
946 'extractor_key': ie_result['extractor_key'],
949 reason = self._match_entry(entry, incomplete=True)
950 if reason is not None:
951 self.to_screen('[download] ' + reason)
954 entry_result = self.process_ie_result(entry,
957 playlist_results.append(entry_result)
958 ie_result['entries'] = playlist_results
959 self.to_screen('[download] Finished downloading playlist: %s' % playlist)
961 elif result_type == 'compat_list':
963 'Extractor %s returned a compat_list result. '
964 'It needs to be updated.' % ie_result.get('extractor'))
970 'extractor': ie_result['extractor'],
971 'webpage_url': ie_result['webpage_url'],
972 'webpage_url_basename': url_basename(ie_result['webpage_url']),
973 'extractor_key': ie_result['extractor_key'],
977 ie_result['entries'] = [
978 self.process_ie_result(_fixup(r), download, extra_info)
979 for r in ie_result['entries']
983 raise Exception('Invalid result type: %s' % result_type)
985 def _build_format_filter(self, filter_spec):
986 " Returns a function to filter the formats according to the filter_spec "
996 operator_rex = re.compile(r'''(?x)\s*
997 (?P<key>width|height|tbr|abr|vbr|asr|filesize|fps)
998 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
999 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
1001 ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1002 m = operator_rex.search(filter_spec)
1005 comparison_value = int(m.group('value'))
1007 comparison_value = parse_filesize(m.group('value'))
1008 if comparison_value is None:
1009 comparison_value = parse_filesize(m.group('value') + 'B')
1010 if comparison_value is None:
1012 'Invalid value %r in format specification %r' % (
1013 m.group('value'), filter_spec))
1014 op = OPERATORS[m.group('op')]
1020 '^=': lambda attr, value: attr.startswith(value),
1021 '$=': lambda attr, value: attr.endswith(value),
1022 '*=': lambda attr, value: value in attr,
1024 str_operator_rex = re.compile(r'''(?x)
1025 \s*(?P<key>ext|acodec|vcodec|container|protocol|format_id)
1026 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
1027 \s*(?P<value>[a-zA-Z0-9._-]+)
1029 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1030 m = str_operator_rex.search(filter_spec)
1032 comparison_value = m.group('value')
1033 op = STR_OPERATORS[m.group('op')]
1036 raise ValueError('Invalid filter specification %r' % filter_spec)
1039 actual_value = f.get(m.group('key'))
1040 if actual_value is None:
1041 return m.group('none_inclusive')
1042 return op(actual_value, comparison_value)
1045 def build_format_selector(self, format_spec):
1046 def syntax_error(note, start):
1048 'Invalid format specification: '
1049 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1050 return SyntaxError(message)
1052 PICKFIRST = 'PICKFIRST'
1056 FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1058 def _parse_filter(tokens):
1060 for type, string, start, _, _ in tokens:
1061 if type == tokenize.OP and string == ']':
1062 return ''.join(filter_parts)
1064 filter_parts.append(string)
1066 def _remove_unused_ops(tokens):
1067 # Remove operators that we don't use and join them with the surrounding strings
1068 # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1069 ALLOWED_OPS = ('/', '+', ',', '(', ')')
1070 last_string, last_start, last_end, last_line = None, None, None, None
1071 for type, string, start, end, line in tokens:
1072 if type == tokenize.OP and string == '[':
1074 yield tokenize.NAME, last_string, last_start, last_end, last_line
1076 yield type, string, start, end, line
1077 # everything inside brackets will be handled by _parse_filter
1078 for type, string, start, end, line in tokens:
1079 yield type, string, start, end, line
1080 if type == tokenize.OP and string == ']':
1082 elif type == tokenize.OP and string in ALLOWED_OPS:
1084 yield tokenize.NAME, last_string, last_start, last_end, last_line
1086 yield type, string, start, end, line
1087 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1089 last_string = string
1093 last_string += string
1095 yield tokenize.NAME, last_string, last_start, last_end, last_line
1097 def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1099 current_selector = None
1100 for type, string, start, _, _ in tokens:
1101 # ENCODING is only defined in python 3.x
1102 if type == getattr(tokenize, 'ENCODING', None):
1104 elif type in [tokenize.NAME, tokenize.NUMBER]:
1105 current_selector = FormatSelector(SINGLE, string, [])
1106 elif type == tokenize.OP:
1108 if not inside_group:
1109 # ')' will be handled by the parentheses group
1110 tokens.restore_last_token()
1112 elif inside_merge and string in ['/', ',']:
1113 tokens.restore_last_token()
1115 elif inside_choice and string == ',':
1116 tokens.restore_last_token()
1119 if not current_selector:
1120 raise syntax_error('"," must follow a format selector', start)
1121 selectors.append(current_selector)
1122 current_selector = None
1124 if not current_selector:
1125 raise syntax_error('"/" must follow a format selector', start)
1126 first_choice = current_selector
1127 second_choice = _parse_format_selection(tokens, inside_choice=True)
1128 current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1130 if not current_selector:
1131 current_selector = FormatSelector(SINGLE, 'best', [])
1132 format_filter = _parse_filter(tokens)
1133 current_selector.filters.append(format_filter)
1135 if current_selector:
1136 raise syntax_error('Unexpected "("', start)
1137 group = _parse_format_selection(tokens, inside_group=True)
1138 current_selector = FormatSelector(GROUP, group, [])
1140 video_selector = current_selector
1141 audio_selector = _parse_format_selection(tokens, inside_merge=True)
1142 if not video_selector or not audio_selector:
1143 raise syntax_error('"+" must be between two format selectors', start)
1144 current_selector = FormatSelector(MERGE, (video_selector, audio_selector), [])
1146 raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1147 elif type == tokenize.ENDMARKER:
1149 if current_selector:
1150 selectors.append(current_selector)
1153 def _build_selector_function(selector):
1154 if isinstance(selector, list):
1155 fs = [_build_selector_function(s) for s in selector]
1157 def selector_function(ctx):
1159 for format in f(ctx):
1161 return selector_function
1162 elif selector.type == GROUP:
1163 selector_function = _build_selector_function(selector.selector)
1164 elif selector.type == PICKFIRST:
1165 fs = [_build_selector_function(s) for s in selector.selector]
1167 def selector_function(ctx):
1169 picked_formats = list(f(ctx))
1171 return picked_formats
1173 elif selector.type == SINGLE:
1174 format_spec = selector.selector
1176 def selector_function(ctx):
1177 formats = list(ctx['formats'])
1180 if format_spec == 'all':
1183 elif format_spec in ['best', 'worst', None]:
1184 format_idx = 0 if format_spec == 'worst' else -1
1185 audiovideo_formats = [
1187 if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
1188 if audiovideo_formats:
1189 yield audiovideo_formats[format_idx]
1190 # for extractors with incomplete formats (audio only (soundcloud)
1191 # or video only (imgur)) we will fallback to best/worst
1192 # {video,audio}-only format
1193 elif ctx['incomplete_formats']:
1194 yield formats[format_idx]
1195 elif format_spec == 'bestaudio':
1198 if f.get('vcodec') == 'none']
1200 yield audio_formats[-1]
1201 elif format_spec == 'worstaudio':
1204 if f.get('vcodec') == 'none']
1206 yield audio_formats[0]
1207 elif format_spec == 'bestvideo':
1210 if f.get('acodec') == 'none']
1212 yield video_formats[-1]
1213 elif format_spec == 'worstvideo':
1216 if f.get('acodec') == 'none']
1218 yield video_formats[0]
1220 extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
1221 if format_spec in extensions:
1222 filter_f = lambda f: f['ext'] == format_spec
1224 filter_f = lambda f: f['format_id'] == format_spec
1225 matches = list(filter(filter_f, formats))
1228 elif selector.type == MERGE:
1229 def _merge(formats_info):
1230 format_1, format_2 = [f['format_id'] for f in formats_info]
1231 # The first format must contain the video and the
1233 if formats_info[0].get('vcodec') == 'none':
1234 self.report_error('The first format must '
1235 'contain the video, try using '
1236 '"-f %s+%s"' % (format_2, format_1))
1238 # Formats must be opposite (video+audio)
1239 if formats_info[0].get('acodec') == 'none' and formats_info[1].get('acodec') == 'none':
1241 'Both formats %s and %s are video-only, you must specify "-f video+audio"'
1242 % (format_1, format_2))
1245 formats_info[0]['ext']
1246 if self.params.get('merge_output_format') is None
1247 else self.params['merge_output_format'])
1249 'requested_formats': formats_info,
1250 'format': '%s+%s' % (formats_info[0].get('format'),
1251 formats_info[1].get('format')),
1252 'format_id': '%s+%s' % (formats_info[0].get('format_id'),
1253 formats_info[1].get('format_id')),
1254 'width': formats_info[0].get('width'),
1255 'height': formats_info[0].get('height'),
1256 'resolution': formats_info[0].get('resolution'),
1257 'fps': formats_info[0].get('fps'),
1258 'vcodec': formats_info[0].get('vcodec'),
1259 'vbr': formats_info[0].get('vbr'),
1260 'stretched_ratio': formats_info[0].get('stretched_ratio'),
1261 'acodec': formats_info[1].get('acodec'),
1262 'abr': formats_info[1].get('abr'),
1265 video_selector, audio_selector = map(_build_selector_function, selector.selector)
1267 def selector_function(ctx):
1268 for pair in itertools.product(
1269 video_selector(copy.deepcopy(ctx)), audio_selector(copy.deepcopy(ctx))):
1272 filters = [self._build_format_filter(f) for f in selector.filters]
1274 def final_selector(ctx):
1275 ctx_copy = copy.deepcopy(ctx)
1276 for _filter in filters:
1277 ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
1278 return selector_function(ctx_copy)
1279 return final_selector
1281 stream = io.BytesIO(format_spec.encode('utf-8'))
1283 tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
1284 except tokenize.TokenError:
1285 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1287 class TokenIterator(object):
1288 def __init__(self, tokens):
1289 self.tokens = tokens
1296 if self.counter >= len(self.tokens):
1297 raise StopIteration()
1298 value = self.tokens[self.counter]
1304 def restore_last_token(self):
1307 parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
1308 return _build_selector_function(parsed_selector)
1310 def _calc_headers(self, info_dict):
1311 res = std_headers.copy()
1313 add_headers = info_dict.get('http_headers')
1315 res.update(add_headers)
1317 cookies = self._calc_cookies(info_dict)
1319 res['Cookie'] = cookies
1321 if 'X-Forwarded-For' not in res:
1322 x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
1323 if x_forwarded_for_ip:
1324 res['X-Forwarded-For'] = x_forwarded_for_ip
1328 def _calc_cookies(self, info_dict):
1329 pr = sanitized_Request(info_dict['url'])
1330 self.cookiejar.add_cookie_header(pr)
1331 return pr.get_header('Cookie')
1333 def process_video_result(self, info_dict, download=True):
1334 assert info_dict.get('_type', 'video') == 'video'
1336 if 'id' not in info_dict:
1337 raise ExtractorError('Missing "id" field in extractor result')
1338 if 'title' not in info_dict:
1339 raise ExtractorError('Missing "title" field in extractor result')
1341 if not isinstance(info_dict['id'], compat_str):
1342 self.report_warning('"id" field is not a string - forcing string conversion')
1343 info_dict['id'] = compat_str(info_dict['id'])
1345 if 'playlist' not in info_dict:
1346 # It isn't part of a playlist
1347 info_dict['playlist'] = None
1348 info_dict['playlist_index'] = None
1350 thumbnails = info_dict.get('thumbnails')
1351 if thumbnails is None:
1352 thumbnail = info_dict.get('thumbnail')
1354 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1356 thumbnails.sort(key=lambda t: (
1357 t.get('preference') if t.get('preference') is not None else -1,
1358 t.get('width') if t.get('width') is not None else -1,
1359 t.get('height') if t.get('height') is not None else -1,
1360 t.get('id') if t.get('id') is not None else '', t.get('url')))
1361 for i, t in enumerate(thumbnails):
1362 t['url'] = sanitize_url(t['url'])
1363 if t.get('width') and t.get('height'):
1364 t['resolution'] = '%dx%d' % (t['width'], t['height'])
1365 if t.get('id') is None:
1368 if self.params.get('list_thumbnails'):
1369 self.list_thumbnails(info_dict)
1372 thumbnail = info_dict.get('thumbnail')
1374 info_dict['thumbnail'] = sanitize_url(thumbnail)
1376 info_dict['thumbnail'] = thumbnails[-1]['url']
1378 if 'display_id' not in info_dict and 'id' in info_dict:
1379 info_dict['display_id'] = info_dict['id']
1381 if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
1382 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1383 # see http://bugs.python.org/issue1646728)
1385 upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
1386 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1387 except (ValueError, OverflowError, OSError):
1390 # Auto generate title fields corresponding to the *_number fields when missing
1391 # in order to always have clean titles. This is very common for TV series.
1392 for field in ('chapter', 'season', 'episode'):
1393 if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
1394 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
1396 subtitles = info_dict.get('subtitles')
1398 for _, subtitle in subtitles.items():
1399 for subtitle_format in subtitle:
1400 if subtitle_format.get('url'):
1401 subtitle_format['url'] = sanitize_url(subtitle_format['url'])
1402 if subtitle_format.get('ext') is None:
1403 subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
1405 if self.params.get('listsubtitles', False):
1406 if 'automatic_captions' in info_dict:
1407 self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions')
1408 self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
1410 info_dict['requested_subtitles'] = self.process_subtitles(
1411 info_dict['id'], subtitles,
1412 info_dict.get('automatic_captions'))
1414 # We now pick which formats have to be downloaded
1415 if info_dict.get('formats') is None:
1416 # There's only one format available
1417 formats = [info_dict]
1419 formats = info_dict['formats']
1422 raise ExtractorError('No video formats found!')
1426 # We check that all the formats have the format and format_id fields
1427 for i, format in enumerate(formats):
1428 if 'url' not in format:
1429 raise ExtractorError('Missing "url" key in result (index %d)' % i)
1431 format['url'] = sanitize_url(format['url'])
1433 if format.get('format_id') is None:
1434 format['format_id'] = compat_str(i)
1436 # Sanitize format_id from characters used in format selector expression
1437 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
1438 format_id = format['format_id']
1439 if format_id not in formats_dict:
1440 formats_dict[format_id] = []
1441 formats_dict[format_id].append(format)
1443 # Make sure all formats have unique format_id
1444 for format_id, ambiguous_formats in formats_dict.items():
1445 if len(ambiguous_formats) > 1:
1446 for i, format in enumerate(ambiguous_formats):
1447 format['format_id'] = '%s-%d' % (format_id, i)
1449 for i, format in enumerate(formats):
1450 if format.get('format') is None:
1451 format['format'] = '{id} - {res}{note}'.format(
1452 id=format['format_id'],
1453 res=self.format_resolution(format),
1454 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1456 # Automatically determine file extension if missing
1457 if format.get('ext') is None:
1458 format['ext'] = determine_ext(format['url']).lower()
1459 # Automatically determine protocol if missing (useful for format
1460 # selection purposes)
1461 if format.get('protocol') is None:
1462 format['protocol'] = determine_protocol(format)
1463 # Add HTTP headers, so that external programs can use them from the
1465 full_format_info = info_dict.copy()
1466 full_format_info.update(format)
1467 format['http_headers'] = self._calc_headers(full_format_info)
1468 # Remove private housekeeping stuff
1469 if '__x_forwarded_for_ip' in info_dict:
1470 del info_dict['__x_forwarded_for_ip']
1472 # TODO Central sorting goes here
1474 if formats[0] is not info_dict:
1475 # only set the 'formats' fields if the original info_dict list them
1476 # otherwise we end up with a circular reference, the first (and unique)
1477 # element in the 'formats' field in info_dict is info_dict itself,
1478 # which can't be exported to json
1479 info_dict['formats'] = formats
1480 if self.params.get('listformats'):
1481 self.list_formats(info_dict)
1484 req_format = self.params.get('format')
1485 if req_format is None:
1486 req_format_list = []
1487 if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and
1488 not info_dict.get('is_live')):
1489 merger = FFmpegMergerPP(self)
1490 if merger.available and merger.can_merge():
1491 req_format_list.append('bestvideo+bestaudio')
1492 req_format_list.append('best')
1493 req_format = '/'.join(req_format_list)
1494 format_selector = self.build_format_selector(req_format)
1496 # While in format selection we may need to have an access to the original
1497 # format set in order to calculate some metrics or do some processing.
1498 # For now we need to be able to guess whether original formats provided
1499 # by extractor are incomplete or not (i.e. whether extractor provides only
1500 # video-only or audio-only formats) for proper formats selection for
1501 # extractors with such incomplete formats (see
1502 # https://github.com/rg3/youtube-dl/pull/5556).
1503 # Since formats may be filtered during format selection and may not match
1504 # the original formats the results may be incorrect. Thus original formats
1505 # or pre-calculated metrics should be passed to format selection routines
1507 # We will pass a context object containing all necessary additional data
1508 # instead of just formats.
1509 # This fixes incorrect format selection issue (see
1510 # https://github.com/rg3/youtube-dl/issues/10083).
1511 incomplete_formats = (
1512 # All formats are video-only or
1513 all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats) or
1514 # all formats are audio-only
1515 all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
1519 'incomplete_formats': incomplete_formats,
1522 formats_to_download = list(format_selector(ctx))
1523 if not formats_to_download:
1524 raise ExtractorError('requested format not available',
1528 if len(formats_to_download) > 1:
1529 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1530 for format in formats_to_download:
1531 new_info = dict(info_dict)
1532 new_info.update(format)
1533 self.process_info(new_info)
1534 # We update the info dict with the best quality format (backwards compatibility)
1535 info_dict.update(formats_to_download[-1])
1538 def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
1539 """Select the requested subtitles and their format"""
1541 if normal_subtitles and self.params.get('writesubtitles'):
1542 available_subs.update(normal_subtitles)
1543 if automatic_captions and self.params.get('writeautomaticsub'):
1544 for lang, cap_info in automatic_captions.items():
1545 if lang not in available_subs:
1546 available_subs[lang] = cap_info
1548 if (not self.params.get('writesubtitles') and not
1549 self.params.get('writeautomaticsub') or not
1553 if self.params.get('allsubtitles', False):
1554 requested_langs = available_subs.keys()
1556 if self.params.get('subtitleslangs', False):
1557 requested_langs = self.params.get('subtitleslangs')
1558 elif 'en' in available_subs:
1559 requested_langs = ['en']
1561 requested_langs = [list(available_subs.keys())[0]]
1563 formats_query = self.params.get('subtitlesformat', 'best')
1564 formats_preference = formats_query.split('/') if formats_query else []
1566 for lang in requested_langs:
1567 formats = available_subs.get(lang)
1569 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1571 for ext in formats_preference:
1575 matches = list(filter(lambda f: f['ext'] == ext, formats))
1581 self.report_warning(
1582 'No subtitle format found matching "%s" for language %s, '
1583 'using %s' % (formats_query, lang, f['ext']))
1587 def process_info(self, info_dict):
1588 """Process a single resolved IE result."""
1590 assert info_dict.get('_type', 'video') == 'video'
1592 max_downloads = self.params.get('max_downloads')
1593 if max_downloads is not None:
1594 if self._num_downloads >= int(max_downloads):
1595 raise MaxDownloadsReached()
1597 info_dict['fulltitle'] = info_dict['title']
1598 if len(info_dict['title']) > 200:
1599 info_dict['title'] = info_dict['title'][:197] + '...'
1601 if 'format' not in info_dict:
1602 info_dict['format'] = info_dict['ext']
1604 reason = self._match_entry(info_dict, incomplete=False)
1605 if reason is not None:
1606 self.to_screen('[download] ' + reason)
1609 self._num_downloads += 1
1611 info_dict['_filename'] = filename = self.prepare_filename(info_dict)
1614 if self.params.get('forcetitle', False):
1615 self.to_stdout(info_dict['fulltitle'])
1616 if self.params.get('forceid', False):
1617 self.to_stdout(info_dict['id'])
1618 if self.params.get('forceurl', False):
1619 if info_dict.get('requested_formats') is not None:
1620 for f in info_dict['requested_formats']:
1621 self.to_stdout(f['url'] + f.get('play_path', ''))
1623 # For RTMP URLs, also include the playpath
1624 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1625 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
1626 self.to_stdout(info_dict['thumbnail'])
1627 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
1628 self.to_stdout(info_dict['description'])
1629 if self.params.get('forcefilename', False) and filename is not None:
1630 self.to_stdout(filename)
1631 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1632 self.to_stdout(formatSeconds(info_dict['duration']))
1633 if self.params.get('forceformat', False):
1634 self.to_stdout(info_dict['format'])
1635 if self.params.get('forcejson', False):
1636 self.to_stdout(json.dumps(info_dict))
1638 # Do nothing else if in simulate mode
1639 if self.params.get('simulate', False):
1642 if filename is None:
1646 dn = os.path.dirname(sanitize_path(encodeFilename(filename)))
1647 if dn and not os.path.exists(dn):
1649 except (OSError, IOError) as err:
1650 self.report_error('unable to create directory ' + error_to_compat_str(err))
1653 if self.params.get('writedescription', False):
1654 descfn = replace_extension(filename, 'description', info_dict.get('ext'))
1655 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1656 self.to_screen('[info] Video description is already present')
1657 elif info_dict.get('description') is None:
1658 self.report_warning('There\'s no description to write.')
1661 self.to_screen('[info] Writing video description to: ' + descfn)
1662 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1663 descfile.write(info_dict['description'])
1664 except (OSError, IOError):
1665 self.report_error('Cannot write description file ' + descfn)
1668 if self.params.get('writeannotations', False):
1669 annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
1670 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1671 self.to_screen('[info] Video annotations are already present')
1674 self.to_screen('[info] Writing video annotations to: ' + annofn)
1675 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1676 annofile.write(info_dict['annotations'])
1677 except (KeyError, TypeError):
1678 self.report_warning('There are no annotations to write.')
1679 except (OSError, IOError):
1680 self.report_error('Cannot write annotations file: ' + annofn)
1683 subtitles_are_requested = any([self.params.get('writesubtitles', False),
1684 self.params.get('writeautomaticsub')])
1686 if subtitles_are_requested and info_dict.get('requested_subtitles'):
1687 # subtitles download errors are already managed as troubles in relevant IE
1688 # that way it will silently go on when used with unsupporting IE
1689 subtitles = info_dict['requested_subtitles']
1690 ie = self.get_info_extractor(info_dict['extractor_key'])
1691 for sub_lang, sub_info in subtitles.items():
1692 sub_format = sub_info['ext']
1693 if sub_info.get('data') is not None:
1694 sub_data = sub_info['data']
1697 sub_data = ie._download_webpage(
1698 sub_info['url'], info_dict['id'], note=False)
1699 except ExtractorError as err:
1700 self.report_warning('Unable to download subtitle for "%s": %s' %
1701 (sub_lang, error_to_compat_str(err.cause)))
1704 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1705 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1706 self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1708 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1709 # Use newline='' to prevent conversion of newline characters
1710 # See https://github.com/rg3/youtube-dl/issues/10268
1711 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
1712 subfile.write(sub_data)
1713 except (OSError, IOError):
1714 self.report_error('Cannot write subtitles file ' + sub_filename)
1717 if self.params.get('writeinfojson', False):
1718 infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
1719 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1720 self.to_screen('[info] Video description metadata is already present')
1722 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1724 write_json_file(self.filter_requested_info(info_dict), infofn)
1725 except (OSError, IOError):
1726 self.report_error('Cannot write metadata to JSON file ' + infofn)
1729 self._write_thumbnails(info_dict, filename)
1731 if not self.params.get('skip_download', False):
1734 fd = get_suitable_downloader(info, self.params)(self, self.params)
1735 for ph in self._progress_hooks:
1736 fd.add_progress_hook(ph)
1737 if self.params.get('verbose'):
1738 self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1739 return fd.download(name, info)
1741 if info_dict.get('requested_formats') is not None:
1744 merger = FFmpegMergerPP(self)
1745 if not merger.available:
1747 self.report_warning('You have requested multiple '
1748 'formats but ffmpeg or avconv are not installed.'
1749 ' The formats won\'t be merged.')
1751 postprocessors = [merger]
1753 def compatible_formats(formats):
1754 video, audio = formats
1756 video_ext, audio_ext = audio.get('ext'), video.get('ext')
1757 if video_ext and audio_ext:
1759 ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma'),
1762 for exts in COMPATIBLE_EXTS:
1763 if video_ext in exts and audio_ext in exts:
1765 # TODO: Check acodec/vcodec
1768 filename_real_ext = os.path.splitext(filename)[1][1:]
1770 os.path.splitext(filename)[0]
1771 if filename_real_ext == info_dict['ext']
1773 requested_formats = info_dict['requested_formats']
1774 if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
1775 info_dict['ext'] = 'mkv'
1776 self.report_warning(
1777 'Requested formats are incompatible for merge and will be merged into mkv.')
1778 # Ensure filename always has a correct extension for successful merge
1779 filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
1780 if os.path.exists(encodeFilename(filename)):
1782 '[download] %s has already been downloaded and '
1783 'merged' % filename)
1785 for f in requested_formats:
1786 new_info = dict(info_dict)
1788 fname = self.prepare_filename(new_info)
1789 fname = prepend_extension(fname, 'f%s' % f['format_id'], new_info['ext'])
1790 downloaded.append(fname)
1791 partial_success = dl(fname, new_info)
1792 success = success and partial_success
1793 info_dict['__postprocessors'] = postprocessors
1794 info_dict['__files_to_merge'] = downloaded
1796 # Just a single file
1797 success = dl(filename, info_dict)
1798 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1799 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
1801 except (OSError, IOError) as err:
1802 raise UnavailableVideoError(err)
1803 except (ContentTooShortError, ) as err:
1804 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1807 if success and filename != '-':
1809 fixup_policy = self.params.get('fixup')
1810 if fixup_policy is None:
1811 fixup_policy = 'detect_or_warn'
1813 INSTALL_FFMPEG_MESSAGE = 'Install ffmpeg or avconv to fix this automatically.'
1815 stretched_ratio = info_dict.get('stretched_ratio')
1816 if stretched_ratio is not None and stretched_ratio != 1:
1817 if fixup_policy == 'warn':
1818 self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1819 info_dict['id'], stretched_ratio))
1820 elif fixup_policy == 'detect_or_warn':
1821 stretched_pp = FFmpegFixupStretchedPP(self)
1822 if stretched_pp.available:
1823 info_dict.setdefault('__postprocessors', [])
1824 info_dict['__postprocessors'].append(stretched_pp)
1826 self.report_warning(
1827 '%s: Non-uniform pixel ratio (%s). %s'
1828 % (info_dict['id'], stretched_ratio, INSTALL_FFMPEG_MESSAGE))
1830 assert fixup_policy in ('ignore', 'never')
1832 if (info_dict.get('requested_formats') is None and
1833 info_dict.get('container') == 'm4a_dash'):
1834 if fixup_policy == 'warn':
1835 self.report_warning(
1836 '%s: writing DASH m4a. '
1837 'Only some players support this container.'
1839 elif fixup_policy == 'detect_or_warn':
1840 fixup_pp = FFmpegFixupM4aPP(self)
1841 if fixup_pp.available:
1842 info_dict.setdefault('__postprocessors', [])
1843 info_dict['__postprocessors'].append(fixup_pp)
1845 self.report_warning(
1846 '%s: writing DASH m4a. '
1847 'Only some players support this container. %s'
1848 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
1850 assert fixup_policy in ('ignore', 'never')
1852 if (info_dict.get('protocol') == 'm3u8_native' or
1853 info_dict.get('protocol') == 'm3u8' and
1854 self.params.get('hls_prefer_native')):
1855 if fixup_policy == 'warn':
1856 self.report_warning('%s: malformated aac bitstream.' % (
1858 elif fixup_policy == 'detect_or_warn':
1859 fixup_pp = FFmpegFixupM3u8PP(self)
1860 if fixup_pp.available:
1861 info_dict.setdefault('__postprocessors', [])
1862 info_dict['__postprocessors'].append(fixup_pp)
1864 self.report_warning(
1865 '%s: malformated aac bitstream. %s'
1866 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
1868 assert fixup_policy in ('ignore', 'never')
1871 self.post_process(filename, info_dict)
1872 except (PostProcessingError) as err:
1873 self.report_error('postprocessing: %s' % str(err))
1875 self.record_download_archive(info_dict)
1877 def download(self, url_list):
1878 """Download a given list of URLs."""
1879 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1880 if (len(url_list) > 1 and
1882 '%' not in outtmpl and
1883 self.params.get('max_downloads') != 1):
1884 raise SameFileError(outtmpl)
1886 for url in url_list:
1888 # It also downloads the videos
1889 res = self.extract_info(
1890 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
1891 except UnavailableVideoError:
1892 self.report_error('unable to download video')
1893 except MaxDownloadsReached:
1894 self.to_screen('[info] Maximum number of downloaded files reached.')
1897 if self.params.get('dump_single_json', False):
1898 self.to_stdout(json.dumps(res))
1900 return self._download_retcode
1902 def download_with_info_file(self, info_filename):
1903 with contextlib.closing(fileinput.FileInput(
1904 [info_filename], mode='r',
1905 openhook=fileinput.hook_encoded('utf-8'))) as f:
1906 # FileInput doesn't have a read method, we can't call json.load
1907 info = self.filter_requested_info(json.loads('\n'.join(f)))
1909 self.process_ie_result(info, download=True)
1910 except DownloadError:
1911 webpage_url = info.get('webpage_url')
1912 if webpage_url is not None:
1913 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1914 return self.download([webpage_url])
1917 return self._download_retcode
1920 def filter_requested_info(info_dict):
1922 (k, v) for k, v in info_dict.items()
1923 if k not in ['requested_formats', 'requested_subtitles'])
1925 def post_process(self, filename, ie_info):
1926 """Run all the postprocessors on the given file."""
1927 info = dict(ie_info)
1928 info['filepath'] = filename
1930 if ie_info.get('__postprocessors') is not None:
1931 pps_chain.extend(ie_info['__postprocessors'])
1932 pps_chain.extend(self._pps)
1933 for pp in pps_chain:
1934 files_to_delete = []
1936 files_to_delete, info = pp.run(info)
1937 except PostProcessingError as e:
1938 self.report_error(e.msg)
1939 if files_to_delete and not self.params.get('keepvideo', False):
1940 for old_filename in files_to_delete:
1941 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
1943 os.remove(encodeFilename(old_filename))
1944 except (IOError, OSError):
1945 self.report_warning('Unable to remove downloaded original file')
1947 def _make_archive_id(self, info_dict):
1948 # Future-proof against any change in case
1949 # and backwards compatibility with prior versions
1950 extractor = info_dict.get('extractor_key')
1951 if extractor is None:
1952 if 'id' in info_dict:
1953 extractor = info_dict.get('ie_key') # key in a playlist
1954 if extractor is None:
1955 return None # Incomplete video information
1956 return extractor.lower() + ' ' + info_dict['id']
1958 def in_download_archive(self, info_dict):
1959 fn = self.params.get('download_archive')
1963 vid_id = self._make_archive_id(info_dict)
1965 return False # Incomplete video information
1968 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1969 for line in archive_file:
1970 if line.strip() == vid_id:
1972 except IOError as ioe:
1973 if ioe.errno != errno.ENOENT:
1977 def record_download_archive(self, info_dict):
1978 fn = self.params.get('download_archive')
1981 vid_id = self._make_archive_id(info_dict)
1983 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1984 archive_file.write(vid_id + '\n')
1987 def format_resolution(format, default='unknown'):
1988 if format.get('vcodec') == 'none':
1990 if format.get('resolution') is not None:
1991 return format['resolution']
1992 if format.get('height') is not None:
1993 if format.get('width') is not None:
1994 res = '%sx%s' % (format['width'], format['height'])
1996 res = '%sp' % format['height']
1997 elif format.get('width') is not None:
1998 res = '%dx?' % format['width']
2003 def _format_note(self, fdict):
2005 if fdict.get('ext') in ['f4f', 'f4m']:
2006 res += '(unsupported) '
2007 if fdict.get('language'):
2010 res += '[%s] ' % fdict['language']
2011 if fdict.get('format_note') is not None:
2012 res += fdict['format_note'] + ' '
2013 if fdict.get('tbr') is not None:
2014 res += '%4dk ' % fdict['tbr']
2015 if fdict.get('container') is not None:
2018 res += '%s container' % fdict['container']
2019 if (fdict.get('vcodec') is not None and
2020 fdict.get('vcodec') != 'none'):
2023 res += fdict['vcodec']
2024 if fdict.get('vbr') is not None:
2026 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
2028 if fdict.get('vbr') is not None:
2029 res += '%4dk' % fdict['vbr']
2030 if fdict.get('fps') is not None:
2033 res += '%sfps' % fdict['fps']
2034 if fdict.get('acodec') is not None:
2037 if fdict['acodec'] == 'none':
2040 res += '%-5s' % fdict['acodec']
2041 elif fdict.get('abr') is not None:
2045 if fdict.get('abr') is not None:
2046 res += '@%3dk' % fdict['abr']
2047 if fdict.get('asr') is not None:
2048 res += ' (%5dHz)' % fdict['asr']
2049 if fdict.get('filesize') is not None:
2052 res += format_bytes(fdict['filesize'])
2053 elif fdict.get('filesize_approx') is not None:
2056 res += '~' + format_bytes(fdict['filesize_approx'])
2059 def list_formats(self, info_dict):
2060 formats = info_dict.get('formats', [info_dict])
2062 [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
2064 if f.get('preference') is None or f['preference'] >= -1000]
2065 if len(formats) > 1:
2066 table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
2068 header_line = ['format code', 'extension', 'resolution', 'note']
2070 '[info] Available formats for %s:\n%s' %
2071 (info_dict['id'], render_table(header_line, table)))
2073 def list_thumbnails(self, info_dict):
2074 thumbnails = info_dict.get('thumbnails')
2076 self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
2080 '[info] Thumbnails for %s:' % info_dict['id'])
2081 self.to_screen(render_table(
2082 ['ID', 'width', 'height', 'URL'],
2083 [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
2085 def list_subtitles(self, video_id, subtitles, name='subtitles'):
2087 self.to_screen('%s has no %s' % (video_id, name))
2090 'Available %s for %s:' % (name, video_id))
2091 self.to_screen(render_table(
2092 ['Language', 'formats'],
2093 [[lang, ', '.join(f['ext'] for f in reversed(formats))]
2094 for lang, formats in subtitles.items()]))
2096 def urlopen(self, req):
2097 """ Start an HTTP download """
2098 if isinstance(req, compat_basestring):
2099 req = sanitized_Request(req)
2100 return self._opener.open(req, timeout=self._socket_timeout)
2102 def print_debug_header(self):
2103 if not self.params.get('verbose'):
2106 if type('') is not compat_str:
2107 # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
2108 self.report_warning(
2109 'Your Python is broken! Update to a newer and supported version')
2111 stdout_encoding = getattr(
2112 sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
2114 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
2115 locale.getpreferredencoding(),
2116 sys.getfilesystemencoding(),
2118 self.get_encoding()))
2119 write_string(encoding_str, encoding=None)
2121 self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
2123 self._write_string('[debug] Lazy loading extractors enabled' + '\n')
2125 sp = subprocess.Popen(
2126 ['git', 'rev-parse', '--short', 'HEAD'],
2127 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
2128 cwd=os.path.dirname(os.path.abspath(__file__)))
2129 out, err = sp.communicate()
2130 out = out.decode().strip()
2131 if re.match('[0-9a-f]+', out):
2132 self._write_string('[debug] Git HEAD: ' + out + '\n')
2138 self._write_string('[debug] Python version %s - %s\n' % (
2139 platform.python_version(), platform_name()))
2141 exe_versions = FFmpegPostProcessor.get_versions(self)
2142 exe_versions['rtmpdump'] = rtmpdump_version()
2143 exe_str = ', '.join(
2145 for exe, v in sorted(exe_versions.items())
2150 self._write_string('[debug] exe versions: %s\n' % exe_str)
2153 for handler in self._opener.handlers:
2154 if hasattr(handler, 'proxies'):
2155 proxy_map.update(handler.proxies)
2156 self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
2158 if self.params.get('call_home', False):
2159 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
2160 self._write_string('[debug] Public IP address: %s\n' % ipaddr)
2161 latest_version = self.urlopen(
2162 'https://yt-dl.org/latest/version').read().decode('utf-8')
2163 if version_tuple(latest_version) > version_tuple(__version__):
2164 self.report_warning(
2165 'You are using an outdated version (newest version: %s)! '
2166 'See https://yt-dl.org/update if you need help updating.' %
2169 def _setup_opener(self):
2170 timeout_val = self.params.get('socket_timeout')
2171 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
2173 opts_cookiefile = self.params.get('cookiefile')
2174 opts_proxy = self.params.get('proxy')
2176 if opts_cookiefile is None:
2177 self.cookiejar = compat_cookiejar.CookieJar()
2179 opts_cookiefile = expand_path(opts_cookiefile)
2180 self.cookiejar = compat_cookiejar.MozillaCookieJar(
2182 if os.access(opts_cookiefile, os.R_OK):
2183 self.cookiejar.load()
2185 cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
2186 if opts_proxy is not None:
2187 if opts_proxy == '':
2190 proxies = {'http': opts_proxy, 'https': opts_proxy}
2192 proxies = compat_urllib_request.getproxies()
2193 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
2194 if 'http' in proxies and 'https' not in proxies:
2195 proxies['https'] = proxies['http']
2196 proxy_handler = PerRequestProxyHandler(proxies)
2198 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
2199 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
2200 ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
2201 data_handler = compat_urllib_request_DataHandler()
2203 # When passing our own FileHandler instance, build_opener won't add the
2204 # default FileHandler and allows us to disable the file protocol, which
2205 # can be used for malicious purposes (see
2206 # https://github.com/rg3/youtube-dl/issues/8227)
2207 file_handler = compat_urllib_request.FileHandler()
2209 def file_open(*args, **kwargs):
2210 raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in youtube-dl for security reasons')
2211 file_handler.file_open = file_open
2213 opener = compat_urllib_request.build_opener(
2214 proxy_handler, https_handler, cookie_processor, ydlh, data_handler, file_handler)
2216 # Delete the default user-agent header, which would otherwise apply in
2217 # cases where our custom HTTP handler doesn't come into play
2218 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
2219 opener.addheaders = []
2220 self._opener = opener
2222 def encode(self, s):
2223 if isinstance(s, bytes):
2224 return s # Already encoded
2227 return s.encode(self.get_encoding())
2228 except UnicodeEncodeError as err:
2229 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
2232 def get_encoding(self):
2233 encoding = self.params.get('encoding')
2234 if encoding is None:
2235 encoding = preferredencoding()
2238 def _write_thumbnails(self, info_dict, filename):
2239 if self.params.get('writethumbnail', False):
2240 thumbnails = info_dict.get('thumbnails')
2242 thumbnails = [thumbnails[-1]]
2243 elif self.params.get('write_all_thumbnails', False):
2244 thumbnails = info_dict.get('thumbnails')
2249 # No thumbnails present, so return immediately
2252 for t in thumbnails:
2253 thumb_ext = determine_ext(t['url'], 'jpg')
2254 suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
2255 thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
2256 t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
2258 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
2259 self.to_screen('[%s] %s: Thumbnail %sis already present' %
2260 (info_dict['extractor'], info_dict['id'], thumb_display_id))
2262 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
2263 (info_dict['extractor'], info_dict['id'], thumb_display_id))
2265 uf = self.urlopen(t['url'])
2266 with open(encodeFilename(thumb_filename), 'wb') as thumbf:
2267 shutil.copyfileobj(uf, thumbf)
2268 self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
2269 (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
2270 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2271 self.report_warning('Unable to download thumbnail "%s": %s' %
2272 (t['url'], error_to_compat_str(err)))