2 # -*- coding: utf-8 -*-
4 from __future__ import absolute_import, unicode_literals
32 compat_get_terminal_size,
37 compat_tokenize_tokenize,
39 compat_urllib_request,
40 compat_urllib_request_DataHandler,
63 PerRequestProxyHandler,
68 register_socks_protocols,
78 UnavailableVideoError,
83 YoutubeDLCookieProcessor,
86 from .cache import Cache
87 from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER
88 from .downloader import get_suitable_downloader
89 from .downloader.rtmp import rtmpdump_version
90 from .postprocessor import (
93 FFmpegFixupStretchedPP,
98 from .version import __version__
100 if compat_os_name == 'nt':
104 class YoutubeDL(object):
107 YoutubeDL objects are the ones responsible of downloading the
108 actual video file and writing it to disk if the user has requested
109 it, among some other tasks. In most cases there should be one per
110 program. As, given a video URL, the downloader doesn't know how to
111 extract all the needed information, task that InfoExtractors do, it
112 has to pass the URL to one of them.
114 For this, YoutubeDL objects have a method that allows
115 InfoExtractors to be registered in a given order. When it is passed
116 a URL, the YoutubeDL object handles it to the first InfoExtractor it
117 finds that reports being able to handle it. The InfoExtractor extracts
118 all the information about the video or videos the URL refers to, and
119 YoutubeDL process the extracted information, possibly using a File
120 Downloader to download the video.
122 YoutubeDL objects accept a lot of parameters. In order not to saturate
123 the object constructor with arguments, it receives a dictionary of
124 options instead. These options are available through the params
125 attribute for the InfoExtractors to use. The YoutubeDL also
126 registers itself as the downloader in charge for the InfoExtractors
127 that are added to it, so this is a "mutual registration".
131 username: Username for authentication purposes.
132 password: Password for authentication purposes.
133 videopassword: Password for accessing a video.
134 usenetrc: Use netrc for authentication instead.
135 verbose: Print additional info to stdout.
136 quiet: Do not print messages to stdout.
137 no_warnings: Do not print out anything for warnings.
138 forceurl: Force printing final URL.
139 forcetitle: Force printing title.
140 forceid: Force printing ID.
141 forcethumbnail: Force printing thumbnail URL.
142 forcedescription: Force printing description.
143 forcefilename: Force printing final filename.
144 forceduration: Force printing duration.
145 forcejson: Force printing info_dict as JSON.
146 dump_single_json: Force printing the info_dict of the whole playlist
147 (or video) as a single JSON line.
148 simulate: Do not download the video files.
149 format: Video format code. See options.py for more information.
150 outtmpl: Template for output names.
151 restrictfilenames: Do not allow "&" and spaces in file names
152 ignoreerrors: Do not stop on download errors.
153 force_generic_extractor: Force downloader to use the generic extractor
154 nooverwrites: Prevent overwriting files.
155 playliststart: Playlist item to start at.
156 playlistend: Playlist item to end at.
157 playlist_items: Specific indices of playlist to download.
158 playlistreverse: Download playlist items in reverse order.
159 matchtitle: Download only matching titles.
160 rejecttitle: Reject downloads for matching titles.
161 logger: Log messages to a logging.Logger instance.
162 logtostderr: Log messages to stderr instead of stdout.
163 writedescription: Write the video description to a .description file
164 writeinfojson: Write the video description to a .info.json file
165 writeannotations: Write the video annotations to a .annotations.xml file
166 writethumbnail: Write the thumbnail image to a file
167 write_all_thumbnails: Write all thumbnail formats to files
168 writesubtitles: Write the video subtitles to a file
169 writeautomaticsub: Write the automatically generated subtitles to a file
170 allsubtitles: Downloads all the subtitles of the video
171 (requires writesubtitles or writeautomaticsub)
172 listsubtitles: Lists all available subtitles for the video
173 subtitlesformat: The format code for subtitles
174 subtitleslangs: List of languages of the subtitles to download
175 keepvideo: Keep the video file after post-processing
176 daterange: A DateRange object, download only if the upload_date is in the range.
177 skip_download: Skip the actual download of the video file
178 cachedir: Location of the cache files in the filesystem.
179 False to disable filesystem cache.
180 noplaylist: Download single video instead of a playlist if in doubt.
181 age_limit: An integer representing the user's age in years.
182 Unsuitable videos for the given age are skipped.
183 min_views: An integer representing the minimum view count the video
184 must have in order to not be skipped.
185 Videos without view count information are always
186 downloaded. None for no limit.
187 max_views: An integer representing the maximum view count.
188 Videos that are more popular than that are not
190 Videos without view count information are always
191 downloaded. None for no limit.
192 download_archive: File name of a file where all downloads are recorded.
193 Videos already present in the file are not downloaded
195 cookiefile: File name where cookies should be read from and dumped to.
196 nocheckcertificate:Do not verify SSL certificates
197 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
198 At the moment, this is only supported by YouTube.
199 proxy: URL of the proxy server to use
200 geo_verification_proxy: URL of the proxy to use for IP address verification
201 on geo-restricted sites. (Experimental)
202 socket_timeout: Time to wait for unresponsive hosts, in seconds
203 bidi_workaround: Work around buggy terminals without bidirectional text
204 support, using fridibi
205 debug_printtraffic:Print out sent and received HTTP traffic
206 include_ads: Download ads as well
207 default_search: Prepend this string if an input url is not valid.
208 'auto' for elaborate guessing
209 encoding: Use this encoding instead of the system-specified.
210 extract_flat: Do not resolve URLs, return the immediate result.
211 Pass in 'in_playlist' to only show this behavior for
213 postprocessors: A list of dictionaries, each with an entry
214 * key: The name of the postprocessor. See
215 youtube_dl/postprocessor/__init__.py for a list.
216 as well as any further keyword arguments for the
218 progress_hooks: A list of functions that get called on download
219 progress, with a dictionary with the entries
220 * status: One of "downloading", "error", or "finished".
221 Check this first and ignore unknown values.
223 If status is one of "downloading", or "finished", the
224 following properties may also be present:
225 * filename: The final filename (always present)
226 * tmpfilename: The filename we're currently writing to
227 * downloaded_bytes: Bytes on disk
228 * total_bytes: Size of the whole file, None if unknown
229 * total_bytes_estimate: Guess of the eventual file size,
231 * elapsed: The number of seconds since download started.
232 * eta: The estimated time in seconds, None if unknown
233 * speed: The download speed in bytes/second, None if
235 * fragment_index: The counter of the currently
236 downloaded video fragment.
237 * fragment_count: The number of fragments (= individual
238 files that will be merged)
240 Progress hooks are guaranteed to be called at least once
241 (with status "finished") if the download is successful.
242 merge_output_format: Extension to use when merging formats.
243 fixup: Automatically correct known faults of the file.
245 - "never": do nothing
246 - "warn": only emit a warning
247 - "detect_or_warn": check whether we can do anything
248 about it, warn otherwise (default)
249 source_address: (Experimental) Client-side IP address to bind to.
250 call_home: Boolean, true iff we are allowed to contact the
251 youtube-dl servers for debugging.
252 sleep_interval: Number of seconds to sleep before each download.
253 listformats: Print an overview of available video formats and exit.
254 list_thumbnails: Print a table of all thumbnails and exit.
255 match_filter: A function that gets called with the info_dict of
257 If it returns a message, the video is ignored.
258 If it returns None, the video is downloaded.
259 match_filter_func in utils.py is one example for this.
260 no_color: Do not emit color codes in output.
262 The following options determine which downloader is picked:
263 external_downloader: Executable of the external downloader to call.
264 None or unset for standard (built-in) downloader.
265 hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv
266 if True, otherwise use ffmpeg/avconv if False, otherwise
267 use downloader suggested by extractor if None.
269 The following parameters are not used by YoutubeDL itself, they are used by
270 the downloader (see youtube_dl/downloader/common.py):
271 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
272 noresizebuffer, retries, continuedl, noprogress, consoletitle,
273 xattr_set_filesize, external_downloader_args, hls_use_mpegts.
275 The following options are used by the post processors:
276 prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
277 otherwise prefer avconv.
278 postprocessor_args: A list of additional command-line arguments for the
285 _download_retcode = None
286 _num_downloads = None
289 def __init__(self, params=None, auto_init=True):
290 """Create a FileDownloader object with the given options."""
294 self._ies_instances = {}
296 self._progress_hooks = []
297 self._download_retcode = 0
298 self._num_downloads = 0
299 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
300 self._err_file = sys.stderr
303 'nocheckcertificate': False,
305 self.params.update(params)
306 self.cache = Cache(self)
308 if self.params.get('cn_verification_proxy') is not None:
309 self.report_warning('--cn-verification-proxy is deprecated. Use --geo-verification-proxy instead.')
310 if self.params.get('geo_verification_proxy') is None:
311 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
313 if params.get('bidi_workaround', False):
316 master, slave = pty.openpty()
317 width = compat_get_terminal_size().columns
321 width_args = ['-w', str(width)]
323 stdin=subprocess.PIPE,
325 stderr=self._err_file)
327 self._output_process = subprocess.Popen(
328 ['bidiv'] + width_args, **sp_kwargs
331 self._output_process = subprocess.Popen(
332 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
333 self._output_channel = os.fdopen(master, 'rb')
334 except OSError as ose:
335 if ose.errno == errno.ENOENT:
336 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
340 if (sys.version_info >= (3,) and sys.platform != 'win32' and
341 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and
342 not params.get('restrictfilenames', False)):
343 # On Python 3, the Unicode filesystem API will throw errors (#1474)
345 'Assuming --restrict-filenames since file system encoding '
346 'cannot encode all characters. '
347 'Set the LC_ALL environment variable to fix this.')
348 self.params['restrictfilenames'] = True
350 if isinstance(params.get('outtmpl'), bytes):
352 'Parameter outtmpl is bytes, but should be a unicode string. '
353 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
358 self.print_debug_header()
359 self.add_default_info_extractors()
361 for pp_def_raw in self.params.get('postprocessors', []):
362 pp_class = get_postprocessor(pp_def_raw['key'])
363 pp_def = dict(pp_def_raw)
365 pp = pp_class(self, **compat_kwargs(pp_def))
366 self.add_post_processor(pp)
368 for ph in self.params.get('progress_hooks', []):
369 self.add_progress_hook(ph)
371 register_socks_protocols()
373 def warn_if_short_id(self, argv):
374 # short YouTube ID starting with dash?
376 i for i, a in enumerate(argv)
377 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
381 [a for i, a in enumerate(argv) if i not in idxs] +
382 ['--'] + [argv[i] for i in idxs]
385 'Long argument string detected. '
386 'Use -- to separate parameters and URLs, like this:\n%s\n' %
387 args_to_str(correct_argv))
389 def add_info_extractor(self, ie):
390 """Add an InfoExtractor object to the end of the list."""
392 if not isinstance(ie, type):
393 self._ies_instances[ie.ie_key()] = ie
394 ie.set_downloader(self)
396 def get_info_extractor(self, ie_key):
398 Get an instance of an IE with name ie_key, it will try to get one from
399 the _ies list, if there's no instance it will create a new one and add
400 it to the extractor list.
402 ie = self._ies_instances.get(ie_key)
404 ie = get_info_extractor(ie_key)()
405 self.add_info_extractor(ie)
408 def add_default_info_extractors(self):
410 Add the InfoExtractors returned by gen_extractors to the end of the list
412 for ie in gen_extractor_classes():
413 self.add_info_extractor(ie)
415 def add_post_processor(self, pp):
416 """Add a PostProcessor object to the end of the chain."""
418 pp.set_downloader(self)
420 def add_progress_hook(self, ph):
421 """Add the progress hook (currently only for the file downloader)"""
422 self._progress_hooks.append(ph)
424 def _bidi_workaround(self, message):
425 if not hasattr(self, '_output_channel'):
428 assert hasattr(self, '_output_process')
429 assert isinstance(message, compat_str)
430 line_count = message.count('\n') + 1
431 self._output_process.stdin.write((message + '\n').encode('utf-8'))
432 self._output_process.stdin.flush()
433 res = ''.join(self._output_channel.readline().decode('utf-8')
434 for _ in range(line_count))
435 return res[:-len('\n')]
437 def to_screen(self, message, skip_eol=False):
438 """Print message to stdout if not in quiet mode."""
439 return self.to_stdout(message, skip_eol, check_quiet=True)
441 def _write_string(self, s, out=None):
442 write_string(s, out=out, encoding=self.params.get('encoding'))
444 def to_stdout(self, message, skip_eol=False, check_quiet=False):
445 """Print message to stdout if not in quiet mode."""
446 if self.params.get('logger'):
447 self.params['logger'].debug(message)
448 elif not check_quiet or not self.params.get('quiet', False):
449 message = self._bidi_workaround(message)
450 terminator = ['\n', ''][skip_eol]
451 output = message + terminator
453 self._write_string(output, self._screen_file)
455 def to_stderr(self, message):
456 """Print message to stderr."""
457 assert isinstance(message, compat_str)
458 if self.params.get('logger'):
459 self.params['logger'].error(message)
461 message = self._bidi_workaround(message)
462 output = message + '\n'
463 self._write_string(output, self._err_file)
465 def to_console_title(self, message):
466 if not self.params.get('consoletitle', False):
468 if compat_os_name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
469 # c_wchar_p() might not be necessary if `message` is
470 # already of type unicode()
471 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
472 elif 'TERM' in os.environ:
473 self._write_string('\033]0;%s\007' % message, self._screen_file)
475 def save_console_title(self):
476 if not self.params.get('consoletitle', False):
478 if 'TERM' in os.environ:
479 # Save the title on stack
480 self._write_string('\033[22;0t', self._screen_file)
482 def restore_console_title(self):
483 if not self.params.get('consoletitle', False):
485 if 'TERM' in os.environ:
486 # Restore the title from stack
487 self._write_string('\033[23;0t', self._screen_file)
490 self.save_console_title()
493 def __exit__(self, *args):
494 self.restore_console_title()
496 if self.params.get('cookiefile') is not None:
497 self.cookiejar.save()
499 def trouble(self, message=None, tb=None):
500 """Determine action to take when a download problem appears.
502 Depending on if the downloader has been configured to ignore
503 download errors or not, this method may throw an exception or
504 not when errors are found, after printing the message.
506 tb, if given, is additional traceback information.
508 if message is not None:
509 self.to_stderr(message)
510 if self.params.get('verbose'):
512 if sys.exc_info()[0]: # if .trouble has been called from an except block
514 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
515 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
516 tb += encode_compat_str(traceback.format_exc())
518 tb_data = traceback.format_list(traceback.extract_stack())
519 tb = ''.join(tb_data)
521 if not self.params.get('ignoreerrors', False):
522 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
523 exc_info = sys.exc_info()[1].exc_info
525 exc_info = sys.exc_info()
526 raise DownloadError(message, exc_info)
527 self._download_retcode = 1
529 def report_warning(self, message):
531 Print the message to stderr, it will be prefixed with 'WARNING:'
532 If stderr is a tty file the 'WARNING:' will be colored
534 if self.params.get('logger') is not None:
535 self.params['logger'].warning(message)
537 if self.params.get('no_warnings'):
539 if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
540 _msg_header = '\033[0;33mWARNING:\033[0m'
542 _msg_header = 'WARNING:'
543 warning_message = '%s %s' % (_msg_header, message)
544 self.to_stderr(warning_message)
546 def report_error(self, message, tb=None):
548 Do the same as trouble, but prefixes the message with 'ERROR:', colored
549 in red if stderr is a tty file.
551 if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
552 _msg_header = '\033[0;31mERROR:\033[0m'
554 _msg_header = 'ERROR:'
555 error_message = '%s %s' % (_msg_header, message)
556 self.trouble(error_message, tb)
558 def report_file_already_downloaded(self, file_name):
559 """Report file has already been fully downloaded."""
561 self.to_screen('[download] %s has already been downloaded' % file_name)
562 except UnicodeEncodeError:
563 self.to_screen('[download] The file has already been downloaded')
565 def prepare_filename(self, info_dict):
566 """Generate the output filename."""
568 template_dict = dict(info_dict)
570 template_dict['epoch'] = int(time.time())
571 autonumber_size = self.params.get('autonumber_size')
572 if autonumber_size is None:
574 autonumber_templ = '%0' + str(autonumber_size) + 'd'
575 template_dict['autonumber'] = autonumber_templ % self._num_downloads
576 if template_dict.get('playlist_index') is not None:
577 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
578 if template_dict.get('resolution') is None:
579 if template_dict.get('width') and template_dict.get('height'):
580 template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
581 elif template_dict.get('height'):
582 template_dict['resolution'] = '%sp' % template_dict['height']
583 elif template_dict.get('width'):
584 template_dict['resolution'] = '%dx?' % template_dict['width']
586 sanitize = lambda k, v: sanitize_filename(
588 restricted=self.params.get('restrictfilenames'),
590 template_dict = dict((k, sanitize(k, v))
591 for k, v in template_dict.items()
592 if v is not None and not isinstance(v, (list, tuple, dict)))
593 template_dict = collections.defaultdict(lambda: 'NA', template_dict)
595 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
596 tmpl = compat_expanduser(outtmpl)
597 filename = tmpl % template_dict
598 # Temporary fix for #4787
599 # 'Treat' all problem characters by passing filename through preferredencoding
600 # to workaround encoding issues with subprocess on python2 @ Windows
601 if sys.version_info < (3, 0) and sys.platform == 'win32':
602 filename = encodeFilename(filename, True).decode(preferredencoding())
603 return sanitize_path(filename)
604 except ValueError as err:
605 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
608 def _match_entry(self, info_dict, incomplete):
609 """ Returns None iff the file should be downloaded """
611 video_title = info_dict.get('title', info_dict.get('id', 'video'))
612 if 'title' in info_dict:
613 # This can happen when we're just evaluating the playlist
614 title = info_dict['title']
615 matchtitle = self.params.get('matchtitle', False)
617 if not re.search(matchtitle, title, re.IGNORECASE):
618 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
619 rejecttitle = self.params.get('rejecttitle', False)
621 if re.search(rejecttitle, title, re.IGNORECASE):
622 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
623 date = info_dict.get('upload_date')
625 dateRange = self.params.get('daterange', DateRange())
626 if date not in dateRange:
627 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
628 view_count = info_dict.get('view_count')
629 if view_count is not None:
630 min_views = self.params.get('min_views')
631 if min_views is not None and view_count < min_views:
632 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
633 max_views = self.params.get('max_views')
634 if max_views is not None and view_count > max_views:
635 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
636 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
637 return 'Skipping "%s" because it is age restricted' % video_title
638 if self.in_download_archive(info_dict):
639 return '%s has already been recorded in archive' % video_title
642 match_filter = self.params.get('match_filter')
643 if match_filter is not None:
644 ret = match_filter(info_dict)
651 def add_extra_info(info_dict, extra_info):
652 '''Set the keys from extra_info in info dict if they are missing'''
653 for key, value in extra_info.items():
654 info_dict.setdefault(key, value)
656 def extract_info(self, url, download=True, ie_key=None, extra_info={},
657 process=True, force_generic_extractor=False):
659 Returns a list with a dictionary for each video we find.
660 If 'download', also downloads the videos.
661 extra_info is a dict containing the extra values to add to each result
664 if not ie_key and force_generic_extractor:
668 ies = [self.get_info_extractor(ie_key)]
673 if not ie.suitable(url):
676 ie = self.get_info_extractor(ie.ie_key())
678 self.report_warning('The program functionality for this site has been marked as broken, '
679 'and will probably not work.')
682 ie_result = ie.extract(url)
683 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
685 if isinstance(ie_result, list):
686 # Backwards compatibility: old IE result format
688 '_type': 'compat_list',
689 'entries': ie_result,
691 self.add_default_extra_info(ie_result, ie, url)
693 return self.process_ie_result(ie_result, download, extra_info)
696 except ExtractorError as e: # An error we somewhat expected
697 self.report_error(compat_str(e), e.format_traceback())
699 except MaxDownloadsReached:
701 except Exception as e:
702 if self.params.get('ignoreerrors', False):
703 self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
708 self.report_error('no suitable InfoExtractor for URL %s' % url)
710 def add_default_extra_info(self, ie_result, ie, url):
711 self.add_extra_info(ie_result, {
712 'extractor': ie.IE_NAME,
714 'webpage_url_basename': url_basename(url),
715 'extractor_key': ie.ie_key(),
718 def process_ie_result(self, ie_result, download=True, extra_info={}):
720 Take the result of the ie(may be modified) and resolve all unresolved
721 references (URLs, playlist items).
723 It will also download the videos if 'download'.
724 Returns the resolved ie_result.
726 result_type = ie_result.get('_type', 'video')
728 if result_type in ('url', 'url_transparent'):
729 ie_result['url'] = sanitize_url(ie_result['url'])
730 extract_flat = self.params.get('extract_flat', False)
731 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
732 extract_flat is True):
733 if self.params.get('forcejson', False):
734 self.to_stdout(json.dumps(ie_result))
737 if result_type == 'video':
738 self.add_extra_info(ie_result, extra_info)
739 return self.process_video_result(ie_result, download=download)
740 elif result_type == 'url':
741 # We have to add extra_info to the results because it may be
742 # contained in a playlist
743 return self.extract_info(ie_result['url'],
745 ie_key=ie_result.get('ie_key'),
746 extra_info=extra_info)
747 elif result_type == 'url_transparent':
748 # Use the information from the embedding page
749 info = self.extract_info(
750 ie_result['url'], ie_key=ie_result.get('ie_key'),
751 extra_info=extra_info, download=False, process=False)
753 force_properties = dict(
754 (k, v) for k, v in ie_result.items() if v is not None)
755 for f in ('_type', 'url', 'ie_key'):
756 if f in force_properties:
757 del force_properties[f]
758 new_result = info.copy()
759 new_result.update(force_properties)
761 assert new_result.get('_type') != 'url_transparent'
763 return self.process_ie_result(
764 new_result, download=download, extra_info=extra_info)
765 elif result_type == 'playlist' or result_type == 'multi_video':
766 # We process each entry in the playlist
767 playlist = ie_result.get('title') or ie_result.get('id')
768 self.to_screen('[download] Downloading playlist: %s' % playlist)
770 playlist_results = []
772 playliststart = self.params.get('playliststart', 1) - 1
773 playlistend = self.params.get('playlistend')
774 # For backwards compatibility, interpret -1 as whole list
775 if playlistend == -1:
778 playlistitems_str = self.params.get('playlist_items')
780 if playlistitems_str is not None:
781 def iter_playlistitems(format):
782 for string_segment in format.split(','):
783 if '-' in string_segment:
784 start, end = string_segment.split('-')
785 for item in range(int(start), int(end) + 1):
788 yield int(string_segment)
789 playlistitems = iter_playlistitems(playlistitems_str)
791 ie_entries = ie_result['entries']
792 if isinstance(ie_entries, list):
793 n_all_entries = len(ie_entries)
796 ie_entries[i - 1] for i in playlistitems
797 if -n_all_entries <= i - 1 < n_all_entries]
799 entries = ie_entries[playliststart:playlistend]
800 n_entries = len(entries)
802 '[%s] playlist %s: Collected %d video ids (downloading %d of them)' %
803 (ie_result['extractor'], playlist, n_all_entries, n_entries))
804 elif isinstance(ie_entries, PagedList):
807 for item in playlistitems:
808 entries.extend(ie_entries.getslice(
812 entries = ie_entries.getslice(
813 playliststart, playlistend)
814 n_entries = len(entries)
816 '[%s] playlist %s: Downloading %d videos' %
817 (ie_result['extractor'], playlist, n_entries))
820 entry_list = list(ie_entries)
821 entries = [entry_list[i - 1] for i in playlistitems]
823 entries = list(itertools.islice(
824 ie_entries, playliststart, playlistend))
825 n_entries = len(entries)
827 '[%s] playlist %s: Downloading %d videos' %
828 (ie_result['extractor'], playlist, n_entries))
830 if self.params.get('playlistreverse', False):
831 entries = entries[::-1]
833 for i, entry in enumerate(entries, 1):
834 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
836 'n_entries': n_entries,
837 'playlist': playlist,
838 'playlist_id': ie_result.get('id'),
839 'playlist_title': ie_result.get('title'),
840 'playlist_index': i + playliststart,
841 'extractor': ie_result['extractor'],
842 'webpage_url': ie_result['webpage_url'],
843 'webpage_url_basename': url_basename(ie_result['webpage_url']),
844 'extractor_key': ie_result['extractor_key'],
847 reason = self._match_entry(entry, incomplete=True)
848 if reason is not None:
849 self.to_screen('[download] ' + reason)
852 entry_result = self.process_ie_result(entry,
855 playlist_results.append(entry_result)
856 ie_result['entries'] = playlist_results
857 self.to_screen('[download] Finished downloading playlist: %s' % playlist)
859 elif result_type == 'compat_list':
861 'Extractor %s returned a compat_list result. '
862 'It needs to be updated.' % ie_result.get('extractor'))
868 'extractor': ie_result['extractor'],
869 'webpage_url': ie_result['webpage_url'],
870 'webpage_url_basename': url_basename(ie_result['webpage_url']),
871 'extractor_key': ie_result['extractor_key'],
875 ie_result['entries'] = [
876 self.process_ie_result(_fixup(r), download, extra_info)
877 for r in ie_result['entries']
881 raise Exception('Invalid result type: %s' % result_type)
883 def _build_format_filter(self, filter_spec):
884 " Returns a function to filter the formats according to the filter_spec "
894 operator_rex = re.compile(r'''(?x)\s*
895 (?P<key>width|height|tbr|abr|vbr|asr|filesize|fps)
896 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
897 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
899 ''' % '|'.join(map(re.escape, OPERATORS.keys())))
900 m = operator_rex.search(filter_spec)
903 comparison_value = int(m.group('value'))
905 comparison_value = parse_filesize(m.group('value'))
906 if comparison_value is None:
907 comparison_value = parse_filesize(m.group('value') + 'B')
908 if comparison_value is None:
910 'Invalid value %r in format specification %r' % (
911 m.group('value'), filter_spec))
912 op = OPERATORS[m.group('op')]
918 '^=': lambda attr, value: attr.startswith(value),
919 '$=': lambda attr, value: attr.endswith(value),
920 '*=': lambda attr, value: value in attr,
922 str_operator_rex = re.compile(r'''(?x)
923 \s*(?P<key>ext|acodec|vcodec|container|protocol|format_id)
924 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
925 \s*(?P<value>[a-zA-Z0-9._-]+)
927 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
928 m = str_operator_rex.search(filter_spec)
930 comparison_value = m.group('value')
931 op = STR_OPERATORS[m.group('op')]
934 raise ValueError('Invalid filter specification %r' % filter_spec)
937 actual_value = f.get(m.group('key'))
938 if actual_value is None:
939 return m.group('none_inclusive')
940 return op(actual_value, comparison_value)
943 def build_format_selector(self, format_spec):
944 def syntax_error(note, start):
946 'Invalid format specification: '
947 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
948 return SyntaxError(message)
950 PICKFIRST = 'PICKFIRST'
954 FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
956 def _parse_filter(tokens):
958 for type, string, start, _, _ in tokens:
959 if type == tokenize.OP and string == ']':
960 return ''.join(filter_parts)
962 filter_parts.append(string)
964 def _remove_unused_ops(tokens):
965 # Remove operators that we don't use and join them with the surrounding strings
966 # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
967 ALLOWED_OPS = ('/', '+', ',', '(', ')')
968 last_string, last_start, last_end, last_line = None, None, None, None
969 for type, string, start, end, line in tokens:
970 if type == tokenize.OP and string == '[':
972 yield tokenize.NAME, last_string, last_start, last_end, last_line
974 yield type, string, start, end, line
975 # everything inside brackets will be handled by _parse_filter
976 for type, string, start, end, line in tokens:
977 yield type, string, start, end, line
978 if type == tokenize.OP and string == ']':
980 elif type == tokenize.OP and string in ALLOWED_OPS:
982 yield tokenize.NAME, last_string, last_start, last_end, last_line
984 yield type, string, start, end, line
985 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
991 last_string += string
993 yield tokenize.NAME, last_string, last_start, last_end, last_line
995 def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
997 current_selector = None
998 for type, string, start, _, _ in tokens:
999 # ENCODING is only defined in python 3.x
1000 if type == getattr(tokenize, 'ENCODING', None):
1002 elif type in [tokenize.NAME, tokenize.NUMBER]:
1003 current_selector = FormatSelector(SINGLE, string, [])
1004 elif type == tokenize.OP:
1006 if not inside_group:
1007 # ')' will be handled by the parentheses group
1008 tokens.restore_last_token()
1010 elif inside_merge and string in ['/', ',']:
1011 tokens.restore_last_token()
1013 elif inside_choice and string == ',':
1014 tokens.restore_last_token()
1017 if not current_selector:
1018 raise syntax_error('"," must follow a format selector', start)
1019 selectors.append(current_selector)
1020 current_selector = None
1022 if not current_selector:
1023 raise syntax_error('"/" must follow a format selector', start)
1024 first_choice = current_selector
1025 second_choice = _parse_format_selection(tokens, inside_choice=True)
1026 current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1028 if not current_selector:
1029 current_selector = FormatSelector(SINGLE, 'best', [])
1030 format_filter = _parse_filter(tokens)
1031 current_selector.filters.append(format_filter)
1033 if current_selector:
1034 raise syntax_error('Unexpected "("', start)
1035 group = _parse_format_selection(tokens, inside_group=True)
1036 current_selector = FormatSelector(GROUP, group, [])
1038 video_selector = current_selector
1039 audio_selector = _parse_format_selection(tokens, inside_merge=True)
1040 if not video_selector or not audio_selector:
1041 raise syntax_error('"+" must be between two format selectors', start)
1042 current_selector = FormatSelector(MERGE, (video_selector, audio_selector), [])
1044 raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1045 elif type == tokenize.ENDMARKER:
1047 if current_selector:
1048 selectors.append(current_selector)
1051 def _build_selector_function(selector):
1052 if isinstance(selector, list):
1053 fs = [_build_selector_function(s) for s in selector]
1055 def selector_function(ctx):
1057 for format in f(ctx):
1059 return selector_function
1060 elif selector.type == GROUP:
1061 selector_function = _build_selector_function(selector.selector)
1062 elif selector.type == PICKFIRST:
1063 fs = [_build_selector_function(s) for s in selector.selector]
1065 def selector_function(ctx):
1067 picked_formats = list(f(ctx))
1069 return picked_formats
1071 elif selector.type == SINGLE:
1072 format_spec = selector.selector
1074 def selector_function(ctx):
1075 formats = list(ctx['formats'])
1078 if format_spec == 'all':
1081 elif format_spec in ['best', 'worst', None]:
1082 format_idx = 0 if format_spec == 'worst' else -1
1083 audiovideo_formats = [
1085 if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
1086 if audiovideo_formats:
1087 yield audiovideo_formats[format_idx]
1088 # for extractors with incomplete formats (audio only (soundcloud)
1089 # or video only (imgur)) we will fallback to best/worst
1090 # {video,audio}-only format
1091 elif ctx['incomplete_formats']:
1092 yield formats[format_idx]
1093 elif format_spec == 'bestaudio':
1096 if f.get('vcodec') == 'none']
1098 yield audio_formats[-1]
1099 elif format_spec == 'worstaudio':
1102 if f.get('vcodec') == 'none']
1104 yield audio_formats[0]
1105 elif format_spec == 'bestvideo':
1108 if f.get('acodec') == 'none']
1110 yield video_formats[-1]
1111 elif format_spec == 'worstvideo':
1114 if f.get('acodec') == 'none']
1116 yield video_formats[0]
1118 extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
1119 if format_spec in extensions:
1120 filter_f = lambda f: f['ext'] == format_spec
1122 filter_f = lambda f: f['format_id'] == format_spec
1123 matches = list(filter(filter_f, formats))
1126 elif selector.type == MERGE:
1127 def _merge(formats_info):
1128 format_1, format_2 = [f['format_id'] for f in formats_info]
1129 # The first format must contain the video and the
1131 if formats_info[0].get('vcodec') == 'none':
1132 self.report_error('The first format must '
1133 'contain the video, try using '
1134 '"-f %s+%s"' % (format_2, format_1))
1136 # Formats must be opposite (video+audio)
1137 if formats_info[0].get('acodec') == 'none' and formats_info[1].get('acodec') == 'none':
1139 'Both formats %s and %s are video-only, you must specify "-f video+audio"'
1140 % (format_1, format_2))
1143 formats_info[0]['ext']
1144 if self.params.get('merge_output_format') is None
1145 else self.params['merge_output_format'])
1147 'requested_formats': formats_info,
1148 'format': '%s+%s' % (formats_info[0].get('format'),
1149 formats_info[1].get('format')),
1150 'format_id': '%s+%s' % (formats_info[0].get('format_id'),
1151 formats_info[1].get('format_id')),
1152 'width': formats_info[0].get('width'),
1153 'height': formats_info[0].get('height'),
1154 'resolution': formats_info[0].get('resolution'),
1155 'fps': formats_info[0].get('fps'),
1156 'vcodec': formats_info[0].get('vcodec'),
1157 'vbr': formats_info[0].get('vbr'),
1158 'stretched_ratio': formats_info[0].get('stretched_ratio'),
1159 'acodec': formats_info[1].get('acodec'),
1160 'abr': formats_info[1].get('abr'),
1163 video_selector, audio_selector = map(_build_selector_function, selector.selector)
1165 def selector_function(ctx):
1166 for pair in itertools.product(
1167 video_selector(copy.deepcopy(ctx)), audio_selector(copy.deepcopy(ctx))):
1170 filters = [self._build_format_filter(f) for f in selector.filters]
1172 def final_selector(ctx):
1173 ctx_copy = copy.deepcopy(ctx)
1174 for _filter in filters:
1175 ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
1176 return selector_function(ctx_copy)
1177 return final_selector
1179 stream = io.BytesIO(format_spec.encode('utf-8'))
1181 tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
1182 except tokenize.TokenError:
1183 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1185 class TokenIterator(object):
1186 def __init__(self, tokens):
1187 self.tokens = tokens
1194 if self.counter >= len(self.tokens):
1195 raise StopIteration()
1196 value = self.tokens[self.counter]
1202 def restore_last_token(self):
1205 parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
1206 return _build_selector_function(parsed_selector)
1208 def _calc_headers(self, info_dict):
1209 res = std_headers.copy()
1211 add_headers = info_dict.get('http_headers')
1213 res.update(add_headers)
1215 cookies = self._calc_cookies(info_dict)
1217 res['Cookie'] = cookies
1221 def _calc_cookies(self, info_dict):
1222 pr = sanitized_Request(info_dict['url'])
1223 self.cookiejar.add_cookie_header(pr)
1224 return pr.get_header('Cookie')
1226 def process_video_result(self, info_dict, download=True):
1227 assert info_dict.get('_type', 'video') == 'video'
1229 if 'id' not in info_dict:
1230 raise ExtractorError('Missing "id" field in extractor result')
1231 if 'title' not in info_dict:
1232 raise ExtractorError('Missing "title" field in extractor result')
1234 if not isinstance(info_dict['id'], compat_str):
1235 self.report_warning('"id" field is not a string - forcing string conversion')
1236 info_dict['id'] = compat_str(info_dict['id'])
1238 if 'playlist' not in info_dict:
1239 # It isn't part of a playlist
1240 info_dict['playlist'] = None
1241 info_dict['playlist_index'] = None
1243 thumbnails = info_dict.get('thumbnails')
1244 if thumbnails is None:
1245 thumbnail = info_dict.get('thumbnail')
1247 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1249 thumbnails.sort(key=lambda t: (
1250 t.get('preference'), t.get('width'), t.get('height'),
1251 t.get('id'), t.get('url')))
1252 for i, t in enumerate(thumbnails):
1253 t['url'] = sanitize_url(t['url'])
1254 if t.get('width') and t.get('height'):
1255 t['resolution'] = '%dx%d' % (t['width'], t['height'])
1256 if t.get('id') is None:
1259 if self.params.get('list_thumbnails'):
1260 self.list_thumbnails(info_dict)
1263 thumbnail = info_dict.get('thumbnail')
1265 info_dict['thumbnail'] = sanitize_url(thumbnail)
1267 info_dict['thumbnail'] = thumbnails[-1]['url']
1269 if 'display_id' not in info_dict and 'id' in info_dict:
1270 info_dict['display_id'] = info_dict['id']
1272 if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
1273 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1274 # see http://bugs.python.org/issue1646728)
1276 upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
1277 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1278 except (ValueError, OverflowError, OSError):
1281 # Auto generate title fields corresponding to the *_number fields when missing
1282 # in order to always have clean titles. This is very common for TV series.
1283 for field in ('chapter', 'season', 'episode'):
1284 if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
1285 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
1287 subtitles = info_dict.get('subtitles')
1289 for _, subtitle in subtitles.items():
1290 for subtitle_format in subtitle:
1291 if subtitle_format.get('url'):
1292 subtitle_format['url'] = sanitize_url(subtitle_format['url'])
1293 if 'ext' not in subtitle_format:
1294 subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
1296 if self.params.get('listsubtitles', False):
1297 if 'automatic_captions' in info_dict:
1298 self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions')
1299 self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
1301 info_dict['requested_subtitles'] = self.process_subtitles(
1302 info_dict['id'], subtitles,
1303 info_dict.get('automatic_captions'))
1305 # We now pick which formats have to be downloaded
1306 if info_dict.get('formats') is None:
1307 # There's only one format available
1308 formats = [info_dict]
1310 formats = info_dict['formats']
1313 raise ExtractorError('No video formats found!')
1317 # We check that all the formats have the format and format_id fields
1318 for i, format in enumerate(formats):
1319 if 'url' not in format:
1320 raise ExtractorError('Missing "url" key in result (index %d)' % i)
1322 format['url'] = sanitize_url(format['url'])
1324 if format.get('format_id') is None:
1325 format['format_id'] = compat_str(i)
1327 # Sanitize format_id from characters used in format selector expression
1328 format['format_id'] = re.sub('[\s,/+\[\]()]', '_', format['format_id'])
1329 format_id = format['format_id']
1330 if format_id not in formats_dict:
1331 formats_dict[format_id] = []
1332 formats_dict[format_id].append(format)
1334 # Make sure all formats have unique format_id
1335 for format_id, ambiguous_formats in formats_dict.items():
1336 if len(ambiguous_formats) > 1:
1337 for i, format in enumerate(ambiguous_formats):
1338 format['format_id'] = '%s-%d' % (format_id, i)
1340 for i, format in enumerate(formats):
1341 if format.get('format') is None:
1342 format['format'] = '{id} - {res}{note}'.format(
1343 id=format['format_id'],
1344 res=self.format_resolution(format),
1345 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1347 # Automatically determine file extension if missing
1348 if 'ext' not in format:
1349 format['ext'] = determine_ext(format['url']).lower()
1350 # Automatically determine protocol if missing (useful for format
1351 # selection purposes)
1352 if 'protocol' not in format:
1353 format['protocol'] = determine_protocol(format)
1354 # Add HTTP headers, so that external programs can use them from the
1356 full_format_info = info_dict.copy()
1357 full_format_info.update(format)
1358 format['http_headers'] = self._calc_headers(full_format_info)
1360 # TODO Central sorting goes here
1362 if formats[0] is not info_dict:
1363 # only set the 'formats' fields if the original info_dict list them
1364 # otherwise we end up with a circular reference, the first (and unique)
1365 # element in the 'formats' field in info_dict is info_dict itself,
1366 # which can't be exported to json
1367 info_dict['formats'] = formats
1368 if self.params.get('listformats'):
1369 self.list_formats(info_dict)
1372 req_format = self.params.get('format')
1373 if req_format is None:
1374 req_format_list = []
1375 if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and
1376 not info_dict.get('is_live')):
1377 merger = FFmpegMergerPP(self)
1378 if merger.available and merger.can_merge():
1379 req_format_list.append('bestvideo+bestaudio')
1380 req_format_list.append('best')
1381 req_format = '/'.join(req_format_list)
1382 format_selector = self.build_format_selector(req_format)
1384 # While in format selection we may need to have an access to the original
1385 # format set in order to calculate some metrics or do some processing.
1386 # For now we need to be able to guess whether original formats provided
1387 # by extractor are incomplete or not (i.e. whether extractor provides only
1388 # video-only or audio-only formats) for proper formats selection for
1389 # extractors with such incomplete formats (see
1390 # https://github.com/rg3/youtube-dl/pull/5556).
1391 # Since formats may be filtered during format selection and may not match
1392 # the original formats the results may be incorrect. Thus original formats
1393 # or pre-calculated metrics should be passed to format selection routines
1395 # We will pass a context object containing all necessary additional data
1396 # instead of just formats.
1397 # This fixes incorrect format selection issue (see
1398 # https://github.com/rg3/youtube-dl/issues/10083).
1399 incomplete_formats = (
1400 # All formats are video-only or
1401 all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats) or
1402 # all formats are audio-only
1403 all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
1407 'incomplete_formats': incomplete_formats,
1410 formats_to_download = list(format_selector(ctx))
1411 if not formats_to_download:
1412 raise ExtractorError('requested format not available',
1416 if len(formats_to_download) > 1:
1417 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1418 for format in formats_to_download:
1419 new_info = dict(info_dict)
1420 new_info.update(format)
1421 self.process_info(new_info)
1422 # We update the info dict with the best quality format (backwards compatibility)
1423 info_dict.update(formats_to_download[-1])
1426 def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
1427 """Select the requested subtitles and their format"""
1429 if normal_subtitles and self.params.get('writesubtitles'):
1430 available_subs.update(normal_subtitles)
1431 if automatic_captions and self.params.get('writeautomaticsub'):
1432 for lang, cap_info in automatic_captions.items():
1433 if lang not in available_subs:
1434 available_subs[lang] = cap_info
1436 if (not self.params.get('writesubtitles') and not
1437 self.params.get('writeautomaticsub') or not
1441 if self.params.get('allsubtitles', False):
1442 requested_langs = available_subs.keys()
1444 if self.params.get('subtitleslangs', False):
1445 requested_langs = self.params.get('subtitleslangs')
1446 elif 'en' in available_subs:
1447 requested_langs = ['en']
1449 requested_langs = [list(available_subs.keys())[0]]
1451 formats_query = self.params.get('subtitlesformat', 'best')
1452 formats_preference = formats_query.split('/') if formats_query else []
1454 for lang in requested_langs:
1455 formats = available_subs.get(lang)
1457 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1459 for ext in formats_preference:
1463 matches = list(filter(lambda f: f['ext'] == ext, formats))
1469 self.report_warning(
1470 'No subtitle format found matching "%s" for language %s, '
1471 'using %s' % (formats_query, lang, f['ext']))
1475 def process_info(self, info_dict):
1476 """Process a single resolved IE result."""
1478 assert info_dict.get('_type', 'video') == 'video'
1480 max_downloads = self.params.get('max_downloads')
1481 if max_downloads is not None:
1482 if self._num_downloads >= int(max_downloads):
1483 raise MaxDownloadsReached()
1485 info_dict['fulltitle'] = info_dict['title']
1486 if len(info_dict['title']) > 200:
1487 info_dict['title'] = info_dict['title'][:197] + '...'
1489 if 'format' not in info_dict:
1490 info_dict['format'] = info_dict['ext']
1492 reason = self._match_entry(info_dict, incomplete=False)
1493 if reason is not None:
1494 self.to_screen('[download] ' + reason)
1497 self._num_downloads += 1
1499 info_dict['_filename'] = filename = self.prepare_filename(info_dict)
1502 if self.params.get('forcetitle', False):
1503 self.to_stdout(info_dict['fulltitle'])
1504 if self.params.get('forceid', False):
1505 self.to_stdout(info_dict['id'])
1506 if self.params.get('forceurl', False):
1507 if info_dict.get('requested_formats') is not None:
1508 for f in info_dict['requested_formats']:
1509 self.to_stdout(f['url'] + f.get('play_path', ''))
1511 # For RTMP URLs, also include the playpath
1512 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1513 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
1514 self.to_stdout(info_dict['thumbnail'])
1515 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
1516 self.to_stdout(info_dict['description'])
1517 if self.params.get('forcefilename', False) and filename is not None:
1518 self.to_stdout(filename)
1519 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1520 self.to_stdout(formatSeconds(info_dict['duration']))
1521 if self.params.get('forceformat', False):
1522 self.to_stdout(info_dict['format'])
1523 if self.params.get('forcejson', False):
1524 self.to_stdout(json.dumps(info_dict))
1526 # Do nothing else if in simulate mode
1527 if self.params.get('simulate', False):
1530 if filename is None:
1534 dn = os.path.dirname(sanitize_path(encodeFilename(filename)))
1535 if dn and not os.path.exists(dn):
1537 except (OSError, IOError) as err:
1538 self.report_error('unable to create directory ' + error_to_compat_str(err))
1541 if self.params.get('writedescription', False):
1542 descfn = replace_extension(filename, 'description', info_dict.get('ext'))
1543 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1544 self.to_screen('[info] Video description is already present')
1545 elif info_dict.get('description') is None:
1546 self.report_warning('There\'s no description to write.')
1549 self.to_screen('[info] Writing video description to: ' + descfn)
1550 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1551 descfile.write(info_dict['description'])
1552 except (OSError, IOError):
1553 self.report_error('Cannot write description file ' + descfn)
1556 if self.params.get('writeannotations', False):
1557 annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
1558 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1559 self.to_screen('[info] Video annotations are already present')
1562 self.to_screen('[info] Writing video annotations to: ' + annofn)
1563 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1564 annofile.write(info_dict['annotations'])
1565 except (KeyError, TypeError):
1566 self.report_warning('There are no annotations to write.')
1567 except (OSError, IOError):
1568 self.report_error('Cannot write annotations file: ' + annofn)
1571 subtitles_are_requested = any([self.params.get('writesubtitles', False),
1572 self.params.get('writeautomaticsub')])
1574 if subtitles_are_requested and info_dict.get('requested_subtitles'):
1575 # subtitles download errors are already managed as troubles in relevant IE
1576 # that way it will silently go on when used with unsupporting IE
1577 subtitles = info_dict['requested_subtitles']
1578 ie = self.get_info_extractor(info_dict['extractor_key'])
1579 for sub_lang, sub_info in subtitles.items():
1580 sub_format = sub_info['ext']
1581 if sub_info.get('data') is not None:
1582 sub_data = sub_info['data']
1585 sub_data = ie._download_webpage(
1586 sub_info['url'], info_dict['id'], note=False)
1587 except ExtractorError as err:
1588 self.report_warning('Unable to download subtitle for "%s": %s' %
1589 (sub_lang, error_to_compat_str(err.cause)))
1592 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1593 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1594 self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1596 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1597 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
1598 subfile.write(sub_data)
1599 except (OSError, IOError):
1600 self.report_error('Cannot write subtitles file ' + sub_filename)
1603 if self.params.get('writeinfojson', False):
1604 infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
1605 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1606 self.to_screen('[info] Video description metadata is already present')
1608 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1610 write_json_file(self.filter_requested_info(info_dict), infofn)
1611 except (OSError, IOError):
1612 self.report_error('Cannot write metadata to JSON file ' + infofn)
1615 self._write_thumbnails(info_dict, filename)
1617 if not self.params.get('skip_download', False):
1620 fd = get_suitable_downloader(info, self.params)(self, self.params)
1621 for ph in self._progress_hooks:
1622 fd.add_progress_hook(ph)
1623 if self.params.get('verbose'):
1624 self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1625 return fd.download(name, info)
1627 if info_dict.get('requested_formats') is not None:
1630 merger = FFmpegMergerPP(self)
1631 if not merger.available:
1633 self.report_warning('You have requested multiple '
1634 'formats but ffmpeg or avconv are not installed.'
1635 ' The formats won\'t be merged.')
1637 postprocessors = [merger]
1639 def compatible_formats(formats):
1640 video, audio = formats
1642 video_ext, audio_ext = audio.get('ext'), video.get('ext')
1643 if video_ext and audio_ext:
1645 ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v'),
1648 for exts in COMPATIBLE_EXTS:
1649 if video_ext in exts and audio_ext in exts:
1651 # TODO: Check acodec/vcodec
1654 filename_real_ext = os.path.splitext(filename)[1][1:]
1656 os.path.splitext(filename)[0]
1657 if filename_real_ext == info_dict['ext']
1659 requested_formats = info_dict['requested_formats']
1660 if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
1661 info_dict['ext'] = 'mkv'
1662 self.report_warning(
1663 'Requested formats are incompatible for merge and will be merged into mkv.')
1664 # Ensure filename always has a correct extension for successful merge
1665 filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
1666 if os.path.exists(encodeFilename(filename)):
1668 '[download] %s has already been downloaded and '
1669 'merged' % filename)
1671 for f in requested_formats:
1672 new_info = dict(info_dict)
1674 fname = self.prepare_filename(new_info)
1675 fname = prepend_extension(fname, 'f%s' % f['format_id'], new_info['ext'])
1676 downloaded.append(fname)
1677 partial_success = dl(fname, new_info)
1678 success = success and partial_success
1679 info_dict['__postprocessors'] = postprocessors
1680 info_dict['__files_to_merge'] = downloaded
1682 # Just a single file
1683 success = dl(filename, info_dict)
1684 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1685 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
1687 except (OSError, IOError) as err:
1688 raise UnavailableVideoError(err)
1689 except (ContentTooShortError, ) as err:
1690 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1693 if success and filename != '-':
1695 fixup_policy = self.params.get('fixup')
1696 if fixup_policy is None:
1697 fixup_policy = 'detect_or_warn'
1699 INSTALL_FFMPEG_MESSAGE = 'Install ffmpeg or avconv to fix this automatically.'
1701 stretched_ratio = info_dict.get('stretched_ratio')
1702 if stretched_ratio is not None and stretched_ratio != 1:
1703 if fixup_policy == 'warn':
1704 self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1705 info_dict['id'], stretched_ratio))
1706 elif fixup_policy == 'detect_or_warn':
1707 stretched_pp = FFmpegFixupStretchedPP(self)
1708 if stretched_pp.available:
1709 info_dict.setdefault('__postprocessors', [])
1710 info_dict['__postprocessors'].append(stretched_pp)
1712 self.report_warning(
1713 '%s: Non-uniform pixel ratio (%s). %s'
1714 % (info_dict['id'], stretched_ratio, INSTALL_FFMPEG_MESSAGE))
1716 assert fixup_policy in ('ignore', 'never')
1718 if (info_dict.get('requested_formats') is None and
1719 info_dict.get('container') == 'm4a_dash'):
1720 if fixup_policy == 'warn':
1721 self.report_warning(
1722 '%s: writing DASH m4a. '
1723 'Only some players support this container.'
1725 elif fixup_policy == 'detect_or_warn':
1726 fixup_pp = FFmpegFixupM4aPP(self)
1727 if fixup_pp.available:
1728 info_dict.setdefault('__postprocessors', [])
1729 info_dict['__postprocessors'].append(fixup_pp)
1731 self.report_warning(
1732 '%s: writing DASH m4a. '
1733 'Only some players support this container. %s'
1734 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
1736 assert fixup_policy in ('ignore', 'never')
1738 if (info_dict.get('protocol') == 'm3u8_native' or
1739 info_dict.get('protocol') == 'm3u8' and
1740 self.params.get('hls_prefer_native')):
1741 if fixup_policy == 'warn':
1742 self.report_warning('%s: malformated aac bitstream.' % (
1744 elif fixup_policy == 'detect_or_warn':
1745 fixup_pp = FFmpegFixupM3u8PP(self)
1746 if fixup_pp.available:
1747 info_dict.setdefault('__postprocessors', [])
1748 info_dict['__postprocessors'].append(fixup_pp)
1750 self.report_warning(
1751 '%s: malformated aac bitstream. %s'
1752 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
1754 assert fixup_policy in ('ignore', 'never')
1757 self.post_process(filename, info_dict)
1758 except (PostProcessingError) as err:
1759 self.report_error('postprocessing: %s' % str(err))
1761 self.record_download_archive(info_dict)
1763 def download(self, url_list):
1764 """Download a given list of URLs."""
1765 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1766 if (len(url_list) > 1 and
1767 '%' not in outtmpl and
1768 self.params.get('max_downloads') != 1):
1769 raise SameFileError(outtmpl)
1771 for url in url_list:
1773 # It also downloads the videos
1774 res = self.extract_info(
1775 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
1776 except UnavailableVideoError:
1777 self.report_error('unable to download video')
1778 except MaxDownloadsReached:
1779 self.to_screen('[info] Maximum number of downloaded files reached.')
1782 if self.params.get('dump_single_json', False):
1783 self.to_stdout(json.dumps(res))
1785 return self._download_retcode
1787 def download_with_info_file(self, info_filename):
1788 with contextlib.closing(fileinput.FileInput(
1789 [info_filename], mode='r',
1790 openhook=fileinput.hook_encoded('utf-8'))) as f:
1791 # FileInput doesn't have a read method, we can't call json.load
1792 info = self.filter_requested_info(json.loads('\n'.join(f)))
1794 self.process_ie_result(info, download=True)
1795 except DownloadError:
1796 webpage_url = info.get('webpage_url')
1797 if webpage_url is not None:
1798 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1799 return self.download([webpage_url])
1802 return self._download_retcode
1805 def filter_requested_info(info_dict):
1807 (k, v) for k, v in info_dict.items()
1808 if k not in ['requested_formats', 'requested_subtitles'])
1810 def post_process(self, filename, ie_info):
1811 """Run all the postprocessors on the given file."""
1812 info = dict(ie_info)
1813 info['filepath'] = filename
1815 if ie_info.get('__postprocessors') is not None:
1816 pps_chain.extend(ie_info['__postprocessors'])
1817 pps_chain.extend(self._pps)
1818 for pp in pps_chain:
1819 files_to_delete = []
1821 files_to_delete, info = pp.run(info)
1822 except PostProcessingError as e:
1823 self.report_error(e.msg)
1824 if files_to_delete and not self.params.get('keepvideo', False):
1825 for old_filename in files_to_delete:
1826 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
1828 os.remove(encodeFilename(old_filename))
1829 except (IOError, OSError):
1830 self.report_warning('Unable to remove downloaded original file')
1832 def _make_archive_id(self, info_dict):
1833 # Future-proof against any change in case
1834 # and backwards compatibility with prior versions
1835 extractor = info_dict.get('extractor_key')
1836 if extractor is None:
1837 if 'id' in info_dict:
1838 extractor = info_dict.get('ie_key') # key in a playlist
1839 if extractor is None:
1840 return None # Incomplete video information
1841 return extractor.lower() + ' ' + info_dict['id']
1843 def in_download_archive(self, info_dict):
1844 fn = self.params.get('download_archive')
1848 vid_id = self._make_archive_id(info_dict)
1850 return False # Incomplete video information
1853 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1854 for line in archive_file:
1855 if line.strip() == vid_id:
1857 except IOError as ioe:
1858 if ioe.errno != errno.ENOENT:
1862 def record_download_archive(self, info_dict):
1863 fn = self.params.get('download_archive')
1866 vid_id = self._make_archive_id(info_dict)
1868 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1869 archive_file.write(vid_id + '\n')
1872 def format_resolution(format, default='unknown'):
1873 if format.get('vcodec') == 'none':
1875 if format.get('resolution') is not None:
1876 return format['resolution']
1877 if format.get('height') is not None:
1878 if format.get('width') is not None:
1879 res = '%sx%s' % (format['width'], format['height'])
1881 res = '%sp' % format['height']
1882 elif format.get('width') is not None:
1883 res = '%dx?' % format['width']
1888 def _format_note(self, fdict):
1890 if fdict.get('ext') in ['f4f', 'f4m']:
1891 res += '(unsupported) '
1892 if fdict.get('language'):
1895 res += '[%s] ' % fdict['language']
1896 if fdict.get('format_note') is not None:
1897 res += fdict['format_note'] + ' '
1898 if fdict.get('tbr') is not None:
1899 res += '%4dk ' % fdict['tbr']
1900 if fdict.get('container') is not None:
1903 res += '%s container' % fdict['container']
1904 if (fdict.get('vcodec') is not None and
1905 fdict.get('vcodec') != 'none'):
1908 res += fdict['vcodec']
1909 if fdict.get('vbr') is not None:
1911 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1913 if fdict.get('vbr') is not None:
1914 res += '%4dk' % fdict['vbr']
1915 if fdict.get('fps') is not None:
1918 res += '%sfps' % fdict['fps']
1919 if fdict.get('acodec') is not None:
1922 if fdict['acodec'] == 'none':
1925 res += '%-5s' % fdict['acodec']
1926 elif fdict.get('abr') is not None:
1930 if fdict.get('abr') is not None:
1931 res += '@%3dk' % fdict['abr']
1932 if fdict.get('asr') is not None:
1933 res += ' (%5dHz)' % fdict['asr']
1934 if fdict.get('filesize') is not None:
1937 res += format_bytes(fdict['filesize'])
1938 elif fdict.get('filesize_approx') is not None:
1941 res += '~' + format_bytes(fdict['filesize_approx'])
1944 def list_formats(self, info_dict):
1945 formats = info_dict.get('formats', [info_dict])
1947 [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
1949 if f.get('preference') is None or f['preference'] >= -1000]
1950 if len(formats) > 1:
1951 table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
1953 header_line = ['format code', 'extension', 'resolution', 'note']
1955 '[info] Available formats for %s:\n%s' %
1956 (info_dict['id'], render_table(header_line, table)))
1958 def list_thumbnails(self, info_dict):
1959 thumbnails = info_dict.get('thumbnails')
1961 self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
1965 '[info] Thumbnails for %s:' % info_dict['id'])
1966 self.to_screen(render_table(
1967 ['ID', 'width', 'height', 'URL'],
1968 [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
1970 def list_subtitles(self, video_id, subtitles, name='subtitles'):
1972 self.to_screen('%s has no %s' % (video_id, name))
1975 'Available %s for %s:' % (name, video_id))
1976 self.to_screen(render_table(
1977 ['Language', 'formats'],
1978 [[lang, ', '.join(f['ext'] for f in reversed(formats))]
1979 for lang, formats in subtitles.items()]))
1981 def urlopen(self, req):
1982 """ Start an HTTP download """
1983 if isinstance(req, compat_basestring):
1984 req = sanitized_Request(req)
1985 return self._opener.open(req, timeout=self._socket_timeout)
1987 def print_debug_header(self):
1988 if not self.params.get('verbose'):
1991 if type('') is not compat_str:
1992 # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1993 self.report_warning(
1994 'Your Python is broken! Update to a newer and supported version')
1996 stdout_encoding = getattr(
1997 sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
1999 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
2000 locale.getpreferredencoding(),
2001 sys.getfilesystemencoding(),
2003 self.get_encoding()))
2004 write_string(encoding_str, encoding=None)
2006 self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
2008 self._write_string('[debug] Lazy loading extractors enabled' + '\n')
2010 sp = subprocess.Popen(
2011 ['git', 'rev-parse', '--short', 'HEAD'],
2012 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
2013 cwd=os.path.dirname(os.path.abspath(__file__)))
2014 out, err = sp.communicate()
2015 out = out.decode().strip()
2016 if re.match('[0-9a-f]+', out):
2017 self._write_string('[debug] Git HEAD: ' + out + '\n')
2023 self._write_string('[debug] Python version %s - %s\n' % (
2024 platform.python_version(), platform_name()))
2026 exe_versions = FFmpegPostProcessor.get_versions(self)
2027 exe_versions['rtmpdump'] = rtmpdump_version()
2028 exe_str = ', '.join(
2030 for exe, v in sorted(exe_versions.items())
2035 self._write_string('[debug] exe versions: %s\n' % exe_str)
2038 for handler in self._opener.handlers:
2039 if hasattr(handler, 'proxies'):
2040 proxy_map.update(handler.proxies)
2041 self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
2043 if self.params.get('call_home', False):
2044 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
2045 self._write_string('[debug] Public IP address: %s\n' % ipaddr)
2046 latest_version = self.urlopen(
2047 'https://yt-dl.org/latest/version').read().decode('utf-8')
2048 if version_tuple(latest_version) > version_tuple(__version__):
2049 self.report_warning(
2050 'You are using an outdated version (newest version: %s)! '
2051 'See https://yt-dl.org/update if you need help updating.' %
2054 def _setup_opener(self):
2055 timeout_val = self.params.get('socket_timeout')
2056 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
2058 opts_cookiefile = self.params.get('cookiefile')
2059 opts_proxy = self.params.get('proxy')
2061 if opts_cookiefile is None:
2062 self.cookiejar = compat_cookiejar.CookieJar()
2064 opts_cookiefile = compat_expanduser(opts_cookiefile)
2065 self.cookiejar = compat_cookiejar.MozillaCookieJar(
2067 if os.access(opts_cookiefile, os.R_OK):
2068 self.cookiejar.load()
2070 cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
2071 if opts_proxy is not None:
2072 if opts_proxy == '':
2075 proxies = {'http': opts_proxy, 'https': opts_proxy}
2077 proxies = compat_urllib_request.getproxies()
2078 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
2079 if 'http' in proxies and 'https' not in proxies:
2080 proxies['https'] = proxies['http']
2081 proxy_handler = PerRequestProxyHandler(proxies)
2083 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
2084 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
2085 ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
2086 data_handler = compat_urllib_request_DataHandler()
2088 # When passing our own FileHandler instance, build_opener won't add the
2089 # default FileHandler and allows us to disable the file protocol, which
2090 # can be used for malicious purposes (see
2091 # https://github.com/rg3/youtube-dl/issues/8227)
2092 file_handler = compat_urllib_request.FileHandler()
2094 def file_open(*args, **kwargs):
2095 raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in youtube-dl for security reasons')
2096 file_handler.file_open = file_open
2098 opener = compat_urllib_request.build_opener(
2099 proxy_handler, https_handler, cookie_processor, ydlh, data_handler, file_handler)
2101 # Delete the default user-agent header, which would otherwise apply in
2102 # cases where our custom HTTP handler doesn't come into play
2103 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
2104 opener.addheaders = []
2105 self._opener = opener
2107 def encode(self, s):
2108 if isinstance(s, bytes):
2109 return s # Already encoded
2112 return s.encode(self.get_encoding())
2113 except UnicodeEncodeError as err:
2114 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
2117 def get_encoding(self):
2118 encoding = self.params.get('encoding')
2119 if encoding is None:
2120 encoding = preferredencoding()
2123 def _write_thumbnails(self, info_dict, filename):
2124 if self.params.get('writethumbnail', False):
2125 thumbnails = info_dict.get('thumbnails')
2127 thumbnails = [thumbnails[-1]]
2128 elif self.params.get('write_all_thumbnails', False):
2129 thumbnails = info_dict.get('thumbnails')
2134 # No thumbnails present, so return immediately
2137 for t in thumbnails:
2138 thumb_ext = determine_ext(t['url'], 'jpg')
2139 suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
2140 thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
2141 t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
2143 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
2144 self.to_screen('[%s] %s: Thumbnail %sis already present' %
2145 (info_dict['extractor'], info_dict['id'], thumb_display_id))
2147 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
2148 (info_dict['extractor'], info_dict['id'], thumb_display_id))
2150 uf = self.urlopen(t['url'])
2151 with open(encodeFilename(thumb_filename), 'wb') as thumbf:
2152 shutil.copyfileobj(uf, thumbf)
2153 self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
2154 (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
2155 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2156 self.report_warning('Unable to download thumbnail "%s": %s' %
2157 (t['url'], error_to_compat_str(err)))