2 # -*- coding: utf-8 -*-
4 from __future__ import absolute_import, unicode_literals
31 compat_get_terminal_size,
36 compat_tokenize_tokenize,
38 compat_urllib_request,
39 compat_urllib_request_DataHandler,
62 PerRequestProxyHandler,
67 register_socks_protocols,
77 UnavailableVideoError,
82 YoutubeDLCookieProcessor,
85 from .cache import Cache
86 from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER
87 from .downloader import get_suitable_downloader
88 from .downloader.rtmp import rtmpdump_version
89 from .postprocessor import (
92 FFmpegFixupStretchedPP,
97 from .version import __version__
99 if compat_os_name == 'nt':
103 class YoutubeDL(object):
106 YoutubeDL objects are the ones responsible of downloading the
107 actual video file and writing it to disk if the user has requested
108 it, among some other tasks. In most cases there should be one per
109 program. As, given a video URL, the downloader doesn't know how to
110 extract all the needed information, task that InfoExtractors do, it
111 has to pass the URL to one of them.
113 For this, YoutubeDL objects have a method that allows
114 InfoExtractors to be registered in a given order. When it is passed
115 a URL, the YoutubeDL object handles it to the first InfoExtractor it
116 finds that reports being able to handle it. The InfoExtractor extracts
117 all the information about the video or videos the URL refers to, and
118 YoutubeDL process the extracted information, possibly using a File
119 Downloader to download the video.
121 YoutubeDL objects accept a lot of parameters. In order not to saturate
122 the object constructor with arguments, it receives a dictionary of
123 options instead. These options are available through the params
124 attribute for the InfoExtractors to use. The YoutubeDL also
125 registers itself as the downloader in charge for the InfoExtractors
126 that are added to it, so this is a "mutual registration".
130 username: Username for authentication purposes.
131 password: Password for authentication purposes.
132 videopassword: Password for accessing a video.
133 usenetrc: Use netrc for authentication instead.
134 verbose: Print additional info to stdout.
135 quiet: Do not print messages to stdout.
136 no_warnings: Do not print out anything for warnings.
137 forceurl: Force printing final URL.
138 forcetitle: Force printing title.
139 forceid: Force printing ID.
140 forcethumbnail: Force printing thumbnail URL.
141 forcedescription: Force printing description.
142 forcefilename: Force printing final filename.
143 forceduration: Force printing duration.
144 forcejson: Force printing info_dict as JSON.
145 dump_single_json: Force printing the info_dict of the whole playlist
146 (or video) as a single JSON line.
147 simulate: Do not download the video files.
148 format: Video format code. See options.py for more information.
149 outtmpl: Template for output names.
150 restrictfilenames: Do not allow "&" and spaces in file names
151 ignoreerrors: Do not stop on download errors.
152 force_generic_extractor: Force downloader to use the generic extractor
153 nooverwrites: Prevent overwriting files.
154 playliststart: Playlist item to start at.
155 playlistend: Playlist item to end at.
156 playlist_items: Specific indices of playlist to download.
157 playlistreverse: Download playlist items in reverse order.
158 matchtitle: Download only matching titles.
159 rejecttitle: Reject downloads for matching titles.
160 logger: Log messages to a logging.Logger instance.
161 logtostderr: Log messages to stderr instead of stdout.
162 writedescription: Write the video description to a .description file
163 writeinfojson: Write the video description to a .info.json file
164 writeannotations: Write the video annotations to a .annotations.xml file
165 writethumbnail: Write the thumbnail image to a file
166 write_all_thumbnails: Write all thumbnail formats to files
167 writesubtitles: Write the video subtitles to a file
168 writeautomaticsub: Write the automatically generated subtitles to a file
169 allsubtitles: Downloads all the subtitles of the video
170 (requires writesubtitles or writeautomaticsub)
171 listsubtitles: Lists all available subtitles for the video
172 subtitlesformat: The format code for subtitles
173 subtitleslangs: List of languages of the subtitles to download
174 keepvideo: Keep the video file after post-processing
175 daterange: A DateRange object, download only if the upload_date is in the range.
176 skip_download: Skip the actual download of the video file
177 cachedir: Location of the cache files in the filesystem.
178 False to disable filesystem cache.
179 noplaylist: Download single video instead of a playlist if in doubt.
180 age_limit: An integer representing the user's age in years.
181 Unsuitable videos for the given age are skipped.
182 min_views: An integer representing the minimum view count the video
183 must have in order to not be skipped.
184 Videos without view count information are always
185 downloaded. None for no limit.
186 max_views: An integer representing the maximum view count.
187 Videos that are more popular than that are not
189 Videos without view count information are always
190 downloaded. None for no limit.
191 download_archive: File name of a file where all downloads are recorded.
192 Videos already present in the file are not downloaded
194 cookiefile: File name where cookies should be read from and dumped to.
195 nocheckcertificate:Do not verify SSL certificates
196 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
197 At the moment, this is only supported by YouTube.
198 proxy: URL of the proxy server to use
199 geo_verification_proxy: URL of the proxy to use for IP address verification
200 on geo-restricted sites. (Experimental)
201 socket_timeout: Time to wait for unresponsive hosts, in seconds
202 bidi_workaround: Work around buggy terminals without bidirectional text
203 support, using fridibi
204 debug_printtraffic:Print out sent and received HTTP traffic
205 include_ads: Download ads as well
206 default_search: Prepend this string if an input url is not valid.
207 'auto' for elaborate guessing
208 encoding: Use this encoding instead of the system-specified.
209 extract_flat: Do not resolve URLs, return the immediate result.
210 Pass in 'in_playlist' to only show this behavior for
212 postprocessors: A list of dictionaries, each with an entry
213 * key: The name of the postprocessor. See
214 youtube_dl/postprocessor/__init__.py for a list.
215 as well as any further keyword arguments for the
217 progress_hooks: A list of functions that get called on download
218 progress, with a dictionary with the entries
219 * status: One of "downloading", "error", or "finished".
220 Check this first and ignore unknown values.
222 If status is one of "downloading", or "finished", the
223 following properties may also be present:
224 * filename: The final filename (always present)
225 * tmpfilename: The filename we're currently writing to
226 * downloaded_bytes: Bytes on disk
227 * total_bytes: Size of the whole file, None if unknown
228 * total_bytes_estimate: Guess of the eventual file size,
230 * elapsed: The number of seconds since download started.
231 * eta: The estimated time in seconds, None if unknown
232 * speed: The download speed in bytes/second, None if
234 * fragment_index: The counter of the currently
235 downloaded video fragment.
236 * fragment_count: The number of fragments (= individual
237 files that will be merged)
239 Progress hooks are guaranteed to be called at least once
240 (with status "finished") if the download is successful.
241 merge_output_format: Extension to use when merging formats.
242 fixup: Automatically correct known faults of the file.
244 - "never": do nothing
245 - "warn": only emit a warning
246 - "detect_or_warn": check whether we can do anything
247 about it, warn otherwise (default)
248 source_address: (Experimental) Client-side IP address to bind to.
249 call_home: Boolean, true iff we are allowed to contact the
250 youtube-dl servers for debugging.
251 sleep_interval: Number of seconds to sleep before each download.
252 listformats: Print an overview of available video formats and exit.
253 list_thumbnails: Print a table of all thumbnails and exit.
254 match_filter: A function that gets called with the info_dict of
256 If it returns a message, the video is ignored.
257 If it returns None, the video is downloaded.
258 match_filter_func in utils.py is one example for this.
259 no_color: Do not emit color codes in output.
261 The following options determine which downloader is picked:
262 external_downloader: Executable of the external downloader to call.
263 None or unset for standard (built-in) downloader.
264 hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv
265 if True, otherwise use ffmpeg/avconv if False, otherwise
266 use downloader suggested by extractor if None.
268 The following parameters are not used by YoutubeDL itself, they are used by
269 the downloader (see youtube_dl/downloader/common.py):
270 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
271 noresizebuffer, retries, continuedl, noprogress, consoletitle,
272 xattr_set_filesize, external_downloader_args, hls_use_mpegts.
274 The following options are used by the post processors:
275 prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
276 otherwise prefer avconv.
277 postprocessor_args: A list of additional command-line arguments for the
284 _download_retcode = None
285 _num_downloads = None
288 def __init__(self, params=None, auto_init=True):
289 """Create a FileDownloader object with the given options."""
293 self._ies_instances = {}
295 self._progress_hooks = []
296 self._download_retcode = 0
297 self._num_downloads = 0
298 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
299 self._err_file = sys.stderr
302 'nocheckcertificate': False,
304 self.params.update(params)
305 self.cache = Cache(self)
307 if self.params.get('cn_verification_proxy') is not None:
308 self.report_warning('--cn-verification-proxy is deprecated. Use --geo-verification-proxy instead.')
309 if self.params.get('geo_verification_proxy') is None:
310 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
312 if params.get('bidi_workaround', False):
315 master, slave = pty.openpty()
316 width = compat_get_terminal_size().columns
320 width_args = ['-w', str(width)]
322 stdin=subprocess.PIPE,
324 stderr=self._err_file)
326 self._output_process = subprocess.Popen(
327 ['bidiv'] + width_args, **sp_kwargs
330 self._output_process = subprocess.Popen(
331 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
332 self._output_channel = os.fdopen(master, 'rb')
333 except OSError as ose:
334 if ose.errno == errno.ENOENT:
335 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
339 if (sys.version_info >= (3,) and sys.platform != 'win32' and
340 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and
341 not params.get('restrictfilenames', False)):
342 # On Python 3, the Unicode filesystem API will throw errors (#1474)
344 'Assuming --restrict-filenames since file system encoding '
345 'cannot encode all characters. '
346 'Set the LC_ALL environment variable to fix this.')
347 self.params['restrictfilenames'] = True
349 if isinstance(params.get('outtmpl'), bytes):
351 'Parameter outtmpl is bytes, but should be a unicode string. '
352 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
357 self.print_debug_header()
358 self.add_default_info_extractors()
360 for pp_def_raw in self.params.get('postprocessors', []):
361 pp_class = get_postprocessor(pp_def_raw['key'])
362 pp_def = dict(pp_def_raw)
364 pp = pp_class(self, **compat_kwargs(pp_def))
365 self.add_post_processor(pp)
367 for ph in self.params.get('progress_hooks', []):
368 self.add_progress_hook(ph)
370 register_socks_protocols()
372 def warn_if_short_id(self, argv):
373 # short YouTube ID starting with dash?
375 i for i, a in enumerate(argv)
376 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
380 [a for i, a in enumerate(argv) if i not in idxs] +
381 ['--'] + [argv[i] for i in idxs]
384 'Long argument string detected. '
385 'Use -- to separate parameters and URLs, like this:\n%s\n' %
386 args_to_str(correct_argv))
388 def add_info_extractor(self, ie):
389 """Add an InfoExtractor object to the end of the list."""
391 if not isinstance(ie, type):
392 self._ies_instances[ie.ie_key()] = ie
393 ie.set_downloader(self)
395 def get_info_extractor(self, ie_key):
397 Get an instance of an IE with name ie_key, it will try to get one from
398 the _ies list, if there's no instance it will create a new one and add
399 it to the extractor list.
401 ie = self._ies_instances.get(ie_key)
403 ie = get_info_extractor(ie_key)()
404 self.add_info_extractor(ie)
407 def add_default_info_extractors(self):
409 Add the InfoExtractors returned by gen_extractors to the end of the list
411 for ie in gen_extractor_classes():
412 self.add_info_extractor(ie)
414 def add_post_processor(self, pp):
415 """Add a PostProcessor object to the end of the chain."""
417 pp.set_downloader(self)
419 def add_progress_hook(self, ph):
420 """Add the progress hook (currently only for the file downloader)"""
421 self._progress_hooks.append(ph)
423 def _bidi_workaround(self, message):
424 if not hasattr(self, '_output_channel'):
427 assert hasattr(self, '_output_process')
428 assert isinstance(message, compat_str)
429 line_count = message.count('\n') + 1
430 self._output_process.stdin.write((message + '\n').encode('utf-8'))
431 self._output_process.stdin.flush()
432 res = ''.join(self._output_channel.readline().decode('utf-8')
433 for _ in range(line_count))
434 return res[:-len('\n')]
436 def to_screen(self, message, skip_eol=False):
437 """Print message to stdout if not in quiet mode."""
438 return self.to_stdout(message, skip_eol, check_quiet=True)
440 def _write_string(self, s, out=None):
441 write_string(s, out=out, encoding=self.params.get('encoding'))
443 def to_stdout(self, message, skip_eol=False, check_quiet=False):
444 """Print message to stdout if not in quiet mode."""
445 if self.params.get('logger'):
446 self.params['logger'].debug(message)
447 elif not check_quiet or not self.params.get('quiet', False):
448 message = self._bidi_workaround(message)
449 terminator = ['\n', ''][skip_eol]
450 output = message + terminator
452 self._write_string(output, self._screen_file)
454 def to_stderr(self, message):
455 """Print message to stderr."""
456 assert isinstance(message, compat_str)
457 if self.params.get('logger'):
458 self.params['logger'].error(message)
460 message = self._bidi_workaround(message)
461 output = message + '\n'
462 self._write_string(output, self._err_file)
464 def to_console_title(self, message):
465 if not self.params.get('consoletitle', False):
467 if compat_os_name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
468 # c_wchar_p() might not be necessary if `message` is
469 # already of type unicode()
470 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
471 elif 'TERM' in os.environ:
472 self._write_string('\033]0;%s\007' % message, self._screen_file)
474 def save_console_title(self):
475 if not self.params.get('consoletitle', False):
477 if 'TERM' in os.environ:
478 # Save the title on stack
479 self._write_string('\033[22;0t', self._screen_file)
481 def restore_console_title(self):
482 if not self.params.get('consoletitle', False):
484 if 'TERM' in os.environ:
485 # Restore the title from stack
486 self._write_string('\033[23;0t', self._screen_file)
489 self.save_console_title()
492 def __exit__(self, *args):
493 self.restore_console_title()
495 if self.params.get('cookiefile') is not None:
496 self.cookiejar.save()
498 def trouble(self, message=None, tb=None):
499 """Determine action to take when a download problem appears.
501 Depending on if the downloader has been configured to ignore
502 download errors or not, this method may throw an exception or
503 not when errors are found, after printing the message.
505 tb, if given, is additional traceback information.
507 if message is not None:
508 self.to_stderr(message)
509 if self.params.get('verbose'):
511 if sys.exc_info()[0]: # if .trouble has been called from an except block
513 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
514 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
515 tb += encode_compat_str(traceback.format_exc())
517 tb_data = traceback.format_list(traceback.extract_stack())
518 tb = ''.join(tb_data)
520 if not self.params.get('ignoreerrors', False):
521 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
522 exc_info = sys.exc_info()[1].exc_info
524 exc_info = sys.exc_info()
525 raise DownloadError(message, exc_info)
526 self._download_retcode = 1
528 def report_warning(self, message):
530 Print the message to stderr, it will be prefixed with 'WARNING:'
531 If stderr is a tty file the 'WARNING:' will be colored
533 if self.params.get('logger') is not None:
534 self.params['logger'].warning(message)
536 if self.params.get('no_warnings'):
538 if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
539 _msg_header = '\033[0;33mWARNING:\033[0m'
541 _msg_header = 'WARNING:'
542 warning_message = '%s %s' % (_msg_header, message)
543 self.to_stderr(warning_message)
545 def report_error(self, message, tb=None):
547 Do the same as trouble, but prefixes the message with 'ERROR:', colored
548 in red if stderr is a tty file.
550 if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
551 _msg_header = '\033[0;31mERROR:\033[0m'
553 _msg_header = 'ERROR:'
554 error_message = '%s %s' % (_msg_header, message)
555 self.trouble(error_message, tb)
557 def report_file_already_downloaded(self, file_name):
558 """Report file has already been fully downloaded."""
560 self.to_screen('[download] %s has already been downloaded' % file_name)
561 except UnicodeEncodeError:
562 self.to_screen('[download] The file has already been downloaded')
564 def prepare_filename(self, info_dict):
565 """Generate the output filename."""
567 template_dict = dict(info_dict)
569 template_dict['epoch'] = int(time.time())
570 autonumber_size = self.params.get('autonumber_size')
571 if autonumber_size is None:
573 autonumber_templ = '%0' + str(autonumber_size) + 'd'
574 template_dict['autonumber'] = autonumber_templ % self._num_downloads
575 if template_dict.get('playlist_index') is not None:
576 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
577 if template_dict.get('resolution') is None:
578 if template_dict.get('width') and template_dict.get('height'):
579 template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
580 elif template_dict.get('height'):
581 template_dict['resolution'] = '%sp' % template_dict['height']
582 elif template_dict.get('width'):
583 template_dict['resolution'] = '%dx?' % template_dict['width']
585 sanitize = lambda k, v: sanitize_filename(
587 restricted=self.params.get('restrictfilenames'),
589 template_dict = dict((k, sanitize(k, v))
590 for k, v in template_dict.items()
591 if v is not None and not isinstance(v, (list, tuple, dict)))
592 template_dict = collections.defaultdict(lambda: 'NA', template_dict)
594 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
595 tmpl = compat_expanduser(outtmpl)
596 filename = tmpl % template_dict
597 # Temporary fix for #4787
598 # 'Treat' all problem characters by passing filename through preferredencoding
599 # to workaround encoding issues with subprocess on python2 @ Windows
600 if sys.version_info < (3, 0) and sys.platform == 'win32':
601 filename = encodeFilename(filename, True).decode(preferredencoding())
602 return sanitize_path(filename)
603 except ValueError as err:
604 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
607 def _match_entry(self, info_dict, incomplete):
608 """ Returns None iff the file should be downloaded """
610 video_title = info_dict.get('title', info_dict.get('id', 'video'))
611 if 'title' in info_dict:
612 # This can happen when we're just evaluating the playlist
613 title = info_dict['title']
614 matchtitle = self.params.get('matchtitle', False)
616 if not re.search(matchtitle, title, re.IGNORECASE):
617 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
618 rejecttitle = self.params.get('rejecttitle', False)
620 if re.search(rejecttitle, title, re.IGNORECASE):
621 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
622 date = info_dict.get('upload_date')
624 dateRange = self.params.get('daterange', DateRange())
625 if date not in dateRange:
626 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
627 view_count = info_dict.get('view_count')
628 if view_count is not None:
629 min_views = self.params.get('min_views')
630 if min_views is not None and view_count < min_views:
631 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
632 max_views = self.params.get('max_views')
633 if max_views is not None and view_count > max_views:
634 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
635 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
636 return 'Skipping "%s" because it is age restricted' % video_title
637 if self.in_download_archive(info_dict):
638 return '%s has already been recorded in archive' % video_title
641 match_filter = self.params.get('match_filter')
642 if match_filter is not None:
643 ret = match_filter(info_dict)
650 def add_extra_info(info_dict, extra_info):
651 '''Set the keys from extra_info in info dict if they are missing'''
652 for key, value in extra_info.items():
653 info_dict.setdefault(key, value)
655 def extract_info(self, url, download=True, ie_key=None, extra_info={},
656 process=True, force_generic_extractor=False):
658 Returns a list with a dictionary for each video we find.
659 If 'download', also downloads the videos.
660 extra_info is a dict containing the extra values to add to each result
663 if not ie_key and force_generic_extractor:
667 ies = [self.get_info_extractor(ie_key)]
672 if not ie.suitable(url):
675 ie = self.get_info_extractor(ie.ie_key())
677 self.report_warning('The program functionality for this site has been marked as broken, '
678 'and will probably not work.')
681 ie_result = ie.extract(url)
682 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
684 if isinstance(ie_result, list):
685 # Backwards compatibility: old IE result format
687 '_type': 'compat_list',
688 'entries': ie_result,
690 self.add_default_extra_info(ie_result, ie, url)
692 return self.process_ie_result(ie_result, download, extra_info)
695 except ExtractorError as e: # An error we somewhat expected
696 self.report_error(compat_str(e), e.format_traceback())
698 except MaxDownloadsReached:
700 except Exception as e:
701 if self.params.get('ignoreerrors', False):
702 self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
707 self.report_error('no suitable InfoExtractor for URL %s' % url)
709 def add_default_extra_info(self, ie_result, ie, url):
710 self.add_extra_info(ie_result, {
711 'extractor': ie.IE_NAME,
713 'webpage_url_basename': url_basename(url),
714 'extractor_key': ie.ie_key(),
717 def process_ie_result(self, ie_result, download=True, extra_info={}):
719 Take the result of the ie(may be modified) and resolve all unresolved
720 references (URLs, playlist items).
722 It will also download the videos if 'download'.
723 Returns the resolved ie_result.
725 result_type = ie_result.get('_type', 'video')
727 if result_type in ('url', 'url_transparent'):
728 ie_result['url'] = sanitize_url(ie_result['url'])
729 extract_flat = self.params.get('extract_flat', False)
730 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
731 extract_flat is True):
732 if self.params.get('forcejson', False):
733 self.to_stdout(json.dumps(ie_result))
736 if result_type == 'video':
737 self.add_extra_info(ie_result, extra_info)
738 return self.process_video_result(ie_result, download=download)
739 elif result_type == 'url':
740 # We have to add extra_info to the results because it may be
741 # contained in a playlist
742 return self.extract_info(ie_result['url'],
744 ie_key=ie_result.get('ie_key'),
745 extra_info=extra_info)
746 elif result_type == 'url_transparent':
747 # Use the information from the embedding page
748 info = self.extract_info(
749 ie_result['url'], ie_key=ie_result.get('ie_key'),
750 extra_info=extra_info, download=False, process=False)
752 force_properties = dict(
753 (k, v) for k, v in ie_result.items() if v is not None)
754 for f in ('_type', 'url', 'ie_key'):
755 if f in force_properties:
756 del force_properties[f]
757 new_result = info.copy()
758 new_result.update(force_properties)
760 assert new_result.get('_type') != 'url_transparent'
762 return self.process_ie_result(
763 new_result, download=download, extra_info=extra_info)
764 elif result_type == 'playlist' or result_type == 'multi_video':
765 # We process each entry in the playlist
766 playlist = ie_result.get('title') or ie_result.get('id')
767 self.to_screen('[download] Downloading playlist: %s' % playlist)
769 playlist_results = []
771 playliststart = self.params.get('playliststart', 1) - 1
772 playlistend = self.params.get('playlistend')
773 # For backwards compatibility, interpret -1 as whole list
774 if playlistend == -1:
777 playlistitems_str = self.params.get('playlist_items')
779 if playlistitems_str is not None:
780 def iter_playlistitems(format):
781 for string_segment in format.split(','):
782 if '-' in string_segment:
783 start, end = string_segment.split('-')
784 for item in range(int(start), int(end) + 1):
787 yield int(string_segment)
788 playlistitems = iter_playlistitems(playlistitems_str)
790 ie_entries = ie_result['entries']
791 if isinstance(ie_entries, list):
792 n_all_entries = len(ie_entries)
795 ie_entries[i - 1] for i in playlistitems
796 if -n_all_entries <= i - 1 < n_all_entries]
798 entries = ie_entries[playliststart:playlistend]
799 n_entries = len(entries)
801 '[%s] playlist %s: Collected %d video ids (downloading %d of them)' %
802 (ie_result['extractor'], playlist, n_all_entries, n_entries))
803 elif isinstance(ie_entries, PagedList):
806 for item in playlistitems:
807 entries.extend(ie_entries.getslice(
811 entries = ie_entries.getslice(
812 playliststart, playlistend)
813 n_entries = len(entries)
815 '[%s] playlist %s: Downloading %d videos' %
816 (ie_result['extractor'], playlist, n_entries))
819 entry_list = list(ie_entries)
820 entries = [entry_list[i - 1] for i in playlistitems]
822 entries = list(itertools.islice(
823 ie_entries, playliststart, playlistend))
824 n_entries = len(entries)
826 '[%s] playlist %s: Downloading %d videos' %
827 (ie_result['extractor'], playlist, n_entries))
829 if self.params.get('playlistreverse', False):
830 entries = entries[::-1]
832 for i, entry in enumerate(entries, 1):
833 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
835 'n_entries': n_entries,
836 'playlist': playlist,
837 'playlist_id': ie_result.get('id'),
838 'playlist_title': ie_result.get('title'),
839 'playlist_index': i + playliststart,
840 'extractor': ie_result['extractor'],
841 'webpage_url': ie_result['webpage_url'],
842 'webpage_url_basename': url_basename(ie_result['webpage_url']),
843 'extractor_key': ie_result['extractor_key'],
846 reason = self._match_entry(entry, incomplete=True)
847 if reason is not None:
848 self.to_screen('[download] ' + reason)
851 entry_result = self.process_ie_result(entry,
854 playlist_results.append(entry_result)
855 ie_result['entries'] = playlist_results
856 self.to_screen('[download] Finished downloading playlist: %s' % playlist)
858 elif result_type == 'compat_list':
860 'Extractor %s returned a compat_list result. '
861 'It needs to be updated.' % ie_result.get('extractor'))
867 'extractor': ie_result['extractor'],
868 'webpage_url': ie_result['webpage_url'],
869 'webpage_url_basename': url_basename(ie_result['webpage_url']),
870 'extractor_key': ie_result['extractor_key'],
874 ie_result['entries'] = [
875 self.process_ie_result(_fixup(r), download, extra_info)
876 for r in ie_result['entries']
880 raise Exception('Invalid result type: %s' % result_type)
882 def _build_format_filter(self, filter_spec):
883 " Returns a function to filter the formats according to the filter_spec "
893 operator_rex = re.compile(r'''(?x)\s*
894 (?P<key>width|height|tbr|abr|vbr|asr|filesize|fps)
895 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
896 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
898 ''' % '|'.join(map(re.escape, OPERATORS.keys())))
899 m = operator_rex.search(filter_spec)
902 comparison_value = int(m.group('value'))
904 comparison_value = parse_filesize(m.group('value'))
905 if comparison_value is None:
906 comparison_value = parse_filesize(m.group('value') + 'B')
907 if comparison_value is None:
909 'Invalid value %r in format specification %r' % (
910 m.group('value'), filter_spec))
911 op = OPERATORS[m.group('op')]
917 '^=': lambda attr, value: attr.startswith(value),
918 '$=': lambda attr, value: attr.endswith(value),
919 '*=': lambda attr, value: value in attr,
921 str_operator_rex = re.compile(r'''(?x)
922 \s*(?P<key>ext|acodec|vcodec|container|protocol|format_id)
923 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
924 \s*(?P<value>[a-zA-Z0-9._-]+)
926 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
927 m = str_operator_rex.search(filter_spec)
929 comparison_value = m.group('value')
930 op = STR_OPERATORS[m.group('op')]
933 raise ValueError('Invalid filter specification %r' % filter_spec)
936 actual_value = f.get(m.group('key'))
937 if actual_value is None:
938 return m.group('none_inclusive')
939 return op(actual_value, comparison_value)
942 def build_format_selector(self, format_spec):
943 def syntax_error(note, start):
945 'Invalid format specification: '
946 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
947 return SyntaxError(message)
949 PICKFIRST = 'PICKFIRST'
953 FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
955 def _parse_filter(tokens):
957 for type, string, start, _, _ in tokens:
958 if type == tokenize.OP and string == ']':
959 return ''.join(filter_parts)
961 filter_parts.append(string)
963 def _remove_unused_ops(tokens):
964 # Remove operators that we don't use and join them with the surrounding strings
965 # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
966 ALLOWED_OPS = ('/', '+', ',', '(', ')')
967 last_string, last_start, last_end, last_line = None, None, None, None
968 for type, string, start, end, line in tokens:
969 if type == tokenize.OP and string == '[':
971 yield tokenize.NAME, last_string, last_start, last_end, last_line
973 yield type, string, start, end, line
974 # everything inside brackets will be handled by _parse_filter
975 for type, string, start, end, line in tokens:
976 yield type, string, start, end, line
977 if type == tokenize.OP and string == ']':
979 elif type == tokenize.OP and string in ALLOWED_OPS:
981 yield tokenize.NAME, last_string, last_start, last_end, last_line
983 yield type, string, start, end, line
984 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
990 last_string += string
992 yield tokenize.NAME, last_string, last_start, last_end, last_line
994 def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
996 current_selector = None
997 for type, string, start, _, _ in tokens:
998 # ENCODING is only defined in python 3.x
999 if type == getattr(tokenize, 'ENCODING', None):
1001 elif type in [tokenize.NAME, tokenize.NUMBER]:
1002 current_selector = FormatSelector(SINGLE, string, [])
1003 elif type == tokenize.OP:
1005 if not inside_group:
1006 # ')' will be handled by the parentheses group
1007 tokens.restore_last_token()
1009 elif inside_merge and string in ['/', ',']:
1010 tokens.restore_last_token()
1012 elif inside_choice and string == ',':
1013 tokens.restore_last_token()
1016 if not current_selector:
1017 raise syntax_error('"," must follow a format selector', start)
1018 selectors.append(current_selector)
1019 current_selector = None
1021 if not current_selector:
1022 raise syntax_error('"/" must follow a format selector', start)
1023 first_choice = current_selector
1024 second_choice = _parse_format_selection(tokens, inside_choice=True)
1025 current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1027 if not current_selector:
1028 current_selector = FormatSelector(SINGLE, 'best', [])
1029 format_filter = _parse_filter(tokens)
1030 current_selector.filters.append(format_filter)
1032 if current_selector:
1033 raise syntax_error('Unexpected "("', start)
1034 group = _parse_format_selection(tokens, inside_group=True)
1035 current_selector = FormatSelector(GROUP, group, [])
1037 video_selector = current_selector
1038 audio_selector = _parse_format_selection(tokens, inside_merge=True)
1039 if not video_selector or not audio_selector:
1040 raise syntax_error('"+" must be between two format selectors', start)
1041 current_selector = FormatSelector(MERGE, (video_selector, audio_selector), [])
1043 raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1044 elif type == tokenize.ENDMARKER:
1046 if current_selector:
1047 selectors.append(current_selector)
1050 def _build_selector_function(selector):
1051 if isinstance(selector, list):
1052 fs = [_build_selector_function(s) for s in selector]
1054 def selector_function(formats):
1056 for format in f(formats):
1058 return selector_function
1059 elif selector.type == GROUP:
1060 selector_function = _build_selector_function(selector.selector)
1061 elif selector.type == PICKFIRST:
1062 fs = [_build_selector_function(s) for s in selector.selector]
1064 def selector_function(formats):
1066 picked_formats = list(f(formats))
1068 return picked_formats
1070 elif selector.type == SINGLE:
1071 format_spec = selector.selector
1073 def selector_function(formats):
1074 formats = list(formats)
1077 if format_spec == 'all':
1080 elif format_spec in ['best', 'worst', None]:
1081 format_idx = 0 if format_spec == 'worst' else -1
1082 audiovideo_formats = [
1084 if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
1085 if audiovideo_formats:
1086 yield audiovideo_formats[format_idx]
1087 # for audio only (soundcloud) or video only (imgur) urls, select the best/worst audio format
1088 elif (all(f.get('acodec') != 'none' for f in formats) or
1089 all(f.get('vcodec') != 'none' for f in formats)):
1090 yield formats[format_idx]
1091 elif format_spec == 'bestaudio':
1094 if f.get('vcodec') == 'none']
1096 yield audio_formats[-1]
1097 elif format_spec == 'worstaudio':
1100 if f.get('vcodec') == 'none']
1102 yield audio_formats[0]
1103 elif format_spec == 'bestvideo':
1106 if f.get('acodec') == 'none']
1108 yield video_formats[-1]
1109 elif format_spec == 'worstvideo':
1112 if f.get('acodec') == 'none']
1114 yield video_formats[0]
1116 extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
1117 if format_spec in extensions:
1118 filter_f = lambda f: f['ext'] == format_spec
1120 filter_f = lambda f: f['format_id'] == format_spec
1121 matches = list(filter(filter_f, formats))
1124 elif selector.type == MERGE:
1125 def _merge(formats_info):
1126 format_1, format_2 = [f['format_id'] for f in formats_info]
1127 # The first format must contain the video and the
1129 if formats_info[0].get('vcodec') == 'none':
1130 self.report_error('The first format must '
1131 'contain the video, try using '
1132 '"-f %s+%s"' % (format_2, format_1))
1134 # Formats must be opposite (video+audio)
1135 if formats_info[0].get('acodec') == 'none' and formats_info[1].get('acodec') == 'none':
1137 'Both formats %s and %s are video-only, you must specify "-f video+audio"'
1138 % (format_1, format_2))
1141 formats_info[0]['ext']
1142 if self.params.get('merge_output_format') is None
1143 else self.params['merge_output_format'])
1145 'requested_formats': formats_info,
1146 'format': '%s+%s' % (formats_info[0].get('format'),
1147 formats_info[1].get('format')),
1148 'format_id': '%s+%s' % (formats_info[0].get('format_id'),
1149 formats_info[1].get('format_id')),
1150 'width': formats_info[0].get('width'),
1151 'height': formats_info[0].get('height'),
1152 'resolution': formats_info[0].get('resolution'),
1153 'fps': formats_info[0].get('fps'),
1154 'vcodec': formats_info[0].get('vcodec'),
1155 'vbr': formats_info[0].get('vbr'),
1156 'stretched_ratio': formats_info[0].get('stretched_ratio'),
1157 'acodec': formats_info[1].get('acodec'),
1158 'abr': formats_info[1].get('abr'),
1161 video_selector, audio_selector = map(_build_selector_function, selector.selector)
1163 def selector_function(formats):
1164 formats = list(formats)
1165 for pair in itertools.product(video_selector(formats), audio_selector(formats)):
1168 filters = [self._build_format_filter(f) for f in selector.filters]
1170 def final_selector(formats):
1171 for _filter in filters:
1172 formats = list(filter(_filter, formats))
1173 return selector_function(formats)
1174 return final_selector
1176 stream = io.BytesIO(format_spec.encode('utf-8'))
1178 tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
1179 except tokenize.TokenError:
1180 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1182 class TokenIterator(object):
1183 def __init__(self, tokens):
1184 self.tokens = tokens
1191 if self.counter >= len(self.tokens):
1192 raise StopIteration()
1193 value = self.tokens[self.counter]
1199 def restore_last_token(self):
1202 parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
1203 return _build_selector_function(parsed_selector)
1205 def _calc_headers(self, info_dict):
1206 res = std_headers.copy()
1208 add_headers = info_dict.get('http_headers')
1210 res.update(add_headers)
1212 cookies = self._calc_cookies(info_dict)
1214 res['Cookie'] = cookies
1218 def _calc_cookies(self, info_dict):
1219 pr = sanitized_Request(info_dict['url'])
1220 self.cookiejar.add_cookie_header(pr)
1221 return pr.get_header('Cookie')
1223 def process_video_result(self, info_dict, download=True):
1224 assert info_dict.get('_type', 'video') == 'video'
1226 if 'id' not in info_dict:
1227 raise ExtractorError('Missing "id" field in extractor result')
1228 if 'title' not in info_dict:
1229 raise ExtractorError('Missing "title" field in extractor result')
1231 if not isinstance(info_dict['id'], compat_str):
1232 self.report_warning('"id" field is not a string - forcing string conversion')
1233 info_dict['id'] = compat_str(info_dict['id'])
1235 if 'playlist' not in info_dict:
1236 # It isn't part of a playlist
1237 info_dict['playlist'] = None
1238 info_dict['playlist_index'] = None
1240 thumbnails = info_dict.get('thumbnails')
1241 if thumbnails is None:
1242 thumbnail = info_dict.get('thumbnail')
1244 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1246 thumbnails.sort(key=lambda t: (
1247 t.get('preference'), t.get('width'), t.get('height'),
1248 t.get('id'), t.get('url')))
1249 for i, t in enumerate(thumbnails):
1250 t['url'] = sanitize_url(t['url'])
1251 if t.get('width') and t.get('height'):
1252 t['resolution'] = '%dx%d' % (t['width'], t['height'])
1253 if t.get('id') is None:
1256 if self.params.get('list_thumbnails'):
1257 self.list_thumbnails(info_dict)
1260 thumbnail = info_dict.get('thumbnail')
1262 info_dict['thumbnail'] = sanitize_url(thumbnail)
1264 info_dict['thumbnail'] = thumbnails[-1]['url']
1266 if 'display_id' not in info_dict and 'id' in info_dict:
1267 info_dict['display_id'] = info_dict['id']
1269 if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
1270 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1271 # see http://bugs.python.org/issue1646728)
1273 upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
1274 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1275 except (ValueError, OverflowError, OSError):
1278 # Auto generate title fields corresponding to the *_number fields when missing
1279 # in order to always have clean titles. This is very common for TV series.
1280 for field in ('chapter', 'season', 'episode'):
1281 if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
1282 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
1284 subtitles = info_dict.get('subtitles')
1286 for _, subtitle in subtitles.items():
1287 for subtitle_format in subtitle:
1288 if subtitle_format.get('url'):
1289 subtitle_format['url'] = sanitize_url(subtitle_format['url'])
1290 if 'ext' not in subtitle_format:
1291 subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
1293 if self.params.get('listsubtitles', False):
1294 if 'automatic_captions' in info_dict:
1295 self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions')
1296 self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
1298 info_dict['requested_subtitles'] = self.process_subtitles(
1299 info_dict['id'], subtitles,
1300 info_dict.get('automatic_captions'))
1302 # We now pick which formats have to be downloaded
1303 if info_dict.get('formats') is None:
1304 # There's only one format available
1305 formats = [info_dict]
1307 formats = info_dict['formats']
1310 raise ExtractorError('No video formats found!')
1314 # We check that all the formats have the format and format_id fields
1315 for i, format in enumerate(formats):
1316 if 'url' not in format:
1317 raise ExtractorError('Missing "url" key in result (index %d)' % i)
1319 format['url'] = sanitize_url(format['url'])
1321 if format.get('format_id') is None:
1322 format['format_id'] = compat_str(i)
1324 # Sanitize format_id from characters used in format selector expression
1325 format['format_id'] = re.sub('[\s,/+\[\]()]', '_', format['format_id'])
1326 format_id = format['format_id']
1327 if format_id not in formats_dict:
1328 formats_dict[format_id] = []
1329 formats_dict[format_id].append(format)
1331 # Make sure all formats have unique format_id
1332 for format_id, ambiguous_formats in formats_dict.items():
1333 if len(ambiguous_formats) > 1:
1334 for i, format in enumerate(ambiguous_formats):
1335 format['format_id'] = '%s-%d' % (format_id, i)
1337 for i, format in enumerate(formats):
1338 if format.get('format') is None:
1339 format['format'] = '{id} - {res}{note}'.format(
1340 id=format['format_id'],
1341 res=self.format_resolution(format),
1342 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1344 # Automatically determine file extension if missing
1345 if 'ext' not in format:
1346 format['ext'] = determine_ext(format['url']).lower()
1347 # Automatically determine protocol if missing (useful for format
1348 # selection purposes)
1349 if 'protocol' not in format:
1350 format['protocol'] = determine_protocol(format)
1351 # Add HTTP headers, so that external programs can use them from the
1353 full_format_info = info_dict.copy()
1354 full_format_info.update(format)
1355 format['http_headers'] = self._calc_headers(full_format_info)
1357 # TODO Central sorting goes here
1359 if formats[0] is not info_dict:
1360 # only set the 'formats' fields if the original info_dict list them
1361 # otherwise we end up with a circular reference, the first (and unique)
1362 # element in the 'formats' field in info_dict is info_dict itself,
1363 # which can't be exported to json
1364 info_dict['formats'] = formats
1365 if self.params.get('listformats'):
1366 self.list_formats(info_dict)
1369 req_format = self.params.get('format')
1370 if req_format is None:
1371 req_format_list = []
1372 if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and
1373 not info_dict.get('is_live')):
1374 merger = FFmpegMergerPP(self)
1375 if merger.available and merger.can_merge():
1376 req_format_list.append('bestvideo+bestaudio')
1377 req_format_list.append('best')
1378 req_format = '/'.join(req_format_list)
1379 format_selector = self.build_format_selector(req_format)
1380 formats_to_download = list(format_selector(formats))
1381 if not formats_to_download:
1382 raise ExtractorError('requested format not available',
1386 if len(formats_to_download) > 1:
1387 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1388 for format in formats_to_download:
1389 new_info = dict(info_dict)
1390 new_info.update(format)
1391 self.process_info(new_info)
1392 # We update the info dict with the best quality format (backwards compatibility)
1393 info_dict.update(formats_to_download[-1])
1396 def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
1397 """Select the requested subtitles and their format"""
1399 if normal_subtitles and self.params.get('writesubtitles'):
1400 available_subs.update(normal_subtitles)
1401 if automatic_captions and self.params.get('writeautomaticsub'):
1402 for lang, cap_info in automatic_captions.items():
1403 if lang not in available_subs:
1404 available_subs[lang] = cap_info
1406 if (not self.params.get('writesubtitles') and not
1407 self.params.get('writeautomaticsub') or not
1411 if self.params.get('allsubtitles', False):
1412 requested_langs = available_subs.keys()
1414 if self.params.get('subtitleslangs', False):
1415 requested_langs = self.params.get('subtitleslangs')
1416 elif 'en' in available_subs:
1417 requested_langs = ['en']
1419 requested_langs = [list(available_subs.keys())[0]]
1421 formats_query = self.params.get('subtitlesformat', 'best')
1422 formats_preference = formats_query.split('/') if formats_query else []
1424 for lang in requested_langs:
1425 formats = available_subs.get(lang)
1427 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1429 for ext in formats_preference:
1433 matches = list(filter(lambda f: f['ext'] == ext, formats))
1439 self.report_warning(
1440 'No subtitle format found matching "%s" for language %s, '
1441 'using %s' % (formats_query, lang, f['ext']))
1445 def process_info(self, info_dict):
1446 """Process a single resolved IE result."""
1448 assert info_dict.get('_type', 'video') == 'video'
1450 max_downloads = self.params.get('max_downloads')
1451 if max_downloads is not None:
1452 if self._num_downloads >= int(max_downloads):
1453 raise MaxDownloadsReached()
1455 info_dict['fulltitle'] = info_dict['title']
1456 if len(info_dict['title']) > 200:
1457 info_dict['title'] = info_dict['title'][:197] + '...'
1459 if 'format' not in info_dict:
1460 info_dict['format'] = info_dict['ext']
1462 reason = self._match_entry(info_dict, incomplete=False)
1463 if reason is not None:
1464 self.to_screen('[download] ' + reason)
1467 self._num_downloads += 1
1469 info_dict['_filename'] = filename = self.prepare_filename(info_dict)
1472 if self.params.get('forcetitle', False):
1473 self.to_stdout(info_dict['fulltitle'])
1474 if self.params.get('forceid', False):
1475 self.to_stdout(info_dict['id'])
1476 if self.params.get('forceurl', False):
1477 if info_dict.get('requested_formats') is not None:
1478 for f in info_dict['requested_formats']:
1479 self.to_stdout(f['url'] + f.get('play_path', ''))
1481 # For RTMP URLs, also include the playpath
1482 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1483 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
1484 self.to_stdout(info_dict['thumbnail'])
1485 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
1486 self.to_stdout(info_dict['description'])
1487 if self.params.get('forcefilename', False) and filename is not None:
1488 self.to_stdout(filename)
1489 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1490 self.to_stdout(formatSeconds(info_dict['duration']))
1491 if self.params.get('forceformat', False):
1492 self.to_stdout(info_dict['format'])
1493 if self.params.get('forcejson', False):
1494 self.to_stdout(json.dumps(info_dict))
1496 # Do nothing else if in simulate mode
1497 if self.params.get('simulate', False):
1500 if filename is None:
1504 dn = os.path.dirname(sanitize_path(encodeFilename(filename)))
1505 if dn and not os.path.exists(dn):
1507 except (OSError, IOError) as err:
1508 self.report_error('unable to create directory ' + error_to_compat_str(err))
1511 if self.params.get('writedescription', False):
1512 descfn = replace_extension(filename, 'description', info_dict.get('ext'))
1513 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1514 self.to_screen('[info] Video description is already present')
1515 elif info_dict.get('description') is None:
1516 self.report_warning('There\'s no description to write.')
1519 self.to_screen('[info] Writing video description to: ' + descfn)
1520 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1521 descfile.write(info_dict['description'])
1522 except (OSError, IOError):
1523 self.report_error('Cannot write description file ' + descfn)
1526 if self.params.get('writeannotations', False):
1527 annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
1528 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1529 self.to_screen('[info] Video annotations are already present')
1532 self.to_screen('[info] Writing video annotations to: ' + annofn)
1533 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1534 annofile.write(info_dict['annotations'])
1535 except (KeyError, TypeError):
1536 self.report_warning('There are no annotations to write.')
1537 except (OSError, IOError):
1538 self.report_error('Cannot write annotations file: ' + annofn)
1541 subtitles_are_requested = any([self.params.get('writesubtitles', False),
1542 self.params.get('writeautomaticsub')])
1544 if subtitles_are_requested and info_dict.get('requested_subtitles'):
1545 # subtitles download errors are already managed as troubles in relevant IE
1546 # that way it will silently go on when used with unsupporting IE
1547 subtitles = info_dict['requested_subtitles']
1548 ie = self.get_info_extractor(info_dict['extractor_key'])
1549 for sub_lang, sub_info in subtitles.items():
1550 sub_format = sub_info['ext']
1551 if sub_info.get('data') is not None:
1552 sub_data = sub_info['data']
1555 sub_data = ie._download_webpage(
1556 sub_info['url'], info_dict['id'], note=False)
1557 except ExtractorError as err:
1558 self.report_warning('Unable to download subtitle for "%s": %s' %
1559 (sub_lang, error_to_compat_str(err.cause)))
1562 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1563 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1564 self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1566 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1567 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
1568 subfile.write(sub_data)
1569 except (OSError, IOError):
1570 self.report_error('Cannot write subtitles file ' + sub_filename)
1573 if self.params.get('writeinfojson', False):
1574 infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
1575 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1576 self.to_screen('[info] Video description metadata is already present')
1578 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1580 write_json_file(self.filter_requested_info(info_dict), infofn)
1581 except (OSError, IOError):
1582 self.report_error('Cannot write metadata to JSON file ' + infofn)
1585 self._write_thumbnails(info_dict, filename)
1587 if not self.params.get('skip_download', False):
1590 fd = get_suitable_downloader(info, self.params)(self, self.params)
1591 for ph in self._progress_hooks:
1592 fd.add_progress_hook(ph)
1593 if self.params.get('verbose'):
1594 self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1595 return fd.download(name, info)
1597 if info_dict.get('requested_formats') is not None:
1600 merger = FFmpegMergerPP(self)
1601 if not merger.available:
1603 self.report_warning('You have requested multiple '
1604 'formats but ffmpeg or avconv are not installed.'
1605 ' The formats won\'t be merged.')
1607 postprocessors = [merger]
1609 def compatible_formats(formats):
1610 video, audio = formats
1612 video_ext, audio_ext = audio.get('ext'), video.get('ext')
1613 if video_ext and audio_ext:
1615 ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v'),
1618 for exts in COMPATIBLE_EXTS:
1619 if video_ext in exts and audio_ext in exts:
1621 # TODO: Check acodec/vcodec
1624 filename_real_ext = os.path.splitext(filename)[1][1:]
1626 os.path.splitext(filename)[0]
1627 if filename_real_ext == info_dict['ext']
1629 requested_formats = info_dict['requested_formats']
1630 if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
1631 info_dict['ext'] = 'mkv'
1632 self.report_warning(
1633 'Requested formats are incompatible for merge and will be merged into mkv.')
1634 # Ensure filename always has a correct extension for successful merge
1635 filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
1636 if os.path.exists(encodeFilename(filename)):
1638 '[download] %s has already been downloaded and '
1639 'merged' % filename)
1641 for f in requested_formats:
1642 new_info = dict(info_dict)
1644 fname = self.prepare_filename(new_info)
1645 fname = prepend_extension(fname, 'f%s' % f['format_id'], new_info['ext'])
1646 downloaded.append(fname)
1647 partial_success = dl(fname, new_info)
1648 success = success and partial_success
1649 info_dict['__postprocessors'] = postprocessors
1650 info_dict['__files_to_merge'] = downloaded
1652 # Just a single file
1653 success = dl(filename, info_dict)
1654 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1655 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
1657 except (OSError, IOError) as err:
1658 raise UnavailableVideoError(err)
1659 except (ContentTooShortError, ) as err:
1660 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1663 if success and filename != '-':
1665 fixup_policy = self.params.get('fixup')
1666 if fixup_policy is None:
1667 fixup_policy = 'detect_or_warn'
1669 INSTALL_FFMPEG_MESSAGE = 'Install ffmpeg or avconv to fix this automatically.'
1671 stretched_ratio = info_dict.get('stretched_ratio')
1672 if stretched_ratio is not None and stretched_ratio != 1:
1673 if fixup_policy == 'warn':
1674 self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1675 info_dict['id'], stretched_ratio))
1676 elif fixup_policy == 'detect_or_warn':
1677 stretched_pp = FFmpegFixupStretchedPP(self)
1678 if stretched_pp.available:
1679 info_dict.setdefault('__postprocessors', [])
1680 info_dict['__postprocessors'].append(stretched_pp)
1682 self.report_warning(
1683 '%s: Non-uniform pixel ratio (%s). %s'
1684 % (info_dict['id'], stretched_ratio, INSTALL_FFMPEG_MESSAGE))
1686 assert fixup_policy in ('ignore', 'never')
1688 if (info_dict.get('requested_formats') is None and
1689 info_dict.get('container') == 'm4a_dash'):
1690 if fixup_policy == 'warn':
1691 self.report_warning(
1692 '%s: writing DASH m4a. '
1693 'Only some players support this container.'
1695 elif fixup_policy == 'detect_or_warn':
1696 fixup_pp = FFmpegFixupM4aPP(self)
1697 if fixup_pp.available:
1698 info_dict.setdefault('__postprocessors', [])
1699 info_dict['__postprocessors'].append(fixup_pp)
1701 self.report_warning(
1702 '%s: writing DASH m4a. '
1703 'Only some players support this container. %s'
1704 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
1706 assert fixup_policy in ('ignore', 'never')
1708 if (info_dict.get('protocol') == 'm3u8_native' or
1709 info_dict.get('protocol') == 'm3u8' and
1710 self.params.get('hls_prefer_native')):
1711 if fixup_policy == 'warn':
1712 self.report_warning('%s: malformated aac bitstream.' % (
1714 elif fixup_policy == 'detect_or_warn':
1715 fixup_pp = FFmpegFixupM3u8PP(self)
1716 if fixup_pp.available:
1717 info_dict.setdefault('__postprocessors', [])
1718 info_dict['__postprocessors'].append(fixup_pp)
1720 self.report_warning(
1721 '%s: malformated aac bitstream. %s'
1722 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
1724 assert fixup_policy in ('ignore', 'never')
1727 self.post_process(filename, info_dict)
1728 except (PostProcessingError) as err:
1729 self.report_error('postprocessing: %s' % str(err))
1731 self.record_download_archive(info_dict)
1733 def download(self, url_list):
1734 """Download a given list of URLs."""
1735 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1736 if (len(url_list) > 1 and
1737 '%' not in outtmpl and
1738 self.params.get('max_downloads') != 1):
1739 raise SameFileError(outtmpl)
1741 for url in url_list:
1743 # It also downloads the videos
1744 res = self.extract_info(
1745 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
1746 except UnavailableVideoError:
1747 self.report_error('unable to download video')
1748 except MaxDownloadsReached:
1749 self.to_screen('[info] Maximum number of downloaded files reached.')
1752 if self.params.get('dump_single_json', False):
1753 self.to_stdout(json.dumps(res))
1755 return self._download_retcode
1757 def download_with_info_file(self, info_filename):
1758 with contextlib.closing(fileinput.FileInput(
1759 [info_filename], mode='r',
1760 openhook=fileinput.hook_encoded('utf-8'))) as f:
1761 # FileInput doesn't have a read method, we can't call json.load
1762 info = self.filter_requested_info(json.loads('\n'.join(f)))
1764 self.process_ie_result(info, download=True)
1765 except DownloadError:
1766 webpage_url = info.get('webpage_url')
1767 if webpage_url is not None:
1768 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1769 return self.download([webpage_url])
1772 return self._download_retcode
1775 def filter_requested_info(info_dict):
1777 (k, v) for k, v in info_dict.items()
1778 if k not in ['requested_formats', 'requested_subtitles'])
1780 def post_process(self, filename, ie_info):
1781 """Run all the postprocessors on the given file."""
1782 info = dict(ie_info)
1783 info['filepath'] = filename
1785 if ie_info.get('__postprocessors') is not None:
1786 pps_chain.extend(ie_info['__postprocessors'])
1787 pps_chain.extend(self._pps)
1788 for pp in pps_chain:
1789 files_to_delete = []
1791 files_to_delete, info = pp.run(info)
1792 except PostProcessingError as e:
1793 self.report_error(e.msg)
1794 if files_to_delete and not self.params.get('keepvideo', False):
1795 for old_filename in files_to_delete:
1796 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
1798 os.remove(encodeFilename(old_filename))
1799 except (IOError, OSError):
1800 self.report_warning('Unable to remove downloaded original file')
1802 def _make_archive_id(self, info_dict):
1803 # Future-proof against any change in case
1804 # and backwards compatibility with prior versions
1805 extractor = info_dict.get('extractor_key')
1806 if extractor is None:
1807 if 'id' in info_dict:
1808 extractor = info_dict.get('ie_key') # key in a playlist
1809 if extractor is None:
1810 return None # Incomplete video information
1811 return extractor.lower() + ' ' + info_dict['id']
1813 def in_download_archive(self, info_dict):
1814 fn = self.params.get('download_archive')
1818 vid_id = self._make_archive_id(info_dict)
1820 return False # Incomplete video information
1823 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1824 for line in archive_file:
1825 if line.strip() == vid_id:
1827 except IOError as ioe:
1828 if ioe.errno != errno.ENOENT:
1832 def record_download_archive(self, info_dict):
1833 fn = self.params.get('download_archive')
1836 vid_id = self._make_archive_id(info_dict)
1838 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1839 archive_file.write(vid_id + '\n')
1842 def format_resolution(format, default='unknown'):
1843 if format.get('vcodec') == 'none':
1845 if format.get('resolution') is not None:
1846 return format['resolution']
1847 if format.get('height') is not None:
1848 if format.get('width') is not None:
1849 res = '%sx%s' % (format['width'], format['height'])
1851 res = '%sp' % format['height']
1852 elif format.get('width') is not None:
1853 res = '%dx?' % format['width']
1858 def _format_note(self, fdict):
1860 if fdict.get('ext') in ['f4f', 'f4m']:
1861 res += '(unsupported) '
1862 if fdict.get('language'):
1865 res += '[%s] ' % fdict['language']
1866 if fdict.get('format_note') is not None:
1867 res += fdict['format_note'] + ' '
1868 if fdict.get('tbr') is not None:
1869 res += '%4dk ' % fdict['tbr']
1870 if fdict.get('container') is not None:
1873 res += '%s container' % fdict['container']
1874 if (fdict.get('vcodec') is not None and
1875 fdict.get('vcodec') != 'none'):
1878 res += fdict['vcodec']
1879 if fdict.get('vbr') is not None:
1881 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1883 if fdict.get('vbr') is not None:
1884 res += '%4dk' % fdict['vbr']
1885 if fdict.get('fps') is not None:
1888 res += '%sfps' % fdict['fps']
1889 if fdict.get('acodec') is not None:
1892 if fdict['acodec'] == 'none':
1895 res += '%-5s' % fdict['acodec']
1896 elif fdict.get('abr') is not None:
1900 if fdict.get('abr') is not None:
1901 res += '@%3dk' % fdict['abr']
1902 if fdict.get('asr') is not None:
1903 res += ' (%5dHz)' % fdict['asr']
1904 if fdict.get('filesize') is not None:
1907 res += format_bytes(fdict['filesize'])
1908 elif fdict.get('filesize_approx') is not None:
1911 res += '~' + format_bytes(fdict['filesize_approx'])
1914 def list_formats(self, info_dict):
1915 formats = info_dict.get('formats', [info_dict])
1917 [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
1919 if f.get('preference') is None or f['preference'] >= -1000]
1920 if len(formats) > 1:
1921 table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
1923 header_line = ['format code', 'extension', 'resolution', 'note']
1925 '[info] Available formats for %s:\n%s' %
1926 (info_dict['id'], render_table(header_line, table)))
1928 def list_thumbnails(self, info_dict):
1929 thumbnails = info_dict.get('thumbnails')
1931 self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
1935 '[info] Thumbnails for %s:' % info_dict['id'])
1936 self.to_screen(render_table(
1937 ['ID', 'width', 'height', 'URL'],
1938 [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
1940 def list_subtitles(self, video_id, subtitles, name='subtitles'):
1942 self.to_screen('%s has no %s' % (video_id, name))
1945 'Available %s for %s:' % (name, video_id))
1946 self.to_screen(render_table(
1947 ['Language', 'formats'],
1948 [[lang, ', '.join(f['ext'] for f in reversed(formats))]
1949 for lang, formats in subtitles.items()]))
1951 def urlopen(self, req):
1952 """ Start an HTTP download """
1953 if isinstance(req, compat_basestring):
1954 req = sanitized_Request(req)
1955 return self._opener.open(req, timeout=self._socket_timeout)
1957 def print_debug_header(self):
1958 if not self.params.get('verbose'):
1961 if type('') is not compat_str:
1962 # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1963 self.report_warning(
1964 'Your Python is broken! Update to a newer and supported version')
1966 stdout_encoding = getattr(
1967 sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
1969 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1970 locale.getpreferredencoding(),
1971 sys.getfilesystemencoding(),
1973 self.get_encoding()))
1974 write_string(encoding_str, encoding=None)
1976 self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1978 self._write_string('[debug] Lazy loading extractors enabled' + '\n')
1980 sp = subprocess.Popen(
1981 ['git', 'rev-parse', '--short', 'HEAD'],
1982 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1983 cwd=os.path.dirname(os.path.abspath(__file__)))
1984 out, err = sp.communicate()
1985 out = out.decode().strip()
1986 if re.match('[0-9a-f]+', out):
1987 self._write_string('[debug] Git HEAD: ' + out + '\n')
1993 self._write_string('[debug] Python version %s - %s\n' % (
1994 platform.python_version(), platform_name()))
1996 exe_versions = FFmpegPostProcessor.get_versions(self)
1997 exe_versions['rtmpdump'] = rtmpdump_version()
1998 exe_str = ', '.join(
2000 for exe, v in sorted(exe_versions.items())
2005 self._write_string('[debug] exe versions: %s\n' % exe_str)
2008 for handler in self._opener.handlers:
2009 if hasattr(handler, 'proxies'):
2010 proxy_map.update(handler.proxies)
2011 self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
2013 if self.params.get('call_home', False):
2014 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
2015 self._write_string('[debug] Public IP address: %s\n' % ipaddr)
2016 latest_version = self.urlopen(
2017 'https://yt-dl.org/latest/version').read().decode('utf-8')
2018 if version_tuple(latest_version) > version_tuple(__version__):
2019 self.report_warning(
2020 'You are using an outdated version (newest version: %s)! '
2021 'See https://yt-dl.org/update if you need help updating.' %
2024 def _setup_opener(self):
2025 timeout_val = self.params.get('socket_timeout')
2026 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
2028 opts_cookiefile = self.params.get('cookiefile')
2029 opts_proxy = self.params.get('proxy')
2031 if opts_cookiefile is None:
2032 self.cookiejar = compat_cookiejar.CookieJar()
2034 opts_cookiefile = compat_expanduser(opts_cookiefile)
2035 self.cookiejar = compat_cookiejar.MozillaCookieJar(
2037 if os.access(opts_cookiefile, os.R_OK):
2038 self.cookiejar.load()
2040 cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
2041 if opts_proxy is not None:
2042 if opts_proxy == '':
2045 proxies = {'http': opts_proxy, 'https': opts_proxy}
2047 proxies = compat_urllib_request.getproxies()
2048 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
2049 if 'http' in proxies and 'https' not in proxies:
2050 proxies['https'] = proxies['http']
2051 proxy_handler = PerRequestProxyHandler(proxies)
2053 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
2054 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
2055 ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
2056 data_handler = compat_urllib_request_DataHandler()
2058 # When passing our own FileHandler instance, build_opener won't add the
2059 # default FileHandler and allows us to disable the file protocol, which
2060 # can be used for malicious purposes (see
2061 # https://github.com/rg3/youtube-dl/issues/8227)
2062 file_handler = compat_urllib_request.FileHandler()
2064 def file_open(*args, **kwargs):
2065 raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in youtube-dl for security reasons')
2066 file_handler.file_open = file_open
2068 opener = compat_urllib_request.build_opener(
2069 proxy_handler, https_handler, cookie_processor, ydlh, data_handler, file_handler)
2071 # Delete the default user-agent header, which would otherwise apply in
2072 # cases where our custom HTTP handler doesn't come into play
2073 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
2074 opener.addheaders = []
2075 self._opener = opener
2077 def encode(self, s):
2078 if isinstance(s, bytes):
2079 return s # Already encoded
2082 return s.encode(self.get_encoding())
2083 except UnicodeEncodeError as err:
2084 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
2087 def get_encoding(self):
2088 encoding = self.params.get('encoding')
2089 if encoding is None:
2090 encoding = preferredencoding()
2093 def _write_thumbnails(self, info_dict, filename):
2094 if self.params.get('writethumbnail', False):
2095 thumbnails = info_dict.get('thumbnails')
2097 thumbnails = [thumbnails[-1]]
2098 elif self.params.get('write_all_thumbnails', False):
2099 thumbnails = info_dict.get('thumbnails')
2104 # No thumbnails present, so return immediately
2107 for t in thumbnails:
2108 thumb_ext = determine_ext(t['url'], 'jpg')
2109 suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
2110 thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
2111 t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
2113 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
2114 self.to_screen('[%s] %s: Thumbnail %sis already present' %
2115 (info_dict['extractor'], info_dict['id'], thumb_display_id))
2117 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
2118 (info_dict['extractor'], info_dict['id'], thumb_display_id))
2120 uf = self.urlopen(t['url'])
2121 with open(encodeFilename(thumb_filename), 'wb') as thumbf:
2122 shutil.copyfileobj(uf, thumbf)
2123 self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
2124 (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
2125 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2126 self.report_warning('Unable to download thumbnail "%s": %s' %
2127 (t['url'], error_to_compat_str(err)))