2 # -*- coding: utf-8 -*-
4 from __future__ import absolute_import, unicode_literals
34 compat_get_terminal_size,
38 compat_tokenize_tokenize,
40 compat_urllib_request,
60 PerRequestProxyHandler,
70 UnavailableVideoError,
81 from .cache import Cache
82 from .extractor import get_info_extractor, gen_extractors
83 from .downloader import get_suitable_downloader
84 from .downloader.rtmp import rtmpdump_version
85 from .postprocessor import (
87 FFmpegFixupStretchedPP,
92 from .version import __version__
95 class YoutubeDL(object):
98 YoutubeDL objects are the ones responsible of downloading the
99 actual video file and writing it to disk if the user has requested
100 it, among some other tasks. In most cases there should be one per
101 program. As, given a video URL, the downloader doesn't know how to
102 extract all the needed information, task that InfoExtractors do, it
103 has to pass the URL to one of them.
105 For this, YoutubeDL objects have a method that allows
106 InfoExtractors to be registered in a given order. When it is passed
107 a URL, the YoutubeDL object handles it to the first InfoExtractor it
108 finds that reports being able to handle it. The InfoExtractor extracts
109 all the information about the video or videos the URL refers to, and
110 YoutubeDL process the extracted information, possibly using a File
111 Downloader to download the video.
113 YoutubeDL objects accept a lot of parameters. In order not to saturate
114 the object constructor with arguments, it receives a dictionary of
115 options instead. These options are available through the params
116 attribute for the InfoExtractors to use. The YoutubeDL also
117 registers itself as the downloader in charge for the InfoExtractors
118 that are added to it, so this is a "mutual registration".
122 username: Username for authentication purposes.
123 password: Password for authentication purposes.
124 videopassword: Password for accessing a video.
125 usenetrc: Use netrc for authentication instead.
126 verbose: Print additional info to stdout.
127 quiet: Do not print messages to stdout.
128 no_warnings: Do not print out anything for warnings.
129 forceurl: Force printing final URL.
130 forcetitle: Force printing title.
131 forceid: Force printing ID.
132 forcethumbnail: Force printing thumbnail URL.
133 forcedescription: Force printing description.
134 forcefilename: Force printing final filename.
135 forceduration: Force printing duration.
136 forcejson: Force printing info_dict as JSON.
137 dump_single_json: Force printing the info_dict of the whole playlist
138 (or video) as a single JSON line.
139 simulate: Do not download the video files.
140 format: Video format code. See options.py for more information.
141 outtmpl: Template for output names.
142 restrictfilenames: Do not allow "&" and spaces in file names
143 ignoreerrors: Do not stop on download errors.
144 force_generic_extractor: Force downloader to use the generic extractor
145 nooverwrites: Prevent overwriting files.
146 playliststart: Playlist item to start at.
147 playlistend: Playlist item to end at.
148 playlist_items: Specific indices of playlist to download.
149 playlistreverse: Download playlist items in reverse order.
150 matchtitle: Download only matching titles.
151 rejecttitle: Reject downloads for matching titles.
152 logger: Log messages to a logging.Logger instance.
153 logtostderr: Log messages to stderr instead of stdout.
154 writedescription: Write the video description to a .description file
155 writeinfojson: Write the video description to a .info.json file
156 writeannotations: Write the video annotations to a .annotations.xml file
157 writethumbnail: Write the thumbnail image to a file
158 write_all_thumbnails: Write all thumbnail formats to files
159 writesubtitles: Write the video subtitles to a file
160 writeautomaticsub: Write the automatic subtitles to a file
161 allsubtitles: Downloads all the subtitles of the video
162 (requires writesubtitles or writeautomaticsub)
163 listsubtitles: Lists all available subtitles for the video
164 subtitlesformat: The format code for subtitles
165 subtitleslangs: List of languages of the subtitles to download
166 keepvideo: Keep the video file after post-processing
167 daterange: A DateRange object, download only if the upload_date is in the range.
168 skip_download: Skip the actual download of the video file
169 cachedir: Location of the cache files in the filesystem.
170 False to disable filesystem cache.
171 noplaylist: Download single video instead of a playlist if in doubt.
172 age_limit: An integer representing the user's age in years.
173 Unsuitable videos for the given age are skipped.
174 min_views: An integer representing the minimum view count the video
175 must have in order to not be skipped.
176 Videos without view count information are always
177 downloaded. None for no limit.
178 max_views: An integer representing the maximum view count.
179 Videos that are more popular than that are not
181 Videos without view count information are always
182 downloaded. None for no limit.
183 download_archive: File name of a file where all downloads are recorded.
184 Videos already present in the file are not downloaded
186 cookiefile: File name where cookies should be read from and dumped to.
187 nocheckcertificate:Do not verify SSL certificates
188 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
189 At the moment, this is only supported by YouTube.
190 proxy: URL of the proxy server to use
191 cn_verification_proxy: URL of the proxy to use for IP address verification
192 on Chinese sites. (Experimental)
193 socket_timeout: Time to wait for unresponsive hosts, in seconds
194 bidi_workaround: Work around buggy terminals without bidirectional text
195 support, using fridibi
196 debug_printtraffic:Print out sent and received HTTP traffic
197 include_ads: Download ads as well
198 default_search: Prepend this string if an input url is not valid.
199 'auto' for elaborate guessing
200 encoding: Use this encoding instead of the system-specified.
201 extract_flat: Do not resolve URLs, return the immediate result.
202 Pass in 'in_playlist' to only show this behavior for
204 postprocessors: A list of dictionaries, each with an entry
205 * key: The name of the postprocessor. See
206 youtube_dl/postprocessor/__init__.py for a list.
207 as well as any further keyword arguments for the
209 progress_hooks: A list of functions that get called on download
210 progress, with a dictionary with the entries
211 * status: One of "downloading", "error", or "finished".
212 Check this first and ignore unknown values.
214 If status is one of "downloading", or "finished", the
215 following properties may also be present:
216 * filename: The final filename (always present)
217 * tmpfilename: The filename we're currently writing to
218 * downloaded_bytes: Bytes on disk
219 * total_bytes: Size of the whole file, None if unknown
220 * total_bytes_estimate: Guess of the eventual file size,
222 * elapsed: The number of seconds since download started.
223 * eta: The estimated time in seconds, None if unknown
224 * speed: The download speed in bytes/second, None if
226 * fragment_index: The counter of the currently
227 downloaded video fragment.
228 * fragment_count: The number of fragments (= individual
229 files that will be merged)
231 Progress hooks are guaranteed to be called at least once
232 (with status "finished") if the download is successful.
233 merge_output_format: Extension to use when merging formats.
234 fixup: Automatically correct known faults of the file.
236 - "never": do nothing
237 - "warn": only emit a warning
238 - "detect_or_warn": check whether we can do anything
239 about it, warn otherwise (default)
240 source_address: (Experimental) Client-side IP address to bind to.
241 call_home: Boolean, true iff we are allowed to contact the
242 youtube-dl servers for debugging.
243 sleep_interval: Number of seconds to sleep before each download.
244 listformats: Print an overview of available video formats and exit.
245 list_thumbnails: Print a table of all thumbnails and exit.
246 match_filter: A function that gets called with the info_dict of
248 If it returns a message, the video is ignored.
249 If it returns None, the video is downloaded.
250 match_filter_func in utils.py is one example for this.
251 no_color: Do not emit color codes in output.
253 The following options determine which downloader is picked:
254 external_downloader: Executable of the external downloader to call.
255 None or unset for standard (built-in) downloader.
256 hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv.
258 The following parameters are not used by YoutubeDL itself, they are used by
259 the downloader (see youtube_dl/downloader/common.py):
260 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
261 noresizebuffer, retries, continuedl, noprogress, consoletitle,
262 xattr_set_filesize, external_downloader_args.
264 The following options are used by the post processors:
265 prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
266 otherwise prefer avconv.
272 _download_retcode = None
273 _num_downloads = None
276 def __init__(self, params=None, auto_init=True):
277 """Create a FileDownloader object with the given options."""
281 self._ies_instances = {}
283 self._progress_hooks = []
284 self._download_retcode = 0
285 self._num_downloads = 0
286 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
287 self._err_file = sys.stderr
289 self.cache = Cache(self)
291 if params.get('bidi_workaround', False):
294 master, slave = pty.openpty()
295 width = compat_get_terminal_size().columns
299 width_args = ['-w', str(width)]
301 stdin=subprocess.PIPE,
303 stderr=self._err_file)
305 self._output_process = subprocess.Popen(
306 ['bidiv'] + width_args, **sp_kwargs
309 self._output_process = subprocess.Popen(
310 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
311 self._output_channel = os.fdopen(master, 'rb')
312 except OSError as ose:
314 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
318 if (sys.version_info >= (3,) and sys.platform != 'win32' and
319 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and
320 not params.get('restrictfilenames', False)):
321 # On Python 3, the Unicode filesystem API will throw errors (#1474)
323 'Assuming --restrict-filenames since file system encoding '
324 'cannot encode all characters. '
325 'Set the LC_ALL environment variable to fix this.')
326 self.params['restrictfilenames'] = True
328 if isinstance(params.get('outtmpl'), bytes):
330 'Parameter outtmpl is bytes, but should be a unicode string. '
331 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
336 self.print_debug_header()
337 self.add_default_info_extractors()
339 for pp_def_raw in self.params.get('postprocessors', []):
340 pp_class = get_postprocessor(pp_def_raw['key'])
341 pp_def = dict(pp_def_raw)
343 pp = pp_class(self, **compat_kwargs(pp_def))
344 self.add_post_processor(pp)
346 for ph in self.params.get('progress_hooks', []):
347 self.add_progress_hook(ph)
349 def warn_if_short_id(self, argv):
350 # short YouTube ID starting with dash?
352 i for i, a in enumerate(argv)
353 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
357 [a for i, a in enumerate(argv) if i not in idxs] +
358 ['--'] + [argv[i] for i in idxs]
361 'Long argument string detected. '
362 'Use -- to separate parameters and URLs, like this:\n%s\n' %
363 args_to_str(correct_argv))
365 def add_info_extractor(self, ie):
366 """Add an InfoExtractor object to the end of the list."""
368 self._ies_instances[ie.ie_key()] = ie
369 ie.set_downloader(self)
371 def get_info_extractor(self, ie_key):
373 Get an instance of an IE with name ie_key, it will try to get one from
374 the _ies list, if there's no instance it will create a new one and add
375 it to the extractor list.
377 ie = self._ies_instances.get(ie_key)
379 ie = get_info_extractor(ie_key)()
380 self.add_info_extractor(ie)
383 def add_default_info_extractors(self):
385 Add the InfoExtractors returned by gen_extractors to the end of the list
387 for ie in gen_extractors():
388 self.add_info_extractor(ie)
390 def add_post_processor(self, pp):
391 """Add a PostProcessor object to the end of the chain."""
393 pp.set_downloader(self)
395 def add_progress_hook(self, ph):
396 """Add the progress hook (currently only for the file downloader)"""
397 self._progress_hooks.append(ph)
399 def _bidi_workaround(self, message):
400 if not hasattr(self, '_output_channel'):
403 assert hasattr(self, '_output_process')
404 assert isinstance(message, compat_str)
405 line_count = message.count('\n') + 1
406 self._output_process.stdin.write((message + '\n').encode('utf-8'))
407 self._output_process.stdin.flush()
408 res = ''.join(self._output_channel.readline().decode('utf-8')
409 for _ in range(line_count))
410 return res[:-len('\n')]
412 def to_screen(self, message, skip_eol=False):
413 """Print message to stdout if not in quiet mode."""
414 return self.to_stdout(message, skip_eol, check_quiet=True)
416 def _write_string(self, s, out=None):
417 write_string(s, out=out, encoding=self.params.get('encoding'))
419 def to_stdout(self, message, skip_eol=False, check_quiet=False):
420 """Print message to stdout if not in quiet mode."""
421 if self.params.get('logger'):
422 self.params['logger'].debug(message)
423 elif not check_quiet or not self.params.get('quiet', False):
424 message = self._bidi_workaround(message)
425 terminator = ['\n', ''][skip_eol]
426 output = message + terminator
428 self._write_string(output, self._screen_file)
430 def to_stderr(self, message):
431 """Print message to stderr."""
432 assert isinstance(message, compat_str)
433 if self.params.get('logger'):
434 self.params['logger'].error(message)
436 message = self._bidi_workaround(message)
437 output = message + '\n'
438 self._write_string(output, self._err_file)
440 def to_console_title(self, message):
441 if not self.params.get('consoletitle', False):
443 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
444 # c_wchar_p() might not be necessary if `message` is
445 # already of type unicode()
446 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
447 elif 'TERM' in os.environ:
448 self._write_string('\033]0;%s\007' % message, self._screen_file)
450 def save_console_title(self):
451 if not self.params.get('consoletitle', False):
453 if 'TERM' in os.environ:
454 # Save the title on stack
455 self._write_string('\033[22;0t', self._screen_file)
457 def restore_console_title(self):
458 if not self.params.get('consoletitle', False):
460 if 'TERM' in os.environ:
461 # Restore the title from stack
462 self._write_string('\033[23;0t', self._screen_file)
465 self.save_console_title()
468 def __exit__(self, *args):
469 self.restore_console_title()
471 if self.params.get('cookiefile') is not None:
472 self.cookiejar.save()
474 def trouble(self, message=None, tb=None):
475 """Determine action to take when a download problem appears.
477 Depending on if the downloader has been configured to ignore
478 download errors or not, this method may throw an exception or
479 not when errors are found, after printing the message.
481 tb, if given, is additional traceback information.
483 if message is not None:
484 self.to_stderr(message)
485 if self.params.get('verbose'):
487 if sys.exc_info()[0]: # if .trouble has been called from an except block
489 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
490 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
491 tb += compat_str(traceback.format_exc())
493 tb_data = traceback.format_list(traceback.extract_stack())
494 tb = ''.join(tb_data)
496 if not self.params.get('ignoreerrors', False):
497 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
498 exc_info = sys.exc_info()[1].exc_info
500 exc_info = sys.exc_info()
501 raise DownloadError(message, exc_info)
502 self._download_retcode = 1
504 def report_warning(self, message):
506 Print the message to stderr, it will be prefixed with 'WARNING:'
507 If stderr is a tty file the 'WARNING:' will be colored
509 if self.params.get('logger') is not None:
510 self.params['logger'].warning(message)
512 if self.params.get('no_warnings'):
514 if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
515 _msg_header = '\033[0;33mWARNING:\033[0m'
517 _msg_header = 'WARNING:'
518 warning_message = '%s %s' % (_msg_header, message)
519 self.to_stderr(warning_message)
521 def report_error(self, message, tb=None):
523 Do the same as trouble, but prefixes the message with 'ERROR:', colored
524 in red if stderr is a tty file.
526 if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
527 _msg_header = '\033[0;31mERROR:\033[0m'
529 _msg_header = 'ERROR:'
530 error_message = '%s %s' % (_msg_header, message)
531 self.trouble(error_message, tb)
533 def report_file_already_downloaded(self, file_name):
534 """Report file has already been fully downloaded."""
536 self.to_screen('[download] %s has already been downloaded' % file_name)
537 except UnicodeEncodeError:
538 self.to_screen('[download] The file has already been downloaded')
540 def prepare_filename(self, info_dict):
541 """Generate the output filename."""
543 template_dict = dict(info_dict)
545 template_dict['epoch'] = int(time.time())
546 autonumber_size = self.params.get('autonumber_size')
547 if autonumber_size is None:
549 autonumber_templ = '%0' + str(autonumber_size) + 'd'
550 template_dict['autonumber'] = autonumber_templ % self._num_downloads
551 if template_dict.get('playlist_index') is not None:
552 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
553 if template_dict.get('resolution') is None:
554 if template_dict.get('width') and template_dict.get('height'):
555 template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
556 elif template_dict.get('height'):
557 template_dict['resolution'] = '%sp' % template_dict['height']
558 elif template_dict.get('width'):
559 template_dict['resolution'] = '?x%d' % template_dict['width']
561 sanitize = lambda k, v: sanitize_filename(
563 restricted=self.params.get('restrictfilenames'),
565 template_dict = dict((k, sanitize(k, v))
566 for k, v in template_dict.items()
568 template_dict = collections.defaultdict(lambda: 'NA', template_dict)
570 outtmpl = sanitize_path(self.params.get('outtmpl', DEFAULT_OUTTMPL))
571 tmpl = compat_expanduser(outtmpl)
572 filename = tmpl % template_dict
573 # Temporary fix for #4787
574 # 'Treat' all problem characters by passing filename through preferredencoding
575 # to workaround encoding issues with subprocess on python2 @ Windows
576 if sys.version_info < (3, 0) and sys.platform == 'win32':
577 filename = encodeFilename(filename, True).decode(preferredencoding())
579 except ValueError as err:
580 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
583 def _match_entry(self, info_dict, incomplete):
584 """ Returns None iff the file should be downloaded """
586 video_title = info_dict.get('title', info_dict.get('id', 'video'))
587 if 'title' in info_dict:
588 # This can happen when we're just evaluating the playlist
589 title = info_dict['title']
590 matchtitle = self.params.get('matchtitle', False)
592 if not re.search(matchtitle, title, re.IGNORECASE):
593 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
594 rejecttitle = self.params.get('rejecttitle', False)
596 if re.search(rejecttitle, title, re.IGNORECASE):
597 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
598 date = info_dict.get('upload_date', None)
600 dateRange = self.params.get('daterange', DateRange())
601 if date not in dateRange:
602 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
603 view_count = info_dict.get('view_count', None)
604 if view_count is not None:
605 min_views = self.params.get('min_views')
606 if min_views is not None and view_count < min_views:
607 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
608 max_views = self.params.get('max_views')
609 if max_views is not None and view_count > max_views:
610 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
611 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
612 return 'Skipping "%s" because it is age restricted' % video_title
613 if self.in_download_archive(info_dict):
614 return '%s has already been recorded in archive' % video_title
617 match_filter = self.params.get('match_filter')
618 if match_filter is not None:
619 ret = match_filter(info_dict)
626 def add_extra_info(info_dict, extra_info):
627 '''Set the keys from extra_info in info dict if they are missing'''
628 for key, value in extra_info.items():
629 info_dict.setdefault(key, value)
631 def extract_info(self, url, download=True, ie_key=None, extra_info={},
632 process=True, force_generic_extractor=False):
634 Returns a list with a dictionary for each video we find.
635 If 'download', also downloads the videos.
636 extra_info is a dict containing the extra values to add to each result
639 if not ie_key and force_generic_extractor:
643 ies = [self.get_info_extractor(ie_key)]
648 if not ie.suitable(url):
652 self.report_warning('The program functionality for this site has been marked as broken, '
653 'and will probably not work.')
656 ie_result = ie.extract(url)
657 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
659 if isinstance(ie_result, list):
660 # Backwards compatibility: old IE result format
662 '_type': 'compat_list',
663 'entries': ie_result,
665 self.add_default_extra_info(ie_result, ie, url)
667 return self.process_ie_result(ie_result, download, extra_info)
670 except ExtractorError as de: # An error we somewhat expected
671 self.report_error(compat_str(de), de.format_traceback())
673 except MaxDownloadsReached:
675 except Exception as e:
676 if self.params.get('ignoreerrors', False):
677 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
682 self.report_error('no suitable InfoExtractor for URL %s' % url)
684 def add_default_extra_info(self, ie_result, ie, url):
685 self.add_extra_info(ie_result, {
686 'extractor': ie.IE_NAME,
688 'webpage_url_basename': url_basename(url),
689 'extractor_key': ie.ie_key(),
692 def process_ie_result(self, ie_result, download=True, extra_info={}):
694 Take the result of the ie(may be modified) and resolve all unresolved
695 references (URLs, playlist items).
697 It will also download the videos if 'download'.
698 Returns the resolved ie_result.
701 result_type = ie_result.get('_type', 'video')
703 if result_type in ('url', 'url_transparent'):
704 extract_flat = self.params.get('extract_flat', False)
705 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
706 extract_flat is True):
707 if self.params.get('forcejson', False):
708 self.to_stdout(json.dumps(ie_result))
711 if result_type == 'video':
712 self.add_extra_info(ie_result, extra_info)
713 return self.process_video_result(ie_result, download=download)
714 elif result_type == 'url':
715 # We have to add extra_info to the results because it may be
716 # contained in a playlist
717 return self.extract_info(ie_result['url'],
719 ie_key=ie_result.get('ie_key'),
720 extra_info=extra_info)
721 elif result_type == 'url_transparent':
722 # Use the information from the embedding page
723 info = self.extract_info(
724 ie_result['url'], ie_key=ie_result.get('ie_key'),
725 extra_info=extra_info, download=False, process=False)
727 force_properties = dict(
728 (k, v) for k, v in ie_result.items() if v is not None)
729 for f in ('_type', 'url'):
730 if f in force_properties:
731 del force_properties[f]
732 new_result = info.copy()
733 new_result.update(force_properties)
735 assert new_result.get('_type') != 'url_transparent'
737 return self.process_ie_result(
738 new_result, download=download, extra_info=extra_info)
739 elif result_type == 'playlist' or result_type == 'multi_video':
740 # We process each entry in the playlist
741 playlist = ie_result.get('title', None) or ie_result.get('id', None)
742 self.to_screen('[download] Downloading playlist: %s' % playlist)
744 playlist_results = []
746 playliststart = self.params.get('playliststart', 1) - 1
747 playlistend = self.params.get('playlistend', None)
748 # For backwards compatibility, interpret -1 as whole list
749 if playlistend == -1:
752 playlistitems_str = self.params.get('playlist_items', None)
754 if playlistitems_str is not None:
755 def iter_playlistitems(format):
756 for string_segment in format.split(','):
757 if '-' in string_segment:
758 start, end = string_segment.split('-')
759 for item in range(int(start), int(end) + 1):
762 yield int(string_segment)
763 playlistitems = iter_playlistitems(playlistitems_str)
765 ie_entries = ie_result['entries']
766 if isinstance(ie_entries, list):
767 n_all_entries = len(ie_entries)
770 ie_entries[i - 1] for i in playlistitems
771 if -n_all_entries <= i - 1 < n_all_entries]
773 entries = ie_entries[playliststart:playlistend]
774 n_entries = len(entries)
776 "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
777 (ie_result['extractor'], playlist, n_all_entries, n_entries))
778 elif isinstance(ie_entries, PagedList):
781 for item in playlistitems:
782 entries.extend(ie_entries.getslice(
786 entries = ie_entries.getslice(
787 playliststart, playlistend)
788 n_entries = len(entries)
790 "[%s] playlist %s: Downloading %d videos" %
791 (ie_result['extractor'], playlist, n_entries))
794 entry_list = list(ie_entries)
795 entries = [entry_list[i - 1] for i in playlistitems]
797 entries = list(itertools.islice(
798 ie_entries, playliststart, playlistend))
799 n_entries = len(entries)
801 "[%s] playlist %s: Downloading %d videos" %
802 (ie_result['extractor'], playlist, n_entries))
804 if self.params.get('playlistreverse', False):
805 entries = entries[::-1]
807 for i, entry in enumerate(entries, 1):
808 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
810 'n_entries': n_entries,
811 'playlist': playlist,
812 'playlist_id': ie_result.get('id'),
813 'playlist_title': ie_result.get('title'),
814 'playlist_index': i + playliststart,
815 'extractor': ie_result['extractor'],
816 'webpage_url': ie_result['webpage_url'],
817 'webpage_url_basename': url_basename(ie_result['webpage_url']),
818 'extractor_key': ie_result['extractor_key'],
821 reason = self._match_entry(entry, incomplete=True)
822 if reason is not None:
823 self.to_screen('[download] ' + reason)
826 entry_result = self.process_ie_result(entry,
829 playlist_results.append(entry_result)
830 ie_result['entries'] = playlist_results
832 elif result_type == 'compat_list':
834 'Extractor %s returned a compat_list result. '
835 'It needs to be updated.' % ie_result.get('extractor'))
841 'extractor': ie_result['extractor'],
842 'webpage_url': ie_result['webpage_url'],
843 'webpage_url_basename': url_basename(ie_result['webpage_url']),
844 'extractor_key': ie_result['extractor_key'],
848 ie_result['entries'] = [
849 self.process_ie_result(_fixup(r), download, extra_info)
850 for r in ie_result['entries']
854 raise Exception('Invalid result type: %s' % result_type)
856 def _build_format_filter(self, filter_spec):
857 " Returns a function to filter the formats according to the filter_spec "
867 operator_rex = re.compile(r'''(?x)\s*
868 (?P<key>width|height|tbr|abr|vbr|asr|filesize|fps)
869 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
870 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
872 ''' % '|'.join(map(re.escape, OPERATORS.keys())))
873 m = operator_rex.search(filter_spec)
876 comparison_value = int(m.group('value'))
878 comparison_value = parse_filesize(m.group('value'))
879 if comparison_value is None:
880 comparison_value = parse_filesize(m.group('value') + 'B')
881 if comparison_value is None:
883 'Invalid value %r in format specification %r' % (
884 m.group('value'), filter_spec))
885 op = OPERATORS[m.group('op')]
892 str_operator_rex = re.compile(r'''(?x)
893 \s*(?P<key>ext|acodec|vcodec|container|protocol)
894 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
895 \s*(?P<value>[a-zA-Z0-9_-]+)
897 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
898 m = str_operator_rex.search(filter_spec)
900 comparison_value = m.group('value')
901 op = STR_OPERATORS[m.group('op')]
904 raise ValueError('Invalid filter specification %r' % filter_spec)
907 actual_value = f.get(m.group('key'))
908 if actual_value is None:
909 return m.group('none_inclusive')
910 return op(actual_value, comparison_value)
913 def build_format_selector(self, format_spec):
914 def syntax_error(note, start):
916 'Invalid format specification: '
917 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
918 return SyntaxError(message)
920 PICKFIRST = 'PICKFIRST'
924 FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
926 def _parse_filter(tokens):
928 for type, string, start, _, _ in tokens:
929 if type == tokenize.OP and string == ']':
930 return ''.join(filter_parts)
932 filter_parts.append(string)
934 def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
936 current_selector = None
937 for type, string, start, _, _ in tokens:
938 # ENCODING is only defined in python 3.x
939 if type == getattr(tokenize, 'ENCODING', None):
941 elif type in [tokenize.NAME, tokenize.NUMBER]:
942 current_selector = FormatSelector(SINGLE, string, [])
943 elif type == tokenize.OP:
946 # ')' will be handled by the parentheses group
947 tokens.restore_last_token()
949 elif inside_merge and string in ['/', ',']:
950 tokens.restore_last_token()
952 elif inside_choice and string == ',':
953 tokens.restore_last_token()
956 selectors.append(current_selector)
957 current_selector = None
959 first_choice = current_selector
960 second_choice = _parse_format_selection(tokens, inside_choice=True)
961 current_selector = None
962 selectors.append(FormatSelector(PICKFIRST, (first_choice, second_choice), []))
964 if not current_selector:
965 current_selector = FormatSelector(SINGLE, 'best', [])
966 format_filter = _parse_filter(tokens)
967 current_selector.filters.append(format_filter)
970 raise syntax_error('Unexpected "("', start)
971 group = _parse_format_selection(tokens, inside_group=True)
972 current_selector = FormatSelector(GROUP, group, [])
974 video_selector = current_selector
975 audio_selector = _parse_format_selection(tokens, inside_merge=True)
976 current_selector = FormatSelector(MERGE, (video_selector, audio_selector), [])
978 raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
979 elif type == tokenize.ENDMARKER:
982 selectors.append(current_selector)
985 def _build_selector_function(selector):
986 if isinstance(selector, list):
987 fs = [_build_selector_function(s) for s in selector]
989 def selector_function(formats):
991 for format in f(formats):
993 return selector_function
994 elif selector.type == GROUP:
995 selector_function = _build_selector_function(selector.selector)
996 elif selector.type == PICKFIRST:
997 fs = [_build_selector_function(s) for s in selector.selector]
999 def selector_function(formats):
1001 picked_formats = list(f(formats))
1003 return picked_formats
1005 elif selector.type == SINGLE:
1006 format_spec = selector.selector
1008 def selector_function(formats):
1009 if format_spec == 'all':
1012 elif format_spec in ['best', 'worst', None]:
1013 format_idx = 0 if format_spec == 'worst' else -1
1014 audiovideo_formats = [
1016 if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
1017 if audiovideo_formats:
1018 yield audiovideo_formats[format_idx]
1019 # for audio only (soundcloud) or video only (imgur) urls, select the best/worst audio format
1020 elif (all(f.get('acodec') != 'none' for f in formats) or
1021 all(f.get('vcodec') != 'none' for f in formats)):
1022 yield formats[format_idx]
1023 elif format_spec == 'bestaudio':
1026 if f.get('vcodec') == 'none']
1028 yield audio_formats[-1]
1029 elif format_spec == 'worstaudio':
1032 if f.get('vcodec') == 'none']
1034 yield audio_formats[0]
1035 elif format_spec == 'bestvideo':
1038 if f.get('acodec') == 'none']
1040 yield video_formats[-1]
1041 elif format_spec == 'worstvideo':
1044 if f.get('acodec') == 'none']
1046 yield video_formats[0]
1048 extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
1049 if format_spec in extensions:
1050 filter_f = lambda f: f['ext'] == format_spec
1052 filter_f = lambda f: f['format_id'] == format_spec
1053 matches = list(filter(filter_f, formats))
1056 elif selector.type == MERGE:
1057 def _merge(formats_info):
1058 format_1, format_2 = [f['format_id'] for f in formats_info]
1059 # The first format must contain the video and the
1061 if formats_info[0].get('vcodec') == 'none':
1062 self.report_error('The first format must '
1063 'contain the video, try using '
1064 '"-f %s+%s"' % (format_2, format_1))
1067 formats_info[0]['ext']
1068 if self.params.get('merge_output_format') is None
1069 else self.params['merge_output_format'])
1071 'requested_formats': formats_info,
1072 'format': '%s+%s' % (formats_info[0].get('format'),
1073 formats_info[1].get('format')),
1074 'format_id': '%s+%s' % (formats_info[0].get('format_id'),
1075 formats_info[1].get('format_id')),
1076 'width': formats_info[0].get('width'),
1077 'height': formats_info[0].get('height'),
1078 'resolution': formats_info[0].get('resolution'),
1079 'fps': formats_info[0].get('fps'),
1080 'vcodec': formats_info[0].get('vcodec'),
1081 'vbr': formats_info[0].get('vbr'),
1082 'stretched_ratio': formats_info[0].get('stretched_ratio'),
1083 'acodec': formats_info[1].get('acodec'),
1084 'abr': formats_info[1].get('abr'),
1087 video_selector, audio_selector = map(_build_selector_function, selector.selector)
1089 def selector_function(formats):
1090 formats = list(formats)
1091 for pair in itertools.product(video_selector(formats), audio_selector(formats)):
1094 filters = [self._build_format_filter(f) for f in selector.filters]
1096 def final_selector(formats):
1097 for _filter in filters:
1098 formats = list(filter(_filter, formats))
1099 return selector_function(formats)
1100 return final_selector
1102 stream = io.BytesIO(format_spec.encode('utf-8'))
1104 tokens = list(compat_tokenize_tokenize(stream.readline))
1105 except tokenize.TokenError:
1106 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1108 class TokenIterator(object):
1109 def __init__(self, tokens):
1110 self.tokens = tokens
1117 if self.counter >= len(self.tokens):
1118 raise StopIteration()
1119 value = self.tokens[self.counter]
1125 def restore_last_token(self):
1128 parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
1129 return _build_selector_function(parsed_selector)
1131 def _calc_headers(self, info_dict):
1132 res = std_headers.copy()
1134 add_headers = info_dict.get('http_headers')
1136 res.update(add_headers)
1138 cookies = self._calc_cookies(info_dict)
1140 res['Cookie'] = cookies
1144 def _calc_cookies(self, info_dict):
1145 pr = compat_urllib_request.Request(info_dict['url'])
1146 self.cookiejar.add_cookie_header(pr)
1147 return pr.get_header('Cookie')
1149 def process_video_result(self, info_dict, download=True):
1150 assert info_dict.get('_type', 'video') == 'video'
1152 if 'id' not in info_dict:
1153 raise ExtractorError('Missing "id" field in extractor result')
1154 if 'title' not in info_dict:
1155 raise ExtractorError('Missing "title" field in extractor result')
1157 if 'playlist' not in info_dict:
1158 # It isn't part of a playlist
1159 info_dict['playlist'] = None
1160 info_dict['playlist_index'] = None
1162 thumbnails = info_dict.get('thumbnails')
1163 if thumbnails is None:
1164 thumbnail = info_dict.get('thumbnail')
1166 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1168 thumbnails.sort(key=lambda t: (
1169 t.get('preference'), t.get('width'), t.get('height'),
1170 t.get('id'), t.get('url')))
1171 for i, t in enumerate(thumbnails):
1172 if 'width' in t and 'height' in t:
1173 t['resolution'] = '%dx%d' % (t['width'], t['height'])
1174 if t.get('id') is None:
1177 if thumbnails and 'thumbnail' not in info_dict:
1178 info_dict['thumbnail'] = thumbnails[-1]['url']
1180 if 'display_id' not in info_dict and 'id' in info_dict:
1181 info_dict['display_id'] = info_dict['id']
1183 if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
1184 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1185 # see http://bugs.python.org/issue1646728)
1187 upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
1188 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1189 except (ValueError, OverflowError, OSError):
1192 if self.params.get('listsubtitles', False):
1193 if 'automatic_captions' in info_dict:
1194 self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions')
1195 self.list_subtitles(info_dict['id'], info_dict.get('subtitles'), 'subtitles')
1197 info_dict['requested_subtitles'] = self.process_subtitles(
1198 info_dict['id'], info_dict.get('subtitles'),
1199 info_dict.get('automatic_captions'))
1201 # We now pick which formats have to be downloaded
1202 if info_dict.get('formats') is None:
1203 # There's only one format available
1204 formats = [info_dict]
1206 formats = info_dict['formats']
1209 raise ExtractorError('No video formats found!')
1213 # We check that all the formats have the format and format_id fields
1214 for i, format in enumerate(formats):
1215 if 'url' not in format:
1216 raise ExtractorError('Missing "url" key in result (index %d)' % i)
1218 if format.get('format_id') is None:
1219 format['format_id'] = compat_str(i)
1220 format_id = format['format_id']
1221 if format_id not in formats_dict:
1222 formats_dict[format_id] = []
1223 formats_dict[format_id].append(format)
1225 # Make sure all formats have unique format_id
1226 for format_id, ambiguous_formats in formats_dict.items():
1227 if len(ambiguous_formats) > 1:
1228 for i, format in enumerate(ambiguous_formats):
1229 format['format_id'] = '%s-%d' % (format_id, i)
1231 for i, format in enumerate(formats):
1232 if format.get('format') is None:
1233 format['format'] = '{id} - {res}{note}'.format(
1234 id=format['format_id'],
1235 res=self.format_resolution(format),
1236 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1238 # Automatically determine file extension if missing
1239 if 'ext' not in format:
1240 format['ext'] = determine_ext(format['url']).lower()
1241 # Add HTTP headers, so that external programs can use them from the
1243 full_format_info = info_dict.copy()
1244 full_format_info.update(format)
1245 format['http_headers'] = self._calc_headers(full_format_info)
1247 # TODO Central sorting goes here
1249 if formats[0] is not info_dict:
1250 # only set the 'formats' fields if the original info_dict list them
1251 # otherwise we end up with a circular reference, the first (and unique)
1252 # element in the 'formats' field in info_dict is info_dict itself,
1253 # wich can't be exported to json
1254 info_dict['formats'] = formats
1255 if self.params.get('listformats'):
1256 self.list_formats(info_dict)
1258 if self.params.get('list_thumbnails'):
1259 self.list_thumbnails(info_dict)
1262 req_format = self.params.get('format')
1263 if req_format is None:
1264 req_format_list = []
1265 if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and
1266 info_dict['extractor'] in ['youtube', 'ted']):
1267 merger = FFmpegMergerPP(self)
1268 if merger.available and merger.can_merge():
1269 req_format_list.append('bestvideo+bestaudio')
1270 req_format_list.append('best')
1271 req_format = '/'.join(req_format_list)
1272 format_selector = self.build_format_selector(req_format)
1273 formats_to_download = list(format_selector(formats))
1274 if not formats_to_download:
1275 raise ExtractorError('requested format not available',
1279 if len(formats_to_download) > 1:
1280 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1281 for format in formats_to_download:
1282 new_info = dict(info_dict)
1283 new_info.update(format)
1284 self.process_info(new_info)
1285 # We update the info dict with the best quality format (backwards compatibility)
1286 info_dict.update(formats_to_download[-1])
1289 def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
1290 """Select the requested subtitles and their format"""
1292 if normal_subtitles and self.params.get('writesubtitles'):
1293 available_subs.update(normal_subtitles)
1294 if automatic_captions and self.params.get('writeautomaticsub'):
1295 for lang, cap_info in automatic_captions.items():
1296 if lang not in available_subs:
1297 available_subs[lang] = cap_info
1299 if (not self.params.get('writesubtitles') and not
1300 self.params.get('writeautomaticsub') or not
1304 if self.params.get('allsubtitles', False):
1305 requested_langs = available_subs.keys()
1307 if self.params.get('subtitleslangs', False):
1308 requested_langs = self.params.get('subtitleslangs')
1309 elif 'en' in available_subs:
1310 requested_langs = ['en']
1312 requested_langs = [list(available_subs.keys())[0]]
1314 formats_query = self.params.get('subtitlesformat', 'best')
1315 formats_preference = formats_query.split('/') if formats_query else []
1317 for lang in requested_langs:
1318 formats = available_subs.get(lang)
1320 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1322 for ext in formats_preference:
1326 matches = list(filter(lambda f: f['ext'] == ext, formats))
1332 self.report_warning(
1333 'No subtitle format found matching "%s" for language %s, '
1334 'using %s' % (formats_query, lang, f['ext']))
1338 def process_info(self, info_dict):
1339 """Process a single resolved IE result."""
1341 assert info_dict.get('_type', 'video') == 'video'
1343 max_downloads = self.params.get('max_downloads')
1344 if max_downloads is not None:
1345 if self._num_downloads >= int(max_downloads):
1346 raise MaxDownloadsReached()
1348 info_dict['fulltitle'] = info_dict['title']
1349 if len(info_dict['title']) > 200:
1350 info_dict['title'] = info_dict['title'][:197] + '...'
1352 if 'format' not in info_dict:
1353 info_dict['format'] = info_dict['ext']
1355 reason = self._match_entry(info_dict, incomplete=False)
1356 if reason is not None:
1357 self.to_screen('[download] ' + reason)
1360 self._num_downloads += 1
1362 info_dict['_filename'] = filename = self.prepare_filename(info_dict)
1365 if self.params.get('forcetitle', False):
1366 self.to_stdout(info_dict['fulltitle'])
1367 if self.params.get('forceid', False):
1368 self.to_stdout(info_dict['id'])
1369 if self.params.get('forceurl', False):
1370 if info_dict.get('requested_formats') is not None:
1371 for f in info_dict['requested_formats']:
1372 self.to_stdout(f['url'] + f.get('play_path', ''))
1374 # For RTMP URLs, also include the playpath
1375 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1376 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
1377 self.to_stdout(info_dict['thumbnail'])
1378 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
1379 self.to_stdout(info_dict['description'])
1380 if self.params.get('forcefilename', False) and filename is not None:
1381 self.to_stdout(filename)
1382 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1383 self.to_stdout(formatSeconds(info_dict['duration']))
1384 if self.params.get('forceformat', False):
1385 self.to_stdout(info_dict['format'])
1386 if self.params.get('forcejson', False):
1387 self.to_stdout(json.dumps(info_dict))
1389 # Do nothing else if in simulate mode
1390 if self.params.get('simulate', False):
1393 if filename is None:
1397 dn = os.path.dirname(sanitize_path(encodeFilename(filename)))
1398 if dn and not os.path.exists(dn):
1400 except (OSError, IOError) as err:
1401 self.report_error('unable to create directory ' + compat_str(err))
1404 if self.params.get('writedescription', False):
1405 descfn = replace_extension(filename, 'description', info_dict.get('ext'))
1406 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1407 self.to_screen('[info] Video description is already present')
1408 elif info_dict.get('description') is None:
1409 self.report_warning('There\'s no description to write.')
1412 self.to_screen('[info] Writing video description to: ' + descfn)
1413 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1414 descfile.write(info_dict['description'])
1415 except (OSError, IOError):
1416 self.report_error('Cannot write description file ' + descfn)
1419 if self.params.get('writeannotations', False):
1420 annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
1421 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1422 self.to_screen('[info] Video annotations are already present')
1425 self.to_screen('[info] Writing video annotations to: ' + annofn)
1426 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1427 annofile.write(info_dict['annotations'])
1428 except (KeyError, TypeError):
1429 self.report_warning('There are no annotations to write.')
1430 except (OSError, IOError):
1431 self.report_error('Cannot write annotations file: ' + annofn)
1434 subtitles_are_requested = any([self.params.get('writesubtitles', False),
1435 self.params.get('writeautomaticsub')])
1437 if subtitles_are_requested and info_dict.get('requested_subtitles'):
1438 # subtitles download errors are already managed as troubles in relevant IE
1439 # that way it will silently go on when used with unsupporting IE
1440 subtitles = info_dict['requested_subtitles']
1441 ie = self.get_info_extractor(info_dict['extractor_key'])
1442 for sub_lang, sub_info in subtitles.items():
1443 sub_format = sub_info['ext']
1444 if sub_info.get('data') is not None:
1445 sub_data = sub_info['data']
1448 sub_data = ie._download_webpage(
1449 sub_info['url'], info_dict['id'], note=False)
1450 except ExtractorError as err:
1451 self.report_warning('Unable to download subtitle for "%s": %s' %
1452 (sub_lang, compat_str(err.cause)))
1455 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1456 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1457 self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1459 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1460 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
1461 subfile.write(sub_data)
1462 except (OSError, IOError):
1463 self.report_error('Cannot write subtitles file ' + sub_filename)
1466 if self.params.get('writeinfojson', False):
1467 infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
1468 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1469 self.to_screen('[info] Video description metadata is already present')
1471 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1473 write_json_file(self.filter_requested_info(info_dict), infofn)
1474 except (OSError, IOError):
1475 self.report_error('Cannot write metadata to JSON file ' + infofn)
1478 self._write_thumbnails(info_dict, filename)
1480 if not self.params.get('skip_download', False):
1483 fd = get_suitable_downloader(info, self.params)(self, self.params)
1484 for ph in self._progress_hooks:
1485 fd.add_progress_hook(ph)
1486 if self.params.get('verbose'):
1487 self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1488 return fd.download(name, info)
1490 if info_dict.get('requested_formats') is not None:
1493 merger = FFmpegMergerPP(self)
1494 if not merger.available:
1496 self.report_warning('You have requested multiple '
1497 'formats but ffmpeg or avconv are not installed.'
1498 ' The formats won\'t be merged.')
1500 postprocessors = [merger]
1502 def compatible_formats(formats):
1503 video, audio = formats
1505 video_ext, audio_ext = audio.get('ext'), video.get('ext')
1506 if video_ext and audio_ext:
1508 ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v'),
1511 for exts in COMPATIBLE_EXTS:
1512 if video_ext in exts and audio_ext in exts:
1514 # TODO: Check acodec/vcodec
1517 filename_real_ext = os.path.splitext(filename)[1][1:]
1519 os.path.splitext(filename)[0]
1520 if filename_real_ext == info_dict['ext']
1522 requested_formats = info_dict['requested_formats']
1523 if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
1524 info_dict['ext'] = 'mkv'
1525 self.report_warning(
1526 'Requested formats are incompatible for merge and will be merged into mkv.')
1527 # Ensure filename always has a correct extension for successful merge
1528 filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
1529 if os.path.exists(encodeFilename(filename)):
1531 '[download] %s has already been downloaded and '
1532 'merged' % filename)
1534 for f in requested_formats:
1535 new_info = dict(info_dict)
1537 fname = self.prepare_filename(new_info)
1538 fname = prepend_extension(fname, 'f%s' % f['format_id'], new_info['ext'])
1539 downloaded.append(fname)
1540 partial_success = dl(fname, new_info)
1541 success = success and partial_success
1542 info_dict['__postprocessors'] = postprocessors
1543 info_dict['__files_to_merge'] = downloaded
1545 # Just a single file
1546 success = dl(filename, info_dict)
1547 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1548 self.report_error('unable to download video data: %s' % str(err))
1550 except (OSError, IOError) as err:
1551 raise UnavailableVideoError(err)
1552 except (ContentTooShortError, ) as err:
1553 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1558 fixup_policy = self.params.get('fixup')
1559 if fixup_policy is None:
1560 fixup_policy = 'detect_or_warn'
1562 stretched_ratio = info_dict.get('stretched_ratio')
1563 if stretched_ratio is not None and stretched_ratio != 1:
1564 if fixup_policy == 'warn':
1565 self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1566 info_dict['id'], stretched_ratio))
1567 elif fixup_policy == 'detect_or_warn':
1568 stretched_pp = FFmpegFixupStretchedPP(self)
1569 if stretched_pp.available:
1570 info_dict.setdefault('__postprocessors', [])
1571 info_dict['__postprocessors'].append(stretched_pp)
1573 self.report_warning(
1574 '%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % (
1575 info_dict['id'], stretched_ratio))
1577 assert fixup_policy in ('ignore', 'never')
1579 if info_dict.get('requested_formats') is None and info_dict.get('container') == 'm4a_dash':
1580 if fixup_policy == 'warn':
1581 self.report_warning('%s: writing DASH m4a. Only some players support this container.' % (
1583 elif fixup_policy == 'detect_or_warn':
1584 fixup_pp = FFmpegFixupM4aPP(self)
1585 if fixup_pp.available:
1586 info_dict.setdefault('__postprocessors', [])
1587 info_dict['__postprocessors'].append(fixup_pp)
1589 self.report_warning(
1590 '%s: writing DASH m4a. Only some players support this container. Install ffmpeg or avconv to fix this automatically.' % (
1593 assert fixup_policy in ('ignore', 'never')
1596 self.post_process(filename, info_dict)
1597 except (PostProcessingError) as err:
1598 self.report_error('postprocessing: %s' % str(err))
1600 self.record_download_archive(info_dict)
1602 def download(self, url_list):
1603 """Download a given list of URLs."""
1604 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1605 if (len(url_list) > 1 and
1606 '%' not in outtmpl and
1607 self.params.get('max_downloads') != 1):
1608 raise SameFileError(outtmpl)
1610 for url in url_list:
1612 # It also downloads the videos
1613 res = self.extract_info(
1614 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
1615 except UnavailableVideoError:
1616 self.report_error('unable to download video')
1617 except MaxDownloadsReached:
1618 self.to_screen('[info] Maximum number of downloaded files reached.')
1621 if self.params.get('dump_single_json', False):
1622 self.to_stdout(json.dumps(res))
1624 return self._download_retcode
1626 def download_with_info_file(self, info_filename):
1627 with contextlib.closing(fileinput.FileInput(
1628 [info_filename], mode='r',
1629 openhook=fileinput.hook_encoded('utf-8'))) as f:
1630 # FileInput doesn't have a read method, we can't call json.load
1631 info = self.filter_requested_info(json.loads('\n'.join(f)))
1633 self.process_ie_result(info, download=True)
1634 except DownloadError:
1635 webpage_url = info.get('webpage_url')
1636 if webpage_url is not None:
1637 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1638 return self.download([webpage_url])
1641 return self._download_retcode
1644 def filter_requested_info(info_dict):
1646 (k, v) for k, v in info_dict.items()
1647 if k not in ['requested_formats', 'requested_subtitles'])
1649 def post_process(self, filename, ie_info):
1650 """Run all the postprocessors on the given file."""
1651 info = dict(ie_info)
1652 info['filepath'] = filename
1654 if ie_info.get('__postprocessors') is not None:
1655 pps_chain.extend(ie_info['__postprocessors'])
1656 pps_chain.extend(self._pps)
1657 for pp in pps_chain:
1658 files_to_delete = []
1660 files_to_delete, info = pp.run(info)
1661 except PostProcessingError as e:
1662 self.report_error(e.msg)
1663 if files_to_delete and not self.params.get('keepvideo', False):
1664 for old_filename in files_to_delete:
1665 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
1667 os.remove(encodeFilename(old_filename))
1668 except (IOError, OSError):
1669 self.report_warning('Unable to remove downloaded original file')
1671 def _make_archive_id(self, info_dict):
1672 # Future-proof against any change in case
1673 # and backwards compatibility with prior versions
1674 extractor = info_dict.get('extractor_key')
1675 if extractor is None:
1676 if 'id' in info_dict:
1677 extractor = info_dict.get('ie_key') # key in a playlist
1678 if extractor is None:
1679 return None # Incomplete video information
1680 return extractor.lower() + ' ' + info_dict['id']
1682 def in_download_archive(self, info_dict):
1683 fn = self.params.get('download_archive')
1687 vid_id = self._make_archive_id(info_dict)
1689 return False # Incomplete video information
1692 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1693 for line in archive_file:
1694 if line.strip() == vid_id:
1696 except IOError as ioe:
1697 if ioe.errno != errno.ENOENT:
1701 def record_download_archive(self, info_dict):
1702 fn = self.params.get('download_archive')
1705 vid_id = self._make_archive_id(info_dict)
1707 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1708 archive_file.write(vid_id + '\n')
1711 def format_resolution(format, default='unknown'):
1712 if format.get('vcodec') == 'none':
1714 if format.get('resolution') is not None:
1715 return format['resolution']
1716 if format.get('height') is not None:
1717 if format.get('width') is not None:
1718 res = '%sx%s' % (format['width'], format['height'])
1720 res = '%sp' % format['height']
1721 elif format.get('width') is not None:
1722 res = '?x%d' % format['width']
1727 def _format_note(self, fdict):
1729 if fdict.get('ext') in ['f4f', 'f4m']:
1730 res += '(unsupported) '
1731 if fdict.get('format_note') is not None:
1732 res += fdict['format_note'] + ' '
1733 if fdict.get('tbr') is not None:
1734 res += '%4dk ' % fdict['tbr']
1735 if fdict.get('container') is not None:
1738 res += '%s container' % fdict['container']
1739 if (fdict.get('vcodec') is not None and
1740 fdict.get('vcodec') != 'none'):
1743 res += fdict['vcodec']
1744 if fdict.get('vbr') is not None:
1746 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1748 if fdict.get('vbr') is not None:
1749 res += '%4dk' % fdict['vbr']
1750 if fdict.get('fps') is not None:
1751 res += ', %sfps' % fdict['fps']
1752 if fdict.get('acodec') is not None:
1755 if fdict['acodec'] == 'none':
1758 res += '%-5s' % fdict['acodec']
1759 elif fdict.get('abr') is not None:
1763 if fdict.get('abr') is not None:
1764 res += '@%3dk' % fdict['abr']
1765 if fdict.get('asr') is not None:
1766 res += ' (%5dHz)' % fdict['asr']
1767 if fdict.get('filesize') is not None:
1770 res += format_bytes(fdict['filesize'])
1771 elif fdict.get('filesize_approx') is not None:
1774 res += '~' + format_bytes(fdict['filesize_approx'])
1777 def list_formats(self, info_dict):
1778 formats = info_dict.get('formats', [info_dict])
1780 [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
1782 if f.get('preference') is None or f['preference'] >= -1000]
1783 if len(formats) > 1:
1784 table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
1786 header_line = ['format code', 'extension', 'resolution', 'note']
1788 '[info] Available formats for %s:\n%s' %
1789 (info_dict['id'], render_table(header_line, table)))
1791 def list_thumbnails(self, info_dict):
1792 thumbnails = info_dict.get('thumbnails')
1794 tn_url = info_dict.get('thumbnail')
1796 thumbnails = [{'id': '0', 'url': tn_url}]
1799 '[info] No thumbnails present for %s' % info_dict['id'])
1803 '[info] Thumbnails for %s:' % info_dict['id'])
1804 self.to_screen(render_table(
1805 ['ID', 'width', 'height', 'URL'],
1806 [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
1808 def list_subtitles(self, video_id, subtitles, name='subtitles'):
1810 self.to_screen('%s has no %s' % (video_id, name))
1813 'Available %s for %s:' % (name, video_id))
1814 self.to_screen(render_table(
1815 ['Language', 'formats'],
1816 [[lang, ', '.join(f['ext'] for f in reversed(formats))]
1817 for lang, formats in subtitles.items()]))
1819 def urlopen(self, req):
1820 """ Start an HTTP download """
1822 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1823 # always respected by websites, some tend to give out URLs with non percent-encoded
1824 # non-ASCII characters (see telemb.py, ard.py [#3412])
1825 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1826 # To work around aforementioned issue we will replace request's original URL with
1827 # percent-encoded one
1828 req_is_string = isinstance(req, compat_basestring)
1829 url = req if req_is_string else req.get_full_url()
1830 url_escaped = escape_url(url)
1832 # Substitute URL if any change after escaping
1833 if url != url_escaped:
1837 req_type = HEADRequest if req.get_method() == 'HEAD' else compat_urllib_request.Request
1839 url_escaped, data=req.data, headers=req.headers,
1840 origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
1842 return self._opener.open(req, timeout=self._socket_timeout)
1844 def print_debug_header(self):
1845 if not self.params.get('verbose'):
1848 if type('') is not compat_str:
1849 # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1850 self.report_warning(
1851 'Your Python is broken! Update to a newer and supported version')
1853 stdout_encoding = getattr(
1854 sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
1856 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1857 locale.getpreferredencoding(),
1858 sys.getfilesystemencoding(),
1860 self.get_encoding()))
1861 write_string(encoding_str, encoding=None)
1863 self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1865 sp = subprocess.Popen(
1866 ['git', 'rev-parse', '--short', 'HEAD'],
1867 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1868 cwd=os.path.dirname(os.path.abspath(__file__)))
1869 out, err = sp.communicate()
1870 out = out.decode().strip()
1871 if re.match('[0-9a-f]+', out):
1872 self._write_string('[debug] Git HEAD: ' + out + '\n')
1878 self._write_string('[debug] Python version %s - %s\n' % (
1879 platform.python_version(), platform_name()))
1881 exe_versions = FFmpegPostProcessor.get_versions(self)
1882 exe_versions['rtmpdump'] = rtmpdump_version()
1883 exe_str = ', '.join(
1885 for exe, v in sorted(exe_versions.items())
1890 self._write_string('[debug] exe versions: %s\n' % exe_str)
1893 for handler in self._opener.handlers:
1894 if hasattr(handler, 'proxies'):
1895 proxy_map.update(handler.proxies)
1896 self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1898 if self.params.get('call_home', False):
1899 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
1900 self._write_string('[debug] Public IP address: %s\n' % ipaddr)
1901 latest_version = self.urlopen(
1902 'https://yt-dl.org/latest/version').read().decode('utf-8')
1903 if version_tuple(latest_version) > version_tuple(__version__):
1904 self.report_warning(
1905 'You are using an outdated version (newest version: %s)! '
1906 'See https://yt-dl.org/update if you need help updating.' %
1909 def _setup_opener(self):
1910 timeout_val = self.params.get('socket_timeout')
1911 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1913 opts_cookiefile = self.params.get('cookiefile')
1914 opts_proxy = self.params.get('proxy')
1916 if opts_cookiefile is None:
1917 self.cookiejar = compat_cookiejar.CookieJar()
1919 self.cookiejar = compat_cookiejar.MozillaCookieJar(
1921 if os.access(opts_cookiefile, os.R_OK):
1922 self.cookiejar.load()
1924 cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1926 if opts_proxy is not None:
1927 if opts_proxy == '':
1930 proxies = {'http': opts_proxy, 'https': opts_proxy}
1932 proxies = compat_urllib_request.getproxies()
1933 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1934 if 'http' in proxies and 'https' not in proxies:
1935 proxies['https'] = proxies['http']
1936 proxy_handler = PerRequestProxyHandler(proxies)
1938 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1939 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
1940 ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
1941 opener = compat_urllib_request.build_opener(
1942 proxy_handler, https_handler, cookie_processor, ydlh)
1944 # Delete the default user-agent header, which would otherwise apply in
1945 # cases where our custom HTTP handler doesn't come into play
1946 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1947 opener.addheaders = []
1948 self._opener = opener
1950 def encode(self, s):
1951 if isinstance(s, bytes):
1952 return s # Already encoded
1955 return s.encode(self.get_encoding())
1956 except UnicodeEncodeError as err:
1957 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1960 def get_encoding(self):
1961 encoding = self.params.get('encoding')
1962 if encoding is None:
1963 encoding = preferredencoding()
1966 def _write_thumbnails(self, info_dict, filename):
1967 if self.params.get('writethumbnail', False):
1968 thumbnails = info_dict.get('thumbnails')
1970 thumbnails = [thumbnails[-1]]
1971 elif self.params.get('write_all_thumbnails', False):
1972 thumbnails = info_dict.get('thumbnails')
1977 # No thumbnails present, so return immediately
1980 for t in thumbnails:
1981 thumb_ext = determine_ext(t['url'], 'jpg')
1982 suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
1983 thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
1984 t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
1986 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
1987 self.to_screen('[%s] %s: Thumbnail %sis already present' %
1988 (info_dict['extractor'], info_dict['id'], thumb_display_id))
1990 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
1991 (info_dict['extractor'], info_dict['id'], thumb_display_id))
1993 uf = self.urlopen(t['url'])
1994 with open(thumb_filename, 'wb') as thumbf:
1995 shutil.copyfileobj(uf, thumbf)
1996 self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
1997 (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
1998 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1999 self.report_warning('Unable to download thumbnail "%s": %s' %
2000 (t['url'], compat_str(err)))