2 # -*- coding: utf-8 -*-
4 from __future__ import absolute_import, unicode_literals
34 compat_get_terminal_size,
38 compat_tokenize_tokenize,
40 compat_urllib_request,
60 PerRequestProxyHandler,
70 UnavailableVideoError,
81 from .cache import Cache
82 from .extractor import get_info_extractor, gen_extractors
83 from .downloader import get_suitable_downloader
84 from .downloader.rtmp import rtmpdump_version
85 from .postprocessor import (
87 FFmpegFixupStretchedPP,
92 from .version import __version__
95 class YoutubeDL(object):
98 YoutubeDL objects are the ones responsible of downloading the
99 actual video file and writing it to disk if the user has requested
100 it, among some other tasks. In most cases there should be one per
101 program. As, given a video URL, the downloader doesn't know how to
102 extract all the needed information, task that InfoExtractors do, it
103 has to pass the URL to one of them.
105 For this, YoutubeDL objects have a method that allows
106 InfoExtractors to be registered in a given order. When it is passed
107 a URL, the YoutubeDL object handles it to the first InfoExtractor it
108 finds that reports being able to handle it. The InfoExtractor extracts
109 all the information about the video or videos the URL refers to, and
110 YoutubeDL process the extracted information, possibly using a File
111 Downloader to download the video.
113 YoutubeDL objects accept a lot of parameters. In order not to saturate
114 the object constructor with arguments, it receives a dictionary of
115 options instead. These options are available through the params
116 attribute for the InfoExtractors to use. The YoutubeDL also
117 registers itself as the downloader in charge for the InfoExtractors
118 that are added to it, so this is a "mutual registration".
122 username: Username for authentication purposes.
123 password: Password for authentication purposes.
124 videopassword: Password for accessing a video.
125 usenetrc: Use netrc for authentication instead.
126 verbose: Print additional info to stdout.
127 quiet: Do not print messages to stdout.
128 no_warnings: Do not print out anything for warnings.
129 forceurl: Force printing final URL.
130 forcetitle: Force printing title.
131 forceid: Force printing ID.
132 forcethumbnail: Force printing thumbnail URL.
133 forcedescription: Force printing description.
134 forcefilename: Force printing final filename.
135 forceduration: Force printing duration.
136 forcejson: Force printing info_dict as JSON.
137 dump_single_json: Force printing the info_dict of the whole playlist
138 (or video) as a single JSON line.
139 simulate: Do not download the video files.
140 format: Video format code. See options.py for more information.
141 outtmpl: Template for output names.
142 restrictfilenames: Do not allow "&" and spaces in file names
143 ignoreerrors: Do not stop on download errors.
144 force_generic_extractor: Force downloader to use the generic extractor
145 nooverwrites: Prevent overwriting files.
146 playliststart: Playlist item to start at.
147 playlistend: Playlist item to end at.
148 playlist_items: Specific indices of playlist to download.
149 playlistreverse: Download playlist items in reverse order.
150 matchtitle: Download only matching titles.
151 rejecttitle: Reject downloads for matching titles.
152 logger: Log messages to a logging.Logger instance.
153 logtostderr: Log messages to stderr instead of stdout.
154 writedescription: Write the video description to a .description file
155 writeinfojson: Write the video description to a .info.json file
156 writeannotations: Write the video annotations to a .annotations.xml file
157 writethumbnail: Write the thumbnail image to a file
158 write_all_thumbnails: Write all thumbnail formats to files
159 writesubtitles: Write the video subtitles to a file
160 writeautomaticsub: Write the automatic subtitles to a file
161 allsubtitles: Downloads all the subtitles of the video
162 (requires writesubtitles or writeautomaticsub)
163 listsubtitles: Lists all available subtitles for the video
164 subtitlesformat: The format code for subtitles
165 subtitleslangs: List of languages of the subtitles to download
166 keepvideo: Keep the video file after post-processing
167 daterange: A DateRange object, download only if the upload_date is in the range.
168 skip_download: Skip the actual download of the video file
169 cachedir: Location of the cache files in the filesystem.
170 False to disable filesystem cache.
171 noplaylist: Download single video instead of a playlist if in doubt.
172 age_limit: An integer representing the user's age in years.
173 Unsuitable videos for the given age are skipped.
174 min_views: An integer representing the minimum view count the video
175 must have in order to not be skipped.
176 Videos without view count information are always
177 downloaded. None for no limit.
178 max_views: An integer representing the maximum view count.
179 Videos that are more popular than that are not
181 Videos without view count information are always
182 downloaded. None for no limit.
183 download_archive: File name of a file where all downloads are recorded.
184 Videos already present in the file are not downloaded
186 cookiefile: File name where cookies should be read from and dumped to.
187 nocheckcertificate:Do not verify SSL certificates
188 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
189 At the moment, this is only supported by YouTube.
190 proxy: URL of the proxy server to use
191 cn_verification_proxy: URL of the proxy to use for IP address verification
192 on Chinese sites. (Experimental)
193 socket_timeout: Time to wait for unresponsive hosts, in seconds
194 bidi_workaround: Work around buggy terminals without bidirectional text
195 support, using fridibi
196 debug_printtraffic:Print out sent and received HTTP traffic
197 include_ads: Download ads as well
198 default_search: Prepend this string if an input url is not valid.
199 'auto' for elaborate guessing
200 encoding: Use this encoding instead of the system-specified.
201 extract_flat: Do not resolve URLs, return the immediate result.
202 Pass in 'in_playlist' to only show this behavior for
204 postprocessors: A list of dictionaries, each with an entry
205 * key: The name of the postprocessor. See
206 youtube_dl/postprocessor/__init__.py for a list.
207 as well as any further keyword arguments for the
209 progress_hooks: A list of functions that get called on download
210 progress, with a dictionary with the entries
211 * status: One of "downloading", "error", or "finished".
212 Check this first and ignore unknown values.
214 If status is one of "downloading", or "finished", the
215 following properties may also be present:
216 * filename: The final filename (always present)
217 * tmpfilename: The filename we're currently writing to
218 * downloaded_bytes: Bytes on disk
219 * total_bytes: Size of the whole file, None if unknown
220 * total_bytes_estimate: Guess of the eventual file size,
222 * elapsed: The number of seconds since download started.
223 * eta: The estimated time in seconds, None if unknown
224 * speed: The download speed in bytes/second, None if
226 * fragment_index: The counter of the currently
227 downloaded video fragment.
228 * fragment_count: The number of fragments (= individual
229 files that will be merged)
231 Progress hooks are guaranteed to be called at least once
232 (with status "finished") if the download is successful.
233 merge_output_format: Extension to use when merging formats.
234 fixup: Automatically correct known faults of the file.
236 - "never": do nothing
237 - "warn": only emit a warning
238 - "detect_or_warn": check whether we can do anything
239 about it, warn otherwise (default)
240 source_address: (Experimental) Client-side IP address to bind to.
241 call_home: Boolean, true iff we are allowed to contact the
242 youtube-dl servers for debugging.
243 sleep_interval: Number of seconds to sleep before each download.
244 listformats: Print an overview of available video formats and exit.
245 list_thumbnails: Print a table of all thumbnails and exit.
246 match_filter: A function that gets called with the info_dict of
248 If it returns a message, the video is ignored.
249 If it returns None, the video is downloaded.
250 match_filter_func in utils.py is one example for this.
251 no_color: Do not emit color codes in output.
253 The following options determine which downloader is picked:
254 external_downloader: Executable of the external downloader to call.
255 None or unset for standard (built-in) downloader.
256 hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv.
258 The following parameters are not used by YoutubeDL itself, they are used by
259 the downloader (see youtube_dl/downloader/common.py):
260 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
261 noresizebuffer, retries, continuedl, noprogress, consoletitle,
262 xattr_set_filesize, external_downloader_args.
264 The following options are used by the post processors:
265 prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
266 otherwise prefer avconv.
267 postprocessor_args: A list of additional command-line arguments for the
274 _download_retcode = None
275 _num_downloads = None
278 def __init__(self, params=None, auto_init=True):
279 """Create a FileDownloader object with the given options."""
283 self._ies_instances = {}
285 self._progress_hooks = []
286 self._download_retcode = 0
287 self._num_downloads = 0
288 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
289 self._err_file = sys.stderr
291 self.cache = Cache(self)
293 if params.get('bidi_workaround', False):
296 master, slave = pty.openpty()
297 width = compat_get_terminal_size().columns
301 width_args = ['-w', str(width)]
303 stdin=subprocess.PIPE,
305 stderr=self._err_file)
307 self._output_process = subprocess.Popen(
308 ['bidiv'] + width_args, **sp_kwargs
311 self._output_process = subprocess.Popen(
312 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
313 self._output_channel = os.fdopen(master, 'rb')
314 except OSError as ose:
316 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
320 if (sys.version_info >= (3,) and sys.platform != 'win32' and
321 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and
322 not params.get('restrictfilenames', False)):
323 # On Python 3, the Unicode filesystem API will throw errors (#1474)
325 'Assuming --restrict-filenames since file system encoding '
326 'cannot encode all characters. '
327 'Set the LC_ALL environment variable to fix this.')
328 self.params['restrictfilenames'] = True
330 if isinstance(params.get('outtmpl'), bytes):
332 'Parameter outtmpl is bytes, but should be a unicode string. '
333 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
338 self.print_debug_header()
339 self.add_default_info_extractors()
341 for pp_def_raw in self.params.get('postprocessors', []):
342 pp_class = get_postprocessor(pp_def_raw['key'])
343 pp_def = dict(pp_def_raw)
345 pp = pp_class(self, **compat_kwargs(pp_def))
346 self.add_post_processor(pp)
348 for ph in self.params.get('progress_hooks', []):
349 self.add_progress_hook(ph)
351 def warn_if_short_id(self, argv):
352 # short YouTube ID starting with dash?
354 i for i, a in enumerate(argv)
355 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
359 [a for i, a in enumerate(argv) if i not in idxs] +
360 ['--'] + [argv[i] for i in idxs]
363 'Long argument string detected. '
364 'Use -- to separate parameters and URLs, like this:\n%s\n' %
365 args_to_str(correct_argv))
367 def add_info_extractor(self, ie):
368 """Add an InfoExtractor object to the end of the list."""
370 self._ies_instances[ie.ie_key()] = ie
371 ie.set_downloader(self)
373 def get_info_extractor(self, ie_key):
375 Get an instance of an IE with name ie_key, it will try to get one from
376 the _ies list, if there's no instance it will create a new one and add
377 it to the extractor list.
379 ie = self._ies_instances.get(ie_key)
381 ie = get_info_extractor(ie_key)()
382 self.add_info_extractor(ie)
385 def add_default_info_extractors(self):
387 Add the InfoExtractors returned by gen_extractors to the end of the list
389 for ie in gen_extractors():
390 self.add_info_extractor(ie)
392 def add_post_processor(self, pp):
393 """Add a PostProcessor object to the end of the chain."""
395 pp.set_downloader(self)
397 def add_progress_hook(self, ph):
398 """Add the progress hook (currently only for the file downloader)"""
399 self._progress_hooks.append(ph)
401 def _bidi_workaround(self, message):
402 if not hasattr(self, '_output_channel'):
405 assert hasattr(self, '_output_process')
406 assert isinstance(message, compat_str)
407 line_count = message.count('\n') + 1
408 self._output_process.stdin.write((message + '\n').encode('utf-8'))
409 self._output_process.stdin.flush()
410 res = ''.join(self._output_channel.readline().decode('utf-8')
411 for _ in range(line_count))
412 return res[:-len('\n')]
414 def to_screen(self, message, skip_eol=False):
415 """Print message to stdout if not in quiet mode."""
416 return self.to_stdout(message, skip_eol, check_quiet=True)
418 def _write_string(self, s, out=None):
419 write_string(s, out=out, encoding=self.params.get('encoding'))
421 def to_stdout(self, message, skip_eol=False, check_quiet=False):
422 """Print message to stdout if not in quiet mode."""
423 if self.params.get('logger'):
424 self.params['logger'].debug(message)
425 elif not check_quiet or not self.params.get('quiet', False):
426 message = self._bidi_workaround(message)
427 terminator = ['\n', ''][skip_eol]
428 output = message + terminator
430 self._write_string(output, self._screen_file)
432 def to_stderr(self, message):
433 """Print message to stderr."""
434 assert isinstance(message, compat_str)
435 if self.params.get('logger'):
436 self.params['logger'].error(message)
438 message = self._bidi_workaround(message)
439 output = message + '\n'
440 self._write_string(output, self._err_file)
442 def to_console_title(self, message):
443 if not self.params.get('consoletitle', False):
445 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
446 # c_wchar_p() might not be necessary if `message` is
447 # already of type unicode()
448 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
449 elif 'TERM' in os.environ:
450 self._write_string('\033]0;%s\007' % message, self._screen_file)
452 def save_console_title(self):
453 if not self.params.get('consoletitle', False):
455 if 'TERM' in os.environ:
456 # Save the title on stack
457 self._write_string('\033[22;0t', self._screen_file)
459 def restore_console_title(self):
460 if not self.params.get('consoletitle', False):
462 if 'TERM' in os.environ:
463 # Restore the title from stack
464 self._write_string('\033[23;0t', self._screen_file)
467 self.save_console_title()
470 def __exit__(self, *args):
471 self.restore_console_title()
473 if self.params.get('cookiefile') is not None:
474 self.cookiejar.save()
476 def trouble(self, message=None, tb=None):
477 """Determine action to take when a download problem appears.
479 Depending on if the downloader has been configured to ignore
480 download errors or not, this method may throw an exception or
481 not when errors are found, after printing the message.
483 tb, if given, is additional traceback information.
485 if message is not None:
486 self.to_stderr(message)
487 if self.params.get('verbose'):
489 if sys.exc_info()[0]: # if .trouble has been called from an except block
491 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
492 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
493 tb += compat_str(traceback.format_exc())
495 tb_data = traceback.format_list(traceback.extract_stack())
496 tb = ''.join(tb_data)
498 if not self.params.get('ignoreerrors', False):
499 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
500 exc_info = sys.exc_info()[1].exc_info
502 exc_info = sys.exc_info()
503 raise DownloadError(message, exc_info)
504 self._download_retcode = 1
506 def report_warning(self, message):
508 Print the message to stderr, it will be prefixed with 'WARNING:'
509 If stderr is a tty file the 'WARNING:' will be colored
511 if self.params.get('logger') is not None:
512 self.params['logger'].warning(message)
514 if self.params.get('no_warnings'):
516 if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
517 _msg_header = '\033[0;33mWARNING:\033[0m'
519 _msg_header = 'WARNING:'
520 warning_message = '%s %s' % (_msg_header, message)
521 self.to_stderr(warning_message)
523 def report_error(self, message, tb=None):
525 Do the same as trouble, but prefixes the message with 'ERROR:', colored
526 in red if stderr is a tty file.
528 if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
529 _msg_header = '\033[0;31mERROR:\033[0m'
531 _msg_header = 'ERROR:'
532 error_message = '%s %s' % (_msg_header, message)
533 self.trouble(error_message, tb)
535 def report_file_already_downloaded(self, file_name):
536 """Report file has already been fully downloaded."""
538 self.to_screen('[download] %s has already been downloaded' % file_name)
539 except UnicodeEncodeError:
540 self.to_screen('[download] The file has already been downloaded')
542 def prepare_filename(self, info_dict):
543 """Generate the output filename."""
545 template_dict = dict(info_dict)
547 template_dict['epoch'] = int(time.time())
548 autonumber_size = self.params.get('autonumber_size')
549 if autonumber_size is None:
551 autonumber_templ = '%0' + str(autonumber_size) + 'd'
552 template_dict['autonumber'] = autonumber_templ % self._num_downloads
553 if template_dict.get('playlist_index') is not None:
554 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
555 if template_dict.get('resolution') is None:
556 if template_dict.get('width') and template_dict.get('height'):
557 template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
558 elif template_dict.get('height'):
559 template_dict['resolution'] = '%sp' % template_dict['height']
560 elif template_dict.get('width'):
561 template_dict['resolution'] = '?x%d' % template_dict['width']
563 sanitize = lambda k, v: sanitize_filename(
565 restricted=self.params.get('restrictfilenames'),
567 template_dict = dict((k, sanitize(k, v))
568 for k, v in template_dict.items()
570 template_dict = collections.defaultdict(lambda: 'NA', template_dict)
572 outtmpl = sanitize_path(self.params.get('outtmpl', DEFAULT_OUTTMPL))
573 tmpl = compat_expanduser(outtmpl)
574 filename = tmpl % template_dict
575 # Temporary fix for #4787
576 # 'Treat' all problem characters by passing filename through preferredencoding
577 # to workaround encoding issues with subprocess on python2 @ Windows
578 if sys.version_info < (3, 0) and sys.platform == 'win32':
579 filename = encodeFilename(filename, True).decode(preferredencoding())
581 except ValueError as err:
582 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
585 def _match_entry(self, info_dict, incomplete):
586 """ Returns None iff the file should be downloaded """
588 video_title = info_dict.get('title', info_dict.get('id', 'video'))
589 if 'title' in info_dict:
590 # This can happen when we're just evaluating the playlist
591 title = info_dict['title']
592 matchtitle = self.params.get('matchtitle', False)
594 if not re.search(matchtitle, title, re.IGNORECASE):
595 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
596 rejecttitle = self.params.get('rejecttitle', False)
598 if re.search(rejecttitle, title, re.IGNORECASE):
599 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
600 date = info_dict.get('upload_date', None)
602 dateRange = self.params.get('daterange', DateRange())
603 if date not in dateRange:
604 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
605 view_count = info_dict.get('view_count', None)
606 if view_count is not None:
607 min_views = self.params.get('min_views')
608 if min_views is not None and view_count < min_views:
609 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
610 max_views = self.params.get('max_views')
611 if max_views is not None and view_count > max_views:
612 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
613 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
614 return 'Skipping "%s" because it is age restricted' % video_title
615 if self.in_download_archive(info_dict):
616 return '%s has already been recorded in archive' % video_title
619 match_filter = self.params.get('match_filter')
620 if match_filter is not None:
621 ret = match_filter(info_dict)
628 def add_extra_info(info_dict, extra_info):
629 '''Set the keys from extra_info in info dict if they are missing'''
630 for key, value in extra_info.items():
631 info_dict.setdefault(key, value)
633 def extract_info(self, url, download=True, ie_key=None, extra_info={},
634 process=True, force_generic_extractor=False):
636 Returns a list with a dictionary for each video we find.
637 If 'download', also downloads the videos.
638 extra_info is a dict containing the extra values to add to each result
641 if not ie_key and force_generic_extractor:
645 ies = [self.get_info_extractor(ie_key)]
650 if not ie.suitable(url):
654 self.report_warning('The program functionality for this site has been marked as broken, '
655 'and will probably not work.')
658 ie_result = ie.extract(url)
659 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
661 if isinstance(ie_result, list):
662 # Backwards compatibility: old IE result format
664 '_type': 'compat_list',
665 'entries': ie_result,
667 self.add_default_extra_info(ie_result, ie, url)
669 return self.process_ie_result(ie_result, download, extra_info)
672 except ExtractorError as de: # An error we somewhat expected
673 self.report_error(compat_str(de), de.format_traceback())
675 except MaxDownloadsReached:
677 except Exception as e:
678 if self.params.get('ignoreerrors', False):
679 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
684 self.report_error('no suitable InfoExtractor for URL %s' % url)
686 def add_default_extra_info(self, ie_result, ie, url):
687 self.add_extra_info(ie_result, {
688 'extractor': ie.IE_NAME,
690 'webpage_url_basename': url_basename(url),
691 'extractor_key': ie.ie_key(),
694 def process_ie_result(self, ie_result, download=True, extra_info={}):
696 Take the result of the ie(may be modified) and resolve all unresolved
697 references (URLs, playlist items).
699 It will also download the videos if 'download'.
700 Returns the resolved ie_result.
703 result_type = ie_result.get('_type', 'video')
705 if result_type in ('url', 'url_transparent'):
706 extract_flat = self.params.get('extract_flat', False)
707 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
708 extract_flat is True):
709 if self.params.get('forcejson', False):
710 self.to_stdout(json.dumps(ie_result))
713 if result_type == 'video':
714 self.add_extra_info(ie_result, extra_info)
715 return self.process_video_result(ie_result, download=download)
716 elif result_type == 'url':
717 # We have to add extra_info to the results because it may be
718 # contained in a playlist
719 return self.extract_info(ie_result['url'],
721 ie_key=ie_result.get('ie_key'),
722 extra_info=extra_info)
723 elif result_type == 'url_transparent':
724 # Use the information from the embedding page
725 info = self.extract_info(
726 ie_result['url'], ie_key=ie_result.get('ie_key'),
727 extra_info=extra_info, download=False, process=False)
729 force_properties = dict(
730 (k, v) for k, v in ie_result.items() if v is not None)
731 for f in ('_type', 'url'):
732 if f in force_properties:
733 del force_properties[f]
734 new_result = info.copy()
735 new_result.update(force_properties)
737 assert new_result.get('_type') != 'url_transparent'
739 return self.process_ie_result(
740 new_result, download=download, extra_info=extra_info)
741 elif result_type == 'playlist' or result_type == 'multi_video':
742 # We process each entry in the playlist
743 playlist = ie_result.get('title', None) or ie_result.get('id', None)
744 self.to_screen('[download] Downloading playlist: %s' % playlist)
746 playlist_results = []
748 playliststart = self.params.get('playliststart', 1) - 1
749 playlistend = self.params.get('playlistend', None)
750 # For backwards compatibility, interpret -1 as whole list
751 if playlistend == -1:
754 playlistitems_str = self.params.get('playlist_items', None)
756 if playlistitems_str is not None:
757 def iter_playlistitems(format):
758 for string_segment in format.split(','):
759 if '-' in string_segment:
760 start, end = string_segment.split('-')
761 for item in range(int(start), int(end) + 1):
764 yield int(string_segment)
765 playlistitems = iter_playlistitems(playlistitems_str)
767 ie_entries = ie_result['entries']
768 if isinstance(ie_entries, list):
769 n_all_entries = len(ie_entries)
772 ie_entries[i - 1] for i in playlistitems
773 if -n_all_entries <= i - 1 < n_all_entries]
775 entries = ie_entries[playliststart:playlistend]
776 n_entries = len(entries)
778 "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
779 (ie_result['extractor'], playlist, n_all_entries, n_entries))
780 elif isinstance(ie_entries, PagedList):
783 for item in playlistitems:
784 entries.extend(ie_entries.getslice(
788 entries = ie_entries.getslice(
789 playliststart, playlistend)
790 n_entries = len(entries)
792 "[%s] playlist %s: Downloading %d videos" %
793 (ie_result['extractor'], playlist, n_entries))
796 entry_list = list(ie_entries)
797 entries = [entry_list[i - 1] for i in playlistitems]
799 entries = list(itertools.islice(
800 ie_entries, playliststart, playlistend))
801 n_entries = len(entries)
803 "[%s] playlist %s: Downloading %d videos" %
804 (ie_result['extractor'], playlist, n_entries))
806 if self.params.get('playlistreverse', False):
807 entries = entries[::-1]
809 for i, entry in enumerate(entries, 1):
810 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
812 'n_entries': n_entries,
813 'playlist': playlist,
814 'playlist_id': ie_result.get('id'),
815 'playlist_title': ie_result.get('title'),
816 'playlist_index': i + playliststart,
817 'extractor': ie_result['extractor'],
818 'webpage_url': ie_result['webpage_url'],
819 'webpage_url_basename': url_basename(ie_result['webpage_url']),
820 'extractor_key': ie_result['extractor_key'],
823 reason = self._match_entry(entry, incomplete=True)
824 if reason is not None:
825 self.to_screen('[download] ' + reason)
828 entry_result = self.process_ie_result(entry,
831 playlist_results.append(entry_result)
832 ie_result['entries'] = playlist_results
834 elif result_type == 'compat_list':
836 'Extractor %s returned a compat_list result. '
837 'It needs to be updated.' % ie_result.get('extractor'))
843 'extractor': ie_result['extractor'],
844 'webpage_url': ie_result['webpage_url'],
845 'webpage_url_basename': url_basename(ie_result['webpage_url']),
846 'extractor_key': ie_result['extractor_key'],
850 ie_result['entries'] = [
851 self.process_ie_result(_fixup(r), download, extra_info)
852 for r in ie_result['entries']
856 raise Exception('Invalid result type: %s' % result_type)
858 def _build_format_filter(self, filter_spec):
859 " Returns a function to filter the formats according to the filter_spec "
869 operator_rex = re.compile(r'''(?x)\s*
870 (?P<key>width|height|tbr|abr|vbr|asr|filesize|fps)
871 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
872 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
874 ''' % '|'.join(map(re.escape, OPERATORS.keys())))
875 m = operator_rex.search(filter_spec)
878 comparison_value = int(m.group('value'))
880 comparison_value = parse_filesize(m.group('value'))
881 if comparison_value is None:
882 comparison_value = parse_filesize(m.group('value') + 'B')
883 if comparison_value is None:
885 'Invalid value %r in format specification %r' % (
886 m.group('value'), filter_spec))
887 op = OPERATORS[m.group('op')]
894 str_operator_rex = re.compile(r'''(?x)
895 \s*(?P<key>ext|acodec|vcodec|container|protocol)
896 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
897 \s*(?P<value>[a-zA-Z0-9_-]+)
899 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
900 m = str_operator_rex.search(filter_spec)
902 comparison_value = m.group('value')
903 op = STR_OPERATORS[m.group('op')]
906 raise ValueError('Invalid filter specification %r' % filter_spec)
909 actual_value = f.get(m.group('key'))
910 if actual_value is None:
911 return m.group('none_inclusive')
912 return op(actual_value, comparison_value)
915 def build_format_selector(self, format_spec):
916 def syntax_error(note, start):
918 'Invalid format specification: '
919 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
920 return SyntaxError(message)
922 PICKFIRST = 'PICKFIRST'
926 FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
928 def _parse_filter(tokens):
930 for type, string, start, _, _ in tokens:
931 if type == tokenize.OP and string == ']':
932 return ''.join(filter_parts)
934 filter_parts.append(string)
936 def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
938 current_selector = None
939 for type, string, start, _, _ in tokens:
940 # ENCODING is only defined in python 3.x
941 if type == getattr(tokenize, 'ENCODING', None):
943 elif type in [tokenize.NAME, tokenize.NUMBER]:
944 current_selector = FormatSelector(SINGLE, string, [])
945 elif type == tokenize.OP:
948 # ')' will be handled by the parentheses group
949 tokens.restore_last_token()
951 elif inside_merge and string in ['/', ',']:
952 tokens.restore_last_token()
954 elif inside_choice and string == ',':
955 tokens.restore_last_token()
958 if not current_selector:
959 raise syntax_error('"," must follow a format selector', start)
960 selectors.append(current_selector)
961 current_selector = None
963 if not current_selector:
964 raise syntax_error('"/" must follow a format selector', start)
965 first_choice = current_selector
966 second_choice = _parse_format_selection(tokens, inside_choice=True)
967 current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
969 if not current_selector:
970 current_selector = FormatSelector(SINGLE, 'best', [])
971 format_filter = _parse_filter(tokens)
972 current_selector.filters.append(format_filter)
975 raise syntax_error('Unexpected "("', start)
976 group = _parse_format_selection(tokens, inside_group=True)
977 current_selector = FormatSelector(GROUP, group, [])
979 video_selector = current_selector
980 audio_selector = _parse_format_selection(tokens, inside_merge=True)
981 if not video_selector or not audio_selector:
982 raise syntax_error('"+" must be between two format selectors', start)
983 current_selector = FormatSelector(MERGE, (video_selector, audio_selector), [])
985 raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
986 elif type == tokenize.ENDMARKER:
989 selectors.append(current_selector)
992 def _build_selector_function(selector):
993 if isinstance(selector, list):
994 fs = [_build_selector_function(s) for s in selector]
996 def selector_function(formats):
998 for format in f(formats):
1000 return selector_function
1001 elif selector.type == GROUP:
1002 selector_function = _build_selector_function(selector.selector)
1003 elif selector.type == PICKFIRST:
1004 fs = [_build_selector_function(s) for s in selector.selector]
1006 def selector_function(formats):
1008 picked_formats = list(f(formats))
1010 return picked_formats
1012 elif selector.type == SINGLE:
1013 format_spec = selector.selector
1015 def selector_function(formats):
1016 formats = list(formats)
1019 if format_spec == 'all':
1022 elif format_spec in ['best', 'worst', None]:
1023 format_idx = 0 if format_spec == 'worst' else -1
1024 audiovideo_formats = [
1026 if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
1027 if audiovideo_formats:
1028 yield audiovideo_formats[format_idx]
1029 # for audio only (soundcloud) or video only (imgur) urls, select the best/worst audio format
1030 elif (all(f.get('acodec') != 'none' for f in formats) or
1031 all(f.get('vcodec') != 'none' for f in formats)):
1032 yield formats[format_idx]
1033 elif format_spec == 'bestaudio':
1036 if f.get('vcodec') == 'none']
1038 yield audio_formats[-1]
1039 elif format_spec == 'worstaudio':
1042 if f.get('vcodec') == 'none']
1044 yield audio_formats[0]
1045 elif format_spec == 'bestvideo':
1048 if f.get('acodec') == 'none']
1050 yield video_formats[-1]
1051 elif format_spec == 'worstvideo':
1054 if f.get('acodec') == 'none']
1056 yield video_formats[0]
1058 extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
1059 if format_spec in extensions:
1060 filter_f = lambda f: f['ext'] == format_spec
1062 filter_f = lambda f: f['format_id'] == format_spec
1063 matches = list(filter(filter_f, formats))
1066 elif selector.type == MERGE:
1067 def _merge(formats_info):
1068 format_1, format_2 = [f['format_id'] for f in formats_info]
1069 # The first format must contain the video and the
1071 if formats_info[0].get('vcodec') == 'none':
1072 self.report_error('The first format must '
1073 'contain the video, try using '
1074 '"-f %s+%s"' % (format_2, format_1))
1077 formats_info[0]['ext']
1078 if self.params.get('merge_output_format') is None
1079 else self.params['merge_output_format'])
1081 'requested_formats': formats_info,
1082 'format': '%s+%s' % (formats_info[0].get('format'),
1083 formats_info[1].get('format')),
1084 'format_id': '%s+%s' % (formats_info[0].get('format_id'),
1085 formats_info[1].get('format_id')),
1086 'width': formats_info[0].get('width'),
1087 'height': formats_info[0].get('height'),
1088 'resolution': formats_info[0].get('resolution'),
1089 'fps': formats_info[0].get('fps'),
1090 'vcodec': formats_info[0].get('vcodec'),
1091 'vbr': formats_info[0].get('vbr'),
1092 'stretched_ratio': formats_info[0].get('stretched_ratio'),
1093 'acodec': formats_info[1].get('acodec'),
1094 'abr': formats_info[1].get('abr'),
1097 video_selector, audio_selector = map(_build_selector_function, selector.selector)
1099 def selector_function(formats):
1100 formats = list(formats)
1101 for pair in itertools.product(video_selector(formats), audio_selector(formats)):
1104 filters = [self._build_format_filter(f) for f in selector.filters]
1106 def final_selector(formats):
1107 for _filter in filters:
1108 formats = list(filter(_filter, formats))
1109 return selector_function(formats)
1110 return final_selector
1112 stream = io.BytesIO(format_spec.encode('utf-8'))
1114 tokens = list(compat_tokenize_tokenize(stream.readline))
1115 except tokenize.TokenError:
1116 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1118 class TokenIterator(object):
1119 def __init__(self, tokens):
1120 self.tokens = tokens
1127 if self.counter >= len(self.tokens):
1128 raise StopIteration()
1129 value = self.tokens[self.counter]
1135 def restore_last_token(self):
1138 parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
1139 return _build_selector_function(parsed_selector)
1141 def _calc_headers(self, info_dict):
1142 res = std_headers.copy()
1144 add_headers = info_dict.get('http_headers')
1146 res.update(add_headers)
1148 cookies = self._calc_cookies(info_dict)
1150 res['Cookie'] = cookies
1154 def _calc_cookies(self, info_dict):
1155 pr = compat_urllib_request.Request(info_dict['url'])
1156 self.cookiejar.add_cookie_header(pr)
1157 return pr.get_header('Cookie')
1159 def process_video_result(self, info_dict, download=True):
1160 assert info_dict.get('_type', 'video') == 'video'
1162 if 'id' not in info_dict:
1163 raise ExtractorError('Missing "id" field in extractor result')
1164 if 'title' not in info_dict:
1165 raise ExtractorError('Missing "title" field in extractor result')
1167 if 'playlist' not in info_dict:
1168 # It isn't part of a playlist
1169 info_dict['playlist'] = None
1170 info_dict['playlist_index'] = None
1172 thumbnails = info_dict.get('thumbnails')
1173 if thumbnails is None:
1174 thumbnail = info_dict.get('thumbnail')
1176 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1178 thumbnails.sort(key=lambda t: (
1179 t.get('preference'), t.get('width'), t.get('height'),
1180 t.get('id'), t.get('url')))
1181 for i, t in enumerate(thumbnails):
1182 if t.get('width') and t.get('height'):
1183 t['resolution'] = '%dx%d' % (t['width'], t['height'])
1184 if t.get('id') is None:
1187 if thumbnails and 'thumbnail' not in info_dict:
1188 info_dict['thumbnail'] = thumbnails[-1]['url']
1190 if 'display_id' not in info_dict and 'id' in info_dict:
1191 info_dict['display_id'] = info_dict['id']
1193 if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
1194 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1195 # see http://bugs.python.org/issue1646728)
1197 upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
1198 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1199 except (ValueError, OverflowError, OSError):
1202 if self.params.get('listsubtitles', False):
1203 if 'automatic_captions' in info_dict:
1204 self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions')
1205 self.list_subtitles(info_dict['id'], info_dict.get('subtitles'), 'subtitles')
1207 info_dict['requested_subtitles'] = self.process_subtitles(
1208 info_dict['id'], info_dict.get('subtitles'),
1209 info_dict.get('automatic_captions'))
1211 # We now pick which formats have to be downloaded
1212 if info_dict.get('formats') is None:
1213 # There's only one format available
1214 formats = [info_dict]
1216 formats = info_dict['formats']
1219 raise ExtractorError('No video formats found!')
1223 # We check that all the formats have the format and format_id fields
1224 for i, format in enumerate(formats):
1225 if 'url' not in format:
1226 raise ExtractorError('Missing "url" key in result (index %d)' % i)
1228 if format.get('format_id') is None:
1229 format['format_id'] = compat_str(i)
1230 format_id = format['format_id']
1231 if format_id not in formats_dict:
1232 formats_dict[format_id] = []
1233 formats_dict[format_id].append(format)
1235 # Make sure all formats have unique format_id
1236 for format_id, ambiguous_formats in formats_dict.items():
1237 if len(ambiguous_formats) > 1:
1238 for i, format in enumerate(ambiguous_formats):
1239 format['format_id'] = '%s-%d' % (format_id, i)
1241 for i, format in enumerate(formats):
1242 if format.get('format') is None:
1243 format['format'] = '{id} - {res}{note}'.format(
1244 id=format['format_id'],
1245 res=self.format_resolution(format),
1246 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1248 # Automatically determine file extension if missing
1249 if 'ext' not in format:
1250 format['ext'] = determine_ext(format['url']).lower()
1251 # Add HTTP headers, so that external programs can use them from the
1253 full_format_info = info_dict.copy()
1254 full_format_info.update(format)
1255 format['http_headers'] = self._calc_headers(full_format_info)
1257 # TODO Central sorting goes here
1259 if formats[0] is not info_dict:
1260 # only set the 'formats' fields if the original info_dict list them
1261 # otherwise we end up with a circular reference, the first (and unique)
1262 # element in the 'formats' field in info_dict is info_dict itself,
1263 # wich can't be exported to json
1264 info_dict['formats'] = formats
1265 if self.params.get('listformats'):
1266 self.list_formats(info_dict)
1268 if self.params.get('list_thumbnails'):
1269 self.list_thumbnails(info_dict)
1272 req_format = self.params.get('format')
1273 if req_format is None:
1274 req_format_list = []
1275 if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and
1276 info_dict['extractor'] in ['youtube', 'ted'] and
1277 not info_dict.get('is_live')):
1278 merger = FFmpegMergerPP(self)
1279 if merger.available and merger.can_merge():
1280 req_format_list.append('bestvideo+bestaudio')
1281 req_format_list.append('best')
1282 req_format = '/'.join(req_format_list)
1283 format_selector = self.build_format_selector(req_format)
1284 formats_to_download = list(format_selector(formats))
1285 if not formats_to_download:
1286 raise ExtractorError('requested format not available',
1290 if len(formats_to_download) > 1:
1291 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1292 for format in formats_to_download:
1293 new_info = dict(info_dict)
1294 new_info.update(format)
1295 self.process_info(new_info)
1296 # We update the info dict with the best quality format (backwards compatibility)
1297 info_dict.update(formats_to_download[-1])
1300 def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
1301 """Select the requested subtitles and their format"""
1303 if normal_subtitles and self.params.get('writesubtitles'):
1304 available_subs.update(normal_subtitles)
1305 if automatic_captions and self.params.get('writeautomaticsub'):
1306 for lang, cap_info in automatic_captions.items():
1307 if lang not in available_subs:
1308 available_subs[lang] = cap_info
1310 if (not self.params.get('writesubtitles') and not
1311 self.params.get('writeautomaticsub') or not
1315 if self.params.get('allsubtitles', False):
1316 requested_langs = available_subs.keys()
1318 if self.params.get('subtitleslangs', False):
1319 requested_langs = self.params.get('subtitleslangs')
1320 elif 'en' in available_subs:
1321 requested_langs = ['en']
1323 requested_langs = [list(available_subs.keys())[0]]
1325 formats_query = self.params.get('subtitlesformat', 'best')
1326 formats_preference = formats_query.split('/') if formats_query else []
1328 for lang in requested_langs:
1329 formats = available_subs.get(lang)
1331 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1333 for ext in formats_preference:
1337 matches = list(filter(lambda f: f['ext'] == ext, formats))
1343 self.report_warning(
1344 'No subtitle format found matching "%s" for language %s, '
1345 'using %s' % (formats_query, lang, f['ext']))
1349 def process_info(self, info_dict):
1350 """Process a single resolved IE result."""
1352 assert info_dict.get('_type', 'video') == 'video'
1354 max_downloads = self.params.get('max_downloads')
1355 if max_downloads is not None:
1356 if self._num_downloads >= int(max_downloads):
1357 raise MaxDownloadsReached()
1359 info_dict['fulltitle'] = info_dict['title']
1360 if len(info_dict['title']) > 200:
1361 info_dict['title'] = info_dict['title'][:197] + '...'
1363 if 'format' not in info_dict:
1364 info_dict['format'] = info_dict['ext']
1366 reason = self._match_entry(info_dict, incomplete=False)
1367 if reason is not None:
1368 self.to_screen('[download] ' + reason)
1371 self._num_downloads += 1
1373 info_dict['_filename'] = filename = self.prepare_filename(info_dict)
1376 if self.params.get('forcetitle', False):
1377 self.to_stdout(info_dict['fulltitle'])
1378 if self.params.get('forceid', False):
1379 self.to_stdout(info_dict['id'])
1380 if self.params.get('forceurl', False):
1381 if info_dict.get('requested_formats') is not None:
1382 for f in info_dict['requested_formats']:
1383 self.to_stdout(f['url'] + f.get('play_path', ''))
1385 # For RTMP URLs, also include the playpath
1386 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1387 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
1388 self.to_stdout(info_dict['thumbnail'])
1389 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
1390 self.to_stdout(info_dict['description'])
1391 if self.params.get('forcefilename', False) and filename is not None:
1392 self.to_stdout(filename)
1393 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1394 self.to_stdout(formatSeconds(info_dict['duration']))
1395 if self.params.get('forceformat', False):
1396 self.to_stdout(info_dict['format'])
1397 if self.params.get('forcejson', False):
1398 self.to_stdout(json.dumps(info_dict))
1400 # Do nothing else if in simulate mode
1401 if self.params.get('simulate', False):
1404 if filename is None:
1408 dn = os.path.dirname(sanitize_path(encodeFilename(filename)))
1409 if dn and not os.path.exists(dn):
1411 except (OSError, IOError) as err:
1412 self.report_error('unable to create directory ' + compat_str(err))
1415 if self.params.get('writedescription', False):
1416 descfn = replace_extension(filename, 'description', info_dict.get('ext'))
1417 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1418 self.to_screen('[info] Video description is already present')
1419 elif info_dict.get('description') is None:
1420 self.report_warning('There\'s no description to write.')
1423 self.to_screen('[info] Writing video description to: ' + descfn)
1424 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1425 descfile.write(info_dict['description'])
1426 except (OSError, IOError):
1427 self.report_error('Cannot write description file ' + descfn)
1430 if self.params.get('writeannotations', False):
1431 annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
1432 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1433 self.to_screen('[info] Video annotations are already present')
1436 self.to_screen('[info] Writing video annotations to: ' + annofn)
1437 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1438 annofile.write(info_dict['annotations'])
1439 except (KeyError, TypeError):
1440 self.report_warning('There are no annotations to write.')
1441 except (OSError, IOError):
1442 self.report_error('Cannot write annotations file: ' + annofn)
1445 subtitles_are_requested = any([self.params.get('writesubtitles', False),
1446 self.params.get('writeautomaticsub')])
1448 if subtitles_are_requested and info_dict.get('requested_subtitles'):
1449 # subtitles download errors are already managed as troubles in relevant IE
1450 # that way it will silently go on when used with unsupporting IE
1451 subtitles = info_dict['requested_subtitles']
1452 ie = self.get_info_extractor(info_dict['extractor_key'])
1453 for sub_lang, sub_info in subtitles.items():
1454 sub_format = sub_info['ext']
1455 if sub_info.get('data') is not None:
1456 sub_data = sub_info['data']
1459 sub_data = ie._download_webpage(
1460 sub_info['url'], info_dict['id'], note=False)
1461 except ExtractorError as err:
1462 self.report_warning('Unable to download subtitle for "%s": %s' %
1463 (sub_lang, compat_str(err.cause)))
1466 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1467 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1468 self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1470 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1471 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
1472 subfile.write(sub_data)
1473 except (OSError, IOError):
1474 self.report_error('Cannot write subtitles file ' + sub_filename)
1477 if self.params.get('writeinfojson', False):
1478 infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
1479 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1480 self.to_screen('[info] Video description metadata is already present')
1482 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1484 write_json_file(self.filter_requested_info(info_dict), infofn)
1485 except (OSError, IOError):
1486 self.report_error('Cannot write metadata to JSON file ' + infofn)
1489 self._write_thumbnails(info_dict, filename)
1491 if not self.params.get('skip_download', False):
1494 fd = get_suitable_downloader(info, self.params)(self, self.params)
1495 for ph in self._progress_hooks:
1496 fd.add_progress_hook(ph)
1497 if self.params.get('verbose'):
1498 self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1499 return fd.download(name, info)
1501 if info_dict.get('requested_formats') is not None:
1504 merger = FFmpegMergerPP(self)
1505 if not merger.available:
1507 self.report_warning('You have requested multiple '
1508 'formats but ffmpeg or avconv are not installed.'
1509 ' The formats won\'t be merged.')
1511 postprocessors = [merger]
1513 def compatible_formats(formats):
1514 video, audio = formats
1516 video_ext, audio_ext = audio.get('ext'), video.get('ext')
1517 if video_ext and audio_ext:
1519 ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v'),
1522 for exts in COMPATIBLE_EXTS:
1523 if video_ext in exts and audio_ext in exts:
1525 # TODO: Check acodec/vcodec
1528 filename_real_ext = os.path.splitext(filename)[1][1:]
1530 os.path.splitext(filename)[0]
1531 if filename_real_ext == info_dict['ext']
1533 requested_formats = info_dict['requested_formats']
1534 if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
1535 info_dict['ext'] = 'mkv'
1536 self.report_warning(
1537 'Requested formats are incompatible for merge and will be merged into mkv.')
1538 # Ensure filename always has a correct extension for successful merge
1539 filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
1540 if os.path.exists(encodeFilename(filename)):
1542 '[download] %s has already been downloaded and '
1543 'merged' % filename)
1545 for f in requested_formats:
1546 new_info = dict(info_dict)
1548 fname = self.prepare_filename(new_info)
1549 fname = prepend_extension(fname, 'f%s' % f['format_id'], new_info['ext'])
1550 downloaded.append(fname)
1551 partial_success = dl(fname, new_info)
1552 success = success and partial_success
1553 info_dict['__postprocessors'] = postprocessors
1554 info_dict['__files_to_merge'] = downloaded
1556 # Just a single file
1557 success = dl(filename, info_dict)
1558 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1559 self.report_error('unable to download video data: %s' % str(err))
1561 except (OSError, IOError) as err:
1562 raise UnavailableVideoError(err)
1563 except (ContentTooShortError, ) as err:
1564 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1569 fixup_policy = self.params.get('fixup')
1570 if fixup_policy is None:
1571 fixup_policy = 'detect_or_warn'
1573 stretched_ratio = info_dict.get('stretched_ratio')
1574 if stretched_ratio is not None and stretched_ratio != 1:
1575 if fixup_policy == 'warn':
1576 self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1577 info_dict['id'], stretched_ratio))
1578 elif fixup_policy == 'detect_or_warn':
1579 stretched_pp = FFmpegFixupStretchedPP(self)
1580 if stretched_pp.available:
1581 info_dict.setdefault('__postprocessors', [])
1582 info_dict['__postprocessors'].append(stretched_pp)
1584 self.report_warning(
1585 '%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % (
1586 info_dict['id'], stretched_ratio))
1588 assert fixup_policy in ('ignore', 'never')
1590 if info_dict.get('requested_formats') is None and info_dict.get('container') == 'm4a_dash':
1591 if fixup_policy == 'warn':
1592 self.report_warning('%s: writing DASH m4a. Only some players support this container.' % (
1594 elif fixup_policy == 'detect_or_warn':
1595 fixup_pp = FFmpegFixupM4aPP(self)
1596 if fixup_pp.available:
1597 info_dict.setdefault('__postprocessors', [])
1598 info_dict['__postprocessors'].append(fixup_pp)
1600 self.report_warning(
1601 '%s: writing DASH m4a. Only some players support this container. Install ffmpeg or avconv to fix this automatically.' % (
1604 assert fixup_policy in ('ignore', 'never')
1607 self.post_process(filename, info_dict)
1608 except (PostProcessingError) as err:
1609 self.report_error('postprocessing: %s' % str(err))
1611 self.record_download_archive(info_dict)
1613 def download(self, url_list):
1614 """Download a given list of URLs."""
1615 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1616 if (len(url_list) > 1 and
1617 '%' not in outtmpl and
1618 self.params.get('max_downloads') != 1):
1619 raise SameFileError(outtmpl)
1621 for url in url_list:
1623 # It also downloads the videos
1624 res = self.extract_info(
1625 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
1626 except UnavailableVideoError:
1627 self.report_error('unable to download video')
1628 except MaxDownloadsReached:
1629 self.to_screen('[info] Maximum number of downloaded files reached.')
1632 if self.params.get('dump_single_json', False):
1633 self.to_stdout(json.dumps(res))
1635 return self._download_retcode
1637 def download_with_info_file(self, info_filename):
1638 with contextlib.closing(fileinput.FileInput(
1639 [info_filename], mode='r',
1640 openhook=fileinput.hook_encoded('utf-8'))) as f:
1641 # FileInput doesn't have a read method, we can't call json.load
1642 info = self.filter_requested_info(json.loads('\n'.join(f)))
1644 self.process_ie_result(info, download=True)
1645 except DownloadError:
1646 webpage_url = info.get('webpage_url')
1647 if webpage_url is not None:
1648 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1649 return self.download([webpage_url])
1652 return self._download_retcode
1655 def filter_requested_info(info_dict):
1657 (k, v) for k, v in info_dict.items()
1658 if k not in ['requested_formats', 'requested_subtitles'])
1660 def post_process(self, filename, ie_info):
1661 """Run all the postprocessors on the given file."""
1662 info = dict(ie_info)
1663 info['filepath'] = filename
1665 if ie_info.get('__postprocessors') is not None:
1666 pps_chain.extend(ie_info['__postprocessors'])
1667 pps_chain.extend(self._pps)
1668 for pp in pps_chain:
1669 files_to_delete = []
1671 files_to_delete, info = pp.run(info)
1672 except PostProcessingError as e:
1673 self.report_error(e.msg)
1674 if files_to_delete and not self.params.get('keepvideo', False):
1675 for old_filename in files_to_delete:
1676 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
1678 os.remove(encodeFilename(old_filename))
1679 except (IOError, OSError):
1680 self.report_warning('Unable to remove downloaded original file')
1682 def _make_archive_id(self, info_dict):
1683 # Future-proof against any change in case
1684 # and backwards compatibility with prior versions
1685 extractor = info_dict.get('extractor_key')
1686 if extractor is None:
1687 if 'id' in info_dict:
1688 extractor = info_dict.get('ie_key') # key in a playlist
1689 if extractor is None:
1690 return None # Incomplete video information
1691 return extractor.lower() + ' ' + info_dict['id']
1693 def in_download_archive(self, info_dict):
1694 fn = self.params.get('download_archive')
1698 vid_id = self._make_archive_id(info_dict)
1700 return False # Incomplete video information
1703 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1704 for line in archive_file:
1705 if line.strip() == vid_id:
1707 except IOError as ioe:
1708 if ioe.errno != errno.ENOENT:
1712 def record_download_archive(self, info_dict):
1713 fn = self.params.get('download_archive')
1716 vid_id = self._make_archive_id(info_dict)
1718 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1719 archive_file.write(vid_id + '\n')
1722 def format_resolution(format, default='unknown'):
1723 if format.get('vcodec') == 'none':
1725 if format.get('resolution') is not None:
1726 return format['resolution']
1727 if format.get('height') is not None:
1728 if format.get('width') is not None:
1729 res = '%sx%s' % (format['width'], format['height'])
1731 res = '%sp' % format['height']
1732 elif format.get('width') is not None:
1733 res = '?x%d' % format['width']
1738 def _format_note(self, fdict):
1740 if fdict.get('ext') in ['f4f', 'f4m']:
1741 res += '(unsupported) '
1742 if fdict.get('format_note') is not None:
1743 res += fdict['format_note'] + ' '
1744 if fdict.get('tbr') is not None:
1745 res += '%4dk ' % fdict['tbr']
1746 if fdict.get('container') is not None:
1749 res += '%s container' % fdict['container']
1750 if (fdict.get('vcodec') is not None and
1751 fdict.get('vcodec') != 'none'):
1754 res += fdict['vcodec']
1755 if fdict.get('vbr') is not None:
1757 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1759 if fdict.get('vbr') is not None:
1760 res += '%4dk' % fdict['vbr']
1761 if fdict.get('fps') is not None:
1762 res += ', %sfps' % fdict['fps']
1763 if fdict.get('acodec') is not None:
1766 if fdict['acodec'] == 'none':
1769 res += '%-5s' % fdict['acodec']
1770 elif fdict.get('abr') is not None:
1774 if fdict.get('abr') is not None:
1775 res += '@%3dk' % fdict['abr']
1776 if fdict.get('asr') is not None:
1777 res += ' (%5dHz)' % fdict['asr']
1778 if fdict.get('filesize') is not None:
1781 res += format_bytes(fdict['filesize'])
1782 elif fdict.get('filesize_approx') is not None:
1785 res += '~' + format_bytes(fdict['filesize_approx'])
1788 def list_formats(self, info_dict):
1789 formats = info_dict.get('formats', [info_dict])
1791 [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
1793 if f.get('preference') is None or f['preference'] >= -1000]
1794 if len(formats) > 1:
1795 table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
1797 header_line = ['format code', 'extension', 'resolution', 'note']
1799 '[info] Available formats for %s:\n%s' %
1800 (info_dict['id'], render_table(header_line, table)))
1802 def list_thumbnails(self, info_dict):
1803 thumbnails = info_dict.get('thumbnails')
1805 tn_url = info_dict.get('thumbnail')
1807 thumbnails = [{'id': '0', 'url': tn_url}]
1810 '[info] No thumbnails present for %s' % info_dict['id'])
1814 '[info] Thumbnails for %s:' % info_dict['id'])
1815 self.to_screen(render_table(
1816 ['ID', 'width', 'height', 'URL'],
1817 [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
1819 def list_subtitles(self, video_id, subtitles, name='subtitles'):
1821 self.to_screen('%s has no %s' % (video_id, name))
1824 'Available %s for %s:' % (name, video_id))
1825 self.to_screen(render_table(
1826 ['Language', 'formats'],
1827 [[lang, ', '.join(f['ext'] for f in reversed(formats))]
1828 for lang, formats in subtitles.items()]))
1830 def urlopen(self, req):
1831 """ Start an HTTP download """
1833 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1834 # always respected by websites, some tend to give out URLs with non percent-encoded
1835 # non-ASCII characters (see telemb.py, ard.py [#3412])
1836 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1837 # To work around aforementioned issue we will replace request's original URL with
1838 # percent-encoded one
1839 req_is_string = isinstance(req, compat_basestring)
1840 url = req if req_is_string else req.get_full_url()
1841 url_escaped = escape_url(url)
1843 # Substitute URL if any change after escaping
1844 if url != url_escaped:
1848 req_type = HEADRequest if req.get_method() == 'HEAD' else compat_urllib_request.Request
1850 url_escaped, data=req.data, headers=req.headers,
1851 origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
1853 return self._opener.open(req, timeout=self._socket_timeout)
1855 def print_debug_header(self):
1856 if not self.params.get('verbose'):
1859 if type('') is not compat_str:
1860 # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1861 self.report_warning(
1862 'Your Python is broken! Update to a newer and supported version')
1864 stdout_encoding = getattr(
1865 sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
1867 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1868 locale.getpreferredencoding(),
1869 sys.getfilesystemencoding(),
1871 self.get_encoding()))
1872 write_string(encoding_str, encoding=None)
1874 self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1876 sp = subprocess.Popen(
1877 ['git', 'rev-parse', '--short', 'HEAD'],
1878 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1879 cwd=os.path.dirname(os.path.abspath(__file__)))
1880 out, err = sp.communicate()
1881 out = out.decode().strip()
1882 if re.match('[0-9a-f]+', out):
1883 self._write_string('[debug] Git HEAD: ' + out + '\n')
1889 self._write_string('[debug] Python version %s - %s\n' % (
1890 platform.python_version(), platform_name()))
1892 exe_versions = FFmpegPostProcessor.get_versions(self)
1893 exe_versions['rtmpdump'] = rtmpdump_version()
1894 exe_str = ', '.join(
1896 for exe, v in sorted(exe_versions.items())
1901 self._write_string('[debug] exe versions: %s\n' % exe_str)
1904 for handler in self._opener.handlers:
1905 if hasattr(handler, 'proxies'):
1906 proxy_map.update(handler.proxies)
1907 self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1909 if self.params.get('call_home', False):
1910 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
1911 self._write_string('[debug] Public IP address: %s\n' % ipaddr)
1912 latest_version = self.urlopen(
1913 'https://yt-dl.org/latest/version').read().decode('utf-8')
1914 if version_tuple(latest_version) > version_tuple(__version__):
1915 self.report_warning(
1916 'You are using an outdated version (newest version: %s)! '
1917 'See https://yt-dl.org/update if you need help updating.' %
1920 def _setup_opener(self):
1921 timeout_val = self.params.get('socket_timeout')
1922 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1924 opts_cookiefile = self.params.get('cookiefile')
1925 opts_proxy = self.params.get('proxy')
1927 if opts_cookiefile is None:
1928 self.cookiejar = compat_cookiejar.CookieJar()
1930 self.cookiejar = compat_cookiejar.MozillaCookieJar(
1932 if os.access(opts_cookiefile, os.R_OK):
1933 self.cookiejar.load()
1935 cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1937 if opts_proxy is not None:
1938 if opts_proxy == '':
1941 proxies = {'http': opts_proxy, 'https': opts_proxy}
1943 proxies = compat_urllib_request.getproxies()
1944 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1945 if 'http' in proxies and 'https' not in proxies:
1946 proxies['https'] = proxies['http']
1947 proxy_handler = PerRequestProxyHandler(proxies)
1949 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1950 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
1951 ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
1952 opener = compat_urllib_request.build_opener(
1953 proxy_handler, https_handler, cookie_processor, ydlh)
1955 # Delete the default user-agent header, which would otherwise apply in
1956 # cases where our custom HTTP handler doesn't come into play
1957 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1958 opener.addheaders = []
1959 self._opener = opener
1961 def encode(self, s):
1962 if isinstance(s, bytes):
1963 return s # Already encoded
1966 return s.encode(self.get_encoding())
1967 except UnicodeEncodeError as err:
1968 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1971 def get_encoding(self):
1972 encoding = self.params.get('encoding')
1973 if encoding is None:
1974 encoding = preferredencoding()
1977 def _write_thumbnails(self, info_dict, filename):
1978 if self.params.get('writethumbnail', False):
1979 thumbnails = info_dict.get('thumbnails')
1981 thumbnails = [thumbnails[-1]]
1982 elif self.params.get('write_all_thumbnails', False):
1983 thumbnails = info_dict.get('thumbnails')
1988 # No thumbnails present, so return immediately
1991 for t in thumbnails:
1992 thumb_ext = determine_ext(t['url'], 'jpg')
1993 suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
1994 thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
1995 t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
1997 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
1998 self.to_screen('[%s] %s: Thumbnail %sis already present' %
1999 (info_dict['extractor'], info_dict['id'], thumb_display_id))
2001 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
2002 (info_dict['extractor'], info_dict['id'], thumb_display_id))
2004 uf = self.urlopen(t['url'])
2005 with open(thumb_filename, 'wb') as thumbf:
2006 shutil.copyfileobj(uf, thumbf)
2007 self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
2008 (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
2009 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2010 self.report_warning('Unable to download thumbnail "%s": %s' %
2011 (t['url'], compat_str(err)))