2 # -*- coding: utf-8 -*-
4 from __future__ import absolute_import, unicode_literals
34 compat_get_terminal_size,
38 compat_tokenize_tokenize,
40 compat_urllib_request,
60 PerRequestProxyHandler,
70 UnavailableVideoError,
81 from .cache import Cache
82 from .extractor import get_info_extractor, gen_extractors
83 from .downloader import get_suitable_downloader
84 from .downloader.rtmp import rtmpdump_version
85 from .postprocessor import (
87 FFmpegFixupStretchedPP,
92 from .version import __version__
95 class YoutubeDL(object):
98 YoutubeDL objects are the ones responsible of downloading the
99 actual video file and writing it to disk if the user has requested
100 it, among some other tasks. In most cases there should be one per
101 program. As, given a video URL, the downloader doesn't know how to
102 extract all the needed information, task that InfoExtractors do, it
103 has to pass the URL to one of them.
105 For this, YoutubeDL objects have a method that allows
106 InfoExtractors to be registered in a given order. When it is passed
107 a URL, the YoutubeDL object handles it to the first InfoExtractor it
108 finds that reports being able to handle it. The InfoExtractor extracts
109 all the information about the video or videos the URL refers to, and
110 YoutubeDL process the extracted information, possibly using a File
111 Downloader to download the video.
113 YoutubeDL objects accept a lot of parameters. In order not to saturate
114 the object constructor with arguments, it receives a dictionary of
115 options instead. These options are available through the params
116 attribute for the InfoExtractors to use. The YoutubeDL also
117 registers itself as the downloader in charge for the InfoExtractors
118 that are added to it, so this is a "mutual registration".
122 username: Username for authentication purposes.
123 password: Password for authentication purposes.
124 videopassword: Password for accessing a video.
125 usenetrc: Use netrc for authentication instead.
126 verbose: Print additional info to stdout.
127 quiet: Do not print messages to stdout.
128 no_warnings: Do not print out anything for warnings.
129 forceurl: Force printing final URL.
130 forcetitle: Force printing title.
131 forceid: Force printing ID.
132 forcethumbnail: Force printing thumbnail URL.
133 forcedescription: Force printing description.
134 forcefilename: Force printing final filename.
135 forceduration: Force printing duration.
136 forcejson: Force printing info_dict as JSON.
137 dump_single_json: Force printing the info_dict of the whole playlist
138 (or video) as a single JSON line.
139 simulate: Do not download the video files.
140 format: Video format code. See options.py for more information.
141 outtmpl: Template for output names.
142 restrictfilenames: Do not allow "&" and spaces in file names
143 ignoreerrors: Do not stop on download errors.
144 force_generic_extractor: Force downloader to use the generic extractor
145 nooverwrites: Prevent overwriting files.
146 playliststart: Playlist item to start at.
147 playlistend: Playlist item to end at.
148 playlist_items: Specific indices of playlist to download.
149 playlistreverse: Download playlist items in reverse order.
150 matchtitle: Download only matching titles.
151 rejecttitle: Reject downloads for matching titles.
152 logger: Log messages to a logging.Logger instance.
153 logtostderr: Log messages to stderr instead of stdout.
154 writedescription: Write the video description to a .description file
155 writeinfojson: Write the video description to a .info.json file
156 writeannotations: Write the video annotations to a .annotations.xml file
157 writethumbnail: Write the thumbnail image to a file
158 write_all_thumbnails: Write all thumbnail formats to files
159 writesubtitles: Write the video subtitles to a file
160 writeautomaticsub: Write the automatic subtitles to a file
161 allsubtitles: Downloads all the subtitles of the video
162 (requires writesubtitles or writeautomaticsub)
163 listsubtitles: Lists all available subtitles for the video
164 subtitlesformat: The format code for subtitles
165 subtitleslangs: List of languages of the subtitles to download
166 keepvideo: Keep the video file after post-processing
167 daterange: A DateRange object, download only if the upload_date is in the range.
168 skip_download: Skip the actual download of the video file
169 cachedir: Location of the cache files in the filesystem.
170 False to disable filesystem cache.
171 noplaylist: Download single video instead of a playlist if in doubt.
172 age_limit: An integer representing the user's age in years.
173 Unsuitable videos for the given age are skipped.
174 min_views: An integer representing the minimum view count the video
175 must have in order to not be skipped.
176 Videos without view count information are always
177 downloaded. None for no limit.
178 max_views: An integer representing the maximum view count.
179 Videos that are more popular than that are not
181 Videos without view count information are always
182 downloaded. None for no limit.
183 download_archive: File name of a file where all downloads are recorded.
184 Videos already present in the file are not downloaded
186 cookiefile: File name where cookies should be read from and dumped to.
187 nocheckcertificate:Do not verify SSL certificates
188 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
189 At the moment, this is only supported by YouTube.
190 proxy: URL of the proxy server to use
191 cn_verification_proxy: URL of the proxy to use for IP address verification
192 on Chinese sites. (Experimental)
193 socket_timeout: Time to wait for unresponsive hosts, in seconds
194 bidi_workaround: Work around buggy terminals without bidirectional text
195 support, using fridibi
196 debug_printtraffic:Print out sent and received HTTP traffic
197 include_ads: Download ads as well
198 default_search: Prepend this string if an input url is not valid.
199 'auto' for elaborate guessing
200 encoding: Use this encoding instead of the system-specified.
201 extract_flat: Do not resolve URLs, return the immediate result.
202 Pass in 'in_playlist' to only show this behavior for
204 postprocessors: A list of dictionaries, each with an entry
205 * key: The name of the postprocessor. See
206 youtube_dl/postprocessor/__init__.py for a list.
207 as well as any further keyword arguments for the
209 progress_hooks: A list of functions that get called on download
210 progress, with a dictionary with the entries
211 * status: One of "downloading", "error", or "finished".
212 Check this first and ignore unknown values.
214 If status is one of "downloading", or "finished", the
215 following properties may also be present:
216 * filename: The final filename (always present)
217 * tmpfilename: The filename we're currently writing to
218 * downloaded_bytes: Bytes on disk
219 * total_bytes: Size of the whole file, None if unknown
220 * total_bytes_estimate: Guess of the eventual file size,
222 * elapsed: The number of seconds since download started.
223 * eta: The estimated time in seconds, None if unknown
224 * speed: The download speed in bytes/second, None if
226 * fragment_index: The counter of the currently
227 downloaded video fragment.
228 * fragment_count: The number of fragments (= individual
229 files that will be merged)
231 Progress hooks are guaranteed to be called at least once
232 (with status "finished") if the download is successful.
233 merge_output_format: Extension to use when merging formats.
234 fixup: Automatically correct known faults of the file.
236 - "never": do nothing
237 - "warn": only emit a warning
238 - "detect_or_warn": check whether we can do anything
239 about it, warn otherwise (default)
240 source_address: (Experimental) Client-side IP address to bind to.
241 call_home: Boolean, true iff we are allowed to contact the
242 youtube-dl servers for debugging.
243 sleep_interval: Number of seconds to sleep before each download.
244 listformats: Print an overview of available video formats and exit.
245 list_thumbnails: Print a table of all thumbnails and exit.
246 match_filter: A function that gets called with the info_dict of
248 If it returns a message, the video is ignored.
249 If it returns None, the video is downloaded.
250 match_filter_func in utils.py is one example for this.
251 no_color: Do not emit color codes in output.
253 The following options determine which downloader is picked:
254 external_downloader: Executable of the external downloader to call.
255 None or unset for standard (built-in) downloader.
256 hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv.
258 The following parameters are not used by YoutubeDL itself, they are used by
259 the downloader (see youtube_dl/downloader/common.py):
260 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
261 noresizebuffer, retries, continuedl, noprogress, consoletitle,
262 xattr_set_filesize, external_downloader_args.
264 The following options are used by the post processors:
265 prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
266 otherwise prefer avconv.
267 postprocessor_args: A list of additional command-line arguments for the
274 _download_retcode = None
275 _num_downloads = None
278 def __init__(self, params=None, auto_init=True):
279 """Create a FileDownloader object with the given options."""
283 self._ies_instances = {}
285 self._progress_hooks = []
286 self._download_retcode = 0
287 self._num_downloads = 0
288 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
289 self._err_file = sys.stderr
291 self.cache = Cache(self)
293 if params.get('bidi_workaround', False):
296 master, slave = pty.openpty()
297 width = compat_get_terminal_size().columns
301 width_args = ['-w', str(width)]
303 stdin=subprocess.PIPE,
305 stderr=self._err_file)
307 self._output_process = subprocess.Popen(
308 ['bidiv'] + width_args, **sp_kwargs
311 self._output_process = subprocess.Popen(
312 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
313 self._output_channel = os.fdopen(master, 'rb')
314 except OSError as ose:
316 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
320 if (sys.version_info >= (3,) and sys.platform != 'win32' and
321 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and
322 not params.get('restrictfilenames', False)):
323 # On Python 3, the Unicode filesystem API will throw errors (#1474)
325 'Assuming --restrict-filenames since file system encoding '
326 'cannot encode all characters. '
327 'Set the LC_ALL environment variable to fix this.')
328 self.params['restrictfilenames'] = True
330 if isinstance(params.get('outtmpl'), bytes):
332 'Parameter outtmpl is bytes, but should be a unicode string. '
333 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
338 self.print_debug_header()
339 self.add_default_info_extractors()
341 for pp_def_raw in self.params.get('postprocessors', []):
342 pp_class = get_postprocessor(pp_def_raw['key'])
343 pp_def = dict(pp_def_raw)
345 pp = pp_class(self, **compat_kwargs(pp_def))
346 self.add_post_processor(pp)
348 for ph in self.params.get('progress_hooks', []):
349 self.add_progress_hook(ph)
351 def warn_if_short_id(self, argv):
352 # short YouTube ID starting with dash?
354 i for i, a in enumerate(argv)
355 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
359 [a for i, a in enumerate(argv) if i not in idxs] +
360 ['--'] + [argv[i] for i in idxs]
363 'Long argument string detected. '
364 'Use -- to separate parameters and URLs, like this:\n%s\n' %
365 args_to_str(correct_argv))
367 def add_info_extractor(self, ie):
368 """Add an InfoExtractor object to the end of the list."""
370 self._ies_instances[ie.ie_key()] = ie
371 ie.set_downloader(self)
373 def get_info_extractor(self, ie_key):
375 Get an instance of an IE with name ie_key, it will try to get one from
376 the _ies list, if there's no instance it will create a new one and add
377 it to the extractor list.
379 ie = self._ies_instances.get(ie_key)
381 ie = get_info_extractor(ie_key)()
382 self.add_info_extractor(ie)
385 def add_default_info_extractors(self):
387 Add the InfoExtractors returned by gen_extractors to the end of the list
389 for ie in gen_extractors():
390 self.add_info_extractor(ie)
392 def add_post_processor(self, pp):
393 """Add a PostProcessor object to the end of the chain."""
395 pp.set_downloader(self)
397 def add_progress_hook(self, ph):
398 """Add the progress hook (currently only for the file downloader)"""
399 self._progress_hooks.append(ph)
401 def _bidi_workaround(self, message):
402 if not hasattr(self, '_output_channel'):
405 assert hasattr(self, '_output_process')
406 assert isinstance(message, compat_str)
407 line_count = message.count('\n') + 1
408 self._output_process.stdin.write((message + '\n').encode('utf-8'))
409 self._output_process.stdin.flush()
410 res = ''.join(self._output_channel.readline().decode('utf-8')
411 for _ in range(line_count))
412 return res[:-len('\n')]
414 def to_screen(self, message, skip_eol=False):
415 """Print message to stdout if not in quiet mode."""
416 return self.to_stdout(message, skip_eol, check_quiet=True)
418 def _write_string(self, s, out=None):
419 write_string(s, out=out, encoding=self.params.get('encoding'))
421 def to_stdout(self, message, skip_eol=False, check_quiet=False):
422 """Print message to stdout if not in quiet mode."""
423 if self.params.get('logger'):
424 self.params['logger'].debug(message)
425 elif not check_quiet or not self.params.get('quiet', False):
426 message = self._bidi_workaround(message)
427 terminator = ['\n', ''][skip_eol]
428 output = message + terminator
430 self._write_string(output, self._screen_file)
432 def to_stderr(self, message):
433 """Print message to stderr."""
434 assert isinstance(message, compat_str)
435 if self.params.get('logger'):
436 self.params['logger'].error(message)
438 message = self._bidi_workaround(message)
439 output = message + '\n'
440 self._write_string(output, self._err_file)
442 def to_console_title(self, message):
443 if not self.params.get('consoletitle', False):
445 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
446 # c_wchar_p() might not be necessary if `message` is
447 # already of type unicode()
448 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
449 elif 'TERM' in os.environ:
450 self._write_string('\033]0;%s\007' % message, self._screen_file)
452 def save_console_title(self):
453 if not self.params.get('consoletitle', False):
455 if 'TERM' in os.environ:
456 # Save the title on stack
457 self._write_string('\033[22;0t', self._screen_file)
459 def restore_console_title(self):
460 if not self.params.get('consoletitle', False):
462 if 'TERM' in os.environ:
463 # Restore the title from stack
464 self._write_string('\033[23;0t', self._screen_file)
467 self.save_console_title()
470 def __exit__(self, *args):
471 self.restore_console_title()
473 if self.params.get('cookiefile') is not None:
474 self.cookiejar.save()
476 def trouble(self, message=None, tb=None):
477 """Determine action to take when a download problem appears.
479 Depending on if the downloader has been configured to ignore
480 download errors or not, this method may throw an exception or
481 not when errors are found, after printing the message.
483 tb, if given, is additional traceback information.
485 if message is not None:
486 self.to_stderr(message)
487 if self.params.get('verbose'):
489 if sys.exc_info()[0]: # if .trouble has been called from an except block
491 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
492 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
493 tb += compat_str(traceback.format_exc())
495 tb_data = traceback.format_list(traceback.extract_stack())
496 tb = ''.join(tb_data)
498 if not self.params.get('ignoreerrors', False):
499 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
500 exc_info = sys.exc_info()[1].exc_info
502 exc_info = sys.exc_info()
503 raise DownloadError(message, exc_info)
504 self._download_retcode = 1
506 def report_warning(self, message):
508 Print the message to stderr, it will be prefixed with 'WARNING:'
509 If stderr is a tty file the 'WARNING:' will be colored
511 if self.params.get('logger') is not None:
512 self.params['logger'].warning(message)
514 if self.params.get('no_warnings'):
516 if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
517 _msg_header = '\033[0;33mWARNING:\033[0m'
519 _msg_header = 'WARNING:'
520 warning_message = '%s %s' % (_msg_header, message)
521 self.to_stderr(warning_message)
523 def report_error(self, message, tb=None):
525 Do the same as trouble, but prefixes the message with 'ERROR:', colored
526 in red if stderr is a tty file.
528 if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
529 _msg_header = '\033[0;31mERROR:\033[0m'
531 _msg_header = 'ERROR:'
532 error_message = '%s %s' % (_msg_header, message)
533 self.trouble(error_message, tb)
535 def report_file_already_downloaded(self, file_name):
536 """Report file has already been fully downloaded."""
538 self.to_screen('[download] %s has already been downloaded' % file_name)
539 except UnicodeEncodeError:
540 self.to_screen('[download] The file has already been downloaded')
542 def prepare_filename(self, info_dict):
543 """Generate the output filename."""
545 template_dict = dict(info_dict)
547 template_dict['epoch'] = int(time.time())
548 autonumber_size = self.params.get('autonumber_size')
549 if autonumber_size is None:
551 autonumber_templ = '%0' + str(autonumber_size) + 'd'
552 template_dict['autonumber'] = autonumber_templ % self._num_downloads
553 if template_dict.get('playlist_index') is not None:
554 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
555 if template_dict.get('resolution') is None:
556 if template_dict.get('width') and template_dict.get('height'):
557 template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
558 elif template_dict.get('height'):
559 template_dict['resolution'] = '%sp' % template_dict['height']
560 elif template_dict.get('width'):
561 template_dict['resolution'] = '?x%d' % template_dict['width']
563 sanitize = lambda k, v: sanitize_filename(
565 restricted=self.params.get('restrictfilenames'),
567 template_dict = dict((k, sanitize(k, v))
568 for k, v in template_dict.items()
570 template_dict = collections.defaultdict(lambda: 'NA', template_dict)
572 outtmpl = sanitize_path(self.params.get('outtmpl', DEFAULT_OUTTMPL))
573 tmpl = compat_expanduser(outtmpl)
574 filename = tmpl % template_dict
575 # Temporary fix for #4787
576 # 'Treat' all problem characters by passing filename through preferredencoding
577 # to workaround encoding issues with subprocess on python2 @ Windows
578 if sys.version_info < (3, 0) and sys.platform == 'win32':
579 filename = encodeFilename(filename, True).decode(preferredencoding())
581 except ValueError as err:
582 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
585 def _match_entry(self, info_dict, incomplete):
586 """ Returns None iff the file should be downloaded """
588 video_title = info_dict.get('title', info_dict.get('id', 'video'))
589 if 'title' in info_dict:
590 # This can happen when we're just evaluating the playlist
591 title = info_dict['title']
592 matchtitle = self.params.get('matchtitle', False)
594 if not re.search(matchtitle, title, re.IGNORECASE):
595 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
596 rejecttitle = self.params.get('rejecttitle', False)
598 if re.search(rejecttitle, title, re.IGNORECASE):
599 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
600 date = info_dict.get('upload_date', None)
602 dateRange = self.params.get('daterange', DateRange())
603 if date not in dateRange:
604 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
605 view_count = info_dict.get('view_count', None)
606 if view_count is not None:
607 min_views = self.params.get('min_views')
608 if min_views is not None and view_count < min_views:
609 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
610 max_views = self.params.get('max_views')
611 if max_views is not None and view_count > max_views:
612 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
613 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
614 return 'Skipping "%s" because it is age restricted' % video_title
615 if self.in_download_archive(info_dict):
616 return '%s has already been recorded in archive' % video_title
619 match_filter = self.params.get('match_filter')
620 if match_filter is not None:
621 ret = match_filter(info_dict)
628 def add_extra_info(info_dict, extra_info):
629 '''Set the keys from extra_info in info dict if they are missing'''
630 for key, value in extra_info.items():
631 info_dict.setdefault(key, value)
633 def extract_info(self, url, download=True, ie_key=None, extra_info={},
634 process=True, force_generic_extractor=False):
636 Returns a list with a dictionary for each video we find.
637 If 'download', also downloads the videos.
638 extra_info is a dict containing the extra values to add to each result
641 if not ie_key and force_generic_extractor:
645 ies = [self.get_info_extractor(ie_key)]
650 if not ie.suitable(url):
654 self.report_warning('The program functionality for this site has been marked as broken, '
655 'and will probably not work.')
658 ie_result = ie.extract(url)
659 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
661 if isinstance(ie_result, list):
662 # Backwards compatibility: old IE result format
664 '_type': 'compat_list',
665 'entries': ie_result,
667 self.add_default_extra_info(ie_result, ie, url)
669 return self.process_ie_result(ie_result, download, extra_info)
672 except ExtractorError as de: # An error we somewhat expected
673 self.report_error(compat_str(de), de.format_traceback())
675 except MaxDownloadsReached:
677 except Exception as e:
678 if self.params.get('ignoreerrors', False):
679 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
684 self.report_error('no suitable InfoExtractor for URL %s' % url)
686 def add_default_extra_info(self, ie_result, ie, url):
687 self.add_extra_info(ie_result, {
688 'extractor': ie.IE_NAME,
690 'webpage_url_basename': url_basename(url),
691 'extractor_key': ie.ie_key(),
694 def process_ie_result(self, ie_result, download=True, extra_info={}):
696 Take the result of the ie(may be modified) and resolve all unresolved
697 references (URLs, playlist items).
699 It will also download the videos if 'download'.
700 Returns the resolved ie_result.
703 result_type = ie_result.get('_type', 'video')
705 if result_type in ('url', 'url_transparent'):
706 extract_flat = self.params.get('extract_flat', False)
707 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
708 extract_flat is True):
709 if self.params.get('forcejson', False):
710 self.to_stdout(json.dumps(ie_result))
713 if result_type == 'video':
714 self.add_extra_info(ie_result, extra_info)
715 return self.process_video_result(ie_result, download=download)
716 elif result_type == 'url':
717 # We have to add extra_info to the results because it may be
718 # contained in a playlist
719 return self.extract_info(ie_result['url'],
721 ie_key=ie_result.get('ie_key'),
722 extra_info=extra_info)
723 elif result_type == 'url_transparent':
724 # Use the information from the embedding page
725 info = self.extract_info(
726 ie_result['url'], ie_key=ie_result.get('ie_key'),
727 extra_info=extra_info, download=False, process=False)
729 force_properties = dict(
730 (k, v) for k, v in ie_result.items() if v is not None)
731 for f in ('_type', 'url'):
732 if f in force_properties:
733 del force_properties[f]
734 new_result = info.copy()
735 new_result.update(force_properties)
737 assert new_result.get('_type') != 'url_transparent'
739 return self.process_ie_result(
740 new_result, download=download, extra_info=extra_info)
741 elif result_type == 'playlist' or result_type == 'multi_video':
742 # We process each entry in the playlist
743 playlist = ie_result.get('title', None) or ie_result.get('id', None)
744 self.to_screen('[download] Downloading playlist: %s' % playlist)
746 playlist_results = []
748 playliststart = self.params.get('playliststart', 1) - 1
749 playlistend = self.params.get('playlistend', None)
750 # For backwards compatibility, interpret -1 as whole list
751 if playlistend == -1:
754 playlistitems_str = self.params.get('playlist_items', None)
756 if playlistitems_str is not None:
757 def iter_playlistitems(format):
758 for string_segment in format.split(','):
759 if '-' in string_segment:
760 start, end = string_segment.split('-')
761 for item in range(int(start), int(end) + 1):
764 yield int(string_segment)
765 playlistitems = iter_playlistitems(playlistitems_str)
767 ie_entries = ie_result['entries']
768 if isinstance(ie_entries, list):
769 n_all_entries = len(ie_entries)
772 ie_entries[i - 1] for i in playlistitems
773 if -n_all_entries <= i - 1 < n_all_entries]
775 entries = ie_entries[playliststart:playlistend]
776 n_entries = len(entries)
778 "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
779 (ie_result['extractor'], playlist, n_all_entries, n_entries))
780 elif isinstance(ie_entries, PagedList):
783 for item in playlistitems:
784 entries.extend(ie_entries.getslice(
788 entries = ie_entries.getslice(
789 playliststart, playlistend)
790 n_entries = len(entries)
792 "[%s] playlist %s: Downloading %d videos" %
793 (ie_result['extractor'], playlist, n_entries))
796 entry_list = list(ie_entries)
797 entries = [entry_list[i - 1] for i in playlistitems]
799 entries = list(itertools.islice(
800 ie_entries, playliststart, playlistend))
801 n_entries = len(entries)
803 "[%s] playlist %s: Downloading %d videos" %
804 (ie_result['extractor'], playlist, n_entries))
806 if self.params.get('playlistreverse', False):
807 entries = entries[::-1]
809 for i, entry in enumerate(entries, 1):
810 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
812 'n_entries': n_entries,
813 'playlist': playlist,
814 'playlist_id': ie_result.get('id'),
815 'playlist_title': ie_result.get('title'),
816 'playlist_index': i + playliststart,
817 'extractor': ie_result['extractor'],
818 'webpage_url': ie_result['webpage_url'],
819 'webpage_url_basename': url_basename(ie_result['webpage_url']),
820 'extractor_key': ie_result['extractor_key'],
823 reason = self._match_entry(entry, incomplete=True)
824 if reason is not None:
825 self.to_screen('[download] ' + reason)
828 entry_result = self.process_ie_result(entry,
831 playlist_results.append(entry_result)
832 ie_result['entries'] = playlist_results
834 elif result_type == 'compat_list':
836 'Extractor %s returned a compat_list result. '
837 'It needs to be updated.' % ie_result.get('extractor'))
843 'extractor': ie_result['extractor'],
844 'webpage_url': ie_result['webpage_url'],
845 'webpage_url_basename': url_basename(ie_result['webpage_url']),
846 'extractor_key': ie_result['extractor_key'],
850 ie_result['entries'] = [
851 self.process_ie_result(_fixup(r), download, extra_info)
852 for r in ie_result['entries']
856 raise Exception('Invalid result type: %s' % result_type)
858 def _build_format_filter(self, filter_spec):
859 " Returns a function to filter the formats according to the filter_spec "
869 operator_rex = re.compile(r'''(?x)\s*
870 (?P<key>width|height|tbr|abr|vbr|asr|filesize|fps)
871 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
872 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
874 ''' % '|'.join(map(re.escape, OPERATORS.keys())))
875 m = operator_rex.search(filter_spec)
878 comparison_value = int(m.group('value'))
880 comparison_value = parse_filesize(m.group('value'))
881 if comparison_value is None:
882 comparison_value = parse_filesize(m.group('value') + 'B')
883 if comparison_value is None:
885 'Invalid value %r in format specification %r' % (
886 m.group('value'), filter_spec))
887 op = OPERATORS[m.group('op')]
894 str_operator_rex = re.compile(r'''(?x)
895 \s*(?P<key>ext|acodec|vcodec|container|protocol)
896 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
897 \s*(?P<value>[a-zA-Z0-9_-]+)
899 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
900 m = str_operator_rex.search(filter_spec)
902 comparison_value = m.group('value')
903 op = STR_OPERATORS[m.group('op')]
906 raise ValueError('Invalid filter specification %r' % filter_spec)
909 actual_value = f.get(m.group('key'))
910 if actual_value is None:
911 return m.group('none_inclusive')
912 return op(actual_value, comparison_value)
915 def build_format_selector(self, format_spec):
916 def syntax_error(note, start):
918 'Invalid format specification: '
919 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
920 return SyntaxError(message)
922 PICKFIRST = 'PICKFIRST'
926 FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
928 def _parse_filter(tokens):
930 for type, string, start, _, _ in tokens:
931 if type == tokenize.OP and string == ']':
932 return ''.join(filter_parts)
934 filter_parts.append(string)
936 def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
938 current_selector = None
939 for type, string, start, _, _ in tokens:
940 # ENCODING is only defined in python 3.x
941 if type == getattr(tokenize, 'ENCODING', None):
943 elif type in [tokenize.NAME, tokenize.NUMBER]:
944 current_selector = FormatSelector(SINGLE, string, [])
945 elif type == tokenize.OP:
948 # ')' will be handled by the parentheses group
949 tokens.restore_last_token()
951 elif inside_merge and string in ['/', ',']:
952 tokens.restore_last_token()
954 elif inside_choice and string == ',':
955 tokens.restore_last_token()
958 if not current_selector:
959 raise syntax_error('"," must follow a format selector', start)
960 selectors.append(current_selector)
961 current_selector = None
963 first_choice = current_selector
964 second_choice = _parse_format_selection(tokens, inside_choice=True)
965 current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
967 if not current_selector:
968 current_selector = FormatSelector(SINGLE, 'best', [])
969 format_filter = _parse_filter(tokens)
970 current_selector.filters.append(format_filter)
973 raise syntax_error('Unexpected "("', start)
974 group = _parse_format_selection(tokens, inside_group=True)
975 current_selector = FormatSelector(GROUP, group, [])
977 video_selector = current_selector
978 audio_selector = _parse_format_selection(tokens, inside_merge=True)
979 if not video_selector or not audio_selector:
980 raise syntax_error('"+" must be between two format selectors', start)
981 current_selector = FormatSelector(MERGE, (video_selector, audio_selector), [])
983 raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
984 elif type == tokenize.ENDMARKER:
987 selectors.append(current_selector)
990 def _build_selector_function(selector):
991 if isinstance(selector, list):
992 fs = [_build_selector_function(s) for s in selector]
994 def selector_function(formats):
996 for format in f(formats):
998 return selector_function
999 elif selector.type == GROUP:
1000 selector_function = _build_selector_function(selector.selector)
1001 elif selector.type == PICKFIRST:
1002 fs = [_build_selector_function(s) for s in selector.selector]
1004 def selector_function(formats):
1006 picked_formats = list(f(formats))
1008 return picked_formats
1010 elif selector.type == SINGLE:
1011 format_spec = selector.selector
1013 def selector_function(formats):
1014 formats = list(formats)
1017 if format_spec == 'all':
1020 elif format_spec in ['best', 'worst', None]:
1021 format_idx = 0 if format_spec == 'worst' else -1
1022 audiovideo_formats = [
1024 if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
1025 if audiovideo_formats:
1026 yield audiovideo_formats[format_idx]
1027 # for audio only (soundcloud) or video only (imgur) urls, select the best/worst audio format
1028 elif (all(f.get('acodec') != 'none' for f in formats) or
1029 all(f.get('vcodec') != 'none' for f in formats)):
1030 yield formats[format_idx]
1031 elif format_spec == 'bestaudio':
1034 if f.get('vcodec') == 'none']
1036 yield audio_formats[-1]
1037 elif format_spec == 'worstaudio':
1040 if f.get('vcodec') == 'none']
1042 yield audio_formats[0]
1043 elif format_spec == 'bestvideo':
1046 if f.get('acodec') == 'none']
1048 yield video_formats[-1]
1049 elif format_spec == 'worstvideo':
1052 if f.get('acodec') == 'none']
1054 yield video_formats[0]
1056 extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
1057 if format_spec in extensions:
1058 filter_f = lambda f: f['ext'] == format_spec
1060 filter_f = lambda f: f['format_id'] == format_spec
1061 matches = list(filter(filter_f, formats))
1064 elif selector.type == MERGE:
1065 def _merge(formats_info):
1066 format_1, format_2 = [f['format_id'] for f in formats_info]
1067 # The first format must contain the video and the
1069 if formats_info[0].get('vcodec') == 'none':
1070 self.report_error('The first format must '
1071 'contain the video, try using '
1072 '"-f %s+%s"' % (format_2, format_1))
1075 formats_info[0]['ext']
1076 if self.params.get('merge_output_format') is None
1077 else self.params['merge_output_format'])
1079 'requested_formats': formats_info,
1080 'format': '%s+%s' % (formats_info[0].get('format'),
1081 formats_info[1].get('format')),
1082 'format_id': '%s+%s' % (formats_info[0].get('format_id'),
1083 formats_info[1].get('format_id')),
1084 'width': formats_info[0].get('width'),
1085 'height': formats_info[0].get('height'),
1086 'resolution': formats_info[0].get('resolution'),
1087 'fps': formats_info[0].get('fps'),
1088 'vcodec': formats_info[0].get('vcodec'),
1089 'vbr': formats_info[0].get('vbr'),
1090 'stretched_ratio': formats_info[0].get('stretched_ratio'),
1091 'acodec': formats_info[1].get('acodec'),
1092 'abr': formats_info[1].get('abr'),
1095 video_selector, audio_selector = map(_build_selector_function, selector.selector)
1097 def selector_function(formats):
1098 formats = list(formats)
1099 for pair in itertools.product(video_selector(formats), audio_selector(formats)):
1102 filters = [self._build_format_filter(f) for f in selector.filters]
1104 def final_selector(formats):
1105 for _filter in filters:
1106 formats = list(filter(_filter, formats))
1107 return selector_function(formats)
1108 return final_selector
1110 stream = io.BytesIO(format_spec.encode('utf-8'))
1112 tokens = list(compat_tokenize_tokenize(stream.readline))
1113 except tokenize.TokenError:
1114 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1116 class TokenIterator(object):
1117 def __init__(self, tokens):
1118 self.tokens = tokens
1125 if self.counter >= len(self.tokens):
1126 raise StopIteration()
1127 value = self.tokens[self.counter]
1133 def restore_last_token(self):
1136 parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
1137 return _build_selector_function(parsed_selector)
1139 def _calc_headers(self, info_dict):
1140 res = std_headers.copy()
1142 add_headers = info_dict.get('http_headers')
1144 res.update(add_headers)
1146 cookies = self._calc_cookies(info_dict)
1148 res['Cookie'] = cookies
1152 def _calc_cookies(self, info_dict):
1153 pr = compat_urllib_request.Request(info_dict['url'])
1154 self.cookiejar.add_cookie_header(pr)
1155 return pr.get_header('Cookie')
1157 def process_video_result(self, info_dict, download=True):
1158 assert info_dict.get('_type', 'video') == 'video'
1160 if 'id' not in info_dict:
1161 raise ExtractorError('Missing "id" field in extractor result')
1162 if 'title' not in info_dict:
1163 raise ExtractorError('Missing "title" field in extractor result')
1165 if 'playlist' not in info_dict:
1166 # It isn't part of a playlist
1167 info_dict['playlist'] = None
1168 info_dict['playlist_index'] = None
1170 thumbnails = info_dict.get('thumbnails')
1171 if thumbnails is None:
1172 thumbnail = info_dict.get('thumbnail')
1174 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1176 thumbnails.sort(key=lambda t: (
1177 t.get('preference'), t.get('width'), t.get('height'),
1178 t.get('id'), t.get('url')))
1179 for i, t in enumerate(thumbnails):
1180 if t.get('width') and t.get('height'):
1181 t['resolution'] = '%dx%d' % (t['width'], t['height'])
1182 if t.get('id') is None:
1185 if thumbnails and 'thumbnail' not in info_dict:
1186 info_dict['thumbnail'] = thumbnails[-1]['url']
1188 if 'display_id' not in info_dict and 'id' in info_dict:
1189 info_dict['display_id'] = info_dict['id']
1191 if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
1192 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1193 # see http://bugs.python.org/issue1646728)
1195 upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
1196 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1197 except (ValueError, OverflowError, OSError):
1200 if self.params.get('listsubtitles', False):
1201 if 'automatic_captions' in info_dict:
1202 self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions')
1203 self.list_subtitles(info_dict['id'], info_dict.get('subtitles'), 'subtitles')
1205 info_dict['requested_subtitles'] = self.process_subtitles(
1206 info_dict['id'], info_dict.get('subtitles'),
1207 info_dict.get('automatic_captions'))
1209 # We now pick which formats have to be downloaded
1210 if info_dict.get('formats') is None:
1211 # There's only one format available
1212 formats = [info_dict]
1214 formats = info_dict['formats']
1217 raise ExtractorError('No video formats found!')
1221 # We check that all the formats have the format and format_id fields
1222 for i, format in enumerate(formats):
1223 if 'url' not in format:
1224 raise ExtractorError('Missing "url" key in result (index %d)' % i)
1226 if format.get('format_id') is None:
1227 format['format_id'] = compat_str(i)
1228 format_id = format['format_id']
1229 if format_id not in formats_dict:
1230 formats_dict[format_id] = []
1231 formats_dict[format_id].append(format)
1233 # Make sure all formats have unique format_id
1234 for format_id, ambiguous_formats in formats_dict.items():
1235 if len(ambiguous_formats) > 1:
1236 for i, format in enumerate(ambiguous_formats):
1237 format['format_id'] = '%s-%d' % (format_id, i)
1239 for i, format in enumerate(formats):
1240 if format.get('format') is None:
1241 format['format'] = '{id} - {res}{note}'.format(
1242 id=format['format_id'],
1243 res=self.format_resolution(format),
1244 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1246 # Automatically determine file extension if missing
1247 if 'ext' not in format:
1248 format['ext'] = determine_ext(format['url']).lower()
1249 # Add HTTP headers, so that external programs can use them from the
1251 full_format_info = info_dict.copy()
1252 full_format_info.update(format)
1253 format['http_headers'] = self._calc_headers(full_format_info)
1255 # TODO Central sorting goes here
1257 if formats[0] is not info_dict:
1258 # only set the 'formats' fields if the original info_dict list them
1259 # otherwise we end up with a circular reference, the first (and unique)
1260 # element in the 'formats' field in info_dict is info_dict itself,
1261 # wich can't be exported to json
1262 info_dict['formats'] = formats
1263 if self.params.get('listformats'):
1264 self.list_formats(info_dict)
1266 if self.params.get('list_thumbnails'):
1267 self.list_thumbnails(info_dict)
1270 req_format = self.params.get('format')
1271 if req_format is None:
1272 req_format_list = []
1273 if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and
1274 info_dict['extractor'] in ['youtube', 'ted'] and
1275 not info_dict.get('is_live')):
1276 merger = FFmpegMergerPP(self)
1277 if merger.available and merger.can_merge():
1278 req_format_list.append('bestvideo+bestaudio')
1279 req_format_list.append('best')
1280 req_format = '/'.join(req_format_list)
1281 format_selector = self.build_format_selector(req_format)
1282 formats_to_download = list(format_selector(formats))
1283 if not formats_to_download:
1284 raise ExtractorError('requested format not available',
1288 if len(formats_to_download) > 1:
1289 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1290 for format in formats_to_download:
1291 new_info = dict(info_dict)
1292 new_info.update(format)
1293 self.process_info(new_info)
1294 # We update the info dict with the best quality format (backwards compatibility)
1295 info_dict.update(formats_to_download[-1])
1298 def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
1299 """Select the requested subtitles and their format"""
1301 if normal_subtitles and self.params.get('writesubtitles'):
1302 available_subs.update(normal_subtitles)
1303 if automatic_captions and self.params.get('writeautomaticsub'):
1304 for lang, cap_info in automatic_captions.items():
1305 if lang not in available_subs:
1306 available_subs[lang] = cap_info
1308 if (not self.params.get('writesubtitles') and not
1309 self.params.get('writeautomaticsub') or not
1313 if self.params.get('allsubtitles', False):
1314 requested_langs = available_subs.keys()
1316 if self.params.get('subtitleslangs', False):
1317 requested_langs = self.params.get('subtitleslangs')
1318 elif 'en' in available_subs:
1319 requested_langs = ['en']
1321 requested_langs = [list(available_subs.keys())[0]]
1323 formats_query = self.params.get('subtitlesformat', 'best')
1324 formats_preference = formats_query.split('/') if formats_query else []
1326 for lang in requested_langs:
1327 formats = available_subs.get(lang)
1329 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1331 for ext in formats_preference:
1335 matches = list(filter(lambda f: f['ext'] == ext, formats))
1341 self.report_warning(
1342 'No subtitle format found matching "%s" for language %s, '
1343 'using %s' % (formats_query, lang, f['ext']))
1347 def process_info(self, info_dict):
1348 """Process a single resolved IE result."""
1350 assert info_dict.get('_type', 'video') == 'video'
1352 max_downloads = self.params.get('max_downloads')
1353 if max_downloads is not None:
1354 if self._num_downloads >= int(max_downloads):
1355 raise MaxDownloadsReached()
1357 info_dict['fulltitle'] = info_dict['title']
1358 if len(info_dict['title']) > 200:
1359 info_dict['title'] = info_dict['title'][:197] + '...'
1361 if 'format' not in info_dict:
1362 info_dict['format'] = info_dict['ext']
1364 reason = self._match_entry(info_dict, incomplete=False)
1365 if reason is not None:
1366 self.to_screen('[download] ' + reason)
1369 self._num_downloads += 1
1371 info_dict['_filename'] = filename = self.prepare_filename(info_dict)
1374 if self.params.get('forcetitle', False):
1375 self.to_stdout(info_dict['fulltitle'])
1376 if self.params.get('forceid', False):
1377 self.to_stdout(info_dict['id'])
1378 if self.params.get('forceurl', False):
1379 if info_dict.get('requested_formats') is not None:
1380 for f in info_dict['requested_formats']:
1381 self.to_stdout(f['url'] + f.get('play_path', ''))
1383 # For RTMP URLs, also include the playpath
1384 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1385 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
1386 self.to_stdout(info_dict['thumbnail'])
1387 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
1388 self.to_stdout(info_dict['description'])
1389 if self.params.get('forcefilename', False) and filename is not None:
1390 self.to_stdout(filename)
1391 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1392 self.to_stdout(formatSeconds(info_dict['duration']))
1393 if self.params.get('forceformat', False):
1394 self.to_stdout(info_dict['format'])
1395 if self.params.get('forcejson', False):
1396 self.to_stdout(json.dumps(info_dict))
1398 # Do nothing else if in simulate mode
1399 if self.params.get('simulate', False):
1402 if filename is None:
1406 dn = os.path.dirname(sanitize_path(encodeFilename(filename)))
1407 if dn and not os.path.exists(dn):
1409 except (OSError, IOError) as err:
1410 self.report_error('unable to create directory ' + compat_str(err))
1413 if self.params.get('writedescription', False):
1414 descfn = replace_extension(filename, 'description', info_dict.get('ext'))
1415 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1416 self.to_screen('[info] Video description is already present')
1417 elif info_dict.get('description') is None:
1418 self.report_warning('There\'s no description to write.')
1421 self.to_screen('[info] Writing video description to: ' + descfn)
1422 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1423 descfile.write(info_dict['description'])
1424 except (OSError, IOError):
1425 self.report_error('Cannot write description file ' + descfn)
1428 if self.params.get('writeannotations', False):
1429 annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
1430 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1431 self.to_screen('[info] Video annotations are already present')
1434 self.to_screen('[info] Writing video annotations to: ' + annofn)
1435 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1436 annofile.write(info_dict['annotations'])
1437 except (KeyError, TypeError):
1438 self.report_warning('There are no annotations to write.')
1439 except (OSError, IOError):
1440 self.report_error('Cannot write annotations file: ' + annofn)
1443 subtitles_are_requested = any([self.params.get('writesubtitles', False),
1444 self.params.get('writeautomaticsub')])
1446 if subtitles_are_requested and info_dict.get('requested_subtitles'):
1447 # subtitles download errors are already managed as troubles in relevant IE
1448 # that way it will silently go on when used with unsupporting IE
1449 subtitles = info_dict['requested_subtitles']
1450 ie = self.get_info_extractor(info_dict['extractor_key'])
1451 for sub_lang, sub_info in subtitles.items():
1452 sub_format = sub_info['ext']
1453 if sub_info.get('data') is not None:
1454 sub_data = sub_info['data']
1457 sub_data = ie._download_webpage(
1458 sub_info['url'], info_dict['id'], note=False)
1459 except ExtractorError as err:
1460 self.report_warning('Unable to download subtitle for "%s": %s' %
1461 (sub_lang, compat_str(err.cause)))
1464 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1465 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1466 self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1468 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1469 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
1470 subfile.write(sub_data)
1471 except (OSError, IOError):
1472 self.report_error('Cannot write subtitles file ' + sub_filename)
1475 if self.params.get('writeinfojson', False):
1476 infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
1477 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1478 self.to_screen('[info] Video description metadata is already present')
1480 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1482 write_json_file(self.filter_requested_info(info_dict), infofn)
1483 except (OSError, IOError):
1484 self.report_error('Cannot write metadata to JSON file ' + infofn)
1487 self._write_thumbnails(info_dict, filename)
1489 if not self.params.get('skip_download', False):
1492 fd = get_suitable_downloader(info, self.params)(self, self.params)
1493 for ph in self._progress_hooks:
1494 fd.add_progress_hook(ph)
1495 if self.params.get('verbose'):
1496 self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1497 return fd.download(name, info)
1499 if info_dict.get('requested_formats') is not None:
1502 merger = FFmpegMergerPP(self)
1503 if not merger.available:
1505 self.report_warning('You have requested multiple '
1506 'formats but ffmpeg or avconv are not installed.'
1507 ' The formats won\'t be merged.')
1509 postprocessors = [merger]
1511 def compatible_formats(formats):
1512 video, audio = formats
1514 video_ext, audio_ext = audio.get('ext'), video.get('ext')
1515 if video_ext and audio_ext:
1517 ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v'),
1520 for exts in COMPATIBLE_EXTS:
1521 if video_ext in exts and audio_ext in exts:
1523 # TODO: Check acodec/vcodec
1526 filename_real_ext = os.path.splitext(filename)[1][1:]
1528 os.path.splitext(filename)[0]
1529 if filename_real_ext == info_dict['ext']
1531 requested_formats = info_dict['requested_formats']
1532 if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
1533 info_dict['ext'] = 'mkv'
1534 self.report_warning(
1535 'Requested formats are incompatible for merge and will be merged into mkv.')
1536 # Ensure filename always has a correct extension for successful merge
1537 filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
1538 if os.path.exists(encodeFilename(filename)):
1540 '[download] %s has already been downloaded and '
1541 'merged' % filename)
1543 for f in requested_formats:
1544 new_info = dict(info_dict)
1546 fname = self.prepare_filename(new_info)
1547 fname = prepend_extension(fname, 'f%s' % f['format_id'], new_info['ext'])
1548 downloaded.append(fname)
1549 partial_success = dl(fname, new_info)
1550 success = success and partial_success
1551 info_dict['__postprocessors'] = postprocessors
1552 info_dict['__files_to_merge'] = downloaded
1554 # Just a single file
1555 success = dl(filename, info_dict)
1556 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1557 self.report_error('unable to download video data: %s' % str(err))
1559 except (OSError, IOError) as err:
1560 raise UnavailableVideoError(err)
1561 except (ContentTooShortError, ) as err:
1562 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1567 fixup_policy = self.params.get('fixup')
1568 if fixup_policy is None:
1569 fixup_policy = 'detect_or_warn'
1571 stretched_ratio = info_dict.get('stretched_ratio')
1572 if stretched_ratio is not None and stretched_ratio != 1:
1573 if fixup_policy == 'warn':
1574 self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1575 info_dict['id'], stretched_ratio))
1576 elif fixup_policy == 'detect_or_warn':
1577 stretched_pp = FFmpegFixupStretchedPP(self)
1578 if stretched_pp.available:
1579 info_dict.setdefault('__postprocessors', [])
1580 info_dict['__postprocessors'].append(stretched_pp)
1582 self.report_warning(
1583 '%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % (
1584 info_dict['id'], stretched_ratio))
1586 assert fixup_policy in ('ignore', 'never')
1588 if info_dict.get('requested_formats') is None and info_dict.get('container') == 'm4a_dash':
1589 if fixup_policy == 'warn':
1590 self.report_warning('%s: writing DASH m4a. Only some players support this container.' % (
1592 elif fixup_policy == 'detect_or_warn':
1593 fixup_pp = FFmpegFixupM4aPP(self)
1594 if fixup_pp.available:
1595 info_dict.setdefault('__postprocessors', [])
1596 info_dict['__postprocessors'].append(fixup_pp)
1598 self.report_warning(
1599 '%s: writing DASH m4a. Only some players support this container. Install ffmpeg or avconv to fix this automatically.' % (
1602 assert fixup_policy in ('ignore', 'never')
1605 self.post_process(filename, info_dict)
1606 except (PostProcessingError) as err:
1607 self.report_error('postprocessing: %s' % str(err))
1609 self.record_download_archive(info_dict)
1611 def download(self, url_list):
1612 """Download a given list of URLs."""
1613 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1614 if (len(url_list) > 1 and
1615 '%' not in outtmpl and
1616 self.params.get('max_downloads') != 1):
1617 raise SameFileError(outtmpl)
1619 for url in url_list:
1621 # It also downloads the videos
1622 res = self.extract_info(
1623 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
1624 except UnavailableVideoError:
1625 self.report_error('unable to download video')
1626 except MaxDownloadsReached:
1627 self.to_screen('[info] Maximum number of downloaded files reached.')
1630 if self.params.get('dump_single_json', False):
1631 self.to_stdout(json.dumps(res))
1633 return self._download_retcode
1635 def download_with_info_file(self, info_filename):
1636 with contextlib.closing(fileinput.FileInput(
1637 [info_filename], mode='r',
1638 openhook=fileinput.hook_encoded('utf-8'))) as f:
1639 # FileInput doesn't have a read method, we can't call json.load
1640 info = self.filter_requested_info(json.loads('\n'.join(f)))
1642 self.process_ie_result(info, download=True)
1643 except DownloadError:
1644 webpage_url = info.get('webpage_url')
1645 if webpage_url is not None:
1646 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1647 return self.download([webpage_url])
1650 return self._download_retcode
1653 def filter_requested_info(info_dict):
1655 (k, v) for k, v in info_dict.items()
1656 if k not in ['requested_formats', 'requested_subtitles'])
1658 def post_process(self, filename, ie_info):
1659 """Run all the postprocessors on the given file."""
1660 info = dict(ie_info)
1661 info['filepath'] = filename
1663 if ie_info.get('__postprocessors') is not None:
1664 pps_chain.extend(ie_info['__postprocessors'])
1665 pps_chain.extend(self._pps)
1666 for pp in pps_chain:
1667 files_to_delete = []
1669 files_to_delete, info = pp.run(info)
1670 except PostProcessingError as e:
1671 self.report_error(e.msg)
1672 if files_to_delete and not self.params.get('keepvideo', False):
1673 for old_filename in files_to_delete:
1674 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
1676 os.remove(encodeFilename(old_filename))
1677 except (IOError, OSError):
1678 self.report_warning('Unable to remove downloaded original file')
1680 def _make_archive_id(self, info_dict):
1681 # Future-proof against any change in case
1682 # and backwards compatibility with prior versions
1683 extractor = info_dict.get('extractor_key')
1684 if extractor is None:
1685 if 'id' in info_dict:
1686 extractor = info_dict.get('ie_key') # key in a playlist
1687 if extractor is None:
1688 return None # Incomplete video information
1689 return extractor.lower() + ' ' + info_dict['id']
1691 def in_download_archive(self, info_dict):
1692 fn = self.params.get('download_archive')
1696 vid_id = self._make_archive_id(info_dict)
1698 return False # Incomplete video information
1701 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1702 for line in archive_file:
1703 if line.strip() == vid_id:
1705 except IOError as ioe:
1706 if ioe.errno != errno.ENOENT:
1710 def record_download_archive(self, info_dict):
1711 fn = self.params.get('download_archive')
1714 vid_id = self._make_archive_id(info_dict)
1716 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1717 archive_file.write(vid_id + '\n')
1720 def format_resolution(format, default='unknown'):
1721 if format.get('vcodec') == 'none':
1723 if format.get('resolution') is not None:
1724 return format['resolution']
1725 if format.get('height') is not None:
1726 if format.get('width') is not None:
1727 res = '%sx%s' % (format['width'], format['height'])
1729 res = '%sp' % format['height']
1730 elif format.get('width') is not None:
1731 res = '?x%d' % format['width']
1736 def _format_note(self, fdict):
1738 if fdict.get('ext') in ['f4f', 'f4m']:
1739 res += '(unsupported) '
1740 if fdict.get('format_note') is not None:
1741 res += fdict['format_note'] + ' '
1742 if fdict.get('tbr') is not None:
1743 res += '%4dk ' % fdict['tbr']
1744 if fdict.get('container') is not None:
1747 res += '%s container' % fdict['container']
1748 if (fdict.get('vcodec') is not None and
1749 fdict.get('vcodec') != 'none'):
1752 res += fdict['vcodec']
1753 if fdict.get('vbr') is not None:
1755 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1757 if fdict.get('vbr') is not None:
1758 res += '%4dk' % fdict['vbr']
1759 if fdict.get('fps') is not None:
1760 res += ', %sfps' % fdict['fps']
1761 if fdict.get('acodec') is not None:
1764 if fdict['acodec'] == 'none':
1767 res += '%-5s' % fdict['acodec']
1768 elif fdict.get('abr') is not None:
1772 if fdict.get('abr') is not None:
1773 res += '@%3dk' % fdict['abr']
1774 if fdict.get('asr') is not None:
1775 res += ' (%5dHz)' % fdict['asr']
1776 if fdict.get('filesize') is not None:
1779 res += format_bytes(fdict['filesize'])
1780 elif fdict.get('filesize_approx') is not None:
1783 res += '~' + format_bytes(fdict['filesize_approx'])
1786 def list_formats(self, info_dict):
1787 formats = info_dict.get('formats', [info_dict])
1789 [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
1791 if f.get('preference') is None or f['preference'] >= -1000]
1792 if len(formats) > 1:
1793 table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
1795 header_line = ['format code', 'extension', 'resolution', 'note']
1797 '[info] Available formats for %s:\n%s' %
1798 (info_dict['id'], render_table(header_line, table)))
1800 def list_thumbnails(self, info_dict):
1801 thumbnails = info_dict.get('thumbnails')
1803 tn_url = info_dict.get('thumbnail')
1805 thumbnails = [{'id': '0', 'url': tn_url}]
1808 '[info] No thumbnails present for %s' % info_dict['id'])
1812 '[info] Thumbnails for %s:' % info_dict['id'])
1813 self.to_screen(render_table(
1814 ['ID', 'width', 'height', 'URL'],
1815 [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
1817 def list_subtitles(self, video_id, subtitles, name='subtitles'):
1819 self.to_screen('%s has no %s' % (video_id, name))
1822 'Available %s for %s:' % (name, video_id))
1823 self.to_screen(render_table(
1824 ['Language', 'formats'],
1825 [[lang, ', '.join(f['ext'] for f in reversed(formats))]
1826 for lang, formats in subtitles.items()]))
1828 def urlopen(self, req):
1829 """ Start an HTTP download """
1831 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1832 # always respected by websites, some tend to give out URLs with non percent-encoded
1833 # non-ASCII characters (see telemb.py, ard.py [#3412])
1834 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1835 # To work around aforementioned issue we will replace request's original URL with
1836 # percent-encoded one
1837 req_is_string = isinstance(req, compat_basestring)
1838 url = req if req_is_string else req.get_full_url()
1839 url_escaped = escape_url(url)
1841 # Substitute URL if any change after escaping
1842 if url != url_escaped:
1846 req_type = HEADRequest if req.get_method() == 'HEAD' else compat_urllib_request.Request
1848 url_escaped, data=req.data, headers=req.headers,
1849 origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
1851 return self._opener.open(req, timeout=self._socket_timeout)
1853 def print_debug_header(self):
1854 if not self.params.get('verbose'):
1857 if type('') is not compat_str:
1858 # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1859 self.report_warning(
1860 'Your Python is broken! Update to a newer and supported version')
1862 stdout_encoding = getattr(
1863 sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
1865 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1866 locale.getpreferredencoding(),
1867 sys.getfilesystemencoding(),
1869 self.get_encoding()))
1870 write_string(encoding_str, encoding=None)
1872 self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1874 sp = subprocess.Popen(
1875 ['git', 'rev-parse', '--short', 'HEAD'],
1876 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1877 cwd=os.path.dirname(os.path.abspath(__file__)))
1878 out, err = sp.communicate()
1879 out = out.decode().strip()
1880 if re.match('[0-9a-f]+', out):
1881 self._write_string('[debug] Git HEAD: ' + out + '\n')
1887 self._write_string('[debug] Python version %s - %s\n' % (
1888 platform.python_version(), platform_name()))
1890 exe_versions = FFmpegPostProcessor.get_versions(self)
1891 exe_versions['rtmpdump'] = rtmpdump_version()
1892 exe_str = ', '.join(
1894 for exe, v in sorted(exe_versions.items())
1899 self._write_string('[debug] exe versions: %s\n' % exe_str)
1902 for handler in self._opener.handlers:
1903 if hasattr(handler, 'proxies'):
1904 proxy_map.update(handler.proxies)
1905 self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1907 if self.params.get('call_home', False):
1908 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
1909 self._write_string('[debug] Public IP address: %s\n' % ipaddr)
1910 latest_version = self.urlopen(
1911 'https://yt-dl.org/latest/version').read().decode('utf-8')
1912 if version_tuple(latest_version) > version_tuple(__version__):
1913 self.report_warning(
1914 'You are using an outdated version (newest version: %s)! '
1915 'See https://yt-dl.org/update if you need help updating.' %
1918 def _setup_opener(self):
1919 timeout_val = self.params.get('socket_timeout')
1920 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1922 opts_cookiefile = self.params.get('cookiefile')
1923 opts_proxy = self.params.get('proxy')
1925 if opts_cookiefile is None:
1926 self.cookiejar = compat_cookiejar.CookieJar()
1928 self.cookiejar = compat_cookiejar.MozillaCookieJar(
1930 if os.access(opts_cookiefile, os.R_OK):
1931 self.cookiejar.load()
1933 cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1935 if opts_proxy is not None:
1936 if opts_proxy == '':
1939 proxies = {'http': opts_proxy, 'https': opts_proxy}
1941 proxies = compat_urllib_request.getproxies()
1942 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1943 if 'http' in proxies and 'https' not in proxies:
1944 proxies['https'] = proxies['http']
1945 proxy_handler = PerRequestProxyHandler(proxies)
1947 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1948 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
1949 ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
1950 opener = compat_urllib_request.build_opener(
1951 proxy_handler, https_handler, cookie_processor, ydlh)
1953 # Delete the default user-agent header, which would otherwise apply in
1954 # cases where our custom HTTP handler doesn't come into play
1955 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1956 opener.addheaders = []
1957 self._opener = opener
1959 def encode(self, s):
1960 if isinstance(s, bytes):
1961 return s # Already encoded
1964 return s.encode(self.get_encoding())
1965 except UnicodeEncodeError as err:
1966 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1969 def get_encoding(self):
1970 encoding = self.params.get('encoding')
1971 if encoding is None:
1972 encoding = preferredencoding()
1975 def _write_thumbnails(self, info_dict, filename):
1976 if self.params.get('writethumbnail', False):
1977 thumbnails = info_dict.get('thumbnails')
1979 thumbnails = [thumbnails[-1]]
1980 elif self.params.get('write_all_thumbnails', False):
1981 thumbnails = info_dict.get('thumbnails')
1986 # No thumbnails present, so return immediately
1989 for t in thumbnails:
1990 thumb_ext = determine_ext(t['url'], 'jpg')
1991 suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
1992 thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
1993 t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
1995 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
1996 self.to_screen('[%s] %s: Thumbnail %sis already present' %
1997 (info_dict['extractor'], info_dict['id'], thumb_display_id))
1999 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
2000 (info_dict['extractor'], info_dict['id'], thumb_display_id))
2002 uf = self.urlopen(t['url'])
2003 with open(thumb_filename, 'wb') as thumbf:
2004 shutil.copyfileobj(uf, thumbf)
2005 self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
2006 (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
2007 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2008 self.report_warning('Unable to download thumbnail "%s": %s' %
2009 (t['url'], compat_str(err)))