2 # -*- coding: utf-8 -*-
4 from __future__ import absolute_import, unicode_literals
33 compat_get_terminal_size,
37 compat_tokenize_tokenize,
39 compat_urllib_request,
57 PerRequestProxyHandler,
67 UnavailableVideoError,
78 from .cache import Cache
79 from .extractor import get_info_extractor, gen_extractors
80 from .downloader import get_suitable_downloader
81 from .downloader.rtmp import rtmpdump_version
82 from .postprocessor import (
84 FFmpegFixupStretchedPP,
89 from .version import __version__
92 class YoutubeDL(object):
95 YoutubeDL objects are the ones responsible of downloading the
96 actual video file and writing it to disk if the user has requested
97 it, among some other tasks. In most cases there should be one per
98 program. As, given a video URL, the downloader doesn't know how to
99 extract all the needed information, task that InfoExtractors do, it
100 has to pass the URL to one of them.
102 For this, YoutubeDL objects have a method that allows
103 InfoExtractors to be registered in a given order. When it is passed
104 a URL, the YoutubeDL object handles it to the first InfoExtractor it
105 finds that reports being able to handle it. The InfoExtractor extracts
106 all the information about the video or videos the URL refers to, and
107 YoutubeDL process the extracted information, possibly using a File
108 Downloader to download the video.
110 YoutubeDL objects accept a lot of parameters. In order not to saturate
111 the object constructor with arguments, it receives a dictionary of
112 options instead. These options are available through the params
113 attribute for the InfoExtractors to use. The YoutubeDL also
114 registers itself as the downloader in charge for the InfoExtractors
115 that are added to it, so this is a "mutual registration".
119 username: Username for authentication purposes.
120 password: Password for authentication purposes.
121 videopassword: Password for accessing a video.
122 usenetrc: Use netrc for authentication instead.
123 verbose: Print additional info to stdout.
124 quiet: Do not print messages to stdout.
125 no_warnings: Do not print out anything for warnings.
126 forceurl: Force printing final URL.
127 forcetitle: Force printing title.
128 forceid: Force printing ID.
129 forcethumbnail: Force printing thumbnail URL.
130 forcedescription: Force printing description.
131 forcefilename: Force printing final filename.
132 forceduration: Force printing duration.
133 forcejson: Force printing info_dict as JSON.
134 dump_single_json: Force printing the info_dict of the whole playlist
135 (or video) as a single JSON line.
136 simulate: Do not download the video files.
137 format: Video format code. See options.py for more information.
138 outtmpl: Template for output names.
139 restrictfilenames: Do not allow "&" and spaces in file names
140 ignoreerrors: Do not stop on download errors.
141 force_generic_extractor: Force downloader to use the generic extractor
142 nooverwrites: Prevent overwriting files.
143 playliststart: Playlist item to start at.
144 playlistend: Playlist item to end at.
145 playlist_items: Specific indices of playlist to download.
146 playlistreverse: Download playlist items in reverse order.
147 matchtitle: Download only matching titles.
148 rejecttitle: Reject downloads for matching titles.
149 logger: Log messages to a logging.Logger instance.
150 logtostderr: Log messages to stderr instead of stdout.
151 writedescription: Write the video description to a .description file
152 writeinfojson: Write the video description to a .info.json file
153 writeannotations: Write the video annotations to a .annotations.xml file
154 writethumbnail: Write the thumbnail image to a file
155 write_all_thumbnails: Write all thumbnail formats to files
156 writesubtitles: Write the video subtitles to a file
157 writeautomaticsub: Write the automatic subtitles to a file
158 allsubtitles: Downloads all the subtitles of the video
159 (requires writesubtitles or writeautomaticsub)
160 listsubtitles: Lists all available subtitles for the video
161 subtitlesformat: The format code for subtitles
162 subtitleslangs: List of languages of the subtitles to download
163 keepvideo: Keep the video file after post-processing
164 daterange: A DateRange object, download only if the upload_date is in the range.
165 skip_download: Skip the actual download of the video file
166 cachedir: Location of the cache files in the filesystem.
167 False to disable filesystem cache.
168 noplaylist: Download single video instead of a playlist if in doubt.
169 age_limit: An integer representing the user's age in years.
170 Unsuitable videos for the given age are skipped.
171 min_views: An integer representing the minimum view count the video
172 must have in order to not be skipped.
173 Videos without view count information are always
174 downloaded. None for no limit.
175 max_views: An integer representing the maximum view count.
176 Videos that are more popular than that are not
178 Videos without view count information are always
179 downloaded. None for no limit.
180 download_archive: File name of a file where all downloads are recorded.
181 Videos already present in the file are not downloaded
183 cookiefile: File name where cookies should be read from and dumped to.
184 nocheckcertificate:Do not verify SSL certificates
185 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
186 At the moment, this is only supported by YouTube.
187 proxy: URL of the proxy server to use
188 cn_verification_proxy: URL of the proxy to use for IP address verification
189 on Chinese sites. (Experimental)
190 socket_timeout: Time to wait for unresponsive hosts, in seconds
191 bidi_workaround: Work around buggy terminals without bidirectional text
192 support, using fridibi
193 debug_printtraffic:Print out sent and received HTTP traffic
194 include_ads: Download ads as well
195 default_search: Prepend this string if an input url is not valid.
196 'auto' for elaborate guessing
197 encoding: Use this encoding instead of the system-specified.
198 extract_flat: Do not resolve URLs, return the immediate result.
199 Pass in 'in_playlist' to only show this behavior for
201 postprocessors: A list of dictionaries, each with an entry
202 * key: The name of the postprocessor. See
203 youtube_dl/postprocessor/__init__.py for a list.
204 as well as any further keyword arguments for the
206 progress_hooks: A list of functions that get called on download
207 progress, with a dictionary with the entries
208 * status: One of "downloading", "error", or "finished".
209 Check this first and ignore unknown values.
211 If status is one of "downloading", or "finished", the
212 following properties may also be present:
213 * filename: The final filename (always present)
214 * tmpfilename: The filename we're currently writing to
215 * downloaded_bytes: Bytes on disk
216 * total_bytes: Size of the whole file, None if unknown
217 * total_bytes_estimate: Guess of the eventual file size,
219 * elapsed: The number of seconds since download started.
220 * eta: The estimated time in seconds, None if unknown
221 * speed: The download speed in bytes/second, None if
223 * fragment_index: The counter of the currently
224 downloaded video fragment.
225 * fragment_count: The number of fragments (= individual
226 files that will be merged)
228 Progress hooks are guaranteed to be called at least once
229 (with status "finished") if the download is successful.
230 merge_output_format: Extension to use when merging formats.
231 fixup: Automatically correct known faults of the file.
233 - "never": do nothing
234 - "warn": only emit a warning
235 - "detect_or_warn": check whether we can do anything
236 about it, warn otherwise (default)
237 source_address: (Experimental) Client-side IP address to bind to.
238 call_home: Boolean, true iff we are allowed to contact the
239 youtube-dl servers for debugging.
240 sleep_interval: Number of seconds to sleep before each download.
241 listformats: Print an overview of available video formats and exit.
242 list_thumbnails: Print a table of all thumbnails and exit.
243 match_filter: A function that gets called with the info_dict of
245 If it returns a message, the video is ignored.
246 If it returns None, the video is downloaded.
247 match_filter_func in utils.py is one example for this.
248 no_color: Do not emit color codes in output.
250 The following options determine which downloader is picked:
251 external_downloader: Executable of the external downloader to call.
252 None or unset for standard (built-in) downloader.
253 hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv.
255 The following parameters are not used by YoutubeDL itself, they are used by
256 the downloader (see youtube_dl/downloader/common.py):
257 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
258 noresizebuffer, retries, continuedl, noprogress, consoletitle,
259 xattr_set_filesize, external_downloader_args.
261 The following options are used by the post processors:
262 prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
263 otherwise prefer avconv.
264 postprocessor_args: A list of additional command-line arguments for the
271 _download_retcode = None
272 _num_downloads = None
275 def __init__(self, params=None, auto_init=True):
276 """Create a FileDownloader object with the given options."""
280 self._ies_instances = {}
282 self._progress_hooks = []
283 self._download_retcode = 0
284 self._num_downloads = 0
285 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
286 self._err_file = sys.stderr
289 'nocheckcertificate': False,
291 self.params.update(params)
292 self.cache = Cache(self)
294 if params.get('bidi_workaround', False):
297 master, slave = pty.openpty()
298 width = compat_get_terminal_size().columns
302 width_args = ['-w', str(width)]
304 stdin=subprocess.PIPE,
306 stderr=self._err_file)
308 self._output_process = subprocess.Popen(
309 ['bidiv'] + width_args, **sp_kwargs
312 self._output_process = subprocess.Popen(
313 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
314 self._output_channel = os.fdopen(master, 'rb')
315 except OSError as ose:
317 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
321 if (sys.version_info >= (3,) and sys.platform != 'win32' and
322 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and
323 not params.get('restrictfilenames', False)):
324 # On Python 3, the Unicode filesystem API will throw errors (#1474)
326 'Assuming --restrict-filenames since file system encoding '
327 'cannot encode all characters. '
328 'Set the LC_ALL environment variable to fix this.')
329 self.params['restrictfilenames'] = True
331 if isinstance(params.get('outtmpl'), bytes):
333 'Parameter outtmpl is bytes, but should be a unicode string. '
334 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
339 self.print_debug_header()
340 self.add_default_info_extractors()
342 for pp_def_raw in self.params.get('postprocessors', []):
343 pp_class = get_postprocessor(pp_def_raw['key'])
344 pp_def = dict(pp_def_raw)
346 pp = pp_class(self, **compat_kwargs(pp_def))
347 self.add_post_processor(pp)
349 for ph in self.params.get('progress_hooks', []):
350 self.add_progress_hook(ph)
352 def warn_if_short_id(self, argv):
353 # short YouTube ID starting with dash?
355 i for i, a in enumerate(argv)
356 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
360 [a for i, a in enumerate(argv) if i not in idxs] +
361 ['--'] + [argv[i] for i in idxs]
364 'Long argument string detected. '
365 'Use -- to separate parameters and URLs, like this:\n%s\n' %
366 args_to_str(correct_argv))
368 def add_info_extractor(self, ie):
369 """Add an InfoExtractor object to the end of the list."""
371 self._ies_instances[ie.ie_key()] = ie
372 ie.set_downloader(self)
374 def get_info_extractor(self, ie_key):
376 Get an instance of an IE with name ie_key, it will try to get one from
377 the _ies list, if there's no instance it will create a new one and add
378 it to the extractor list.
380 ie = self._ies_instances.get(ie_key)
382 ie = get_info_extractor(ie_key)()
383 self.add_info_extractor(ie)
386 def add_default_info_extractors(self):
388 Add the InfoExtractors returned by gen_extractors to the end of the list
390 for ie in gen_extractors():
391 self.add_info_extractor(ie)
393 def add_post_processor(self, pp):
394 """Add a PostProcessor object to the end of the chain."""
396 pp.set_downloader(self)
398 def add_progress_hook(self, ph):
399 """Add the progress hook (currently only for the file downloader)"""
400 self._progress_hooks.append(ph)
402 def _bidi_workaround(self, message):
403 if not hasattr(self, '_output_channel'):
406 assert hasattr(self, '_output_process')
407 assert isinstance(message, compat_str)
408 line_count = message.count('\n') + 1
409 self._output_process.stdin.write((message + '\n').encode('utf-8'))
410 self._output_process.stdin.flush()
411 res = ''.join(self._output_channel.readline().decode('utf-8')
412 for _ in range(line_count))
413 return res[:-len('\n')]
415 def to_screen(self, message, skip_eol=False):
416 """Print message to stdout if not in quiet mode."""
417 return self.to_stdout(message, skip_eol, check_quiet=True)
419 def _write_string(self, s, out=None):
420 write_string(s, out=out, encoding=self.params.get('encoding'))
422 def to_stdout(self, message, skip_eol=False, check_quiet=False):
423 """Print message to stdout if not in quiet mode."""
424 if self.params.get('logger'):
425 self.params['logger'].debug(message)
426 elif not check_quiet or not self.params.get('quiet', False):
427 message = self._bidi_workaround(message)
428 terminator = ['\n', ''][skip_eol]
429 output = message + terminator
431 self._write_string(output, self._screen_file)
433 def to_stderr(self, message):
434 """Print message to stderr."""
435 assert isinstance(message, compat_str)
436 if self.params.get('logger'):
437 self.params['logger'].error(message)
439 message = self._bidi_workaround(message)
440 output = message + '\n'
441 self._write_string(output, self._err_file)
443 def to_console_title(self, message):
444 if not self.params.get('consoletitle', False):
446 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
447 # c_wchar_p() might not be necessary if `message` is
448 # already of type unicode()
449 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
450 elif 'TERM' in os.environ:
451 self._write_string('\033]0;%s\007' % message, self._screen_file)
453 def save_console_title(self):
454 if not self.params.get('consoletitle', False):
456 if 'TERM' in os.environ:
457 # Save the title on stack
458 self._write_string('\033[22;0t', self._screen_file)
460 def restore_console_title(self):
461 if not self.params.get('consoletitle', False):
463 if 'TERM' in os.environ:
464 # Restore the title from stack
465 self._write_string('\033[23;0t', self._screen_file)
468 self.save_console_title()
471 def __exit__(self, *args):
472 self.restore_console_title()
474 if self.params.get('cookiefile') is not None:
475 self.cookiejar.save()
477 def trouble(self, message=None, tb=None):
478 """Determine action to take when a download problem appears.
480 Depending on if the downloader has been configured to ignore
481 download errors or not, this method may throw an exception or
482 not when errors are found, after printing the message.
484 tb, if given, is additional traceback information.
486 if message is not None:
487 self.to_stderr(message)
488 if self.params.get('verbose'):
490 if sys.exc_info()[0]: # if .trouble has been called from an except block
492 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
493 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
494 tb += compat_str(traceback.format_exc())
496 tb_data = traceback.format_list(traceback.extract_stack())
497 tb = ''.join(tb_data)
499 if not self.params.get('ignoreerrors', False):
500 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
501 exc_info = sys.exc_info()[1].exc_info
503 exc_info = sys.exc_info()
504 raise DownloadError(message, exc_info)
505 self._download_retcode = 1
507 def report_warning(self, message):
509 Print the message to stderr, it will be prefixed with 'WARNING:'
510 If stderr is a tty file the 'WARNING:' will be colored
512 if self.params.get('logger') is not None:
513 self.params['logger'].warning(message)
515 if self.params.get('no_warnings'):
517 if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
518 _msg_header = '\033[0;33mWARNING:\033[0m'
520 _msg_header = 'WARNING:'
521 warning_message = '%s %s' % (_msg_header, message)
522 self.to_stderr(warning_message)
524 def report_error(self, message, tb=None):
526 Do the same as trouble, but prefixes the message with 'ERROR:', colored
527 in red if stderr is a tty file.
529 if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
530 _msg_header = '\033[0;31mERROR:\033[0m'
532 _msg_header = 'ERROR:'
533 error_message = '%s %s' % (_msg_header, message)
534 self.trouble(error_message, tb)
536 def report_file_already_downloaded(self, file_name):
537 """Report file has already been fully downloaded."""
539 self.to_screen('[download] %s has already been downloaded' % file_name)
540 except UnicodeEncodeError:
541 self.to_screen('[download] The file has already been downloaded')
543 def prepare_filename(self, info_dict):
544 """Generate the output filename."""
546 template_dict = dict(info_dict)
548 template_dict['epoch'] = int(time.time())
549 autonumber_size = self.params.get('autonumber_size')
550 if autonumber_size is None:
552 autonumber_templ = '%0' + str(autonumber_size) + 'd'
553 template_dict['autonumber'] = autonumber_templ % self._num_downloads
554 if template_dict.get('playlist_index') is not None:
555 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
556 if template_dict.get('resolution') is None:
557 if template_dict.get('width') and template_dict.get('height'):
558 template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
559 elif template_dict.get('height'):
560 template_dict['resolution'] = '%sp' % template_dict['height']
561 elif template_dict.get('width'):
562 template_dict['resolution'] = '?x%d' % template_dict['width']
564 sanitize = lambda k, v: sanitize_filename(
566 restricted=self.params.get('restrictfilenames'),
568 template_dict = dict((k, sanitize(k, v))
569 for k, v in template_dict.items()
571 template_dict = collections.defaultdict(lambda: 'NA', template_dict)
573 outtmpl = sanitize_path(self.params.get('outtmpl', DEFAULT_OUTTMPL))
574 tmpl = compat_expanduser(outtmpl)
575 filename = tmpl % template_dict
576 # Temporary fix for #4787
577 # 'Treat' all problem characters by passing filename through preferredencoding
578 # to workaround encoding issues with subprocess on python2 @ Windows
579 if sys.version_info < (3, 0) and sys.platform == 'win32':
580 filename = encodeFilename(filename, True).decode(preferredencoding())
582 except ValueError as err:
583 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
586 def _match_entry(self, info_dict, incomplete):
587 """ Returns None iff the file should be downloaded """
589 video_title = info_dict.get('title', info_dict.get('id', 'video'))
590 if 'title' in info_dict:
591 # This can happen when we're just evaluating the playlist
592 title = info_dict['title']
593 matchtitle = self.params.get('matchtitle', False)
595 if not re.search(matchtitle, title, re.IGNORECASE):
596 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
597 rejecttitle = self.params.get('rejecttitle', False)
599 if re.search(rejecttitle, title, re.IGNORECASE):
600 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
601 date = info_dict.get('upload_date', None)
603 dateRange = self.params.get('daterange', DateRange())
604 if date not in dateRange:
605 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
606 view_count = info_dict.get('view_count', None)
607 if view_count is not None:
608 min_views = self.params.get('min_views')
609 if min_views is not None and view_count < min_views:
610 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
611 max_views = self.params.get('max_views')
612 if max_views is not None and view_count > max_views:
613 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
614 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
615 return 'Skipping "%s" because it is age restricted' % video_title
616 if self.in_download_archive(info_dict):
617 return '%s has already been recorded in archive' % video_title
620 match_filter = self.params.get('match_filter')
621 if match_filter is not None:
622 ret = match_filter(info_dict)
629 def add_extra_info(info_dict, extra_info):
630 '''Set the keys from extra_info in info dict if they are missing'''
631 for key, value in extra_info.items():
632 info_dict.setdefault(key, value)
634 def extract_info(self, url, download=True, ie_key=None, extra_info={},
635 process=True, force_generic_extractor=False):
637 Returns a list with a dictionary for each video we find.
638 If 'download', also downloads the videos.
639 extra_info is a dict containing the extra values to add to each result
642 if not ie_key and force_generic_extractor:
646 ies = [self.get_info_extractor(ie_key)]
651 if not ie.suitable(url):
655 self.report_warning('The program functionality for this site has been marked as broken, '
656 'and will probably not work.')
659 ie_result = ie.extract(url)
660 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
662 if isinstance(ie_result, list):
663 # Backwards compatibility: old IE result format
665 '_type': 'compat_list',
666 'entries': ie_result,
668 self.add_default_extra_info(ie_result, ie, url)
670 return self.process_ie_result(ie_result, download, extra_info)
673 except ExtractorError as de: # An error we somewhat expected
674 self.report_error(compat_str(de), de.format_traceback())
676 except MaxDownloadsReached:
678 except Exception as e:
679 if self.params.get('ignoreerrors', False):
680 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
685 self.report_error('no suitable InfoExtractor for URL %s' % url)
687 def add_default_extra_info(self, ie_result, ie, url):
688 self.add_extra_info(ie_result, {
689 'extractor': ie.IE_NAME,
691 'webpage_url_basename': url_basename(url),
692 'extractor_key': ie.ie_key(),
695 def process_ie_result(self, ie_result, download=True, extra_info={}):
697 Take the result of the ie(may be modified) and resolve all unresolved
698 references (URLs, playlist items).
700 It will also download the videos if 'download'.
701 Returns the resolved ie_result.
704 result_type = ie_result.get('_type', 'video')
706 if result_type in ('url', 'url_transparent'):
707 extract_flat = self.params.get('extract_flat', False)
708 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
709 extract_flat is True):
710 if self.params.get('forcejson', False):
711 self.to_stdout(json.dumps(ie_result))
714 if result_type == 'video':
715 self.add_extra_info(ie_result, extra_info)
716 return self.process_video_result(ie_result, download=download)
717 elif result_type == 'url':
718 # We have to add extra_info to the results because it may be
719 # contained in a playlist
720 return self.extract_info(ie_result['url'],
722 ie_key=ie_result.get('ie_key'),
723 extra_info=extra_info)
724 elif result_type == 'url_transparent':
725 # Use the information from the embedding page
726 info = self.extract_info(
727 ie_result['url'], ie_key=ie_result.get('ie_key'),
728 extra_info=extra_info, download=False, process=False)
730 force_properties = dict(
731 (k, v) for k, v in ie_result.items() if v is not None)
732 for f in ('_type', 'url'):
733 if f in force_properties:
734 del force_properties[f]
735 new_result = info.copy()
736 new_result.update(force_properties)
738 assert new_result.get('_type') != 'url_transparent'
740 return self.process_ie_result(
741 new_result, download=download, extra_info=extra_info)
742 elif result_type == 'playlist' or result_type == 'multi_video':
743 # We process each entry in the playlist
744 playlist = ie_result.get('title', None) or ie_result.get('id', None)
745 self.to_screen('[download] Downloading playlist: %s' % playlist)
747 playlist_results = []
749 playliststart = self.params.get('playliststart', 1) - 1
750 playlistend = self.params.get('playlistend', None)
751 # For backwards compatibility, interpret -1 as whole list
752 if playlistend == -1:
755 playlistitems_str = self.params.get('playlist_items', None)
757 if playlistitems_str is not None:
758 def iter_playlistitems(format):
759 for string_segment in format.split(','):
760 if '-' in string_segment:
761 start, end = string_segment.split('-')
762 for item in range(int(start), int(end) + 1):
765 yield int(string_segment)
766 playlistitems = iter_playlistitems(playlistitems_str)
768 ie_entries = ie_result['entries']
769 if isinstance(ie_entries, list):
770 n_all_entries = len(ie_entries)
773 ie_entries[i - 1] for i in playlistitems
774 if -n_all_entries <= i - 1 < n_all_entries]
776 entries = ie_entries[playliststart:playlistend]
777 n_entries = len(entries)
779 "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
780 (ie_result['extractor'], playlist, n_all_entries, n_entries))
781 elif isinstance(ie_entries, PagedList):
784 for item in playlistitems:
785 entries.extend(ie_entries.getslice(
789 entries = ie_entries.getslice(
790 playliststart, playlistend)
791 n_entries = len(entries)
793 "[%s] playlist %s: Downloading %d videos" %
794 (ie_result['extractor'], playlist, n_entries))
797 entry_list = list(ie_entries)
798 entries = [entry_list[i - 1] for i in playlistitems]
800 entries = list(itertools.islice(
801 ie_entries, playliststart, playlistend))
802 n_entries = len(entries)
804 "[%s] playlist %s: Downloading %d videos" %
805 (ie_result['extractor'], playlist, n_entries))
807 if self.params.get('playlistreverse', False):
808 entries = entries[::-1]
810 for i, entry in enumerate(entries, 1):
811 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
813 'n_entries': n_entries,
814 'playlist': playlist,
815 'playlist_id': ie_result.get('id'),
816 'playlist_title': ie_result.get('title'),
817 'playlist_index': i + playliststart,
818 'extractor': ie_result['extractor'],
819 'webpage_url': ie_result['webpage_url'],
820 'webpage_url_basename': url_basename(ie_result['webpage_url']),
821 'extractor_key': ie_result['extractor_key'],
824 reason = self._match_entry(entry, incomplete=True)
825 if reason is not None:
826 self.to_screen('[download] ' + reason)
829 entry_result = self.process_ie_result(entry,
832 playlist_results.append(entry_result)
833 ie_result['entries'] = playlist_results
835 elif result_type == 'compat_list':
837 'Extractor %s returned a compat_list result. '
838 'It needs to be updated.' % ie_result.get('extractor'))
844 'extractor': ie_result['extractor'],
845 'webpage_url': ie_result['webpage_url'],
846 'webpage_url_basename': url_basename(ie_result['webpage_url']),
847 'extractor_key': ie_result['extractor_key'],
851 ie_result['entries'] = [
852 self.process_ie_result(_fixup(r), download, extra_info)
853 for r in ie_result['entries']
857 raise Exception('Invalid result type: %s' % result_type)
859 def _build_format_filter(self, filter_spec):
860 " Returns a function to filter the formats according to the filter_spec "
870 operator_rex = re.compile(r'''(?x)\s*
871 (?P<key>width|height|tbr|abr|vbr|asr|filesize|fps)
872 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
873 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
875 ''' % '|'.join(map(re.escape, OPERATORS.keys())))
876 m = operator_rex.search(filter_spec)
879 comparison_value = int(m.group('value'))
881 comparison_value = parse_filesize(m.group('value'))
882 if comparison_value is None:
883 comparison_value = parse_filesize(m.group('value') + 'B')
884 if comparison_value is None:
886 'Invalid value %r in format specification %r' % (
887 m.group('value'), filter_spec))
888 op = OPERATORS[m.group('op')]
895 str_operator_rex = re.compile(r'''(?x)
896 \s*(?P<key>ext|acodec|vcodec|container|protocol)
897 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
898 \s*(?P<value>[a-zA-Z0-9_-]+)
900 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
901 m = str_operator_rex.search(filter_spec)
903 comparison_value = m.group('value')
904 op = STR_OPERATORS[m.group('op')]
907 raise ValueError('Invalid filter specification %r' % filter_spec)
910 actual_value = f.get(m.group('key'))
911 if actual_value is None:
912 return m.group('none_inclusive')
913 return op(actual_value, comparison_value)
916 def build_format_selector(self, format_spec):
917 def syntax_error(note, start):
919 'Invalid format specification: '
920 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
921 return SyntaxError(message)
923 PICKFIRST = 'PICKFIRST'
927 FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
929 def _parse_filter(tokens):
931 for type, string, start, _, _ in tokens:
932 if type == tokenize.OP and string == ']':
933 return ''.join(filter_parts)
935 filter_parts.append(string)
937 def _remove_unused_ops(tokens):
938 # Remove operators that we don't use and join them with the sourrounding strings
939 # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
940 ALLOWED_OPS = ('/', '+', ',', '(', ')')
941 last_string, last_start, last_end, last_line = None, None, None, None
942 for type, string, start, end, line in tokens:
943 if type == tokenize.OP and string == '[':
945 yield tokenize.NAME, last_string, last_start, last_end, last_line
947 yield type, string, start, end, line
948 # everything inside brackets will be handled by _parse_filter
949 for type, string, start, end, line in tokens:
950 yield type, string, start, end, line
951 if type == tokenize.OP and string == ']':
953 elif type == tokenize.OP and string in ALLOWED_OPS:
955 yield tokenize.NAME, last_string, last_start, last_end, last_line
957 yield type, string, start, end, line
958 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
964 last_string += string
966 yield tokenize.NAME, last_string, last_start, last_end, last_line
968 def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
970 current_selector = None
971 for type, string, start, _, _ in tokens:
972 # ENCODING is only defined in python 3.x
973 if type == getattr(tokenize, 'ENCODING', None):
975 elif type in [tokenize.NAME, tokenize.NUMBER]:
976 current_selector = FormatSelector(SINGLE, string, [])
977 elif type == tokenize.OP:
980 # ')' will be handled by the parentheses group
981 tokens.restore_last_token()
983 elif inside_merge and string in ['/', ',']:
984 tokens.restore_last_token()
986 elif inside_choice and string == ',':
987 tokens.restore_last_token()
990 if not current_selector:
991 raise syntax_error('"," must follow a format selector', start)
992 selectors.append(current_selector)
993 current_selector = None
995 if not current_selector:
996 raise syntax_error('"/" must follow a format selector', start)
997 first_choice = current_selector
998 second_choice = _parse_format_selection(tokens, inside_choice=True)
999 current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1001 if not current_selector:
1002 current_selector = FormatSelector(SINGLE, 'best', [])
1003 format_filter = _parse_filter(tokens)
1004 current_selector.filters.append(format_filter)
1006 if current_selector:
1007 raise syntax_error('Unexpected "("', start)
1008 group = _parse_format_selection(tokens, inside_group=True)
1009 current_selector = FormatSelector(GROUP, group, [])
1011 video_selector = current_selector
1012 audio_selector = _parse_format_selection(tokens, inside_merge=True)
1013 if not video_selector or not audio_selector:
1014 raise syntax_error('"+" must be between two format selectors', start)
1015 current_selector = FormatSelector(MERGE, (video_selector, audio_selector), [])
1017 raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1018 elif type == tokenize.ENDMARKER:
1020 if current_selector:
1021 selectors.append(current_selector)
1024 def _build_selector_function(selector):
1025 if isinstance(selector, list):
1026 fs = [_build_selector_function(s) for s in selector]
1028 def selector_function(formats):
1030 for format in f(formats):
1032 return selector_function
1033 elif selector.type == GROUP:
1034 selector_function = _build_selector_function(selector.selector)
1035 elif selector.type == PICKFIRST:
1036 fs = [_build_selector_function(s) for s in selector.selector]
1038 def selector_function(formats):
1040 picked_formats = list(f(formats))
1042 return picked_formats
1044 elif selector.type == SINGLE:
1045 format_spec = selector.selector
1047 def selector_function(formats):
1048 formats = list(formats)
1051 if format_spec == 'all':
1054 elif format_spec in ['best', 'worst', None]:
1055 format_idx = 0 if format_spec == 'worst' else -1
1056 audiovideo_formats = [
1058 if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
1059 if audiovideo_formats:
1060 yield audiovideo_formats[format_idx]
1061 # for audio only (soundcloud) or video only (imgur) urls, select the best/worst audio format
1062 elif (all(f.get('acodec') != 'none' for f in formats) or
1063 all(f.get('vcodec') != 'none' for f in formats)):
1064 yield formats[format_idx]
1065 elif format_spec == 'bestaudio':
1068 if f.get('vcodec') == 'none']
1070 yield audio_formats[-1]
1071 elif format_spec == 'worstaudio':
1074 if f.get('vcodec') == 'none']
1076 yield audio_formats[0]
1077 elif format_spec == 'bestvideo':
1080 if f.get('acodec') == 'none']
1082 yield video_formats[-1]
1083 elif format_spec == 'worstvideo':
1086 if f.get('acodec') == 'none']
1088 yield video_formats[0]
1090 extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
1091 if format_spec in extensions:
1092 filter_f = lambda f: f['ext'] == format_spec
1094 filter_f = lambda f: f['format_id'] == format_spec
1095 matches = list(filter(filter_f, formats))
1098 elif selector.type == MERGE:
1099 def _merge(formats_info):
1100 format_1, format_2 = [f['format_id'] for f in formats_info]
1101 # The first format must contain the video and the
1103 if formats_info[0].get('vcodec') == 'none':
1104 self.report_error('The first format must '
1105 'contain the video, try using '
1106 '"-f %s+%s"' % (format_2, format_1))
1109 formats_info[0]['ext']
1110 if self.params.get('merge_output_format') is None
1111 else self.params['merge_output_format'])
1113 'requested_formats': formats_info,
1114 'format': '%s+%s' % (formats_info[0].get('format'),
1115 formats_info[1].get('format')),
1116 'format_id': '%s+%s' % (formats_info[0].get('format_id'),
1117 formats_info[1].get('format_id')),
1118 'width': formats_info[0].get('width'),
1119 'height': formats_info[0].get('height'),
1120 'resolution': formats_info[0].get('resolution'),
1121 'fps': formats_info[0].get('fps'),
1122 'vcodec': formats_info[0].get('vcodec'),
1123 'vbr': formats_info[0].get('vbr'),
1124 'stretched_ratio': formats_info[0].get('stretched_ratio'),
1125 'acodec': formats_info[1].get('acodec'),
1126 'abr': formats_info[1].get('abr'),
1129 video_selector, audio_selector = map(_build_selector_function, selector.selector)
1131 def selector_function(formats):
1132 formats = list(formats)
1133 for pair in itertools.product(video_selector(formats), audio_selector(formats)):
1136 filters = [self._build_format_filter(f) for f in selector.filters]
1138 def final_selector(formats):
1139 for _filter in filters:
1140 formats = list(filter(_filter, formats))
1141 return selector_function(formats)
1142 return final_selector
1144 stream = io.BytesIO(format_spec.encode('utf-8'))
1146 tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
1147 except tokenize.TokenError:
1148 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1150 class TokenIterator(object):
1151 def __init__(self, tokens):
1152 self.tokens = tokens
1159 if self.counter >= len(self.tokens):
1160 raise StopIteration()
1161 value = self.tokens[self.counter]
1167 def restore_last_token(self):
1170 parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
1171 return _build_selector_function(parsed_selector)
1173 def _calc_headers(self, info_dict):
1174 res = std_headers.copy()
1176 add_headers = info_dict.get('http_headers')
1178 res.update(add_headers)
1180 cookies = self._calc_cookies(info_dict)
1182 res['Cookie'] = cookies
1186 def _calc_cookies(self, info_dict):
1187 pr = compat_urllib_request.Request(info_dict['url'])
1188 self.cookiejar.add_cookie_header(pr)
1189 return pr.get_header('Cookie')
1191 def process_video_result(self, info_dict, download=True):
1192 assert info_dict.get('_type', 'video') == 'video'
1194 if 'id' not in info_dict:
1195 raise ExtractorError('Missing "id" field in extractor result')
1196 if 'title' not in info_dict:
1197 raise ExtractorError('Missing "title" field in extractor result')
1199 if 'playlist' not in info_dict:
1200 # It isn't part of a playlist
1201 info_dict['playlist'] = None
1202 info_dict['playlist_index'] = None
1204 thumbnails = info_dict.get('thumbnails')
1205 if thumbnails is None:
1206 thumbnail = info_dict.get('thumbnail')
1208 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1210 thumbnails.sort(key=lambda t: (
1211 t.get('preference'), t.get('width'), t.get('height'),
1212 t.get('id'), t.get('url')))
1213 for i, t in enumerate(thumbnails):
1214 if t.get('width') and t.get('height'):
1215 t['resolution'] = '%dx%d' % (t['width'], t['height'])
1216 if t.get('id') is None:
1219 if thumbnails and 'thumbnail' not in info_dict:
1220 info_dict['thumbnail'] = thumbnails[-1]['url']
1222 if 'display_id' not in info_dict and 'id' in info_dict:
1223 info_dict['display_id'] = info_dict['id']
1225 if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
1226 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1227 # see http://bugs.python.org/issue1646728)
1229 upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
1230 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1231 except (ValueError, OverflowError, OSError):
1234 if self.params.get('listsubtitles', False):
1235 if 'automatic_captions' in info_dict:
1236 self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions')
1237 self.list_subtitles(info_dict['id'], info_dict.get('subtitles'), 'subtitles')
1239 info_dict['requested_subtitles'] = self.process_subtitles(
1240 info_dict['id'], info_dict.get('subtitles'),
1241 info_dict.get('automatic_captions'))
1243 # We now pick which formats have to be downloaded
1244 if info_dict.get('formats') is None:
1245 # There's only one format available
1246 formats = [info_dict]
1248 formats = info_dict['formats']
1251 raise ExtractorError('No video formats found!')
1255 # We check that all the formats have the format and format_id fields
1256 for i, format in enumerate(formats):
1257 if 'url' not in format:
1258 raise ExtractorError('Missing "url" key in result (index %d)' % i)
1260 if format.get('format_id') is None:
1261 format['format_id'] = compat_str(i)
1262 format_id = format['format_id']
1263 if format_id not in formats_dict:
1264 formats_dict[format_id] = []
1265 formats_dict[format_id].append(format)
1267 # Make sure all formats have unique format_id
1268 for format_id, ambiguous_formats in formats_dict.items():
1269 if len(ambiguous_formats) > 1:
1270 for i, format in enumerate(ambiguous_formats):
1271 format['format_id'] = '%s-%d' % (format_id, i)
1273 for i, format in enumerate(formats):
1274 if format.get('format') is None:
1275 format['format'] = '{id} - {res}{note}'.format(
1276 id=format['format_id'],
1277 res=self.format_resolution(format),
1278 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1280 # Automatically determine file extension if missing
1281 if 'ext' not in format:
1282 format['ext'] = determine_ext(format['url']).lower()
1283 # Add HTTP headers, so that external programs can use them from the
1285 full_format_info = info_dict.copy()
1286 full_format_info.update(format)
1287 format['http_headers'] = self._calc_headers(full_format_info)
1289 # TODO Central sorting goes here
1291 if formats[0] is not info_dict:
1292 # only set the 'formats' fields if the original info_dict list them
1293 # otherwise we end up with a circular reference, the first (and unique)
1294 # element in the 'formats' field in info_dict is info_dict itself,
1295 # wich can't be exported to json
1296 info_dict['formats'] = formats
1297 if self.params.get('listformats'):
1298 self.list_formats(info_dict)
1300 if self.params.get('list_thumbnails'):
1301 self.list_thumbnails(info_dict)
1304 req_format = self.params.get('format')
1305 if req_format is None:
1306 req_format_list = []
1307 if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and
1308 info_dict['extractor'] in ['youtube', 'ted'] and
1309 not info_dict.get('is_live')):
1310 merger = FFmpegMergerPP(self)
1311 if merger.available and merger.can_merge():
1312 req_format_list.append('bestvideo+bestaudio')
1313 req_format_list.append('best')
1314 req_format = '/'.join(req_format_list)
1315 format_selector = self.build_format_selector(req_format)
1316 formats_to_download = list(format_selector(formats))
1317 if not formats_to_download:
1318 raise ExtractorError('requested format not available',
1322 if len(formats_to_download) > 1:
1323 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1324 for format in formats_to_download:
1325 new_info = dict(info_dict)
1326 new_info.update(format)
1327 self.process_info(new_info)
1328 # We update the info dict with the best quality format (backwards compatibility)
1329 info_dict.update(formats_to_download[-1])
1332 def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
1333 """Select the requested subtitles and their format"""
1335 if normal_subtitles and self.params.get('writesubtitles'):
1336 available_subs.update(normal_subtitles)
1337 if automatic_captions and self.params.get('writeautomaticsub'):
1338 for lang, cap_info in automatic_captions.items():
1339 if lang not in available_subs:
1340 available_subs[lang] = cap_info
1342 if (not self.params.get('writesubtitles') and not
1343 self.params.get('writeautomaticsub') or not
1347 if self.params.get('allsubtitles', False):
1348 requested_langs = available_subs.keys()
1350 if self.params.get('subtitleslangs', False):
1351 requested_langs = self.params.get('subtitleslangs')
1352 elif 'en' in available_subs:
1353 requested_langs = ['en']
1355 requested_langs = [list(available_subs.keys())[0]]
1357 formats_query = self.params.get('subtitlesformat', 'best')
1358 formats_preference = formats_query.split('/') if formats_query else []
1360 for lang in requested_langs:
1361 formats = available_subs.get(lang)
1363 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1365 for ext in formats_preference:
1369 matches = list(filter(lambda f: f['ext'] == ext, formats))
1375 self.report_warning(
1376 'No subtitle format found matching "%s" for language %s, '
1377 'using %s' % (formats_query, lang, f['ext']))
1381 def process_info(self, info_dict):
1382 """Process a single resolved IE result."""
1384 assert info_dict.get('_type', 'video') == 'video'
1386 max_downloads = self.params.get('max_downloads')
1387 if max_downloads is not None:
1388 if self._num_downloads >= int(max_downloads):
1389 raise MaxDownloadsReached()
1391 info_dict['fulltitle'] = info_dict['title']
1392 if len(info_dict['title']) > 200:
1393 info_dict['title'] = info_dict['title'][:197] + '...'
1395 if 'format' not in info_dict:
1396 info_dict['format'] = info_dict['ext']
1398 reason = self._match_entry(info_dict, incomplete=False)
1399 if reason is not None:
1400 self.to_screen('[download] ' + reason)
1403 self._num_downloads += 1
1405 info_dict['_filename'] = filename = self.prepare_filename(info_dict)
1408 if self.params.get('forcetitle', False):
1409 self.to_stdout(info_dict['fulltitle'])
1410 if self.params.get('forceid', False):
1411 self.to_stdout(info_dict['id'])
1412 if self.params.get('forceurl', False):
1413 if info_dict.get('requested_formats') is not None:
1414 for f in info_dict['requested_formats']:
1415 self.to_stdout(f['url'] + f.get('play_path', ''))
1417 # For RTMP URLs, also include the playpath
1418 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1419 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
1420 self.to_stdout(info_dict['thumbnail'])
1421 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
1422 self.to_stdout(info_dict['description'])
1423 if self.params.get('forcefilename', False) and filename is not None:
1424 self.to_stdout(filename)
1425 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1426 self.to_stdout(formatSeconds(info_dict['duration']))
1427 if self.params.get('forceformat', False):
1428 self.to_stdout(info_dict['format'])
1429 if self.params.get('forcejson', False):
1430 self.to_stdout(json.dumps(info_dict))
1432 # Do nothing else if in simulate mode
1433 if self.params.get('simulate', False):
1436 if filename is None:
1440 dn = os.path.dirname(sanitize_path(encodeFilename(filename)))
1441 if dn and not os.path.exists(dn):
1443 except (OSError, IOError) as err:
1444 self.report_error('unable to create directory ' + compat_str(err))
1447 if self.params.get('writedescription', False):
1448 descfn = replace_extension(filename, 'description', info_dict.get('ext'))
1449 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1450 self.to_screen('[info] Video description is already present')
1451 elif info_dict.get('description') is None:
1452 self.report_warning('There\'s no description to write.')
1455 self.to_screen('[info] Writing video description to: ' + descfn)
1456 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1457 descfile.write(info_dict['description'])
1458 except (OSError, IOError):
1459 self.report_error('Cannot write description file ' + descfn)
1462 if self.params.get('writeannotations', False):
1463 annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
1464 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1465 self.to_screen('[info] Video annotations are already present')
1468 self.to_screen('[info] Writing video annotations to: ' + annofn)
1469 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1470 annofile.write(info_dict['annotations'])
1471 except (KeyError, TypeError):
1472 self.report_warning('There are no annotations to write.')
1473 except (OSError, IOError):
1474 self.report_error('Cannot write annotations file: ' + annofn)
1477 subtitles_are_requested = any([self.params.get('writesubtitles', False),
1478 self.params.get('writeautomaticsub')])
1480 if subtitles_are_requested and info_dict.get('requested_subtitles'):
1481 # subtitles download errors are already managed as troubles in relevant IE
1482 # that way it will silently go on when used with unsupporting IE
1483 subtitles = info_dict['requested_subtitles']
1484 ie = self.get_info_extractor(info_dict['extractor_key'])
1485 for sub_lang, sub_info in subtitles.items():
1486 sub_format = sub_info['ext']
1487 if sub_info.get('data') is not None:
1488 sub_data = sub_info['data']
1491 sub_data = ie._download_webpage(
1492 sub_info['url'], info_dict['id'], note=False)
1493 except ExtractorError as err:
1494 self.report_warning('Unable to download subtitle for "%s": %s' %
1495 (sub_lang, compat_str(err.cause)))
1498 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1499 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1500 self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1502 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1503 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
1504 subfile.write(sub_data)
1505 except (OSError, IOError):
1506 self.report_error('Cannot write subtitles file ' + sub_filename)
1509 if self.params.get('writeinfojson', False):
1510 infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
1511 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1512 self.to_screen('[info] Video description metadata is already present')
1514 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1516 write_json_file(self.filter_requested_info(info_dict), infofn)
1517 except (OSError, IOError):
1518 self.report_error('Cannot write metadata to JSON file ' + infofn)
1521 self._write_thumbnails(info_dict, filename)
1523 if not self.params.get('skip_download', False):
1526 fd = get_suitable_downloader(info, self.params)(self, self.params)
1527 for ph in self._progress_hooks:
1528 fd.add_progress_hook(ph)
1529 if self.params.get('verbose'):
1530 self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1531 return fd.download(name, info)
1533 if info_dict.get('requested_formats') is not None:
1536 merger = FFmpegMergerPP(self)
1537 if not merger.available:
1539 self.report_warning('You have requested multiple '
1540 'formats but ffmpeg or avconv are not installed.'
1541 ' The formats won\'t be merged.')
1543 postprocessors = [merger]
1545 def compatible_formats(formats):
1546 video, audio = formats
1548 video_ext, audio_ext = audio.get('ext'), video.get('ext')
1549 if video_ext and audio_ext:
1551 ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v'),
1554 for exts in COMPATIBLE_EXTS:
1555 if video_ext in exts and audio_ext in exts:
1557 # TODO: Check acodec/vcodec
1560 filename_real_ext = os.path.splitext(filename)[1][1:]
1562 os.path.splitext(filename)[0]
1563 if filename_real_ext == info_dict['ext']
1565 requested_formats = info_dict['requested_formats']
1566 if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
1567 info_dict['ext'] = 'mkv'
1568 self.report_warning(
1569 'Requested formats are incompatible for merge and will be merged into mkv.')
1570 # Ensure filename always has a correct extension for successful merge
1571 filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
1572 if os.path.exists(encodeFilename(filename)):
1574 '[download] %s has already been downloaded and '
1575 'merged' % filename)
1577 for f in requested_formats:
1578 new_info = dict(info_dict)
1580 fname = self.prepare_filename(new_info)
1581 fname = prepend_extension(fname, 'f%s' % f['format_id'], new_info['ext'])
1582 downloaded.append(fname)
1583 partial_success = dl(fname, new_info)
1584 success = success and partial_success
1585 info_dict['__postprocessors'] = postprocessors
1586 info_dict['__files_to_merge'] = downloaded
1588 # Just a single file
1589 success = dl(filename, info_dict)
1590 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1591 self.report_error('unable to download video data: %s' % str(err))
1593 except (OSError, IOError) as err:
1594 raise UnavailableVideoError(err)
1595 except (ContentTooShortError, ) as err:
1596 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1601 fixup_policy = self.params.get('fixup')
1602 if fixup_policy is None:
1603 fixup_policy = 'detect_or_warn'
1605 stretched_ratio = info_dict.get('stretched_ratio')
1606 if stretched_ratio is not None and stretched_ratio != 1:
1607 if fixup_policy == 'warn':
1608 self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1609 info_dict['id'], stretched_ratio))
1610 elif fixup_policy == 'detect_or_warn':
1611 stretched_pp = FFmpegFixupStretchedPP(self)
1612 if stretched_pp.available:
1613 info_dict.setdefault('__postprocessors', [])
1614 info_dict['__postprocessors'].append(stretched_pp)
1616 self.report_warning(
1617 '%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % (
1618 info_dict['id'], stretched_ratio))
1620 assert fixup_policy in ('ignore', 'never')
1622 if info_dict.get('requested_formats') is None and info_dict.get('container') == 'm4a_dash':
1623 if fixup_policy == 'warn':
1624 self.report_warning('%s: writing DASH m4a. Only some players support this container.' % (
1626 elif fixup_policy == 'detect_or_warn':
1627 fixup_pp = FFmpegFixupM4aPP(self)
1628 if fixup_pp.available:
1629 info_dict.setdefault('__postprocessors', [])
1630 info_dict['__postprocessors'].append(fixup_pp)
1632 self.report_warning(
1633 '%s: writing DASH m4a. Only some players support this container. Install ffmpeg or avconv to fix this automatically.' % (
1636 assert fixup_policy in ('ignore', 'never')
1639 self.post_process(filename, info_dict)
1640 except (PostProcessingError) as err:
1641 self.report_error('postprocessing: %s' % str(err))
1643 self.record_download_archive(info_dict)
1645 def download(self, url_list):
1646 """Download a given list of URLs."""
1647 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1648 if (len(url_list) > 1 and
1649 '%' not in outtmpl and
1650 self.params.get('max_downloads') != 1):
1651 raise SameFileError(outtmpl)
1653 for url in url_list:
1655 # It also downloads the videos
1656 res = self.extract_info(
1657 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
1658 except UnavailableVideoError:
1659 self.report_error('unable to download video')
1660 except MaxDownloadsReached:
1661 self.to_screen('[info] Maximum number of downloaded files reached.')
1664 if self.params.get('dump_single_json', False):
1665 self.to_stdout(json.dumps(res))
1667 return self._download_retcode
1669 def download_with_info_file(self, info_filename):
1670 with contextlib.closing(fileinput.FileInput(
1671 [info_filename], mode='r',
1672 openhook=fileinput.hook_encoded('utf-8'))) as f:
1673 # FileInput doesn't have a read method, we can't call json.load
1674 info = self.filter_requested_info(json.loads('\n'.join(f)))
1676 self.process_ie_result(info, download=True)
1677 except DownloadError:
1678 webpage_url = info.get('webpage_url')
1679 if webpage_url is not None:
1680 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1681 return self.download([webpage_url])
1684 return self._download_retcode
1687 def filter_requested_info(info_dict):
1689 (k, v) for k, v in info_dict.items()
1690 if k not in ['requested_formats', 'requested_subtitles'])
1692 def post_process(self, filename, ie_info):
1693 """Run all the postprocessors on the given file."""
1694 info = dict(ie_info)
1695 info['filepath'] = filename
1697 if ie_info.get('__postprocessors') is not None:
1698 pps_chain.extend(ie_info['__postprocessors'])
1699 pps_chain.extend(self._pps)
1700 for pp in pps_chain:
1701 files_to_delete = []
1703 files_to_delete, info = pp.run(info)
1704 except PostProcessingError as e:
1705 self.report_error(e.msg)
1706 if files_to_delete and not self.params.get('keepvideo', False):
1707 for old_filename in files_to_delete:
1708 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
1710 os.remove(encodeFilename(old_filename))
1711 except (IOError, OSError):
1712 self.report_warning('Unable to remove downloaded original file')
1714 def _make_archive_id(self, info_dict):
1715 # Future-proof against any change in case
1716 # and backwards compatibility with prior versions
1717 extractor = info_dict.get('extractor_key')
1718 if extractor is None:
1719 if 'id' in info_dict:
1720 extractor = info_dict.get('ie_key') # key in a playlist
1721 if extractor is None:
1722 return None # Incomplete video information
1723 return extractor.lower() + ' ' + info_dict['id']
1725 def in_download_archive(self, info_dict):
1726 fn = self.params.get('download_archive')
1730 vid_id = self._make_archive_id(info_dict)
1732 return False # Incomplete video information
1735 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1736 for line in archive_file:
1737 if line.strip() == vid_id:
1739 except IOError as ioe:
1740 if ioe.errno != errno.ENOENT:
1744 def record_download_archive(self, info_dict):
1745 fn = self.params.get('download_archive')
1748 vid_id = self._make_archive_id(info_dict)
1750 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1751 archive_file.write(vid_id + '\n')
1754 def format_resolution(format, default='unknown'):
1755 if format.get('vcodec') == 'none':
1757 if format.get('resolution') is not None:
1758 return format['resolution']
1759 if format.get('height') is not None:
1760 if format.get('width') is not None:
1761 res = '%sx%s' % (format['width'], format['height'])
1763 res = '%sp' % format['height']
1764 elif format.get('width') is not None:
1765 res = '?x%d' % format['width']
1770 def _format_note(self, fdict):
1772 if fdict.get('ext') in ['f4f', 'f4m']:
1773 res += '(unsupported) '
1774 if fdict.get('format_note') is not None:
1775 res += fdict['format_note'] + ' '
1776 if fdict.get('tbr') is not None:
1777 res += '%4dk ' % fdict['tbr']
1778 if fdict.get('container') is not None:
1781 res += '%s container' % fdict['container']
1782 if (fdict.get('vcodec') is not None and
1783 fdict.get('vcodec') != 'none'):
1786 res += fdict['vcodec']
1787 if fdict.get('vbr') is not None:
1789 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1791 if fdict.get('vbr') is not None:
1792 res += '%4dk' % fdict['vbr']
1793 if fdict.get('fps') is not None:
1794 res += ', %sfps' % fdict['fps']
1795 if fdict.get('acodec') is not None:
1798 if fdict['acodec'] == 'none':
1801 res += '%-5s' % fdict['acodec']
1802 elif fdict.get('abr') is not None:
1806 if fdict.get('abr') is not None:
1807 res += '@%3dk' % fdict['abr']
1808 if fdict.get('asr') is not None:
1809 res += ' (%5dHz)' % fdict['asr']
1810 if fdict.get('filesize') is not None:
1813 res += format_bytes(fdict['filesize'])
1814 elif fdict.get('filesize_approx') is not None:
1817 res += '~' + format_bytes(fdict['filesize_approx'])
1820 def list_formats(self, info_dict):
1821 formats = info_dict.get('formats', [info_dict])
1823 [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
1825 if f.get('preference') is None or f['preference'] >= -1000]
1826 if len(formats) > 1:
1827 table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
1829 header_line = ['format code', 'extension', 'resolution', 'note']
1831 '[info] Available formats for %s:\n%s' %
1832 (info_dict['id'], render_table(header_line, table)))
1834 def list_thumbnails(self, info_dict):
1835 thumbnails = info_dict.get('thumbnails')
1837 tn_url = info_dict.get('thumbnail')
1839 thumbnails = [{'id': '0', 'url': tn_url}]
1842 '[info] No thumbnails present for %s' % info_dict['id'])
1846 '[info] Thumbnails for %s:' % info_dict['id'])
1847 self.to_screen(render_table(
1848 ['ID', 'width', 'height', 'URL'],
1849 [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
1851 def list_subtitles(self, video_id, subtitles, name='subtitles'):
1853 self.to_screen('%s has no %s' % (video_id, name))
1856 'Available %s for %s:' % (name, video_id))
1857 self.to_screen(render_table(
1858 ['Language', 'formats'],
1859 [[lang, ', '.join(f['ext'] for f in reversed(formats))]
1860 for lang, formats in subtitles.items()]))
1862 def urlopen(self, req):
1863 """ Start an HTTP download """
1864 return self._opener.open(req, timeout=self._socket_timeout)
1866 def print_debug_header(self):
1867 if not self.params.get('verbose'):
1870 if type('') is not compat_str:
1871 # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1872 self.report_warning(
1873 'Your Python is broken! Update to a newer and supported version')
1875 stdout_encoding = getattr(
1876 sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
1878 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1879 locale.getpreferredencoding(),
1880 sys.getfilesystemencoding(),
1882 self.get_encoding()))
1883 write_string(encoding_str, encoding=None)
1885 self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1887 sp = subprocess.Popen(
1888 ['git', 'rev-parse', '--short', 'HEAD'],
1889 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1890 cwd=os.path.dirname(os.path.abspath(__file__)))
1891 out, err = sp.communicate()
1892 out = out.decode().strip()
1893 if re.match('[0-9a-f]+', out):
1894 self._write_string('[debug] Git HEAD: ' + out + '\n')
1900 self._write_string('[debug] Python version %s - %s\n' % (
1901 platform.python_version(), platform_name()))
1903 exe_versions = FFmpegPostProcessor.get_versions(self)
1904 exe_versions['rtmpdump'] = rtmpdump_version()
1905 exe_str = ', '.join(
1907 for exe, v in sorted(exe_versions.items())
1912 self._write_string('[debug] exe versions: %s\n' % exe_str)
1915 for handler in self._opener.handlers:
1916 if hasattr(handler, 'proxies'):
1917 proxy_map.update(handler.proxies)
1918 self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1920 if self.params.get('call_home', False):
1921 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
1922 self._write_string('[debug] Public IP address: %s\n' % ipaddr)
1923 latest_version = self.urlopen(
1924 'https://yt-dl.org/latest/version').read().decode('utf-8')
1925 if version_tuple(latest_version) > version_tuple(__version__):
1926 self.report_warning(
1927 'You are using an outdated version (newest version: %s)! '
1928 'See https://yt-dl.org/update if you need help updating.' %
1931 def _setup_opener(self):
1932 timeout_val = self.params.get('socket_timeout')
1933 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1935 opts_cookiefile = self.params.get('cookiefile')
1936 opts_proxy = self.params.get('proxy')
1938 if opts_cookiefile is None:
1939 self.cookiejar = compat_cookiejar.CookieJar()
1941 self.cookiejar = compat_cookiejar.MozillaCookieJar(
1943 if os.access(opts_cookiefile, os.R_OK):
1944 self.cookiejar.load()
1946 cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1948 if opts_proxy is not None:
1949 if opts_proxy == '':
1952 proxies = {'http': opts_proxy, 'https': opts_proxy}
1954 proxies = compat_urllib_request.getproxies()
1955 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1956 if 'http' in proxies and 'https' not in proxies:
1957 proxies['https'] = proxies['http']
1958 proxy_handler = PerRequestProxyHandler(proxies)
1960 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1961 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
1962 ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
1963 opener = compat_urllib_request.build_opener(
1964 proxy_handler, https_handler, cookie_processor, ydlh)
1966 # Delete the default user-agent header, which would otherwise apply in
1967 # cases where our custom HTTP handler doesn't come into play
1968 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1969 opener.addheaders = []
1970 self._opener = opener
1972 def encode(self, s):
1973 if isinstance(s, bytes):
1974 return s # Already encoded
1977 return s.encode(self.get_encoding())
1978 except UnicodeEncodeError as err:
1979 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1982 def get_encoding(self):
1983 encoding = self.params.get('encoding')
1984 if encoding is None:
1985 encoding = preferredencoding()
1988 def _write_thumbnails(self, info_dict, filename):
1989 if self.params.get('writethumbnail', False):
1990 thumbnails = info_dict.get('thumbnails')
1992 thumbnails = [thumbnails[-1]]
1993 elif self.params.get('write_all_thumbnails', False):
1994 thumbnails = info_dict.get('thumbnails')
1999 # No thumbnails present, so return immediately
2002 for t in thumbnails:
2003 thumb_ext = determine_ext(t['url'], 'jpg')
2004 suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
2005 thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
2006 t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
2008 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
2009 self.to_screen('[%s] %s: Thumbnail %sis already present' %
2010 (info_dict['extractor'], info_dict['id'], thumb_display_id))
2012 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
2013 (info_dict['extractor'], info_dict['id'], thumb_display_id))
2015 uf = self.urlopen(t['url'])
2016 with open(encodeFilename(thumb_filename), 'wb') as thumbf:
2017 shutil.copyfileobj(uf, thumbf)
2018 self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
2019 (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
2020 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2021 self.report_warning('Unable to download thumbnail "%s": %s' %
2022 (t['url'], compat_str(err)))