2 # -*- coding: utf-8 -*-
4 from __future__ import absolute_import, unicode_literals
31 compat_get_terminal_size,
36 compat_tokenize_tokenize,
38 compat_urllib_request,
39 compat_urllib_request_DataHandler,
62 PerRequestProxyHandler,
76 UnavailableVideoError,
81 YoutubeDLCookieProcessor,
84 from .cache import Cache
85 from .extractor import get_info_extractor, gen_extractors
86 from .downloader import get_suitable_downloader
87 from .downloader.rtmp import rtmpdump_version
88 from .postprocessor import (
91 FFmpegFixupStretchedPP,
96 from .version import __version__
98 if compat_os_name == 'nt':
102 class YoutubeDL(object):
105 YoutubeDL objects are the ones responsible of downloading the
106 actual video file and writing it to disk if the user has requested
107 it, among some other tasks. In most cases there should be one per
108 program. As, given a video URL, the downloader doesn't know how to
109 extract all the needed information, task that InfoExtractors do, it
110 has to pass the URL to one of them.
112 For this, YoutubeDL objects have a method that allows
113 InfoExtractors to be registered in a given order. When it is passed
114 a URL, the YoutubeDL object handles it to the first InfoExtractor it
115 finds that reports being able to handle it. The InfoExtractor extracts
116 all the information about the video or videos the URL refers to, and
117 YoutubeDL process the extracted information, possibly using a File
118 Downloader to download the video.
120 YoutubeDL objects accept a lot of parameters. In order not to saturate
121 the object constructor with arguments, it receives a dictionary of
122 options instead. These options are available through the params
123 attribute for the InfoExtractors to use. The YoutubeDL also
124 registers itself as the downloader in charge for the InfoExtractors
125 that are added to it, so this is a "mutual registration".
129 username: Username for authentication purposes.
130 password: Password for authentication purposes.
131 videopassword: Password for accessing a video.
132 usenetrc: Use netrc for authentication instead.
133 verbose: Print additional info to stdout.
134 quiet: Do not print messages to stdout.
135 no_warnings: Do not print out anything for warnings.
136 forceurl: Force printing final URL.
137 forcetitle: Force printing title.
138 forceid: Force printing ID.
139 forcethumbnail: Force printing thumbnail URL.
140 forcedescription: Force printing description.
141 forcefilename: Force printing final filename.
142 forceduration: Force printing duration.
143 forcejson: Force printing info_dict as JSON.
144 dump_single_json: Force printing the info_dict of the whole playlist
145 (or video) as a single JSON line.
146 simulate: Do not download the video files.
147 format: Video format code. See options.py for more information.
148 outtmpl: Template for output names.
149 restrictfilenames: Do not allow "&" and spaces in file names
150 ignoreerrors: Do not stop on download errors.
151 force_generic_extractor: Force downloader to use the generic extractor
152 nooverwrites: Prevent overwriting files.
153 playliststart: Playlist item to start at.
154 playlistend: Playlist item to end at.
155 playlist_items: Specific indices of playlist to download.
156 playlistreverse: Download playlist items in reverse order.
157 matchtitle: Download only matching titles.
158 rejecttitle: Reject downloads for matching titles.
159 logger: Log messages to a logging.Logger instance.
160 logtostderr: Log messages to stderr instead of stdout.
161 writedescription: Write the video description to a .description file
162 writeinfojson: Write the video description to a .info.json file
163 writeannotations: Write the video annotations to a .annotations.xml file
164 writethumbnail: Write the thumbnail image to a file
165 write_all_thumbnails: Write all thumbnail formats to files
166 writesubtitles: Write the video subtitles to a file
167 writeautomaticsub: Write the automatically generated subtitles to a file
168 allsubtitles: Downloads all the subtitles of the video
169 (requires writesubtitles or writeautomaticsub)
170 listsubtitles: Lists all available subtitles for the video
171 subtitlesformat: The format code for subtitles
172 subtitleslangs: List of languages of the subtitles to download
173 keepvideo: Keep the video file after post-processing
174 daterange: A DateRange object, download only if the upload_date is in the range.
175 skip_download: Skip the actual download of the video file
176 cachedir: Location of the cache files in the filesystem.
177 False to disable filesystem cache.
178 noplaylist: Download single video instead of a playlist if in doubt.
179 age_limit: An integer representing the user's age in years.
180 Unsuitable videos for the given age are skipped.
181 min_views: An integer representing the minimum view count the video
182 must have in order to not be skipped.
183 Videos without view count information are always
184 downloaded. None for no limit.
185 max_views: An integer representing the maximum view count.
186 Videos that are more popular than that are not
188 Videos without view count information are always
189 downloaded. None for no limit.
190 download_archive: File name of a file where all downloads are recorded.
191 Videos already present in the file are not downloaded
193 cookiefile: File name where cookies should be read from and dumped to.
194 nocheckcertificate:Do not verify SSL certificates
195 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
196 At the moment, this is only supported by YouTube.
197 proxy: URL of the proxy server to use
198 cn_verification_proxy: URL of the proxy to use for IP address verification
199 on Chinese sites. (Experimental)
200 socket_timeout: Time to wait for unresponsive hosts, in seconds
201 bidi_workaround: Work around buggy terminals without bidirectional text
202 support, using fridibi
203 debug_printtraffic:Print out sent and received HTTP traffic
204 include_ads: Download ads as well
205 default_search: Prepend this string if an input url is not valid.
206 'auto' for elaborate guessing
207 encoding: Use this encoding instead of the system-specified.
208 extract_flat: Do not resolve URLs, return the immediate result.
209 Pass in 'in_playlist' to only show this behavior for
211 postprocessors: A list of dictionaries, each with an entry
212 * key: The name of the postprocessor. See
213 youtube_dl/postprocessor/__init__.py for a list.
214 as well as any further keyword arguments for the
216 progress_hooks: A list of functions that get called on download
217 progress, with a dictionary with the entries
218 * status: One of "downloading", "error", or "finished".
219 Check this first and ignore unknown values.
221 If status is one of "downloading", or "finished", the
222 following properties may also be present:
223 * filename: The final filename (always present)
224 * tmpfilename: The filename we're currently writing to
225 * downloaded_bytes: Bytes on disk
226 * total_bytes: Size of the whole file, None if unknown
227 * total_bytes_estimate: Guess of the eventual file size,
229 * elapsed: The number of seconds since download started.
230 * eta: The estimated time in seconds, None if unknown
231 * speed: The download speed in bytes/second, None if
233 * fragment_index: The counter of the currently
234 downloaded video fragment.
235 * fragment_count: The number of fragments (= individual
236 files that will be merged)
238 Progress hooks are guaranteed to be called at least once
239 (with status "finished") if the download is successful.
240 merge_output_format: Extension to use when merging formats.
241 fixup: Automatically correct known faults of the file.
243 - "never": do nothing
244 - "warn": only emit a warning
245 - "detect_or_warn": check whether we can do anything
246 about it, warn otherwise (default)
247 source_address: (Experimental) Client-side IP address to bind to.
248 call_home: Boolean, true iff we are allowed to contact the
249 youtube-dl servers for debugging.
250 sleep_interval: Number of seconds to sleep before each download.
251 listformats: Print an overview of available video formats and exit.
252 list_thumbnails: Print a table of all thumbnails and exit.
253 match_filter: A function that gets called with the info_dict of
255 If it returns a message, the video is ignored.
256 If it returns None, the video is downloaded.
257 match_filter_func in utils.py is one example for this.
258 no_color: Do not emit color codes in output.
260 The following options determine which downloader is picked:
261 external_downloader: Executable of the external downloader to call.
262 None or unset for standard (built-in) downloader.
263 hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv.
265 The following parameters are not used by YoutubeDL itself, they are used by
266 the downloader (see youtube_dl/downloader/common.py):
267 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
268 noresizebuffer, retries, continuedl, noprogress, consoletitle,
269 xattr_set_filesize, external_downloader_args, hls_use_mpegts.
271 The following options are used by the post processors:
272 prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
273 otherwise prefer avconv.
274 postprocessor_args: A list of additional command-line arguments for the
281 _download_retcode = None
282 _num_downloads = None
285 def __init__(self, params=None, auto_init=True):
286 """Create a FileDownloader object with the given options."""
290 self._ies_instances = {}
292 self._progress_hooks = []
293 self._download_retcode = 0
294 self._num_downloads = 0
295 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
296 self._err_file = sys.stderr
299 'nocheckcertificate': False,
301 self.params.update(params)
302 self.cache = Cache(self)
304 if params.get('bidi_workaround', False):
307 master, slave = pty.openpty()
308 width = compat_get_terminal_size().columns
312 width_args = ['-w', str(width)]
314 stdin=subprocess.PIPE,
316 stderr=self._err_file)
318 self._output_process = subprocess.Popen(
319 ['bidiv'] + width_args, **sp_kwargs
322 self._output_process = subprocess.Popen(
323 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
324 self._output_channel = os.fdopen(master, 'rb')
325 except OSError as ose:
327 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
331 if (sys.version_info >= (3,) and sys.platform != 'win32' and
332 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and
333 not params.get('restrictfilenames', False)):
334 # On Python 3, the Unicode filesystem API will throw errors (#1474)
336 'Assuming --restrict-filenames since file system encoding '
337 'cannot encode all characters. '
338 'Set the LC_ALL environment variable to fix this.')
339 self.params['restrictfilenames'] = True
341 if isinstance(params.get('outtmpl'), bytes):
343 'Parameter outtmpl is bytes, but should be a unicode string. '
344 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
349 self.print_debug_header()
350 self.add_default_info_extractors()
352 for pp_def_raw in self.params.get('postprocessors', []):
353 pp_class = get_postprocessor(pp_def_raw['key'])
354 pp_def = dict(pp_def_raw)
356 pp = pp_class(self, **compat_kwargs(pp_def))
357 self.add_post_processor(pp)
359 for ph in self.params.get('progress_hooks', []):
360 self.add_progress_hook(ph)
362 def warn_if_short_id(self, argv):
363 # short YouTube ID starting with dash?
365 i for i, a in enumerate(argv)
366 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
370 [a for i, a in enumerate(argv) if i not in idxs] +
371 ['--'] + [argv[i] for i in idxs]
374 'Long argument string detected. '
375 'Use -- to separate parameters and URLs, like this:\n%s\n' %
376 args_to_str(correct_argv))
378 def add_info_extractor(self, ie):
379 """Add an InfoExtractor object to the end of the list."""
381 self._ies_instances[ie.ie_key()] = ie
382 ie.set_downloader(self)
384 def get_info_extractor(self, ie_key):
386 Get an instance of an IE with name ie_key, it will try to get one from
387 the _ies list, if there's no instance it will create a new one and add
388 it to the extractor list.
390 ie = self._ies_instances.get(ie_key)
392 ie = get_info_extractor(ie_key)()
393 self.add_info_extractor(ie)
396 def add_default_info_extractors(self):
398 Add the InfoExtractors returned by gen_extractors to the end of the list
400 for ie in gen_extractors():
401 self.add_info_extractor(ie)
403 def add_post_processor(self, pp):
404 """Add a PostProcessor object to the end of the chain."""
406 pp.set_downloader(self)
408 def add_progress_hook(self, ph):
409 """Add the progress hook (currently only for the file downloader)"""
410 self._progress_hooks.append(ph)
412 def _bidi_workaround(self, message):
413 if not hasattr(self, '_output_channel'):
416 assert hasattr(self, '_output_process')
417 assert isinstance(message, compat_str)
418 line_count = message.count('\n') + 1
419 self._output_process.stdin.write((message + '\n').encode('utf-8'))
420 self._output_process.stdin.flush()
421 res = ''.join(self._output_channel.readline().decode('utf-8')
422 for _ in range(line_count))
423 return res[:-len('\n')]
425 def to_screen(self, message, skip_eol=False):
426 """Print message to stdout if not in quiet mode."""
427 return self.to_stdout(message, skip_eol, check_quiet=True)
429 def _write_string(self, s, out=None):
430 write_string(s, out=out, encoding=self.params.get('encoding'))
432 def to_stdout(self, message, skip_eol=False, check_quiet=False):
433 """Print message to stdout if not in quiet mode."""
434 if self.params.get('logger'):
435 self.params['logger'].debug(message)
436 elif not check_quiet or not self.params.get('quiet', False):
437 message = self._bidi_workaround(message)
438 terminator = ['\n', ''][skip_eol]
439 output = message + terminator
441 self._write_string(output, self._screen_file)
443 def to_stderr(self, message):
444 """Print message to stderr."""
445 assert isinstance(message, compat_str)
446 if self.params.get('logger'):
447 self.params['logger'].error(message)
449 message = self._bidi_workaround(message)
450 output = message + '\n'
451 self._write_string(output, self._err_file)
453 def to_console_title(self, message):
454 if not self.params.get('consoletitle', False):
456 if compat_os_name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
457 # c_wchar_p() might not be necessary if `message` is
458 # already of type unicode()
459 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
460 elif 'TERM' in os.environ:
461 self._write_string('\033]0;%s\007' % message, self._screen_file)
463 def save_console_title(self):
464 if not self.params.get('consoletitle', False):
466 if 'TERM' in os.environ:
467 # Save the title on stack
468 self._write_string('\033[22;0t', self._screen_file)
470 def restore_console_title(self):
471 if not self.params.get('consoletitle', False):
473 if 'TERM' in os.environ:
474 # Restore the title from stack
475 self._write_string('\033[23;0t', self._screen_file)
478 self.save_console_title()
481 def __exit__(self, *args):
482 self.restore_console_title()
484 if self.params.get('cookiefile') is not None:
485 self.cookiejar.save()
487 def trouble(self, message=None, tb=None):
488 """Determine action to take when a download problem appears.
490 Depending on if the downloader has been configured to ignore
491 download errors or not, this method may throw an exception or
492 not when errors are found, after printing the message.
494 tb, if given, is additional traceback information.
496 if message is not None:
497 self.to_stderr(message)
498 if self.params.get('verbose'):
500 if sys.exc_info()[0]: # if .trouble has been called from an except block
502 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
503 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
504 tb += encode_compat_str(traceback.format_exc())
506 tb_data = traceback.format_list(traceback.extract_stack())
507 tb = ''.join(tb_data)
509 if not self.params.get('ignoreerrors', False):
510 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
511 exc_info = sys.exc_info()[1].exc_info
513 exc_info = sys.exc_info()
514 raise DownloadError(message, exc_info)
515 self._download_retcode = 1
517 def report_warning(self, message):
519 Print the message to stderr, it will be prefixed with 'WARNING:'
520 If stderr is a tty file the 'WARNING:' will be colored
522 if self.params.get('logger') is not None:
523 self.params['logger'].warning(message)
525 if self.params.get('no_warnings'):
527 if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
528 _msg_header = '\033[0;33mWARNING:\033[0m'
530 _msg_header = 'WARNING:'
531 warning_message = '%s %s' % (_msg_header, message)
532 self.to_stderr(warning_message)
534 def report_error(self, message, tb=None):
536 Do the same as trouble, but prefixes the message with 'ERROR:', colored
537 in red if stderr is a tty file.
539 if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
540 _msg_header = '\033[0;31mERROR:\033[0m'
542 _msg_header = 'ERROR:'
543 error_message = '%s %s' % (_msg_header, message)
544 self.trouble(error_message, tb)
546 def report_file_already_downloaded(self, file_name):
547 """Report file has already been fully downloaded."""
549 self.to_screen('[download] %s has already been downloaded' % file_name)
550 except UnicodeEncodeError:
551 self.to_screen('[download] The file has already been downloaded')
553 def prepare_filename(self, info_dict):
554 """Generate the output filename."""
556 template_dict = dict(info_dict)
558 template_dict['epoch'] = int(time.time())
559 autonumber_size = self.params.get('autonumber_size')
560 if autonumber_size is None:
562 autonumber_templ = '%0' + str(autonumber_size) + 'd'
563 template_dict['autonumber'] = autonumber_templ % self._num_downloads
564 if template_dict.get('playlist_index') is not None:
565 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
566 if template_dict.get('resolution') is None:
567 if template_dict.get('width') and template_dict.get('height'):
568 template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
569 elif template_dict.get('height'):
570 template_dict['resolution'] = '%sp' % template_dict['height']
571 elif template_dict.get('width'):
572 template_dict['resolution'] = '%dx?' % template_dict['width']
574 sanitize = lambda k, v: sanitize_filename(
576 restricted=self.params.get('restrictfilenames'),
578 template_dict = dict((k, sanitize(k, v))
579 for k, v in template_dict.items()
581 template_dict = collections.defaultdict(lambda: 'NA', template_dict)
583 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
584 tmpl = compat_expanduser(outtmpl)
585 filename = tmpl % template_dict
586 # Temporary fix for #4787
587 # 'Treat' all problem characters by passing filename through preferredencoding
588 # to workaround encoding issues with subprocess on python2 @ Windows
589 if sys.version_info < (3, 0) and sys.platform == 'win32':
590 filename = encodeFilename(filename, True).decode(preferredencoding())
591 return sanitize_path(filename)
592 except ValueError as err:
593 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
596 def _match_entry(self, info_dict, incomplete):
597 """ Returns None iff the file should be downloaded """
599 video_title = info_dict.get('title', info_dict.get('id', 'video'))
600 if 'title' in info_dict:
601 # This can happen when we're just evaluating the playlist
602 title = info_dict['title']
603 matchtitle = self.params.get('matchtitle', False)
605 if not re.search(matchtitle, title, re.IGNORECASE):
606 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
607 rejecttitle = self.params.get('rejecttitle', False)
609 if re.search(rejecttitle, title, re.IGNORECASE):
610 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
611 date = info_dict.get('upload_date')
613 dateRange = self.params.get('daterange', DateRange())
614 if date not in dateRange:
615 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
616 view_count = info_dict.get('view_count')
617 if view_count is not None:
618 min_views = self.params.get('min_views')
619 if min_views is not None and view_count < min_views:
620 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
621 max_views = self.params.get('max_views')
622 if max_views is not None and view_count > max_views:
623 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
624 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
625 return 'Skipping "%s" because it is age restricted' % video_title
626 if self.in_download_archive(info_dict):
627 return '%s has already been recorded in archive' % video_title
630 match_filter = self.params.get('match_filter')
631 if match_filter is not None:
632 ret = match_filter(info_dict)
639 def add_extra_info(info_dict, extra_info):
640 '''Set the keys from extra_info in info dict if they are missing'''
641 for key, value in extra_info.items():
642 info_dict.setdefault(key, value)
644 def extract_info(self, url, download=True, ie_key=None, extra_info={},
645 process=True, force_generic_extractor=False):
647 Returns a list with a dictionary for each video we find.
648 If 'download', also downloads the videos.
649 extra_info is a dict containing the extra values to add to each result
652 if not ie_key and force_generic_extractor:
656 ies = [self.get_info_extractor(ie_key)]
661 if not ie.suitable(url):
665 self.report_warning('The program functionality for this site has been marked as broken, '
666 'and will probably not work.')
669 ie_result = ie.extract(url)
670 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
672 if isinstance(ie_result, list):
673 # Backwards compatibility: old IE result format
675 '_type': 'compat_list',
676 'entries': ie_result,
678 self.add_default_extra_info(ie_result, ie, url)
680 return self.process_ie_result(ie_result, download, extra_info)
683 except ExtractorError as e: # An error we somewhat expected
684 self.report_error(compat_str(e), e.format_traceback())
686 except MaxDownloadsReached:
688 except Exception as e:
689 if self.params.get('ignoreerrors', False):
690 self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
695 self.report_error('no suitable InfoExtractor for URL %s' % url)
697 def add_default_extra_info(self, ie_result, ie, url):
698 self.add_extra_info(ie_result, {
699 'extractor': ie.IE_NAME,
701 'webpage_url_basename': url_basename(url),
702 'extractor_key': ie.ie_key(),
705 def process_ie_result(self, ie_result, download=True, extra_info={}):
707 Take the result of the ie(may be modified) and resolve all unresolved
708 references (URLs, playlist items).
710 It will also download the videos if 'download'.
711 Returns the resolved ie_result.
713 result_type = ie_result.get('_type', 'video')
715 if result_type in ('url', 'url_transparent'):
716 extract_flat = self.params.get('extract_flat', False)
717 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
718 extract_flat is True):
719 if self.params.get('forcejson', False):
720 self.to_stdout(json.dumps(ie_result))
723 if result_type == 'video':
724 self.add_extra_info(ie_result, extra_info)
725 return self.process_video_result(ie_result, download=download)
726 elif result_type == 'url':
727 # We have to add extra_info to the results because it may be
728 # contained in a playlist
729 return self.extract_info(ie_result['url'],
731 ie_key=ie_result.get('ie_key'),
732 extra_info=extra_info)
733 elif result_type == 'url_transparent':
734 # Use the information from the embedding page
735 info = self.extract_info(
736 ie_result['url'], ie_key=ie_result.get('ie_key'),
737 extra_info=extra_info, download=False, process=False)
739 force_properties = dict(
740 (k, v) for k, v in ie_result.items() if v is not None)
741 for f in ('_type', 'url', 'ie_key'):
742 if f in force_properties:
743 del force_properties[f]
744 new_result = info.copy()
745 new_result.update(force_properties)
747 assert new_result.get('_type') != 'url_transparent'
749 return self.process_ie_result(
750 new_result, download=download, extra_info=extra_info)
751 elif result_type == 'playlist' or result_type == 'multi_video':
752 # We process each entry in the playlist
753 playlist = ie_result.get('title') or ie_result.get('id')
754 self.to_screen('[download] Downloading playlist: %s' % playlist)
756 playlist_results = []
758 playliststart = self.params.get('playliststart', 1) - 1
759 playlistend = self.params.get('playlistend')
760 # For backwards compatibility, interpret -1 as whole list
761 if playlistend == -1:
764 playlistitems_str = self.params.get('playlist_items')
766 if playlistitems_str is not None:
767 def iter_playlistitems(format):
768 for string_segment in format.split(','):
769 if '-' in string_segment:
770 start, end = string_segment.split('-')
771 for item in range(int(start), int(end) + 1):
774 yield int(string_segment)
775 playlistitems = iter_playlistitems(playlistitems_str)
777 ie_entries = ie_result['entries']
778 if isinstance(ie_entries, list):
779 n_all_entries = len(ie_entries)
782 ie_entries[i - 1] for i in playlistitems
783 if -n_all_entries <= i - 1 < n_all_entries]
785 entries = ie_entries[playliststart:playlistend]
786 n_entries = len(entries)
788 '[%s] playlist %s: Collected %d video ids (downloading %d of them)' %
789 (ie_result['extractor'], playlist, n_all_entries, n_entries))
790 elif isinstance(ie_entries, PagedList):
793 for item in playlistitems:
794 entries.extend(ie_entries.getslice(
798 entries = ie_entries.getslice(
799 playliststart, playlistend)
800 n_entries = len(entries)
802 '[%s] playlist %s: Downloading %d videos' %
803 (ie_result['extractor'], playlist, n_entries))
806 entry_list = list(ie_entries)
807 entries = [entry_list[i - 1] for i in playlistitems]
809 entries = list(itertools.islice(
810 ie_entries, playliststart, playlistend))
811 n_entries = len(entries)
813 '[%s] playlist %s: Downloading %d videos' %
814 (ie_result['extractor'], playlist, n_entries))
816 if self.params.get('playlistreverse', False):
817 entries = entries[::-1]
819 for i, entry in enumerate(entries, 1):
820 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
822 'n_entries': n_entries,
823 'playlist': playlist,
824 'playlist_id': ie_result.get('id'),
825 'playlist_title': ie_result.get('title'),
826 'playlist_index': i + playliststart,
827 'extractor': ie_result['extractor'],
828 'webpage_url': ie_result['webpage_url'],
829 'webpage_url_basename': url_basename(ie_result['webpage_url']),
830 'extractor_key': ie_result['extractor_key'],
833 reason = self._match_entry(entry, incomplete=True)
834 if reason is not None:
835 self.to_screen('[download] ' + reason)
838 entry_result = self.process_ie_result(entry,
841 playlist_results.append(entry_result)
842 ie_result['entries'] = playlist_results
843 self.to_screen('[download] Finished downloading playlist: %s' % playlist)
845 elif result_type == 'compat_list':
847 'Extractor %s returned a compat_list result. '
848 'It needs to be updated.' % ie_result.get('extractor'))
854 'extractor': ie_result['extractor'],
855 'webpage_url': ie_result['webpage_url'],
856 'webpage_url_basename': url_basename(ie_result['webpage_url']),
857 'extractor_key': ie_result['extractor_key'],
861 ie_result['entries'] = [
862 self.process_ie_result(_fixup(r), download, extra_info)
863 for r in ie_result['entries']
867 raise Exception('Invalid result type: %s' % result_type)
869 def _build_format_filter(self, filter_spec):
870 " Returns a function to filter the formats according to the filter_spec "
880 operator_rex = re.compile(r'''(?x)\s*
881 (?P<key>width|height|tbr|abr|vbr|asr|filesize|fps)
882 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
883 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
885 ''' % '|'.join(map(re.escape, OPERATORS.keys())))
886 m = operator_rex.search(filter_spec)
889 comparison_value = int(m.group('value'))
891 comparison_value = parse_filesize(m.group('value'))
892 if comparison_value is None:
893 comparison_value = parse_filesize(m.group('value') + 'B')
894 if comparison_value is None:
896 'Invalid value %r in format specification %r' % (
897 m.group('value'), filter_spec))
898 op = OPERATORS[m.group('op')]
904 '^=': lambda attr, value: attr.startswith(value),
905 '$=': lambda attr, value: attr.endswith(value),
906 '*=': lambda attr, value: value in attr,
908 str_operator_rex = re.compile(r'''(?x)
909 \s*(?P<key>ext|acodec|vcodec|container|protocol|format_id)
910 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
911 \s*(?P<value>[a-zA-Z0-9._-]+)
913 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
914 m = str_operator_rex.search(filter_spec)
916 comparison_value = m.group('value')
917 op = STR_OPERATORS[m.group('op')]
920 raise ValueError('Invalid filter specification %r' % filter_spec)
923 actual_value = f.get(m.group('key'))
924 if actual_value is None:
925 return m.group('none_inclusive')
926 return op(actual_value, comparison_value)
929 def build_format_selector(self, format_spec):
930 def syntax_error(note, start):
932 'Invalid format specification: '
933 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
934 return SyntaxError(message)
936 PICKFIRST = 'PICKFIRST'
940 FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
942 def _parse_filter(tokens):
944 for type, string, start, _, _ in tokens:
945 if type == tokenize.OP and string == ']':
946 return ''.join(filter_parts)
948 filter_parts.append(string)
950 def _remove_unused_ops(tokens):
951 # Remove operators that we don't use and join them with the surrounding strings
952 # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
953 ALLOWED_OPS = ('/', '+', ',', '(', ')')
954 last_string, last_start, last_end, last_line = None, None, None, None
955 for type, string, start, end, line in tokens:
956 if type == tokenize.OP and string == '[':
958 yield tokenize.NAME, last_string, last_start, last_end, last_line
960 yield type, string, start, end, line
961 # everything inside brackets will be handled by _parse_filter
962 for type, string, start, end, line in tokens:
963 yield type, string, start, end, line
964 if type == tokenize.OP and string == ']':
966 elif type == tokenize.OP and string in ALLOWED_OPS:
968 yield tokenize.NAME, last_string, last_start, last_end, last_line
970 yield type, string, start, end, line
971 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
977 last_string += string
979 yield tokenize.NAME, last_string, last_start, last_end, last_line
981 def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
983 current_selector = None
984 for type, string, start, _, _ in tokens:
985 # ENCODING is only defined in python 3.x
986 if type == getattr(tokenize, 'ENCODING', None):
988 elif type in [tokenize.NAME, tokenize.NUMBER]:
989 current_selector = FormatSelector(SINGLE, string, [])
990 elif type == tokenize.OP:
993 # ')' will be handled by the parentheses group
994 tokens.restore_last_token()
996 elif inside_merge and string in ['/', ',']:
997 tokens.restore_last_token()
999 elif inside_choice and string == ',':
1000 tokens.restore_last_token()
1003 if not current_selector:
1004 raise syntax_error('"," must follow a format selector', start)
1005 selectors.append(current_selector)
1006 current_selector = None
1008 if not current_selector:
1009 raise syntax_error('"/" must follow a format selector', start)
1010 first_choice = current_selector
1011 second_choice = _parse_format_selection(tokens, inside_choice=True)
1012 current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1014 if not current_selector:
1015 current_selector = FormatSelector(SINGLE, 'best', [])
1016 format_filter = _parse_filter(tokens)
1017 current_selector.filters.append(format_filter)
1019 if current_selector:
1020 raise syntax_error('Unexpected "("', start)
1021 group = _parse_format_selection(tokens, inside_group=True)
1022 current_selector = FormatSelector(GROUP, group, [])
1024 video_selector = current_selector
1025 audio_selector = _parse_format_selection(tokens, inside_merge=True)
1026 if not video_selector or not audio_selector:
1027 raise syntax_error('"+" must be between two format selectors', start)
1028 current_selector = FormatSelector(MERGE, (video_selector, audio_selector), [])
1030 raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1031 elif type == tokenize.ENDMARKER:
1033 if current_selector:
1034 selectors.append(current_selector)
1037 def _build_selector_function(selector):
1038 if isinstance(selector, list):
1039 fs = [_build_selector_function(s) for s in selector]
1041 def selector_function(formats):
1043 for format in f(formats):
1045 return selector_function
1046 elif selector.type == GROUP:
1047 selector_function = _build_selector_function(selector.selector)
1048 elif selector.type == PICKFIRST:
1049 fs = [_build_selector_function(s) for s in selector.selector]
1051 def selector_function(formats):
1053 picked_formats = list(f(formats))
1055 return picked_formats
1057 elif selector.type == SINGLE:
1058 format_spec = selector.selector
1060 def selector_function(formats):
1061 formats = list(formats)
1064 if format_spec == 'all':
1067 elif format_spec in ['best', 'worst', None]:
1068 format_idx = 0 if format_spec == 'worst' else -1
1069 audiovideo_formats = [
1071 if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
1072 if audiovideo_formats:
1073 yield audiovideo_formats[format_idx]
1074 # for audio only (soundcloud) or video only (imgur) urls, select the best/worst audio format
1075 elif (all(f.get('acodec') != 'none' for f in formats) or
1076 all(f.get('vcodec') != 'none' for f in formats)):
1077 yield formats[format_idx]
1078 elif format_spec == 'bestaudio':
1081 if f.get('vcodec') == 'none']
1083 yield audio_formats[-1]
1084 elif format_spec == 'worstaudio':
1087 if f.get('vcodec') == 'none']
1089 yield audio_formats[0]
1090 elif format_spec == 'bestvideo':
1093 if f.get('acodec') == 'none']
1095 yield video_formats[-1]
1096 elif format_spec == 'worstvideo':
1099 if f.get('acodec') == 'none']
1101 yield video_formats[0]
1103 extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
1104 if format_spec in extensions:
1105 filter_f = lambda f: f['ext'] == format_spec
1107 filter_f = lambda f: f['format_id'] == format_spec
1108 matches = list(filter(filter_f, formats))
1111 elif selector.type == MERGE:
1112 def _merge(formats_info):
1113 format_1, format_2 = [f['format_id'] for f in formats_info]
1114 # The first format must contain the video and the
1116 if formats_info[0].get('vcodec') == 'none':
1117 self.report_error('The first format must '
1118 'contain the video, try using '
1119 '"-f %s+%s"' % (format_2, format_1))
1121 # Formats must be opposite (video+audio)
1122 if formats_info[0].get('acodec') == 'none' and formats_info[1].get('acodec') == 'none':
1124 'Both formats %s and %s are video-only, you must specify "-f video+audio"'
1125 % (format_1, format_2))
1128 formats_info[0]['ext']
1129 if self.params.get('merge_output_format') is None
1130 else self.params['merge_output_format'])
1132 'requested_formats': formats_info,
1133 'format': '%s+%s' % (formats_info[0].get('format'),
1134 formats_info[1].get('format')),
1135 'format_id': '%s+%s' % (formats_info[0].get('format_id'),
1136 formats_info[1].get('format_id')),
1137 'width': formats_info[0].get('width'),
1138 'height': formats_info[0].get('height'),
1139 'resolution': formats_info[0].get('resolution'),
1140 'fps': formats_info[0].get('fps'),
1141 'vcodec': formats_info[0].get('vcodec'),
1142 'vbr': formats_info[0].get('vbr'),
1143 'stretched_ratio': formats_info[0].get('stretched_ratio'),
1144 'acodec': formats_info[1].get('acodec'),
1145 'abr': formats_info[1].get('abr'),
1148 video_selector, audio_selector = map(_build_selector_function, selector.selector)
1150 def selector_function(formats):
1151 formats = list(formats)
1152 for pair in itertools.product(video_selector(formats), audio_selector(formats)):
1155 filters = [self._build_format_filter(f) for f in selector.filters]
1157 def final_selector(formats):
1158 for _filter in filters:
1159 formats = list(filter(_filter, formats))
1160 return selector_function(formats)
1161 return final_selector
1163 stream = io.BytesIO(format_spec.encode('utf-8'))
1165 tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
1166 except tokenize.TokenError:
1167 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1169 class TokenIterator(object):
1170 def __init__(self, tokens):
1171 self.tokens = tokens
1178 if self.counter >= len(self.tokens):
1179 raise StopIteration()
1180 value = self.tokens[self.counter]
1186 def restore_last_token(self):
1189 parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
1190 return _build_selector_function(parsed_selector)
1192 def _calc_headers(self, info_dict):
1193 res = std_headers.copy()
1195 add_headers = info_dict.get('http_headers')
1197 res.update(add_headers)
1199 cookies = self._calc_cookies(info_dict)
1201 res['Cookie'] = cookies
1205 def _calc_cookies(self, info_dict):
1206 pr = sanitized_Request(info_dict['url'])
1207 self.cookiejar.add_cookie_header(pr)
1208 return pr.get_header('Cookie')
1210 def process_video_result(self, info_dict, download=True):
1211 assert info_dict.get('_type', 'video') == 'video'
1213 if 'id' not in info_dict:
1214 raise ExtractorError('Missing "id" field in extractor result')
1215 if 'title' not in info_dict:
1216 raise ExtractorError('Missing "title" field in extractor result')
1218 if 'playlist' not in info_dict:
1219 # It isn't part of a playlist
1220 info_dict['playlist'] = None
1221 info_dict['playlist_index'] = None
1223 thumbnails = info_dict.get('thumbnails')
1224 if thumbnails is None:
1225 thumbnail = info_dict.get('thumbnail')
1227 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1229 thumbnails.sort(key=lambda t: (
1230 t.get('preference'), t.get('width'), t.get('height'),
1231 t.get('id'), t.get('url')))
1232 for i, t in enumerate(thumbnails):
1233 t['url'] = sanitize_url(t['url'])
1234 if t.get('width') and t.get('height'):
1235 t['resolution'] = '%dx%d' % (t['width'], t['height'])
1236 if t.get('id') is None:
1239 if self.params.get('list_thumbnails'):
1240 self.list_thumbnails(info_dict)
1243 thumbnail = info_dict.get('thumbnail')
1245 info_dict['thumbnail'] = sanitize_url(thumbnail)
1247 info_dict['thumbnail'] = thumbnails[-1]['url']
1249 if 'display_id' not in info_dict and 'id' in info_dict:
1250 info_dict['display_id'] = info_dict['id']
1252 if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
1253 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1254 # see http://bugs.python.org/issue1646728)
1256 upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
1257 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1258 except (ValueError, OverflowError, OSError):
1261 # Auto generate title fields corresponding to the *_number fields when missing
1262 # in order to always have clean titles. This is very common for TV series.
1263 for field in ('chapter', 'season', 'episode'):
1264 if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
1265 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
1267 subtitles = info_dict.get('subtitles')
1269 for _, subtitle in subtitles.items():
1270 for subtitle_format in subtitle:
1271 if subtitle_format.get('url'):
1272 subtitle_format['url'] = sanitize_url(subtitle_format['url'])
1273 if 'ext' not in subtitle_format:
1274 subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
1276 if self.params.get('listsubtitles', False):
1277 if 'automatic_captions' in info_dict:
1278 self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions')
1279 self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
1281 info_dict['requested_subtitles'] = self.process_subtitles(
1282 info_dict['id'], subtitles,
1283 info_dict.get('automatic_captions'))
1285 # We now pick which formats have to be downloaded
1286 if info_dict.get('formats') is None:
1287 # There's only one format available
1288 formats = [info_dict]
1290 formats = info_dict['formats']
1293 raise ExtractorError('No video formats found!')
1297 # We check that all the formats have the format and format_id fields
1298 for i, format in enumerate(formats):
1299 if 'url' not in format:
1300 raise ExtractorError('Missing "url" key in result (index %d)' % i)
1302 format['url'] = sanitize_url(format['url'])
1304 if format.get('format_id') is None:
1305 format['format_id'] = compat_str(i)
1307 # Sanitize format_id from characters used in format selector expression
1308 format['format_id'] = re.sub('[\s,/+\[\]()]', '_', format['format_id'])
1309 format_id = format['format_id']
1310 if format_id not in formats_dict:
1311 formats_dict[format_id] = []
1312 formats_dict[format_id].append(format)
1314 # Make sure all formats have unique format_id
1315 for format_id, ambiguous_formats in formats_dict.items():
1316 if len(ambiguous_formats) > 1:
1317 for i, format in enumerate(ambiguous_formats):
1318 format['format_id'] = '%s-%d' % (format_id, i)
1320 for i, format in enumerate(formats):
1321 if format.get('format') is None:
1322 format['format'] = '{id} - {res}{note}'.format(
1323 id=format['format_id'],
1324 res=self.format_resolution(format),
1325 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1327 # Automatically determine file extension if missing
1328 if 'ext' not in format:
1329 format['ext'] = determine_ext(format['url']).lower()
1330 # Automatically determine protocol if missing (useful for format
1331 # selection purposes)
1332 if 'protocol' not in format:
1333 format['protocol'] = determine_protocol(format)
1334 # Add HTTP headers, so that external programs can use them from the
1336 full_format_info = info_dict.copy()
1337 full_format_info.update(format)
1338 format['http_headers'] = self._calc_headers(full_format_info)
1340 # TODO Central sorting goes here
1342 if formats[0] is not info_dict:
1343 # only set the 'formats' fields if the original info_dict list them
1344 # otherwise we end up with a circular reference, the first (and unique)
1345 # element in the 'formats' field in info_dict is info_dict itself,
1346 # which can't be exported to json
1347 info_dict['formats'] = formats
1348 if self.params.get('listformats'):
1349 self.list_formats(info_dict)
1352 req_format = self.params.get('format')
1353 if req_format is None:
1354 req_format_list = []
1355 if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and
1356 not info_dict.get('is_live')):
1357 merger = FFmpegMergerPP(self)
1358 if merger.available and merger.can_merge():
1359 req_format_list.append('bestvideo+bestaudio')
1360 req_format_list.append('best')
1361 req_format = '/'.join(req_format_list)
1362 format_selector = self.build_format_selector(req_format)
1363 formats_to_download = list(format_selector(formats))
1364 if not formats_to_download:
1365 raise ExtractorError('requested format not available',
1369 if len(formats_to_download) > 1:
1370 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1371 for format in formats_to_download:
1372 new_info = dict(info_dict)
1373 new_info.update(format)
1374 self.process_info(new_info)
1375 # We update the info dict with the best quality format (backwards compatibility)
1376 info_dict.update(formats_to_download[-1])
1379 def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
1380 """Select the requested subtitles and their format"""
1382 if normal_subtitles and self.params.get('writesubtitles'):
1383 available_subs.update(normal_subtitles)
1384 if automatic_captions and self.params.get('writeautomaticsub'):
1385 for lang, cap_info in automatic_captions.items():
1386 if lang not in available_subs:
1387 available_subs[lang] = cap_info
1389 if (not self.params.get('writesubtitles') and not
1390 self.params.get('writeautomaticsub') or not
1394 if self.params.get('allsubtitles', False):
1395 requested_langs = available_subs.keys()
1397 if self.params.get('subtitleslangs', False):
1398 requested_langs = self.params.get('subtitleslangs')
1399 elif 'en' in available_subs:
1400 requested_langs = ['en']
1402 requested_langs = [list(available_subs.keys())[0]]
1404 formats_query = self.params.get('subtitlesformat', 'best')
1405 formats_preference = formats_query.split('/') if formats_query else []
1407 for lang in requested_langs:
1408 formats = available_subs.get(lang)
1410 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1412 for ext in formats_preference:
1416 matches = list(filter(lambda f: f['ext'] == ext, formats))
1422 self.report_warning(
1423 'No subtitle format found matching "%s" for language %s, '
1424 'using %s' % (formats_query, lang, f['ext']))
1428 def process_info(self, info_dict):
1429 """Process a single resolved IE result."""
1431 assert info_dict.get('_type', 'video') == 'video'
1433 max_downloads = self.params.get('max_downloads')
1434 if max_downloads is not None:
1435 if self._num_downloads >= int(max_downloads):
1436 raise MaxDownloadsReached()
1438 info_dict['fulltitle'] = info_dict['title']
1439 if len(info_dict['title']) > 200:
1440 info_dict['title'] = info_dict['title'][:197] + '...'
1442 if 'format' not in info_dict:
1443 info_dict['format'] = info_dict['ext']
1445 reason = self._match_entry(info_dict, incomplete=False)
1446 if reason is not None:
1447 self.to_screen('[download] ' + reason)
1450 self._num_downloads += 1
1452 info_dict['_filename'] = filename = self.prepare_filename(info_dict)
1455 if self.params.get('forcetitle', False):
1456 self.to_stdout(info_dict['fulltitle'])
1457 if self.params.get('forceid', False):
1458 self.to_stdout(info_dict['id'])
1459 if self.params.get('forceurl', False):
1460 if info_dict.get('requested_formats') is not None:
1461 for f in info_dict['requested_formats']:
1462 self.to_stdout(f['url'] + f.get('play_path', ''))
1464 # For RTMP URLs, also include the playpath
1465 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1466 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
1467 self.to_stdout(info_dict['thumbnail'])
1468 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
1469 self.to_stdout(info_dict['description'])
1470 if self.params.get('forcefilename', False) and filename is not None:
1471 self.to_stdout(filename)
1472 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1473 self.to_stdout(formatSeconds(info_dict['duration']))
1474 if self.params.get('forceformat', False):
1475 self.to_stdout(info_dict['format'])
1476 if self.params.get('forcejson', False):
1477 self.to_stdout(json.dumps(info_dict))
1479 # Do nothing else if in simulate mode
1480 if self.params.get('simulate', False):
1483 if filename is None:
1487 dn = os.path.dirname(sanitize_path(encodeFilename(filename)))
1488 if dn and not os.path.exists(dn):
1490 except (OSError, IOError) as err:
1491 self.report_error('unable to create directory ' + error_to_compat_str(err))
1494 if self.params.get('writedescription', False):
1495 descfn = replace_extension(filename, 'description', info_dict.get('ext'))
1496 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1497 self.to_screen('[info] Video description is already present')
1498 elif info_dict.get('description') is None:
1499 self.report_warning('There\'s no description to write.')
1502 self.to_screen('[info] Writing video description to: ' + descfn)
1503 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1504 descfile.write(info_dict['description'])
1505 except (OSError, IOError):
1506 self.report_error('Cannot write description file ' + descfn)
1509 if self.params.get('writeannotations', False):
1510 annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
1511 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1512 self.to_screen('[info] Video annotations are already present')
1515 self.to_screen('[info] Writing video annotations to: ' + annofn)
1516 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1517 annofile.write(info_dict['annotations'])
1518 except (KeyError, TypeError):
1519 self.report_warning('There are no annotations to write.')
1520 except (OSError, IOError):
1521 self.report_error('Cannot write annotations file: ' + annofn)
1524 subtitles_are_requested = any([self.params.get('writesubtitles', False),
1525 self.params.get('writeautomaticsub')])
1527 if subtitles_are_requested and info_dict.get('requested_subtitles'):
1528 # subtitles download errors are already managed as troubles in relevant IE
1529 # that way it will silently go on when used with unsupporting IE
1530 subtitles = info_dict['requested_subtitles']
1531 ie = self.get_info_extractor(info_dict['extractor_key'])
1532 for sub_lang, sub_info in subtitles.items():
1533 sub_format = sub_info['ext']
1534 if sub_info.get('data') is not None:
1535 sub_data = sub_info['data']
1538 sub_data = ie._download_webpage(
1539 sub_info['url'], info_dict['id'], note=False)
1540 except ExtractorError as err:
1541 self.report_warning('Unable to download subtitle for "%s": %s' %
1542 (sub_lang, error_to_compat_str(err.cause)))
1545 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1546 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1547 self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1549 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1550 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
1551 subfile.write(sub_data)
1552 except (OSError, IOError):
1553 self.report_error('Cannot write subtitles file ' + sub_filename)
1556 if self.params.get('writeinfojson', False):
1557 infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
1558 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1559 self.to_screen('[info] Video description metadata is already present')
1561 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1563 write_json_file(self.filter_requested_info(info_dict), infofn)
1564 except (OSError, IOError):
1565 self.report_error('Cannot write metadata to JSON file ' + infofn)
1568 self._write_thumbnails(info_dict, filename)
1570 if not self.params.get('skip_download', False):
1573 fd = get_suitable_downloader(info, self.params)(self, self.params)
1574 for ph in self._progress_hooks:
1575 fd.add_progress_hook(ph)
1576 if self.params.get('verbose'):
1577 self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1578 return fd.download(name, info)
1580 if info_dict.get('requested_formats') is not None:
1583 merger = FFmpegMergerPP(self)
1584 if not merger.available:
1586 self.report_warning('You have requested multiple '
1587 'formats but ffmpeg or avconv are not installed.'
1588 ' The formats won\'t be merged.')
1590 postprocessors = [merger]
1592 def compatible_formats(formats):
1593 video, audio = formats
1595 video_ext, audio_ext = audio.get('ext'), video.get('ext')
1596 if video_ext and audio_ext:
1598 ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v'),
1601 for exts in COMPATIBLE_EXTS:
1602 if video_ext in exts and audio_ext in exts:
1604 # TODO: Check acodec/vcodec
1607 filename_real_ext = os.path.splitext(filename)[1][1:]
1609 os.path.splitext(filename)[0]
1610 if filename_real_ext == info_dict['ext']
1612 requested_formats = info_dict['requested_formats']
1613 if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
1614 info_dict['ext'] = 'mkv'
1615 self.report_warning(
1616 'Requested formats are incompatible for merge and will be merged into mkv.')
1617 # Ensure filename always has a correct extension for successful merge
1618 filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
1619 if os.path.exists(encodeFilename(filename)):
1621 '[download] %s has already been downloaded and '
1622 'merged' % filename)
1624 for f in requested_formats:
1625 new_info = dict(info_dict)
1627 fname = self.prepare_filename(new_info)
1628 fname = prepend_extension(fname, 'f%s' % f['format_id'], new_info['ext'])
1629 downloaded.append(fname)
1630 partial_success = dl(fname, new_info)
1631 success = success and partial_success
1632 info_dict['__postprocessors'] = postprocessors
1633 info_dict['__files_to_merge'] = downloaded
1635 # Just a single file
1636 success = dl(filename, info_dict)
1637 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1638 self.report_error('unable to download video data: %s' % str(err))
1640 except (OSError, IOError) as err:
1641 raise UnavailableVideoError(err)
1642 except (ContentTooShortError, ) as err:
1643 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1646 if success and filename != '-':
1648 fixup_policy = self.params.get('fixup')
1649 if fixup_policy is None:
1650 fixup_policy = 'detect_or_warn'
1652 INSTALL_FFMPEG_MESSAGE = 'Install ffmpeg or avconv to fix this automatically.'
1654 stretched_ratio = info_dict.get('stretched_ratio')
1655 if stretched_ratio is not None and stretched_ratio != 1:
1656 if fixup_policy == 'warn':
1657 self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1658 info_dict['id'], stretched_ratio))
1659 elif fixup_policy == 'detect_or_warn':
1660 stretched_pp = FFmpegFixupStretchedPP(self)
1661 if stretched_pp.available:
1662 info_dict.setdefault('__postprocessors', [])
1663 info_dict['__postprocessors'].append(stretched_pp)
1665 self.report_warning(
1666 '%s: Non-uniform pixel ratio (%s). %s'
1667 % (info_dict['id'], stretched_ratio, INSTALL_FFMPEG_MESSAGE))
1669 assert fixup_policy in ('ignore', 'never')
1671 if (info_dict.get('requested_formats') is None and
1672 info_dict.get('container') == 'm4a_dash'):
1673 if fixup_policy == 'warn':
1674 self.report_warning(
1675 '%s: writing DASH m4a. '
1676 'Only some players support this container.'
1678 elif fixup_policy == 'detect_or_warn':
1679 fixup_pp = FFmpegFixupM4aPP(self)
1680 if fixup_pp.available:
1681 info_dict.setdefault('__postprocessors', [])
1682 info_dict['__postprocessors'].append(fixup_pp)
1684 self.report_warning(
1685 '%s: writing DASH m4a. '
1686 'Only some players support this container. %s'
1687 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
1689 assert fixup_policy in ('ignore', 'never')
1691 if (info_dict.get('protocol') == 'm3u8_native' or
1692 info_dict.get('protocol') == 'm3u8' and
1693 self.params.get('hls_prefer_native')):
1694 if fixup_policy == 'warn':
1695 self.report_warning('%s: malformated aac bitstream.' % (
1697 elif fixup_policy == 'detect_or_warn':
1698 fixup_pp = FFmpegFixupM3u8PP(self)
1699 if fixup_pp.available:
1700 info_dict.setdefault('__postprocessors', [])
1701 info_dict['__postprocessors'].append(fixup_pp)
1703 self.report_warning(
1704 '%s: malformated aac bitstream. %s'
1705 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
1707 assert fixup_policy in ('ignore', 'never')
1710 self.post_process(filename, info_dict)
1711 except (PostProcessingError) as err:
1712 self.report_error('postprocessing: %s' % str(err))
1714 self.record_download_archive(info_dict)
1716 def download(self, url_list):
1717 """Download a given list of URLs."""
1718 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1719 if (len(url_list) > 1 and
1720 '%' not in outtmpl and
1721 self.params.get('max_downloads') != 1):
1722 raise SameFileError(outtmpl)
1724 for url in url_list:
1726 # It also downloads the videos
1727 res = self.extract_info(
1728 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
1729 except UnavailableVideoError:
1730 self.report_error('unable to download video')
1731 except MaxDownloadsReached:
1732 self.to_screen('[info] Maximum number of downloaded files reached.')
1735 if self.params.get('dump_single_json', False):
1736 self.to_stdout(json.dumps(res))
1738 return self._download_retcode
1740 def download_with_info_file(self, info_filename):
1741 with contextlib.closing(fileinput.FileInput(
1742 [info_filename], mode='r',
1743 openhook=fileinput.hook_encoded('utf-8'))) as f:
1744 # FileInput doesn't have a read method, we can't call json.load
1745 info = self.filter_requested_info(json.loads('\n'.join(f)))
1747 self.process_ie_result(info, download=True)
1748 except DownloadError:
1749 webpage_url = info.get('webpage_url')
1750 if webpage_url is not None:
1751 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1752 return self.download([webpage_url])
1755 return self._download_retcode
1758 def filter_requested_info(info_dict):
1760 (k, v) for k, v in info_dict.items()
1761 if k not in ['requested_formats', 'requested_subtitles'])
1763 def post_process(self, filename, ie_info):
1764 """Run all the postprocessors on the given file."""
1765 info = dict(ie_info)
1766 info['filepath'] = filename
1768 if ie_info.get('__postprocessors') is not None:
1769 pps_chain.extend(ie_info['__postprocessors'])
1770 pps_chain.extend(self._pps)
1771 for pp in pps_chain:
1772 files_to_delete = []
1774 files_to_delete, info = pp.run(info)
1775 except PostProcessingError as e:
1776 self.report_error(e.msg)
1777 if files_to_delete and not self.params.get('keepvideo', False):
1778 for old_filename in files_to_delete:
1779 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
1781 os.remove(encodeFilename(old_filename))
1782 except (IOError, OSError):
1783 self.report_warning('Unable to remove downloaded original file')
1785 def _make_archive_id(self, info_dict):
1786 # Future-proof against any change in case
1787 # and backwards compatibility with prior versions
1788 extractor = info_dict.get('extractor_key')
1789 if extractor is None:
1790 if 'id' in info_dict:
1791 extractor = info_dict.get('ie_key') # key in a playlist
1792 if extractor is None:
1793 return None # Incomplete video information
1794 return extractor.lower() + ' ' + info_dict['id']
1796 def in_download_archive(self, info_dict):
1797 fn = self.params.get('download_archive')
1801 vid_id = self._make_archive_id(info_dict)
1803 return False # Incomplete video information
1806 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1807 for line in archive_file:
1808 if line.strip() == vid_id:
1810 except IOError as ioe:
1811 if ioe.errno != errno.ENOENT:
1815 def record_download_archive(self, info_dict):
1816 fn = self.params.get('download_archive')
1819 vid_id = self._make_archive_id(info_dict)
1821 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1822 archive_file.write(vid_id + '\n')
1825 def format_resolution(format, default='unknown'):
1826 if format.get('vcodec') == 'none':
1828 if format.get('resolution') is not None:
1829 return format['resolution']
1830 if format.get('height') is not None:
1831 if format.get('width') is not None:
1832 res = '%sx%s' % (format['width'], format['height'])
1834 res = '%sp' % format['height']
1835 elif format.get('width') is not None:
1836 res = '%dx?' % format['width']
1841 def _format_note(self, fdict):
1843 if fdict.get('ext') in ['f4f', 'f4m']:
1844 res += '(unsupported) '
1845 if fdict.get('language'):
1848 res += '[%s] ' % fdict['language']
1849 if fdict.get('format_note') is not None:
1850 res += fdict['format_note'] + ' '
1851 if fdict.get('tbr') is not None:
1852 res += '%4dk ' % fdict['tbr']
1853 if fdict.get('container') is not None:
1856 res += '%s container' % fdict['container']
1857 if (fdict.get('vcodec') is not None and
1858 fdict.get('vcodec') != 'none'):
1861 res += fdict['vcodec']
1862 if fdict.get('vbr') is not None:
1864 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1866 if fdict.get('vbr') is not None:
1867 res += '%4dk' % fdict['vbr']
1868 if fdict.get('fps') is not None:
1871 res += '%sfps' % fdict['fps']
1872 if fdict.get('acodec') is not None:
1875 if fdict['acodec'] == 'none':
1878 res += '%-5s' % fdict['acodec']
1879 elif fdict.get('abr') is not None:
1883 if fdict.get('abr') is not None:
1884 res += '@%3dk' % fdict['abr']
1885 if fdict.get('asr') is not None:
1886 res += ' (%5dHz)' % fdict['asr']
1887 if fdict.get('filesize') is not None:
1890 res += format_bytes(fdict['filesize'])
1891 elif fdict.get('filesize_approx') is not None:
1894 res += '~' + format_bytes(fdict['filesize_approx'])
1897 def list_formats(self, info_dict):
1898 formats = info_dict.get('formats', [info_dict])
1900 [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
1902 if f.get('preference') is None or f['preference'] >= -1000]
1903 if len(formats) > 1:
1904 table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
1906 header_line = ['format code', 'extension', 'resolution', 'note']
1908 '[info] Available formats for %s:\n%s' %
1909 (info_dict['id'], render_table(header_line, table)))
1911 def list_thumbnails(self, info_dict):
1912 thumbnails = info_dict.get('thumbnails')
1914 self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
1918 '[info] Thumbnails for %s:' % info_dict['id'])
1919 self.to_screen(render_table(
1920 ['ID', 'width', 'height', 'URL'],
1921 [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
1923 def list_subtitles(self, video_id, subtitles, name='subtitles'):
1925 self.to_screen('%s has no %s' % (video_id, name))
1928 'Available %s for %s:' % (name, video_id))
1929 self.to_screen(render_table(
1930 ['Language', 'formats'],
1931 [[lang, ', '.join(f['ext'] for f in reversed(formats))]
1932 for lang, formats in subtitles.items()]))
1934 def urlopen(self, req):
1935 """ Start an HTTP download """
1936 if isinstance(req, compat_basestring):
1937 req = sanitized_Request(req)
1938 return self._opener.open(req, timeout=self._socket_timeout)
1940 def print_debug_header(self):
1941 if not self.params.get('verbose'):
1944 if type('') is not compat_str:
1945 # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1946 self.report_warning(
1947 'Your Python is broken! Update to a newer and supported version')
1949 stdout_encoding = getattr(
1950 sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
1952 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1953 locale.getpreferredencoding(),
1954 sys.getfilesystemencoding(),
1956 self.get_encoding()))
1957 write_string(encoding_str, encoding=None)
1959 self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1961 sp = subprocess.Popen(
1962 ['git', 'rev-parse', '--short', 'HEAD'],
1963 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1964 cwd=os.path.dirname(os.path.abspath(__file__)))
1965 out, err = sp.communicate()
1966 out = out.decode().strip()
1967 if re.match('[0-9a-f]+', out):
1968 self._write_string('[debug] Git HEAD: ' + out + '\n')
1974 self._write_string('[debug] Python version %s - %s\n' % (
1975 platform.python_version(), platform_name()))
1977 exe_versions = FFmpegPostProcessor.get_versions(self)
1978 exe_versions['rtmpdump'] = rtmpdump_version()
1979 exe_str = ', '.join(
1981 for exe, v in sorted(exe_versions.items())
1986 self._write_string('[debug] exe versions: %s\n' % exe_str)
1989 for handler in self._opener.handlers:
1990 if hasattr(handler, 'proxies'):
1991 proxy_map.update(handler.proxies)
1992 self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1994 if self.params.get('call_home', False):
1995 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
1996 self._write_string('[debug] Public IP address: %s\n' % ipaddr)
1997 latest_version = self.urlopen(
1998 'https://yt-dl.org/latest/version').read().decode('utf-8')
1999 if version_tuple(latest_version) > version_tuple(__version__):
2000 self.report_warning(
2001 'You are using an outdated version (newest version: %s)! '
2002 'See https://yt-dl.org/update if you need help updating.' %
2005 def _setup_opener(self):
2006 timeout_val = self.params.get('socket_timeout')
2007 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
2009 opts_cookiefile = self.params.get('cookiefile')
2010 opts_proxy = self.params.get('proxy')
2012 if opts_cookiefile is None:
2013 self.cookiejar = compat_cookiejar.CookieJar()
2015 self.cookiejar = compat_cookiejar.MozillaCookieJar(
2017 if os.access(opts_cookiefile, os.R_OK):
2018 self.cookiejar.load()
2020 cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
2021 if opts_proxy is not None:
2022 if opts_proxy == '':
2025 proxies = {'http': opts_proxy, 'https': opts_proxy}
2027 proxies = compat_urllib_request.getproxies()
2028 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
2029 if 'http' in proxies and 'https' not in proxies:
2030 proxies['https'] = proxies['http']
2031 proxy_handler = PerRequestProxyHandler(proxies)
2033 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
2034 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
2035 ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
2036 data_handler = compat_urllib_request_DataHandler()
2038 # When passing our own FileHandler instance, build_opener won't add the
2039 # default FileHandler and allows us to disable the file protocol, which
2040 # can be used for malicious purposes (see
2041 # https://github.com/rg3/youtube-dl/issues/8227)
2042 file_handler = compat_urllib_request.FileHandler()
2044 def file_open(*args, **kwargs):
2045 raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in youtube-dl for security reasons')
2046 file_handler.file_open = file_open
2048 opener = compat_urllib_request.build_opener(
2049 proxy_handler, https_handler, cookie_processor, ydlh, data_handler, file_handler)
2051 # Delete the default user-agent header, which would otherwise apply in
2052 # cases where our custom HTTP handler doesn't come into play
2053 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
2054 opener.addheaders = []
2055 self._opener = opener
2057 def encode(self, s):
2058 if isinstance(s, bytes):
2059 return s # Already encoded
2062 return s.encode(self.get_encoding())
2063 except UnicodeEncodeError as err:
2064 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
2067 def get_encoding(self):
2068 encoding = self.params.get('encoding')
2069 if encoding is None:
2070 encoding = preferredencoding()
2073 def _write_thumbnails(self, info_dict, filename):
2074 if self.params.get('writethumbnail', False):
2075 thumbnails = info_dict.get('thumbnails')
2077 thumbnails = [thumbnails[-1]]
2078 elif self.params.get('write_all_thumbnails', False):
2079 thumbnails = info_dict.get('thumbnails')
2084 # No thumbnails present, so return immediately
2087 for t in thumbnails:
2088 thumb_ext = determine_ext(t['url'], 'jpg')
2089 suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
2090 thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
2091 t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
2093 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
2094 self.to_screen('[%s] %s: Thumbnail %sis already present' %
2095 (info_dict['extractor'], info_dict['id'], thumb_display_id))
2097 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
2098 (info_dict['extractor'], info_dict['id'], thumb_display_id))
2100 uf = self.urlopen(t['url'])
2101 with open(encodeFilename(thumb_filename), 'wb') as thumbf:
2102 shutil.copyfileobj(uf, thumbf)
2103 self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
2104 (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
2105 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2106 self.report_warning('Unable to download thumbnail "%s": %s' %
2107 (t['url'], error_to_compat_str(err)))