2 # -*- coding: utf-8 -*-
4 from __future__ import absolute_import, unicode_literals
33 compat_urllib_request,
59 UnavailableVideoError,
67 from .cache import Cache
68 from .extractor import get_info_extractor, gen_extractors
69 from .downloader import get_suitable_downloader
70 from .downloader.rtmp import rtmpdump_version
71 from .postprocessor import (
76 from .version import __version__
79 class YoutubeDL(object):
82 YoutubeDL objects are the ones responsible of downloading the
83 actual video file and writing it to disk if the user has requested
84 it, among some other tasks. In most cases there should be one per
85 program. As, given a video URL, the downloader doesn't know how to
86 extract all the needed information, task that InfoExtractors do, it
87 has to pass the URL to one of them.
89 For this, YoutubeDL objects have a method that allows
90 InfoExtractors to be registered in a given order. When it is passed
91 a URL, the YoutubeDL object handles it to the first InfoExtractor it
92 finds that reports being able to handle it. The InfoExtractor extracts
93 all the information about the video or videos the URL refers to, and
94 YoutubeDL process the extracted information, possibly using a File
95 Downloader to download the video.
97 YoutubeDL objects accept a lot of parameters. In order not to saturate
98 the object constructor with arguments, it receives a dictionary of
99 options instead. These options are available through the params
100 attribute for the InfoExtractors to use. The YoutubeDL also
101 registers itself as the downloader in charge for the InfoExtractors
102 that are added to it, so this is a "mutual registration".
106 username: Username for authentication purposes.
107 password: Password for authentication purposes.
108 videopassword: Password for acces a video.
109 usenetrc: Use netrc for authentication instead.
110 verbose: Print additional info to stdout.
111 quiet: Do not print messages to stdout.
112 no_warnings: Do not print out anything for warnings.
113 forceurl: Force printing final URL.
114 forcetitle: Force printing title.
115 forceid: Force printing ID.
116 forcethumbnail: Force printing thumbnail URL.
117 forcedescription: Force printing description.
118 forcefilename: Force printing final filename.
119 forceduration: Force printing duration.
120 forcejson: Force printing info_dict as JSON.
121 dump_single_json: Force printing the info_dict of the whole playlist
122 (or video) as a single JSON line.
123 simulate: Do not download the video files.
124 format: Video format code.
125 format_limit: Highest quality format to try.
126 outtmpl: Template for output names.
127 restrictfilenames: Do not allow "&" and spaces in file names
128 ignoreerrors: Do not stop on download errors.
129 nooverwrites: Prevent overwriting files.
130 playliststart: Playlist item to start at.
131 playlistend: Playlist item to end at.
132 playlistreverse: Download playlist items in reverse order.
133 matchtitle: Download only matching titles.
134 rejecttitle: Reject downloads for matching titles.
135 logger: Log messages to a logging.Logger instance.
136 logtostderr: Log messages to stderr instead of stdout.
137 writedescription: Write the video description to a .description file
138 writeinfojson: Write the video description to a .info.json file
139 writeannotations: Write the video annotations to a .annotations.xml file
140 writethumbnail: Write the thumbnail image to a file
141 writesubtitles: Write the video subtitles to a file
142 writeautomaticsub: Write the automatic subtitles to a file
143 allsubtitles: Downloads all the subtitles of the video
144 (requires writesubtitles or writeautomaticsub)
145 listsubtitles: Lists all available subtitles for the video
146 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
147 subtitleslangs: List of languages of the subtitles to download
148 keepvideo: Keep the video file after post-processing
149 daterange: A DateRange object, download only if the upload_date is in the range.
150 skip_download: Skip the actual download of the video file
151 cachedir: Location of the cache files in the filesystem.
152 False to disable filesystem cache.
153 noplaylist: Download single video instead of a playlist if in doubt.
154 age_limit: An integer representing the user's age in years.
155 Unsuitable videos for the given age are skipped.
156 min_views: An integer representing the minimum view count the video
157 must have in order to not be skipped.
158 Videos without view count information are always
159 downloaded. None for no limit.
160 max_views: An integer representing the maximum view count.
161 Videos that are more popular than that are not
163 Videos without view count information are always
164 downloaded. None for no limit.
165 download_archive: File name of a file where all downloads are recorded.
166 Videos already present in the file are not downloaded
168 cookiefile: File name where cookies should be read from and dumped to.
169 nocheckcertificate:Do not verify SSL certificates
170 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
171 At the moment, this is only supported by YouTube.
172 proxy: URL of the proxy server to use
173 socket_timeout: Time to wait for unresponsive hosts, in seconds
174 bidi_workaround: Work around buggy terminals without bidirectional text
175 support, using fridibi
176 debug_printtraffic:Print out sent and received HTTP traffic
177 include_ads: Download ads as well
178 default_search: Prepend this string if an input url is not valid.
179 'auto' for elaborate guessing
180 encoding: Use this encoding instead of the system-specified.
181 extract_flat: Do not resolve URLs, return the immediate result.
182 Pass in 'in_playlist' to only show this behavior for
184 postprocessors: A list of dictionaries, each with an entry
185 key: The name of the postprocessor. See
186 youtube_dl/postprocessor/__init__.py for a list.
187 as well as any further keyword arguments for the
190 The following parameters are not used by YoutubeDL itself, they are used by
192 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
193 noresizebuffer, retries, continuedl, noprogress, consoletitle
195 The following options are used by the post processors:
196 prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
197 otherwise prefer avconv.
198 exec_cmd: Arbitrary command to run after downloading
204 _download_retcode = None
205 _num_downloads = None
208 def __init__(self, params=None, auto_init=True):
209 """Create a FileDownloader object with the given options."""
213 self._ies_instances = {}
215 self._progress_hooks = []
216 self._download_retcode = 0
217 self._num_downloads = 0
218 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
219 self._err_file = sys.stderr
221 self.cache = Cache(self)
223 if params.get('bidi_workaround', False):
226 master, slave = pty.openpty()
227 width = get_term_width()
231 width_args = ['-w', str(width)]
233 stdin=subprocess.PIPE,
235 stderr=self._err_file)
237 self._output_process = subprocess.Popen(
238 ['bidiv'] + width_args, **sp_kwargs
241 self._output_process = subprocess.Popen(
242 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
243 self._output_channel = os.fdopen(master, 'rb')
244 except OSError as ose:
246 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
250 if (sys.version_info >= (3,) and sys.platform != 'win32' and
251 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
252 and not params.get('restrictfilenames', False)):
253 # On Python 3, the Unicode filesystem API will throw errors (#1474)
255 'Assuming --restrict-filenames since file system encoding '
256 'cannot encode all characters. '
257 'Set the LC_ALL environment variable to fix this.')
258 self.params['restrictfilenames'] = True
260 if '%(stitle)s' in self.params.get('outtmpl', ''):
261 self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
266 self.print_debug_header()
267 self.add_default_info_extractors()
269 for pp_def_raw in self.params.get('postprocessors', []):
270 pp_class = get_postprocessor(pp_def_raw['key'])
271 pp_def = dict(pp_def_raw)
273 pp = pp_class(self, **compat_kwargs(pp_def))
274 self.add_post_processor(pp)
276 def warn_if_short_id(self, argv):
277 # short YouTube ID starting with dash?
279 i for i, a in enumerate(argv)
280 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
284 [a for i, a in enumerate(argv) if i not in idxs] +
285 ['--'] + [argv[i] for i in idxs]
288 'Long argument string detected. '
289 'Use -- to separate parameters and URLs, like this:\n%s\n' %
290 args_to_str(correct_argv))
292 def add_info_extractor(self, ie):
293 """Add an InfoExtractor object to the end of the list."""
295 self._ies_instances[ie.ie_key()] = ie
296 ie.set_downloader(self)
298 def get_info_extractor(self, ie_key):
300 Get an instance of an IE with name ie_key, it will try to get one from
301 the _ies list, if there's no instance it will create a new one and add
302 it to the extractor list.
304 ie = self._ies_instances.get(ie_key)
306 ie = get_info_extractor(ie_key)()
307 self.add_info_extractor(ie)
310 def add_default_info_extractors(self):
312 Add the InfoExtractors returned by gen_extractors to the end of the list
314 for ie in gen_extractors():
315 self.add_info_extractor(ie)
317 def add_post_processor(self, pp):
318 """Add a PostProcessor object to the end of the chain."""
320 pp.set_downloader(self)
322 def add_progress_hook(self, ph):
323 """Add the progress hook (currently only for the file downloader)"""
324 self._progress_hooks.append(ph)
326 def _bidi_workaround(self, message):
327 if not hasattr(self, '_output_channel'):
330 assert hasattr(self, '_output_process')
331 assert isinstance(message, compat_str)
332 line_count = message.count('\n') + 1
333 self._output_process.stdin.write((message + '\n').encode('utf-8'))
334 self._output_process.stdin.flush()
335 res = ''.join(self._output_channel.readline().decode('utf-8')
336 for _ in range(line_count))
337 return res[:-len('\n')]
339 def to_screen(self, message, skip_eol=False):
340 """Print message to stdout if not in quiet mode."""
341 return self.to_stdout(message, skip_eol, check_quiet=True)
343 def _write_string(self, s, out=None):
344 write_string(s, out=out, encoding=self.params.get('encoding'))
346 def to_stdout(self, message, skip_eol=False, check_quiet=False):
347 """Print message to stdout if not in quiet mode."""
348 if self.params.get('logger'):
349 self.params['logger'].debug(message)
350 elif not check_quiet or not self.params.get('quiet', False):
351 message = self._bidi_workaround(message)
352 terminator = ['\n', ''][skip_eol]
353 output = message + terminator
355 self._write_string(output, self._screen_file)
357 def to_stderr(self, message):
358 """Print message to stderr."""
359 assert isinstance(message, compat_str)
360 if self.params.get('logger'):
361 self.params['logger'].error(message)
363 message = self._bidi_workaround(message)
364 output = message + '\n'
365 self._write_string(output, self._err_file)
367 def to_console_title(self, message):
368 if not self.params.get('consoletitle', False):
370 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
371 # c_wchar_p() might not be necessary if `message` is
372 # already of type unicode()
373 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
374 elif 'TERM' in os.environ:
375 self._write_string('\033]0;%s\007' % message, self._screen_file)
377 def save_console_title(self):
378 if not self.params.get('consoletitle', False):
380 if 'TERM' in os.environ:
381 # Save the title on stack
382 self._write_string('\033[22;0t', self._screen_file)
384 def restore_console_title(self):
385 if not self.params.get('consoletitle', False):
387 if 'TERM' in os.environ:
388 # Restore the title from stack
389 self._write_string('\033[23;0t', self._screen_file)
392 self.save_console_title()
395 def __exit__(self, *args):
396 self.restore_console_title()
398 if self.params.get('cookiefile') is not None:
399 self.cookiejar.save()
401 def trouble(self, message=None, tb=None):
402 """Determine action to take when a download problem appears.
404 Depending on if the downloader has been configured to ignore
405 download errors or not, this method may throw an exception or
406 not when errors are found, after printing the message.
408 tb, if given, is additional traceback information.
410 if message is not None:
411 self.to_stderr(message)
412 if self.params.get('verbose'):
414 if sys.exc_info()[0]: # if .trouble has been called from an except block
416 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
417 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
418 tb += compat_str(traceback.format_exc())
420 tb_data = traceback.format_list(traceback.extract_stack())
421 tb = ''.join(tb_data)
423 if not self.params.get('ignoreerrors', False):
424 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
425 exc_info = sys.exc_info()[1].exc_info
427 exc_info = sys.exc_info()
428 raise DownloadError(message, exc_info)
429 self._download_retcode = 1
431 def report_warning(self, message):
433 Print the message to stderr, it will be prefixed with 'WARNING:'
434 If stderr is a tty file the 'WARNING:' will be colored
436 if self.params.get('logger') is not None:
437 self.params['logger'].warning(message)
439 if self.params.get('no_warnings'):
441 if self._err_file.isatty() and os.name != 'nt':
442 _msg_header = '\033[0;33mWARNING:\033[0m'
444 _msg_header = 'WARNING:'
445 warning_message = '%s %s' % (_msg_header, message)
446 self.to_stderr(warning_message)
448 def report_error(self, message, tb=None):
450 Do the same as trouble, but prefixes the message with 'ERROR:', colored
451 in red if stderr is a tty file.
453 if self._err_file.isatty() and os.name != 'nt':
454 _msg_header = '\033[0;31mERROR:\033[0m'
456 _msg_header = 'ERROR:'
457 error_message = '%s %s' % (_msg_header, message)
458 self.trouble(error_message, tb)
460 def report_file_already_downloaded(self, file_name):
461 """Report file has already been fully downloaded."""
463 self.to_screen('[download] %s has already been downloaded' % file_name)
464 except UnicodeEncodeError:
465 self.to_screen('[download] The file has already been downloaded')
467 def prepare_filename(self, info_dict):
468 """Generate the output filename."""
470 template_dict = dict(info_dict)
472 template_dict['epoch'] = int(time.time())
473 autonumber_size = self.params.get('autonumber_size')
474 if autonumber_size is None:
476 autonumber_templ = '%0' + str(autonumber_size) + 'd'
477 template_dict['autonumber'] = autonumber_templ % self._num_downloads
478 if template_dict.get('playlist_index') is not None:
479 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
480 if template_dict.get('resolution') is None:
481 if template_dict.get('width') and template_dict.get('height'):
482 template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
483 elif template_dict.get('height'):
484 template_dict['resolution'] = '%sp' % template_dict['height']
485 elif template_dict.get('width'):
486 template_dict['resolution'] = '?x%d' % template_dict['width']
488 sanitize = lambda k, v: sanitize_filename(
490 restricted=self.params.get('restrictfilenames'),
492 template_dict = dict((k, sanitize(k, v))
493 for k, v in template_dict.items()
495 template_dict = collections.defaultdict(lambda: 'NA', template_dict)
497 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
498 tmpl = compat_expanduser(outtmpl)
499 filename = tmpl % template_dict
501 except ValueError as err:
502 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
505 def _match_entry(self, info_dict):
506 """ Returns None iff the file should be downloaded """
508 video_title = info_dict.get('title', info_dict.get('id', 'video'))
509 if 'title' in info_dict:
510 # This can happen when we're just evaluating the playlist
511 title = info_dict['title']
512 matchtitle = self.params.get('matchtitle', False)
514 if not re.search(matchtitle, title, re.IGNORECASE):
515 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
516 rejecttitle = self.params.get('rejecttitle', False)
518 if re.search(rejecttitle, title, re.IGNORECASE):
519 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
520 date = info_dict.get('upload_date', None)
522 dateRange = self.params.get('daterange', DateRange())
523 if date not in dateRange:
524 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
525 view_count = info_dict.get('view_count', None)
526 if view_count is not None:
527 min_views = self.params.get('min_views')
528 if min_views is not None and view_count < min_views:
529 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
530 max_views = self.params.get('max_views')
531 if max_views is not None and view_count > max_views:
532 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
533 age_limit = self.params.get('age_limit')
534 if age_limit is not None:
535 actual_age_limit = info_dict.get('age_limit')
536 if actual_age_limit is None:
538 if age_limit < actual_age_limit:
539 return 'Skipping "' + title + '" because it is age restricted'
540 if self.in_download_archive(info_dict):
541 return '%s has already been recorded in archive' % video_title
545 def add_extra_info(info_dict, extra_info):
546 '''Set the keys from extra_info in info dict if they are missing'''
547 for key, value in extra_info.items():
548 info_dict.setdefault(key, value)
550 def extract_info(self, url, download=True, ie_key=None, extra_info={},
553 Returns a list with a dictionary for each video we find.
554 If 'download', also downloads the videos.
555 extra_info is a dict containing the extra values to add to each result
559 ies = [self.get_info_extractor(ie_key)]
564 if not ie.suitable(url):
568 self.report_warning('The program functionality for this site has been marked as broken, '
569 'and will probably not work.')
572 ie_result = ie.extract(url)
573 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
575 if isinstance(ie_result, list):
576 # Backwards compatibility: old IE result format
578 '_type': 'compat_list',
579 'entries': ie_result,
581 self.add_default_extra_info(ie_result, ie, url)
583 return self.process_ie_result(ie_result, download, extra_info)
586 except ExtractorError as de: # An error we somewhat expected
587 self.report_error(compat_str(de), de.format_traceback())
589 except MaxDownloadsReached:
591 except Exception as e:
592 if self.params.get('ignoreerrors', False):
593 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
598 self.report_error('no suitable InfoExtractor for URL %s' % url)
600 def add_default_extra_info(self, ie_result, ie, url):
601 self.add_extra_info(ie_result, {
602 'extractor': ie.IE_NAME,
604 'webpage_url_basename': url_basename(url),
605 'extractor_key': ie.ie_key(),
608 def process_ie_result(self, ie_result, download=True, extra_info={}):
610 Take the result of the ie(may be modified) and resolve all unresolved
611 references (URLs, playlist items).
613 It will also download the videos if 'download'.
614 Returns the resolved ie_result.
617 result_type = ie_result.get('_type', 'video')
619 if result_type in ('url', 'url_transparent'):
620 extract_flat = self.params.get('extract_flat', False)
621 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
622 extract_flat is True):
623 if self.params.get('forcejson', False):
624 self.to_stdout(json.dumps(ie_result))
627 if result_type == 'video':
628 self.add_extra_info(ie_result, extra_info)
629 return self.process_video_result(ie_result, download=download)
630 elif result_type == 'url':
631 # We have to add extra_info to the results because it may be
632 # contained in a playlist
633 return self.extract_info(ie_result['url'],
635 ie_key=ie_result.get('ie_key'),
636 extra_info=extra_info)
637 elif result_type == 'url_transparent':
638 # Use the information from the embedding page
639 info = self.extract_info(
640 ie_result['url'], ie_key=ie_result.get('ie_key'),
641 extra_info=extra_info, download=False, process=False)
643 force_properties = dict(
644 (k, v) for k, v in ie_result.items() if v is not None)
645 for f in ('_type', 'url'):
646 if f in force_properties:
647 del force_properties[f]
648 new_result = info.copy()
649 new_result.update(force_properties)
651 assert new_result.get('_type') != 'url_transparent'
653 return self.process_ie_result(
654 new_result, download=download, extra_info=extra_info)
655 elif result_type == 'playlist' or result_type == 'multi_video':
656 # We process each entry in the playlist
657 playlist = ie_result.get('title', None) or ie_result.get('id', None)
658 self.to_screen('[download] Downloading playlist: %s' % playlist)
660 playlist_results = []
662 playliststart = self.params.get('playliststart', 1) - 1
663 playlistend = self.params.get('playlistend', None)
664 # For backwards compatibility, interpret -1 as whole list
665 if playlistend == -1:
668 ie_entries = ie_result['entries']
669 if isinstance(ie_entries, list):
670 n_all_entries = len(ie_entries)
671 entries = ie_entries[playliststart:playlistend]
672 n_entries = len(entries)
674 "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
675 (ie_result['extractor'], playlist, n_all_entries, n_entries))
676 elif isinstance(ie_entries, PagedList):
677 entries = ie_entries.getslice(
678 playliststart, playlistend)
679 n_entries = len(entries)
681 "[%s] playlist %s: Downloading %d videos" %
682 (ie_result['extractor'], playlist, n_entries))
684 entries = list(itertools.islice(
685 ie_entries, playliststart, playlistend))
686 n_entries = len(entries)
688 "[%s] playlist %s: Downloading %d videos" %
689 (ie_result['extractor'], playlist, n_entries))
691 if self.params.get('playlistreverse', False):
692 entries = entries[::-1]
694 for i, entry in enumerate(entries, 1):
695 self.to_screen('[download] Downloading video #%s of %s' % (i, n_entries))
697 'n_entries': n_entries,
698 'playlist': playlist,
699 'playlist_id': ie_result.get('id'),
700 'playlist_title': ie_result.get('title'),
701 'playlist_index': i + playliststart,
702 'extractor': ie_result['extractor'],
703 'webpage_url': ie_result['webpage_url'],
704 'webpage_url_basename': url_basename(ie_result['webpage_url']),
705 'extractor_key': ie_result['extractor_key'],
708 reason = self._match_entry(entry)
709 if reason is not None:
710 self.to_screen('[download] ' + reason)
713 entry_result = self.process_ie_result(entry,
716 playlist_results.append(entry_result)
717 ie_result['entries'] = playlist_results
719 elif result_type == 'compat_list':
721 'Extractor %s returned a compat_list result. '
722 'It needs to be updated.' % ie_result.get('extractor'))
728 'extractor': ie_result['extractor'],
729 'webpage_url': ie_result['webpage_url'],
730 'webpage_url_basename': url_basename(ie_result['webpage_url']),
731 'extractor_key': ie_result['extractor_key'],
735 ie_result['entries'] = [
736 self.process_ie_result(_fixup(r), download, extra_info)
737 for r in ie_result['entries']
741 raise Exception('Invalid result type: %s' % result_type)
743 def select_format(self, format_spec, available_formats):
744 if format_spec == 'best' or format_spec is None:
745 return available_formats[-1]
746 elif format_spec == 'worst':
747 return available_formats[0]
748 elif format_spec == 'bestaudio':
750 f for f in available_formats
751 if f.get('vcodec') == 'none']
753 return audio_formats[-1]
754 elif format_spec == 'worstaudio':
756 f for f in available_formats
757 if f.get('vcodec') == 'none']
759 return audio_formats[0]
760 elif format_spec == 'bestvideo':
762 f for f in available_formats
763 if f.get('acodec') == 'none']
765 return video_formats[-1]
766 elif format_spec == 'worstvideo':
768 f for f in available_formats
769 if f.get('acodec') == 'none']
771 return video_formats[0]
773 extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a']
774 if format_spec in extensions:
775 filter_f = lambda f: f['ext'] == format_spec
777 filter_f = lambda f: f['format_id'] == format_spec
778 matches = list(filter(filter_f, available_formats))
783 def process_video_result(self, info_dict, download=True):
784 assert info_dict.get('_type', 'video') == 'video'
786 if 'id' not in info_dict:
787 raise ExtractorError('Missing "id" field in extractor result')
788 if 'title' not in info_dict:
789 raise ExtractorError('Missing "title" field in extractor result')
791 if 'playlist' not in info_dict:
792 # It isn't part of a playlist
793 info_dict['playlist'] = None
794 info_dict['playlist_index'] = None
796 thumbnails = info_dict.get('thumbnails')
798 thumbnails.sort(key=lambda t: (
799 t.get('width'), t.get('height'), t.get('url')))
801 if 'width' in t and 'height' in t:
802 t['resolution'] = '%dx%d' % (t['width'], t['height'])
804 if thumbnails and 'thumbnail' not in info_dict:
805 info_dict['thumbnail'] = thumbnails[-1]['url']
807 if 'display_id' not in info_dict and 'id' in info_dict:
808 info_dict['display_id'] = info_dict['id']
810 if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
811 # Working around negative timestamps in Windows
812 # (see http://bugs.python.org/issue1646728)
813 if info_dict['timestamp'] < 0 and os.name == 'nt':
814 info_dict['timestamp'] = 0
815 upload_date = datetime.datetime.utcfromtimestamp(
816 info_dict['timestamp'])
817 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
819 # This extractors handle format selection themselves
820 if info_dict['extractor'] in ['Youku']:
822 self.process_info(info_dict)
825 # We now pick which formats have to be downloaded
826 if info_dict.get('formats') is None:
827 # There's only one format available
828 formats = [info_dict]
830 formats = info_dict['formats']
833 raise ExtractorError('No video formats found!')
835 # We check that all the formats have the format and format_id fields
836 for i, format in enumerate(formats):
837 if 'url' not in format:
838 raise ExtractorError('Missing "url" key in result (index %d)' % i)
840 if format.get('format_id') is None:
841 format['format_id'] = compat_str(i)
842 if format.get('format') is None:
843 format['format'] = '{id} - {res}{note}'.format(
844 id=format['format_id'],
845 res=self.format_resolution(format),
846 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
848 # Automatically determine file extension if missing
849 if 'ext' not in format:
850 format['ext'] = determine_ext(format['url']).lower()
852 format_limit = self.params.get('format_limit', None)
854 formats = list(takewhile_inclusive(
855 lambda f: f['format_id'] != format_limit, formats
858 # TODO Central sorting goes here
860 if formats[0] is not info_dict:
861 # only set the 'formats' fields if the original info_dict list them
862 # otherwise we end up with a circular reference, the first (and unique)
863 # element in the 'formats' field in info_dict is info_dict itself,
864 # wich can't be exported to json
865 info_dict['formats'] = formats
866 if self.params.get('listformats', None):
867 self.list_formats(info_dict)
870 req_format = self.params.get('format')
871 if req_format is None:
873 formats_to_download = []
874 # The -1 is for supporting YoutubeIE
875 if req_format in ('-1', 'all'):
876 formats_to_download = formats
878 for rfstr in req_format.split(','):
879 # We can accept formats requested in the format: 34/5/best, we pick
880 # the first that is available, starting from left
881 req_formats = rfstr.split('/')
882 for rf in req_formats:
883 if re.match(r'.+?\+.+?', rf) is not None:
884 # Two formats have been requested like '137+139'
885 format_1, format_2 = rf.split('+')
886 formats_info = (self.select_format(format_1, formats),
887 self.select_format(format_2, formats))
888 if all(formats_info):
889 # The first format must contain the video and the
891 if formats_info[0].get('vcodec') == 'none':
892 self.report_error('The first format must '
893 'contain the video, try using '
894 '"-f %s+%s"' % (format_2, format_1))
897 'requested_formats': formats_info,
899 'ext': formats_info[0]['ext'],
902 selected_format = None
904 selected_format = self.select_format(rf, formats)
905 if selected_format is not None:
906 formats_to_download.append(selected_format)
908 if not formats_to_download:
909 raise ExtractorError('requested format not available',
913 if len(formats_to_download) > 1:
914 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
915 for format in formats_to_download:
916 new_info = dict(info_dict)
917 new_info.update(format)
918 self.process_info(new_info)
919 # We update the info dict with the best quality format (backwards compatibility)
920 info_dict.update(formats_to_download[-1])
923 def process_info(self, info_dict):
924 """Process a single resolved IE result."""
926 assert info_dict.get('_type', 'video') == 'video'
928 max_downloads = self.params.get('max_downloads')
929 if max_downloads is not None:
930 if self._num_downloads >= int(max_downloads):
931 raise MaxDownloadsReached()
933 info_dict['fulltitle'] = info_dict['title']
934 if len(info_dict['title']) > 200:
935 info_dict['title'] = info_dict['title'][:197] + '...'
937 # Keep for backwards compatibility
938 info_dict['stitle'] = info_dict['title']
940 if 'format' not in info_dict:
941 info_dict['format'] = info_dict['ext']
943 reason = self._match_entry(info_dict)
944 if reason is not None:
945 self.to_screen('[download] ' + reason)
948 self._num_downloads += 1
950 filename = self.prepare_filename(info_dict)
953 if self.params.get('forcetitle', False):
954 self.to_stdout(info_dict['fulltitle'])
955 if self.params.get('forceid', False):
956 self.to_stdout(info_dict['id'])
957 if self.params.get('forceurl', False):
958 if info_dict.get('requested_formats') is not None:
959 for f in info_dict['requested_formats']:
960 self.to_stdout(f['url'] + f.get('play_path', ''))
962 # For RTMP URLs, also include the playpath
963 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
964 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
965 self.to_stdout(info_dict['thumbnail'])
966 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
967 self.to_stdout(info_dict['description'])
968 if self.params.get('forcefilename', False) and filename is not None:
969 self.to_stdout(filename)
970 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
971 self.to_stdout(formatSeconds(info_dict['duration']))
972 if self.params.get('forceformat', False):
973 self.to_stdout(info_dict['format'])
974 if self.params.get('forcejson', False):
975 info_dict['_filename'] = filename
976 self.to_stdout(json.dumps(info_dict))
977 if self.params.get('dump_single_json', False):
978 info_dict['_filename'] = filename
980 # Do nothing else if in simulate mode
981 if self.params.get('simulate', False):
988 dn = os.path.dirname(encodeFilename(filename))
989 if dn and not os.path.exists(dn):
991 except (OSError, IOError) as err:
992 self.report_error('unable to create directory ' + compat_str(err))
995 if self.params.get('writedescription', False):
996 descfn = filename + '.description'
997 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
998 self.to_screen('[info] Video description is already present')
1001 self.to_screen('[info] Writing video description to: ' + descfn)
1002 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1003 descfile.write(info_dict['description'])
1004 except (KeyError, TypeError):
1005 self.report_warning('There\'s no description to write.')
1006 except (OSError, IOError):
1007 self.report_error('Cannot write description file ' + descfn)
1010 if self.params.get('writeannotations', False):
1011 annofn = filename + '.annotations.xml'
1012 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1013 self.to_screen('[info] Video annotations are already present')
1016 self.to_screen('[info] Writing video annotations to: ' + annofn)
1017 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1018 annofile.write(info_dict['annotations'])
1019 except (KeyError, TypeError):
1020 self.report_warning('There are no annotations to write.')
1021 except (OSError, IOError):
1022 self.report_error('Cannot write annotations file: ' + annofn)
1025 subtitles_are_requested = any([self.params.get('writesubtitles', False),
1026 self.params.get('writeautomaticsub')])
1028 if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
1029 # subtitles download errors are already managed as troubles in relevant IE
1030 # that way it will silently go on when used with unsupporting IE
1031 subtitles = info_dict['subtitles']
1032 sub_format = self.params.get('subtitlesformat', 'srt')
1033 for sub_lang in subtitles.keys():
1034 sub = subtitles[sub_lang]
1038 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1039 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1040 self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1042 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1043 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
1045 except (OSError, IOError):
1046 self.report_error('Cannot write subtitles file ' + sub_filename)
1049 if self.params.get('writeinfojson', False):
1050 infofn = os.path.splitext(filename)[0] + '.info.json'
1051 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1052 self.to_screen('[info] Video description metadata is already present')
1054 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1056 write_json_file(info_dict, infofn)
1057 except (OSError, IOError):
1058 self.report_error('Cannot write metadata to JSON file ' + infofn)
1061 if self.params.get('writethumbnail', False):
1062 if info_dict.get('thumbnail') is not None:
1063 thumb_format = determine_ext(info_dict['thumbnail'], 'jpg')
1064 thumb_filename = os.path.splitext(filename)[0] + '.' + thumb_format
1065 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
1066 self.to_screen('[%s] %s: Thumbnail is already present' %
1067 (info_dict['extractor'], info_dict['id']))
1069 self.to_screen('[%s] %s: Downloading thumbnail ...' %
1070 (info_dict['extractor'], info_dict['id']))
1072 uf = self.urlopen(info_dict['thumbnail'])
1073 with open(thumb_filename, 'wb') as thumbf:
1074 shutil.copyfileobj(uf, thumbf)
1075 self.to_screen('[%s] %s: Writing thumbnail to: %s' %
1076 (info_dict['extractor'], info_dict['id'], thumb_filename))
1077 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1078 self.report_warning('Unable to download thumbnail "%s": %s' %
1079 (info_dict['thumbnail'], compat_str(err)))
1081 if not self.params.get('skip_download', False):
1082 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
1087 fd = get_suitable_downloader(info)(self, self.params)
1088 for ph in self._progress_hooks:
1089 fd.add_progress_hook(ph)
1090 if self.params.get('verbose'):
1091 self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1092 return fd.download(name, info)
1093 if info_dict.get('requested_formats') is not None:
1096 merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
1097 if not merger._executable:
1099 self.report_warning('You have requested multiple '
1100 'formats but ffmpeg or avconv are not installed.'
1101 ' The formats won\'t be merged')
1103 postprocessors = [merger]
1104 for f in info_dict['requested_formats']:
1105 new_info = dict(info_dict)
1107 fname = self.prepare_filename(new_info)
1108 fname = prepend_extension(fname, 'f%s' % f['format_id'])
1109 downloaded.append(fname)
1110 partial_success = dl(fname, new_info)
1111 success = success and partial_success
1112 info_dict['__postprocessors'] = postprocessors
1113 info_dict['__files_to_merge'] = downloaded
1115 # Just a single file
1116 success = dl(filename, info_dict)
1117 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1118 self.report_error('unable to download video data: %s' % str(err))
1120 except (OSError, IOError) as err:
1121 raise UnavailableVideoError(err)
1122 except (ContentTooShortError, ) as err:
1123 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1128 self.post_process(filename, info_dict)
1129 except (PostProcessingError) as err:
1130 self.report_error('postprocessing: %s' % str(err))
1133 self.record_download_archive(info_dict)
1135 def download(self, url_list):
1136 """Download a given list of URLs."""
1137 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1138 if (len(url_list) > 1 and
1140 and self.params.get('max_downloads') != 1):
1141 raise SameFileError(outtmpl)
1143 for url in url_list:
1145 # It also downloads the videos
1146 res = self.extract_info(url)
1147 except UnavailableVideoError:
1148 self.report_error('unable to download video')
1149 except MaxDownloadsReached:
1150 self.to_screen('[info] Maximum number of downloaded files reached.')
1153 if self.params.get('dump_single_json', False):
1154 self.to_stdout(json.dumps(res))
1156 return self._download_retcode
1158 def download_with_info_file(self, info_filename):
1159 with io.open(info_filename, 'r', encoding='utf-8') as f:
1162 self.process_ie_result(info, download=True)
1163 except DownloadError:
1164 webpage_url = info.get('webpage_url')
1165 if webpage_url is not None:
1166 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1167 return self.download([webpage_url])
1170 return self._download_retcode
1172 def post_process(self, filename, ie_info):
1173 """Run all the postprocessors on the given file."""
1174 info = dict(ie_info)
1175 info['filepath'] = filename
1178 if ie_info.get('__postprocessors') is not None:
1179 pps_chain.extend(ie_info['__postprocessors'])
1180 pps_chain.extend(self._pps)
1181 for pp in pps_chain:
1183 keep_video_wish, new_info = pp.run(info)
1184 if keep_video_wish is not None:
1186 keep_video = keep_video_wish
1187 elif keep_video is None:
1188 # No clear decision yet, let IE decide
1189 keep_video = keep_video_wish
1190 except PostProcessingError as e:
1191 self.report_error(e.msg)
1192 if keep_video is False and not self.params.get('keepvideo', False):
1194 self.to_screen('Deleting original file %s (pass -k to keep)' % filename)
1195 os.remove(encodeFilename(filename))
1196 except (IOError, OSError):
1197 self.report_warning('Unable to remove downloaded video file')
1199 def _make_archive_id(self, info_dict):
1200 # Future-proof against any change in case
1201 # and backwards compatibility with prior versions
1202 extractor = info_dict.get('extractor_key')
1203 if extractor is None:
1204 if 'id' in info_dict:
1205 extractor = info_dict.get('ie_key') # key in a playlist
1206 if extractor is None:
1207 return None # Incomplete video information
1208 return extractor.lower() + ' ' + info_dict['id']
1210 def in_download_archive(self, info_dict):
1211 fn = self.params.get('download_archive')
1215 vid_id = self._make_archive_id(info_dict)
1217 return False # Incomplete video information
1220 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1221 for line in archive_file:
1222 if line.strip() == vid_id:
1224 except IOError as ioe:
1225 if ioe.errno != errno.ENOENT:
1229 def record_download_archive(self, info_dict):
1230 fn = self.params.get('download_archive')
1233 vid_id = self._make_archive_id(info_dict)
1235 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1236 archive_file.write(vid_id + '\n')
1239 def format_resolution(format, default='unknown'):
1240 if format.get('vcodec') == 'none':
1242 if format.get('resolution') is not None:
1243 return format['resolution']
1244 if format.get('height') is not None:
1245 if format.get('width') is not None:
1246 res = '%sx%s' % (format['width'], format['height'])
1248 res = '%sp' % format['height']
1249 elif format.get('width') is not None:
1250 res = '?x%d' % format['width']
1255 def _format_note(self, fdict):
1257 if fdict.get('ext') in ['f4f', 'f4m']:
1258 res += '(unsupported) '
1259 if fdict.get('format_note') is not None:
1260 res += fdict['format_note'] + ' '
1261 if fdict.get('tbr') is not None:
1262 res += '%4dk ' % fdict['tbr']
1263 if fdict.get('container') is not None:
1266 res += '%s container' % fdict['container']
1267 if (fdict.get('vcodec') is not None and
1268 fdict.get('vcodec') != 'none'):
1271 res += fdict['vcodec']
1272 if fdict.get('vbr') is not None:
1274 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1276 if fdict.get('vbr') is not None:
1277 res += '%4dk' % fdict['vbr']
1278 if fdict.get('fps') is not None:
1279 res += ', %sfps' % fdict['fps']
1280 if fdict.get('acodec') is not None:
1283 if fdict['acodec'] == 'none':
1286 res += '%-5s' % fdict['acodec']
1287 elif fdict.get('abr') is not None:
1291 if fdict.get('abr') is not None:
1292 res += '@%3dk' % fdict['abr']
1293 if fdict.get('asr') is not None:
1294 res += ' (%5dHz)' % fdict['asr']
1295 if fdict.get('filesize') is not None:
1298 res += format_bytes(fdict['filesize'])
1299 elif fdict.get('filesize_approx') is not None:
1302 res += '~' + format_bytes(fdict['filesize_approx'])
1305 def list_formats(self, info_dict):
1306 def line(format, idlen=20):
1307 return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
1308 format['format_id'],
1310 self.format_resolution(format),
1311 self._format_note(format),
1314 formats = info_dict.get('formats', [info_dict])
1315 idlen = max(len('format code'),
1316 max(len(f['format_id']) for f in formats))
1317 formats_s = [line(f, idlen) for f in formats]
1318 if len(formats) > 1:
1319 formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)'
1320 formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
1322 header_line = line({
1323 'format_id': 'format code', 'ext': 'extension',
1324 'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
1325 self.to_screen('[info] Available formats for %s:\n%s\n%s' %
1326 (info_dict['id'], header_line, '\n'.join(formats_s)))
1328 def urlopen(self, req):
1329 """ Start an HTTP download """
1331 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1332 # always respected by websites, some tend to give out URLs with non percent-encoded
1333 # non-ASCII characters (see telemb.py, ard.py [#3412])
1334 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1335 # To work around aforementioned issue we will replace request's original URL with
1336 # percent-encoded one
1337 req_is_string = isinstance(req, basestring if sys.version_info < (3, 0) else compat_str)
1338 url = req if req_is_string else req.get_full_url()
1339 url_escaped = escape_url(url)
1341 # Substitute URL if any change after escaping
1342 if url != url_escaped:
1346 req = compat_urllib_request.Request(
1347 url_escaped, data=req.data, headers=req.headers,
1348 origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
1350 return self._opener.open(req, timeout=self._socket_timeout)
1352 def print_debug_header(self):
1353 if not self.params.get('verbose'):
1356 if type('') is not compat_str:
1357 # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1358 self.report_warning(
1359 'Your Python is broken! Update to a newer and supported version')
1361 stdout_encoding = getattr(
1362 sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
1364 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1365 locale.getpreferredencoding(),
1366 sys.getfilesystemencoding(),
1368 self.get_encoding()))
1369 write_string(encoding_str, encoding=None)
1371 self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1373 sp = subprocess.Popen(
1374 ['git', 'rev-parse', '--short', 'HEAD'],
1375 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1376 cwd=os.path.dirname(os.path.abspath(__file__)))
1377 out, err = sp.communicate()
1378 out = out.decode().strip()
1379 if re.match('[0-9a-f]+', out):
1380 self._write_string('[debug] Git HEAD: ' + out + '\n')
1386 self._write_string('[debug] Python version %s - %s\n' % (
1387 platform.python_version(), platform_name()))
1389 exe_versions = FFmpegPostProcessor.get_versions()
1390 exe_versions['rtmpdump'] = rtmpdump_version()
1391 exe_str = ', '.join(
1393 for exe, v in sorted(exe_versions.items())
1398 self._write_string('[debug] exe versions: %s\n' % exe_str)
1401 for handler in self._opener.handlers:
1402 if hasattr(handler, 'proxies'):
1403 proxy_map.update(handler.proxies)
1404 self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1406 def _setup_opener(self):
1407 timeout_val = self.params.get('socket_timeout')
1408 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1410 opts_cookiefile = self.params.get('cookiefile')
1411 opts_proxy = self.params.get('proxy')
1413 if opts_cookiefile is None:
1414 self.cookiejar = compat_cookiejar.CookieJar()
1416 self.cookiejar = compat_cookiejar.MozillaCookieJar(
1418 if os.access(opts_cookiefile, os.R_OK):
1419 self.cookiejar.load()
1421 cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1423 if opts_proxy is not None:
1424 if opts_proxy == '':
1427 proxies = {'http': opts_proxy, 'https': opts_proxy}
1429 proxies = compat_urllib_request.getproxies()
1430 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1431 if 'http' in proxies and 'https' not in proxies:
1432 proxies['https'] = proxies['http']
1433 proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1435 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1436 https_handler = make_HTTPS_handler(
1437 self.params.get('nocheckcertificate', False), debuglevel=debuglevel)
1438 ydlh = YoutubeDLHandler(debuglevel=debuglevel)
1439 opener = compat_urllib_request.build_opener(
1440 https_handler, proxy_handler, cookie_processor, ydlh)
1441 # Delete the default user-agent header, which would otherwise apply in
1442 # cases where our custom HTTP handler doesn't come into play
1443 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1444 opener.addheaders = []
1445 self._opener = opener
1447 def encode(self, s):
1448 if isinstance(s, bytes):
1449 return s # Already encoded
1452 return s.encode(self.get_encoding())
1453 except UnicodeEncodeError as err:
1454 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1457 def get_encoding(self):
1458 encoding = self.params.get('encoding')
1459 if encoding is None:
1460 encoding = preferredencoding()