2 # -*- coding: utf-8 -*-
4 from __future__ import absolute_import, unicode_literals
30 compat_urllib_request,
53 UnavailableVideoError,
60 from .extractor import get_info_extractor, gen_extractors
61 from .downloader import get_suitable_downloader
62 from .postprocessor import FFmpegMergerPP
63 from .version import __version__
66 class YoutubeDL(object):
69 YoutubeDL objects are the ones responsible of downloading the
70 actual video file and writing it to disk if the user has requested
71 it, among some other tasks. In most cases there should be one per
72 program. As, given a video URL, the downloader doesn't know how to
73 extract all the needed information, task that InfoExtractors do, it
74 has to pass the URL to one of them.
76 For this, YoutubeDL objects have a method that allows
77 InfoExtractors to be registered in a given order. When it is passed
78 a URL, the YoutubeDL object handles it to the first InfoExtractor it
79 finds that reports being able to handle it. The InfoExtractor extracts
80 all the information about the video or videos the URL refers to, and
81 YoutubeDL process the extracted information, possibly using a File
82 Downloader to download the video.
84 YoutubeDL objects accept a lot of parameters. In order not to saturate
85 the object constructor with arguments, it receives a dictionary of
86 options instead. These options are available through the params
87 attribute for the InfoExtractors to use. The YoutubeDL also
88 registers itself as the downloader in charge for the InfoExtractors
89 that are added to it, so this is a "mutual registration".
93 username: Username for authentication purposes.
94 password: Password for authentication purposes.
95 videopassword: Password for acces a video.
96 usenetrc: Use netrc for authentication instead.
97 verbose: Print additional info to stdout.
98 quiet: Do not print messages to stdout.
99 no_warnings: Do not print out anything for warnings.
100 forceurl: Force printing final URL.
101 forcetitle: Force printing title.
102 forceid: Force printing ID.
103 forcethumbnail: Force printing thumbnail URL.
104 forcedescription: Force printing description.
105 forcefilename: Force printing final filename.
106 forceduration: Force printing duration.
107 forcejson: Force printing info_dict as JSON.
108 simulate: Do not download the video files.
109 format: Video format code.
110 format_limit: Highest quality format to try.
111 outtmpl: Template for output names.
112 restrictfilenames: Do not allow "&" and spaces in file names
113 ignoreerrors: Do not stop on download errors.
114 nooverwrites: Prevent overwriting files.
115 playliststart: Playlist item to start at.
116 playlistend: Playlist item to end at.
117 matchtitle: Download only matching titles.
118 rejecttitle: Reject downloads for matching titles.
119 logger: Log messages to a logging.Logger instance.
120 logtostderr: Log messages to stderr instead of stdout.
121 writedescription: Write the video description to a .description file
122 writeinfojson: Write the video description to a .info.json file
123 writeannotations: Write the video annotations to a .annotations.xml file
124 writethumbnail: Write the thumbnail image to a file
125 writesubtitles: Write the video subtitles to a file
126 writeautomaticsub: Write the automatic subtitles to a file
127 allsubtitles: Downloads all the subtitles of the video
128 (requires writesubtitles or writeautomaticsub)
129 listsubtitles: Lists all available subtitles for the video
130 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
131 subtitleslangs: List of languages of the subtitles to download
132 keepvideo: Keep the video file after post-processing
133 daterange: A DateRange object, download only if the upload_date is in the range.
134 skip_download: Skip the actual download of the video file
135 cachedir: Location of the cache files in the filesystem.
136 None to disable filesystem cache.
137 noplaylist: Download single video instead of a playlist if in doubt.
138 age_limit: An integer representing the user's age in years.
139 Unsuitable videos for the given age are skipped.
140 min_views: An integer representing the minimum view count the video
141 must have in order to not be skipped.
142 Videos without view count information are always
143 downloaded. None for no limit.
144 max_views: An integer representing the maximum view count.
145 Videos that are more popular than that are not
147 Videos without view count information are always
148 downloaded. None for no limit.
149 download_archive: File name of a file where all downloads are recorded.
150 Videos already present in the file are not downloaded
152 cookiefile: File name where cookies should be read from and dumped to.
153 nocheckcertificate:Do not verify SSL certificates
154 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
155 At the moment, this is only supported by YouTube.
156 proxy: URL of the proxy server to use
157 socket_timeout: Time to wait for unresponsive hosts, in seconds
158 bidi_workaround: Work around buggy terminals without bidirectional text
159 support, using fridibi
160 debug_printtraffic:Print out sent and received HTTP traffic
161 include_ads: Download ads as well
162 default_search: Prepend this string if an input url is not valid.
163 'auto' for elaborate guessing
164 encoding: Use this encoding instead of the system-specified.
166 The following parameters are not used by YoutubeDL itself, they are used by
168 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
169 noresizebuffer, retries, continuedl, noprogress, consoletitle
171 The following options are used by the post processors:
172 prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
173 otherwise prefer avconv.
179 _download_retcode = None
180 _num_downloads = None
183 def __init__(self, params=None):
184 """Create a FileDownloader object with the given options."""
188 self._ies_instances = {}
190 self._progress_hooks = []
191 self._download_retcode = 0
192 self._num_downloads = 0
193 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
194 self._err_file = sys.stderr
197 if params.get('bidi_workaround', False):
200 master, slave = pty.openpty()
201 width = get_term_width()
205 width_args = ['-w', str(width)]
207 stdin=subprocess.PIPE,
209 stderr=self._err_file)
211 self._output_process = subprocess.Popen(
212 ['bidiv'] + width_args, **sp_kwargs
215 self._output_process = subprocess.Popen(
216 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
217 self._output_channel = os.fdopen(master, 'rb')
218 except OSError as ose:
220 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
224 if (sys.version_info >= (3,) and sys.platform != 'win32' and
225 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
226 and not params['restrictfilenames']):
227 # On Python 3, the Unicode filesystem API will throw errors (#1474)
229 'Assuming --restrict-filenames since file system encoding '
230 'cannot encode all charactes. '
231 'Set the LC_ALL environment variable to fix this.')
232 self.params['restrictfilenames'] = True
234 if '%(stitle)s' in self.params.get('outtmpl', ''):
235 self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
239 def add_info_extractor(self, ie):
240 """Add an InfoExtractor object to the end of the list."""
242 self._ies_instances[ie.ie_key()] = ie
243 ie.set_downloader(self)
245 def get_info_extractor(self, ie_key):
247 Get an instance of an IE with name ie_key, it will try to get one from
248 the _ies list, if there's no instance it will create a new one and add
249 it to the extractor list.
251 ie = self._ies_instances.get(ie_key)
253 ie = get_info_extractor(ie_key)()
254 self.add_info_extractor(ie)
257 def add_default_info_extractors(self):
259 Add the InfoExtractors returned by gen_extractors to the end of the list
261 for ie in gen_extractors():
262 self.add_info_extractor(ie)
264 def add_post_processor(self, pp):
265 """Add a PostProcessor object to the end of the chain."""
267 pp.set_downloader(self)
269 def add_progress_hook(self, ph):
270 """Add the progress hook (currently only for the file downloader)"""
271 self._progress_hooks.append(ph)
273 def _bidi_workaround(self, message):
274 if not hasattr(self, '_output_channel'):
277 assert hasattr(self, '_output_process')
278 assert type(message) == type('')
279 line_count = message.count('\n') + 1
280 self._output_process.stdin.write((message + '\n').encode('utf-8'))
281 self._output_process.stdin.flush()
282 res = ''.join(self._output_channel.readline().decode('utf-8')
283 for _ in range(line_count))
284 return res[:-len('\n')]
286 def to_screen(self, message, skip_eol=False):
287 """Print message to stdout if not in quiet mode."""
288 return self.to_stdout(message, skip_eol, check_quiet=True)
290 def _write_string(self, s, out=None):
291 write_string(s, out=out, encoding=self.params.get('encoding'))
293 def to_stdout(self, message, skip_eol=False, check_quiet=False):
294 """Print message to stdout if not in quiet mode."""
295 if self.params.get('logger'):
296 self.params['logger'].debug(message)
297 elif not check_quiet or not self.params.get('quiet', False):
298 message = self._bidi_workaround(message)
299 terminator = ['\n', ''][skip_eol]
300 output = message + terminator
302 self._write_string(output, self._screen_file)
304 def to_stderr(self, message):
305 """Print message to stderr."""
306 assert type(message) == type('')
307 if self.params.get('logger'):
308 self.params['logger'].error(message)
310 message = self._bidi_workaround(message)
311 output = message + '\n'
312 self._write_string(output, self._err_file)
314 def to_console_title(self, message):
315 if not self.params.get('consoletitle', False):
317 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
318 # c_wchar_p() might not be necessary if `message` is
319 # already of type unicode()
320 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
321 elif 'TERM' in os.environ:
322 self._write_string('\033]0;%s\007' % message, self._screen_file)
324 def save_console_title(self):
325 if not self.params.get('consoletitle', False):
327 if 'TERM' in os.environ:
328 # Save the title on stack
329 self._write_string('\033[22;0t', self._screen_file)
331 def restore_console_title(self):
332 if not self.params.get('consoletitle', False):
334 if 'TERM' in os.environ:
335 # Restore the title from stack
336 self._write_string('\033[23;0t', self._screen_file)
339 self.save_console_title()
342 def __exit__(self, *args):
343 self.restore_console_title()
345 if self.params.get('cookiefile') is not None:
346 self.cookiejar.save()
348 def trouble(self, message=None, tb=None):
349 """Determine action to take when a download problem appears.
351 Depending on if the downloader has been configured to ignore
352 download errors or not, this method may throw an exception or
353 not when errors are found, after printing the message.
355 tb, if given, is additional traceback information.
357 if message is not None:
358 self.to_stderr(message)
359 if self.params.get('verbose'):
361 if sys.exc_info()[0]: # if .trouble has been called from an except block
363 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
364 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
365 tb += compat_str(traceback.format_exc())
367 tb_data = traceback.format_list(traceback.extract_stack())
368 tb = ''.join(tb_data)
370 if not self.params.get('ignoreerrors', False):
371 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
372 exc_info = sys.exc_info()[1].exc_info
374 exc_info = sys.exc_info()
375 raise DownloadError(message, exc_info)
376 self._download_retcode = 1
378 def report_warning(self, message):
380 Print the message to stderr, it will be prefixed with 'WARNING:'
381 If stderr is a tty file the 'WARNING:' will be colored
383 if self.params.get('logger') is not None:
384 self.params['logger'].warning(message)
386 if self.params.get('no_warnings'):
388 if self._err_file.isatty() and os.name != 'nt':
389 _msg_header = '\033[0;33mWARNING:\033[0m'
391 _msg_header = 'WARNING:'
392 warning_message = '%s %s' % (_msg_header, message)
393 self.to_stderr(warning_message)
395 def report_error(self, message, tb=None):
397 Do the same as trouble, but prefixes the message with 'ERROR:', colored
398 in red if stderr is a tty file.
400 if self._err_file.isatty() and os.name != 'nt':
401 _msg_header = '\033[0;31mERROR:\033[0m'
403 _msg_header = 'ERROR:'
404 error_message = '%s %s' % (_msg_header, message)
405 self.trouble(error_message, tb)
407 def report_file_already_downloaded(self, file_name):
408 """Report file has already been fully downloaded."""
410 self.to_screen('[download] %s has already been downloaded' % file_name)
411 except UnicodeEncodeError:
412 self.to_screen('[download] The file has already been downloaded')
414 def prepare_filename(self, info_dict):
415 """Generate the output filename."""
417 template_dict = dict(info_dict)
419 template_dict['epoch'] = int(time.time())
420 autonumber_size = self.params.get('autonumber_size')
421 if autonumber_size is None:
423 autonumber_templ = '%0' + str(autonumber_size) + 'd'
424 template_dict['autonumber'] = autonumber_templ % self._num_downloads
425 if template_dict.get('playlist_index') is not None:
426 template_dict['playlist_index'] = '%05d' % template_dict['playlist_index']
427 if template_dict.get('resolution') is None:
428 if template_dict.get('width') and template_dict.get('height'):
429 template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
430 elif template_dict.get('height'):
431 template_dict['resolution'] = '%sp' % template_dict['height']
432 elif template_dict.get('width'):
433 template_dict['resolution'] = '?x%d' % template_dict['width']
435 sanitize = lambda k, v: sanitize_filename(
437 restricted=self.params.get('restrictfilenames'),
439 template_dict = dict((k, sanitize(k, v))
440 for k, v in template_dict.items()
442 template_dict = collections.defaultdict(lambda: 'NA', template_dict)
444 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
445 tmpl = os.path.expanduser(outtmpl)
446 filename = tmpl % template_dict
448 except ValueError as err:
449 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
452 def _match_entry(self, info_dict):
453 """ Returns None iff the file should be downloaded """
455 video_title = info_dict.get('title', info_dict.get('id', 'video'))
456 if 'title' in info_dict:
457 # This can happen when we're just evaluating the playlist
458 title = info_dict['title']
459 matchtitle = self.params.get('matchtitle', False)
461 if not re.search(matchtitle, title, re.IGNORECASE):
462 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
463 rejecttitle = self.params.get('rejecttitle', False)
465 if re.search(rejecttitle, title, re.IGNORECASE):
466 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
467 date = info_dict.get('upload_date', None)
469 dateRange = self.params.get('daterange', DateRange())
470 if date not in dateRange:
471 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
472 view_count = info_dict.get('view_count', None)
473 if view_count is not None:
474 min_views = self.params.get('min_views')
475 if min_views is not None and view_count < min_views:
476 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
477 max_views = self.params.get('max_views')
478 if max_views is not None and view_count > max_views:
479 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
480 age_limit = self.params.get('age_limit')
481 if age_limit is not None:
482 if age_limit < info_dict.get('age_limit', 0):
483 return 'Skipping "' + title + '" because it is age restricted'
484 if self.in_download_archive(info_dict):
485 return '%s has already been recorded in archive' % video_title
489 def add_extra_info(info_dict, extra_info):
490 '''Set the keys from extra_info in info dict if they are missing'''
491 for key, value in extra_info.items():
492 info_dict.setdefault(key, value)
494 def extract_info(self, url, download=True, ie_key=None, extra_info={},
497 Returns a list with a dictionary for each video we find.
498 If 'download', also downloads the videos.
499 extra_info is a dict containing the extra values to add to each result
503 ies = [self.get_info_extractor(ie_key)]
508 if not ie.suitable(url):
512 self.report_warning('The program functionality for this site has been marked as broken, '
513 'and will probably not work.')
516 ie_result = ie.extract(url)
517 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
519 if isinstance(ie_result, list):
520 # Backwards compatibility: old IE result format
522 '_type': 'compat_list',
523 'entries': ie_result,
525 self.add_default_extra_info(ie_result, ie, url)
527 return self.process_ie_result(ie_result, download, extra_info)
530 except ExtractorError as de: # An error we somewhat expected
531 self.report_error(compat_str(de), de.format_traceback())
533 except MaxDownloadsReached:
535 except Exception as e:
536 if self.params.get('ignoreerrors', False):
537 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
542 self.report_error('no suitable InfoExtractor for URL %s' % url)
544 def add_default_extra_info(self, ie_result, ie, url):
545 self.add_extra_info(ie_result, {
546 'extractor': ie.IE_NAME,
548 'webpage_url_basename': url_basename(url),
549 'extractor_key': ie.ie_key(),
552 def process_ie_result(self, ie_result, download=True, extra_info={}):
554 Take the result of the ie(may be modified) and resolve all unresolved
555 references (URLs, playlist items).
557 It will also download the videos if 'download'.
558 Returns the resolved ie_result.
561 result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
562 if result_type == 'video':
563 self.add_extra_info(ie_result, extra_info)
564 return self.process_video_result(ie_result, download=download)
565 elif result_type == 'url':
566 # We have to add extra_info to the results because it may be
567 # contained in a playlist
568 return self.extract_info(ie_result['url'],
570 ie_key=ie_result.get('ie_key'),
571 extra_info=extra_info)
572 elif result_type == 'url_transparent':
573 # Use the information from the embedding page
574 info = self.extract_info(
575 ie_result['url'], ie_key=ie_result.get('ie_key'),
576 extra_info=extra_info, download=False, process=False)
578 def make_result(embedded_info):
579 new_result = ie_result.copy()
580 for f in ('_type', 'url', 'ext', 'player_url', 'formats',
581 'entries', 'ie_key', 'duration',
582 'subtitles', 'annotations', 'format',
583 'thumbnail', 'thumbnails'):
586 if f in embedded_info:
587 new_result[f] = embedded_info[f]
589 new_result = make_result(info)
591 assert new_result.get('_type') != 'url_transparent'
592 if new_result.get('_type') == 'compat_list':
593 new_result['entries'] = [
594 make_result(e) for e in new_result['entries']]
596 return self.process_ie_result(
597 new_result, download=download, extra_info=extra_info)
598 elif result_type == 'playlist':
599 # We process each entry in the playlist
600 playlist = ie_result.get('title', None) or ie_result.get('id', None)
601 self.to_screen('[download] Downloading playlist: %s' % playlist)
603 playlist_results = []
605 playliststart = self.params.get('playliststart', 1) - 1
606 playlistend = self.params.get('playlistend', None)
607 # For backwards compatibility, interpret -1 as whole list
608 if playlistend == -1:
611 if isinstance(ie_result['entries'], list):
612 n_all_entries = len(ie_result['entries'])
613 entries = ie_result['entries'][playliststart:playlistend]
614 n_entries = len(entries)
616 "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
617 (ie_result['extractor'], playlist, n_all_entries, n_entries))
619 assert isinstance(ie_result['entries'], PagedList)
620 entries = ie_result['entries'].getslice(
621 playliststart, playlistend)
622 n_entries = len(entries)
624 "[%s] playlist %s: Downloading %d videos" %
625 (ie_result['extractor'], playlist, n_entries))
627 for i, entry in enumerate(entries, 1):
628 self.to_screen('[download] Downloading video #%s of %s' % (i, n_entries))
630 'playlist': playlist,
631 'playlist_index': i + playliststart,
632 'extractor': ie_result['extractor'],
633 'webpage_url': ie_result['webpage_url'],
634 'webpage_url_basename': url_basename(ie_result['webpage_url']),
635 'extractor_key': ie_result['extractor_key'],
638 reason = self._match_entry(entry)
639 if reason is not None:
640 self.to_screen('[download] ' + reason)
643 entry_result = self.process_ie_result(entry,
646 playlist_results.append(entry_result)
647 ie_result['entries'] = playlist_results
649 elif result_type == 'compat_list':
651 self.add_extra_info(r,
653 'extractor': ie_result['extractor'],
654 'webpage_url': ie_result['webpage_url'],
655 'webpage_url_basename': url_basename(ie_result['webpage_url']),
656 'extractor_key': ie_result['extractor_key'],
659 ie_result['entries'] = [
660 self.process_ie_result(_fixup(r), download, extra_info)
661 for r in ie_result['entries']
665 raise Exception('Invalid result type: %s' % result_type)
667 def select_format(self, format_spec, available_formats):
668 if format_spec == 'best' or format_spec is None:
669 return available_formats[-1]
670 elif format_spec == 'worst':
671 return available_formats[0]
672 elif format_spec == 'bestaudio':
674 f for f in available_formats
675 if f.get('vcodec') == 'none']
677 return audio_formats[-1]
678 elif format_spec == 'worstaudio':
680 f for f in available_formats
681 if f.get('vcodec') == 'none']
683 return audio_formats[0]
684 elif format_spec == 'bestvideo':
686 f for f in available_formats
687 if f.get('acodec') == 'none']
689 return video_formats[-1]
690 elif format_spec == 'worstvideo':
692 f for f in available_formats
693 if f.get('acodec') == 'none']
695 return video_formats[0]
697 extensions = ['mp4', 'flv', 'webm', '3gp']
698 if format_spec in extensions:
699 filter_f = lambda f: f['ext'] == format_spec
701 filter_f = lambda f: f['format_id'] == format_spec
702 matches = list(filter(filter_f, available_formats))
707 def process_video_result(self, info_dict, download=True):
708 assert info_dict.get('_type', 'video') == 'video'
710 if 'id' not in info_dict:
711 raise ExtractorError('Missing "id" field in extractor result')
712 if 'title' not in info_dict:
713 raise ExtractorError('Missing "title" field in extractor result')
715 if 'playlist' not in info_dict:
716 # It isn't part of a playlist
717 info_dict['playlist'] = None
718 info_dict['playlist_index'] = None
720 thumbnails = info_dict.get('thumbnails')
723 if 'width' in t and 'height' in t:
724 t['resolution'] = '%dx%d' % (t['width'], t['height'])
726 if thumbnails and 'thumbnail' not in info_dict:
727 info_dict['thumbnail'] = thumbnails[-1]['url']
729 if 'display_id' not in info_dict and 'id' in info_dict:
730 info_dict['display_id'] = info_dict['id']
732 if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
733 upload_date = datetime.datetime.utcfromtimestamp(
734 info_dict['timestamp'])
735 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
737 # This extractors handle format selection themselves
738 if info_dict['extractor'] in ['Youku']:
740 self.process_info(info_dict)
743 # We now pick which formats have to be downloaded
744 if info_dict.get('formats') is None:
745 # There's only one format available
746 formats = [info_dict]
748 formats = info_dict['formats']
751 raise ExtractorError('No video formats found!')
753 # We check that all the formats have the format and format_id fields
754 for i, format in enumerate(formats):
755 if 'url' not in format:
756 raise ExtractorError('Missing "url" key in result (index %d)' % i)
758 if format.get('format_id') is None:
759 format['format_id'] = compat_str(i)
760 if format.get('format') is None:
761 format['format'] = '{id} - {res}{note}'.format(
762 id=format['format_id'],
763 res=self.format_resolution(format),
764 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
766 # Automatically determine file extension if missing
767 if 'ext' not in format:
768 format['ext'] = determine_ext(format['url']).lower()
770 format_limit = self.params.get('format_limit', None)
772 formats = list(takewhile_inclusive(
773 lambda f: f['format_id'] != format_limit, formats
776 # TODO Central sorting goes here
778 if formats[0] is not info_dict:
779 # only set the 'formats' fields if the original info_dict list them
780 # otherwise we end up with a circular reference, the first (and unique)
781 # element in the 'formats' field in info_dict is info_dict itself,
782 # wich can't be exported to json
783 info_dict['formats'] = formats
784 if self.params.get('listformats', None):
785 self.list_formats(info_dict)
788 req_format = self.params.get('format')
789 if req_format is None:
791 formats_to_download = []
792 # The -1 is for supporting YoutubeIE
793 if req_format in ('-1', 'all'):
794 formats_to_download = formats
796 # We can accept formats requested in the format: 34/5/best, we pick
797 # the first that is available, starting from left
798 req_formats = req_format.split('/')
799 for rf in req_formats:
800 if re.match(r'.+?\+.+?', rf) is not None:
801 # Two formats have been requested like '137+139'
802 format_1, format_2 = rf.split('+')
803 formats_info = (self.select_format(format_1, formats),
804 self.select_format(format_2, formats))
805 if all(formats_info):
807 'requested_formats': formats_info,
809 'ext': formats_info[0]['ext'],
812 selected_format = None
814 selected_format = self.select_format(rf, formats)
815 if selected_format is not None:
816 formats_to_download = [selected_format]
818 if not formats_to_download:
819 raise ExtractorError('requested format not available',
823 if len(formats_to_download) > 1:
824 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
825 for format in formats_to_download:
826 new_info = dict(info_dict)
827 new_info.update(format)
828 self.process_info(new_info)
829 # We update the info dict with the best quality format (backwards compatibility)
830 info_dict.update(formats_to_download[-1])
833 def process_info(self, info_dict):
834 """Process a single resolved IE result."""
836 assert info_dict.get('_type', 'video') == 'video'
838 max_downloads = self.params.get('max_downloads')
839 if max_downloads is not None:
840 if self._num_downloads >= int(max_downloads):
841 raise MaxDownloadsReached()
843 info_dict['fulltitle'] = info_dict['title']
844 if len(info_dict['title']) > 200:
845 info_dict['title'] = info_dict['title'][:197] + '...'
847 # Keep for backwards compatibility
848 info_dict['stitle'] = info_dict['title']
850 if not 'format' in info_dict:
851 info_dict['format'] = info_dict['ext']
853 reason = self._match_entry(info_dict)
854 if reason is not None:
855 self.to_screen('[download] ' + reason)
858 self._num_downloads += 1
860 filename = self.prepare_filename(info_dict)
863 if self.params.get('forcetitle', False):
864 self.to_stdout(info_dict['fulltitle'])
865 if self.params.get('forceid', False):
866 self.to_stdout(info_dict['id'])
867 if self.params.get('forceurl', False):
868 # For RTMP URLs, also include the playpath
869 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
870 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
871 self.to_stdout(info_dict['thumbnail'])
872 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
873 self.to_stdout(info_dict['description'])
874 if self.params.get('forcefilename', False) and filename is not None:
875 self.to_stdout(filename)
876 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
877 self.to_stdout(formatSeconds(info_dict['duration']))
878 if self.params.get('forceformat', False):
879 self.to_stdout(info_dict['format'])
880 if self.params.get('forcejson', False):
881 info_dict['_filename'] = filename
882 self.to_stdout(json.dumps(info_dict))
884 # Do nothing else if in simulate mode
885 if self.params.get('simulate', False):
892 dn = os.path.dirname(encodeFilename(filename))
893 if dn and not os.path.exists(dn):
895 except (OSError, IOError) as err:
896 self.report_error('unable to create directory ' + compat_str(err))
899 if self.params.get('writedescription', False):
900 descfn = filename + '.description'
901 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
902 self.to_screen('[info] Video description is already present')
905 self.to_screen('[info] Writing video description to: ' + descfn)
906 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
907 descfile.write(info_dict['description'])
908 except (KeyError, TypeError):
909 self.report_warning('There\'s no description to write.')
910 except (OSError, IOError):
911 self.report_error('Cannot write description file ' + descfn)
914 if self.params.get('writeannotations', False):
915 annofn = filename + '.annotations.xml'
916 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
917 self.to_screen('[info] Video annotations are already present')
920 self.to_screen('[info] Writing video annotations to: ' + annofn)
921 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
922 annofile.write(info_dict['annotations'])
923 except (KeyError, TypeError):
924 self.report_warning('There are no annotations to write.')
925 except (OSError, IOError):
926 self.report_error('Cannot write annotations file: ' + annofn)
929 subtitles_are_requested = any([self.params.get('writesubtitles', False),
930 self.params.get('writeautomaticsub')])
932 if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
933 # subtitles download errors are already managed as troubles in relevant IE
934 # that way it will silently go on when used with unsupporting IE
935 subtitles = info_dict['subtitles']
936 sub_format = self.params.get('subtitlesformat', 'srt')
937 for sub_lang in subtitles.keys():
938 sub = subtitles[sub_lang]
942 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
943 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
944 self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
946 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
947 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
949 except (OSError, IOError):
950 self.report_error('Cannot write subtitles file ' + sub_filename)
953 if self.params.get('writeinfojson', False):
954 infofn = os.path.splitext(filename)[0] + '.info.json'
955 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
956 self.to_screen('[info] Video description metadata is already present')
958 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
960 write_json_file(info_dict, encodeFilename(infofn))
961 except (OSError, IOError):
962 self.report_error('Cannot write metadata to JSON file ' + infofn)
965 if self.params.get('writethumbnail', False):
966 if info_dict.get('thumbnail') is not None:
967 thumb_format = determine_ext(info_dict['thumbnail'], 'jpg')
968 thumb_filename = os.path.splitext(filename)[0] + '.' + thumb_format
969 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
970 self.to_screen('[%s] %s: Thumbnail is already present' %
971 (info_dict['extractor'], info_dict['id']))
973 self.to_screen('[%s] %s: Downloading thumbnail ...' %
974 (info_dict['extractor'], info_dict['id']))
976 uf = self.urlopen(info_dict['thumbnail'])
977 with open(thumb_filename, 'wb') as thumbf:
978 shutil.copyfileobj(uf, thumbf)
979 self.to_screen('[%s] %s: Writing thumbnail to: %s' %
980 (info_dict['extractor'], info_dict['id'], thumb_filename))
981 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
982 self.report_warning('Unable to download thumbnail "%s": %s' %
983 (info_dict['thumbnail'], compat_str(err)))
985 if not self.params.get('skip_download', False):
986 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
991 fd = get_suitable_downloader(info)(self, self.params)
992 for ph in self._progress_hooks:
993 fd.add_progress_hook(ph)
994 return fd.download(name, info)
995 if info_dict.get('requested_formats') is not None:
998 merger = FFmpegMergerPP(self)
999 if not merger._get_executable():
1001 self.report_warning('You have requested multiple '
1002 'formats but ffmpeg or avconv are not installed.'
1003 ' The formats won\'t be merged')
1005 postprocessors = [merger]
1006 for f in info_dict['requested_formats']:
1007 new_info = dict(info_dict)
1009 fname = self.prepare_filename(new_info)
1010 fname = prepend_extension(fname, 'f%s' % f['format_id'])
1011 downloaded.append(fname)
1012 partial_success = dl(fname, new_info)
1013 success = success and partial_success
1014 info_dict['__postprocessors'] = postprocessors
1015 info_dict['__files_to_merge'] = downloaded
1017 # Just a single file
1018 success = dl(filename, info_dict)
1019 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1020 self.report_error('unable to download video data: %s' % str(err))
1022 except (OSError, IOError) as err:
1023 raise UnavailableVideoError(err)
1024 except (ContentTooShortError, ) as err:
1025 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1030 self.post_process(filename, info_dict)
1031 except (PostProcessingError) as err:
1032 self.report_error('postprocessing: %s' % str(err))
1035 self.record_download_archive(info_dict)
1037 def download(self, url_list):
1038 """Download a given list of URLs."""
1039 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1040 if (len(url_list) > 1 and
1042 and self.params.get('max_downloads') != 1):
1043 raise SameFileError(outtmpl)
1045 for url in url_list:
1047 #It also downloads the videos
1048 self.extract_info(url)
1049 except UnavailableVideoError:
1050 self.report_error('unable to download video')
1051 except MaxDownloadsReached:
1052 self.to_screen('[info] Maximum number of downloaded files reached.')
1055 return self._download_retcode
1057 def download_with_info_file(self, info_filename):
1058 with io.open(info_filename, 'r', encoding='utf-8') as f:
1061 self.process_ie_result(info, download=True)
1062 except DownloadError:
1063 webpage_url = info.get('webpage_url')
1064 if webpage_url is not None:
1065 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1066 return self.download([webpage_url])
1069 return self._download_retcode
1071 def post_process(self, filename, ie_info):
1072 """Run all the postprocessors on the given file."""
1073 info = dict(ie_info)
1074 info['filepath'] = filename
1077 if ie_info.get('__postprocessors') is not None:
1078 pps_chain.extend(ie_info['__postprocessors'])
1079 pps_chain.extend(self._pps)
1080 for pp in pps_chain:
1082 keep_video_wish, new_info = pp.run(info)
1083 if keep_video_wish is not None:
1085 keep_video = keep_video_wish
1086 elif keep_video is None:
1087 # No clear decision yet, let IE decide
1088 keep_video = keep_video_wish
1089 except PostProcessingError as e:
1090 self.report_error(e.msg)
1091 if keep_video is False and not self.params.get('keepvideo', False):
1093 self.to_screen('Deleting original file %s (pass -k to keep)' % filename)
1094 os.remove(encodeFilename(filename))
1095 except (IOError, OSError):
1096 self.report_warning('Unable to remove downloaded video file')
1098 def _make_archive_id(self, info_dict):
1099 # Future-proof against any change in case
1100 # and backwards compatibility with prior versions
1101 extractor = info_dict.get('extractor_key')
1102 if extractor is None:
1103 if 'id' in info_dict:
1104 extractor = info_dict.get('ie_key') # key in a playlist
1105 if extractor is None:
1106 return None # Incomplete video information
1107 return extractor.lower() + ' ' + info_dict['id']
1109 def in_download_archive(self, info_dict):
1110 fn = self.params.get('download_archive')
1114 vid_id = self._make_archive_id(info_dict)
1116 return False # Incomplete video information
1119 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1120 for line in archive_file:
1121 if line.strip() == vid_id:
1123 except IOError as ioe:
1124 if ioe.errno != errno.ENOENT:
1128 def record_download_archive(self, info_dict):
1129 fn = self.params.get('download_archive')
1132 vid_id = self._make_archive_id(info_dict)
1134 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1135 archive_file.write(vid_id + '\n')
1138 def format_resolution(format, default='unknown'):
1139 if format.get('vcodec') == 'none':
1141 if format.get('resolution') is not None:
1142 return format['resolution']
1143 if format.get('height') is not None:
1144 if format.get('width') is not None:
1145 res = '%sx%s' % (format['width'], format['height'])
1147 res = '%sp' % format['height']
1148 elif format.get('width') is not None:
1149 res = '?x%d' % format['width']
1154 def _format_note(self, fdict):
1156 if fdict.get('ext') in ['f4f', 'f4m']:
1157 res += '(unsupported) '
1158 if fdict.get('format_note') is not None:
1159 res += fdict['format_note'] + ' '
1160 if fdict.get('tbr') is not None:
1161 res += '%4dk ' % fdict['tbr']
1162 if fdict.get('container') is not None:
1165 res += '%s container' % fdict['container']
1166 if (fdict.get('vcodec') is not None and
1167 fdict.get('vcodec') != 'none'):
1170 res += fdict['vcodec']
1171 if fdict.get('vbr') is not None:
1173 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1175 if fdict.get('vbr') is not None:
1176 res += '%4dk' % fdict['vbr']
1177 if fdict.get('acodec') is not None:
1180 if fdict['acodec'] == 'none':
1183 res += '%-5s' % fdict['acodec']
1184 elif fdict.get('abr') is not None:
1188 if fdict.get('abr') is not None:
1189 res += '@%3dk' % fdict['abr']
1190 if fdict.get('asr') is not None:
1191 res += ' (%5dHz)' % fdict['asr']
1192 if fdict.get('filesize') is not None:
1195 res += format_bytes(fdict['filesize'])
1198 def list_formats(self, info_dict):
1199 def line(format, idlen=20):
1200 return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
1201 format['format_id'],
1203 self.format_resolution(format),
1204 self._format_note(format),
1207 formats = info_dict.get('formats', [info_dict])
1208 idlen = max(len('format code'),
1209 max(len(f['format_id']) for f in formats))
1210 formats_s = [line(f, idlen) for f in formats]
1211 if len(formats) > 1:
1212 formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)'
1213 formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
1215 header_line = line({
1216 'format_id': 'format code', 'ext': 'extension',
1217 'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
1218 self.to_screen('[info] Available formats for %s:\n%s\n%s' %
1219 (info_dict['id'], header_line, '\n'.join(formats_s)))
1221 def urlopen(self, req):
1222 """ Start an HTTP download """
1223 return self._opener.open(req, timeout=self._socket_timeout)
1225 def print_debug_header(self):
1226 if not self.params.get('verbose'):
1230 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1231 locale.getpreferredencoding(),
1232 sys.getfilesystemencoding(),
1233 sys.stdout.encoding,
1234 self.get_encoding()),
1238 self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1240 sp = subprocess.Popen(
1241 ['git', 'rev-parse', '--short', 'HEAD'],
1242 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1243 cwd=os.path.dirname(os.path.abspath(__file__)))
1244 out, err = sp.communicate()
1245 out = out.decode().strip()
1246 if re.match('[0-9a-f]+', out):
1247 self._write_string('[debug] Git HEAD: ' + out + '\n')
1253 self._write_string('[debug] Python version %s - %s' %
1254 (platform.python_version(), platform_name()) + '\n')
1257 for handler in self._opener.handlers:
1258 if hasattr(handler, 'proxies'):
1259 proxy_map.update(handler.proxies)
1260 self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1262 def _setup_opener(self):
1263 timeout_val = self.params.get('socket_timeout')
1264 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1266 opts_cookiefile = self.params.get('cookiefile')
1267 opts_proxy = self.params.get('proxy')
1269 if opts_cookiefile is None:
1270 self.cookiejar = compat_cookiejar.CookieJar()
1272 self.cookiejar = compat_cookiejar.MozillaCookieJar(
1274 if os.access(opts_cookiefile, os.R_OK):
1275 self.cookiejar.load()
1277 cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1279 if opts_proxy is not None:
1280 if opts_proxy == '':
1283 proxies = {'http': opts_proxy, 'https': opts_proxy}
1285 proxies = compat_urllib_request.getproxies()
1286 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1287 if 'http' in proxies and 'https' not in proxies:
1288 proxies['https'] = proxies['http']
1289 proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1291 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1292 https_handler = make_HTTPS_handler(
1293 self.params.get('nocheckcertificate', False), debuglevel=debuglevel)
1294 ydlh = YoutubeDLHandler(debuglevel=debuglevel)
1295 opener = compat_urllib_request.build_opener(
1296 https_handler, proxy_handler, cookie_processor, ydlh)
1297 # Delete the default user-agent header, which would otherwise apply in
1298 # cases where our custom HTTP handler doesn't come into play
1299 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1300 opener.addheaders = []
1301 self._opener = opener
1303 def encode(self, s):
1304 if isinstance(s, bytes):
1305 return s # Already encoded
1308 return s.encode(self.get_encoding())
1309 except UnicodeEncodeError as err:
1310 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1313 def get_encoding(self):
1314 encoding = self.params.get('encoding')
1315 if encoding is None:
1316 encoding = preferredencoding()