2 # -*- coding: utf-8 -*-
4 from __future__ import absolute_import, unicode_literals
30 compat_urllib_request,
54 UnavailableVideoError,
61 from .cache import Cache
62 from .extractor import get_info_extractor, gen_extractors
63 from .downloader import get_suitable_downloader
64 from .postprocessor import FFmpegMergerPP
65 from .version import __version__
68 class YoutubeDL(object):
71 YoutubeDL objects are the ones responsible of downloading the
72 actual video file and writing it to disk if the user has requested
73 it, among some other tasks. In most cases there should be one per
74 program. As, given a video URL, the downloader doesn't know how to
75 extract all the needed information, task that InfoExtractors do, it
76 has to pass the URL to one of them.
78 For this, YoutubeDL objects have a method that allows
79 InfoExtractors to be registered in a given order. When it is passed
80 a URL, the YoutubeDL object handles it to the first InfoExtractor it
81 finds that reports being able to handle it. The InfoExtractor extracts
82 all the information about the video or videos the URL refers to, and
83 YoutubeDL process the extracted information, possibly using a File
84 Downloader to download the video.
86 YoutubeDL objects accept a lot of parameters. In order not to saturate
87 the object constructor with arguments, it receives a dictionary of
88 options instead. These options are available through the params
89 attribute for the InfoExtractors to use. The YoutubeDL also
90 registers itself as the downloader in charge for the InfoExtractors
91 that are added to it, so this is a "mutual registration".
95 username: Username for authentication purposes.
96 password: Password for authentication purposes.
97 videopassword: Password for acces a video.
98 usenetrc: Use netrc for authentication instead.
99 verbose: Print additional info to stdout.
100 quiet: Do not print messages to stdout.
101 no_warnings: Do not print out anything for warnings.
102 forceurl: Force printing final URL.
103 forcetitle: Force printing title.
104 forceid: Force printing ID.
105 forcethumbnail: Force printing thumbnail URL.
106 forcedescription: Force printing description.
107 forcefilename: Force printing final filename.
108 forceduration: Force printing duration.
109 forcejson: Force printing info_dict as JSON.
110 simulate: Do not download the video files.
111 format: Video format code.
112 format_limit: Highest quality format to try.
113 outtmpl: Template for output names.
114 restrictfilenames: Do not allow "&" and spaces in file names
115 ignoreerrors: Do not stop on download errors.
116 nooverwrites: Prevent overwriting files.
117 playliststart: Playlist item to start at.
118 playlistend: Playlist item to end at.
119 matchtitle: Download only matching titles.
120 rejecttitle: Reject downloads for matching titles.
121 logger: Log messages to a logging.Logger instance.
122 logtostderr: Log messages to stderr instead of stdout.
123 writedescription: Write the video description to a .description file
124 writeinfojson: Write the video description to a .info.json file
125 writeannotations: Write the video annotations to a .annotations.xml file
126 writethumbnail: Write the thumbnail image to a file
127 writesubtitles: Write the video subtitles to a file
128 writeautomaticsub: Write the automatic subtitles to a file
129 allsubtitles: Downloads all the subtitles of the video
130 (requires writesubtitles or writeautomaticsub)
131 listsubtitles: Lists all available subtitles for the video
132 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
133 subtitleslangs: List of languages of the subtitles to download
134 keepvideo: Keep the video file after post-processing
135 daterange: A DateRange object, download only if the upload_date is in the range.
136 skip_download: Skip the actual download of the video file
137 cachedir: Location of the cache files in the filesystem.
138 False to disable filesystem cache.
139 noplaylist: Download single video instead of a playlist if in doubt.
140 age_limit: An integer representing the user's age in years.
141 Unsuitable videos for the given age are skipped.
142 min_views: An integer representing the minimum view count the video
143 must have in order to not be skipped.
144 Videos without view count information are always
145 downloaded. None for no limit.
146 max_views: An integer representing the maximum view count.
147 Videos that are more popular than that are not
149 Videos without view count information are always
150 downloaded. None for no limit.
151 download_archive: File name of a file where all downloads are recorded.
152 Videos already present in the file are not downloaded
154 cookiefile: File name where cookies should be read from and dumped to.
155 nocheckcertificate:Do not verify SSL certificates
156 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
157 At the moment, this is only supported by YouTube.
158 proxy: URL of the proxy server to use
159 socket_timeout: Time to wait for unresponsive hosts, in seconds
160 bidi_workaround: Work around buggy terminals without bidirectional text
161 support, using fridibi
162 debug_printtraffic:Print out sent and received HTTP traffic
163 include_ads: Download ads as well
164 default_search: Prepend this string if an input url is not valid.
165 'auto' for elaborate guessing
166 encoding: Use this encoding instead of the system-specified.
167 extract_flat: Do not resolve URLs, return the immediate result.
168 Pass in 'in_playlist' to only show this behavior for
171 The following parameters are not used by YoutubeDL itself, they are used by
173 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
174 noresizebuffer, retries, continuedl, noprogress, consoletitle
176 The following options are used by the post processors:
177 prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
178 otherwise prefer avconv.
179 exec_cmd: Arbitrary command to run after downloading
185 _download_retcode = None
186 _num_downloads = None
189 def __init__(self, params=None):
190 """Create a FileDownloader object with the given options."""
194 self._ies_instances = {}
196 self._progress_hooks = []
197 self._download_retcode = 0
198 self._num_downloads = 0
199 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
200 self._err_file = sys.stderr
202 self.cache = Cache(self)
204 if params.get('bidi_workaround', False):
207 master, slave = pty.openpty()
208 width = get_term_width()
212 width_args = ['-w', str(width)]
214 stdin=subprocess.PIPE,
216 stderr=self._err_file)
218 self._output_process = subprocess.Popen(
219 ['bidiv'] + width_args, **sp_kwargs
222 self._output_process = subprocess.Popen(
223 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
224 self._output_channel = os.fdopen(master, 'rb')
225 except OSError as ose:
227 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
231 if (sys.version_info >= (3,) and sys.platform != 'win32' and
232 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
233 and not params.get('restrictfilenames', False)):
234 # On Python 3, the Unicode filesystem API will throw errors (#1474)
236 'Assuming --restrict-filenames since file system encoding '
237 'cannot encode all characters. '
238 'Set the LC_ALL environment variable to fix this.')
239 self.params['restrictfilenames'] = True
241 if '%(stitle)s' in self.params.get('outtmpl', ''):
242 self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
246 def add_info_extractor(self, ie):
247 """Add an InfoExtractor object to the end of the list."""
249 self._ies_instances[ie.ie_key()] = ie
250 ie.set_downloader(self)
252 def get_info_extractor(self, ie_key):
254 Get an instance of an IE with name ie_key, it will try to get one from
255 the _ies list, if there's no instance it will create a new one and add
256 it to the extractor list.
258 ie = self._ies_instances.get(ie_key)
260 ie = get_info_extractor(ie_key)()
261 self.add_info_extractor(ie)
264 def add_default_info_extractors(self):
266 Add the InfoExtractors returned by gen_extractors to the end of the list
268 for ie in gen_extractors():
269 self.add_info_extractor(ie)
271 def add_post_processor(self, pp):
272 """Add a PostProcessor object to the end of the chain."""
274 pp.set_downloader(self)
276 def add_progress_hook(self, ph):
277 """Add the progress hook (currently only for the file downloader)"""
278 self._progress_hooks.append(ph)
280 def _bidi_workaround(self, message):
281 if not hasattr(self, '_output_channel'):
284 assert hasattr(self, '_output_process')
285 assert isinstance(message, compat_str)
286 line_count = message.count('\n') + 1
287 self._output_process.stdin.write((message + '\n').encode('utf-8'))
288 self._output_process.stdin.flush()
289 res = ''.join(self._output_channel.readline().decode('utf-8')
290 for _ in range(line_count))
291 return res[:-len('\n')]
293 def to_screen(self, message, skip_eol=False):
294 """Print message to stdout if not in quiet mode."""
295 return self.to_stdout(message, skip_eol, check_quiet=True)
297 def _write_string(self, s, out=None):
298 write_string(s, out=out, encoding=self.params.get('encoding'))
300 def to_stdout(self, message, skip_eol=False, check_quiet=False):
301 """Print message to stdout if not in quiet mode."""
302 if self.params.get('logger'):
303 self.params['logger'].debug(message)
304 elif not check_quiet or not self.params.get('quiet', False):
305 message = self._bidi_workaround(message)
306 terminator = ['\n', ''][skip_eol]
307 output = message + terminator
309 self._write_string(output, self._screen_file)
311 def to_stderr(self, message):
312 """Print message to stderr."""
313 assert isinstance(message, compat_str)
314 if self.params.get('logger'):
315 self.params['logger'].error(message)
317 message = self._bidi_workaround(message)
318 output = message + '\n'
319 self._write_string(output, self._err_file)
321 def to_console_title(self, message):
322 if not self.params.get('consoletitle', False):
324 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
325 # c_wchar_p() might not be necessary if `message` is
326 # already of type unicode()
327 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
328 elif 'TERM' in os.environ:
329 self._write_string('\033]0;%s\007' % message, self._screen_file)
331 def save_console_title(self):
332 if not self.params.get('consoletitle', False):
334 if 'TERM' in os.environ:
335 # Save the title on stack
336 self._write_string('\033[22;0t', self._screen_file)
338 def restore_console_title(self):
339 if not self.params.get('consoletitle', False):
341 if 'TERM' in os.environ:
342 # Restore the title from stack
343 self._write_string('\033[23;0t', self._screen_file)
346 self.save_console_title()
349 def __exit__(self, *args):
350 self.restore_console_title()
352 if self.params.get('cookiefile') is not None:
353 self.cookiejar.save()
355 def trouble(self, message=None, tb=None):
356 """Determine action to take when a download problem appears.
358 Depending on if the downloader has been configured to ignore
359 download errors or not, this method may throw an exception or
360 not when errors are found, after printing the message.
362 tb, if given, is additional traceback information.
364 if message is not None:
365 self.to_stderr(message)
366 if self.params.get('verbose'):
368 if sys.exc_info()[0]: # if .trouble has been called from an except block
370 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
371 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
372 tb += compat_str(traceback.format_exc())
374 tb_data = traceback.format_list(traceback.extract_stack())
375 tb = ''.join(tb_data)
377 if not self.params.get('ignoreerrors', False):
378 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
379 exc_info = sys.exc_info()[1].exc_info
381 exc_info = sys.exc_info()
382 raise DownloadError(message, exc_info)
383 self._download_retcode = 1
385 def report_warning(self, message):
387 Print the message to stderr, it will be prefixed with 'WARNING:'
388 If stderr is a tty file the 'WARNING:' will be colored
390 if self.params.get('logger') is not None:
391 self.params['logger'].warning(message)
393 if self.params.get('no_warnings'):
395 if self._err_file.isatty() and os.name != 'nt':
396 _msg_header = '\033[0;33mWARNING:\033[0m'
398 _msg_header = 'WARNING:'
399 warning_message = '%s %s' % (_msg_header, message)
400 self.to_stderr(warning_message)
402 def report_error(self, message, tb=None):
404 Do the same as trouble, but prefixes the message with 'ERROR:', colored
405 in red if stderr is a tty file.
407 if self._err_file.isatty() and os.name != 'nt':
408 _msg_header = '\033[0;31mERROR:\033[0m'
410 _msg_header = 'ERROR:'
411 error_message = '%s %s' % (_msg_header, message)
412 self.trouble(error_message, tb)
414 def report_file_already_downloaded(self, file_name):
415 """Report file has already been fully downloaded."""
417 self.to_screen('[download] %s has already been downloaded' % file_name)
418 except UnicodeEncodeError:
419 self.to_screen('[download] The file has already been downloaded')
421 def prepare_filename(self, info_dict):
422 """Generate the output filename."""
424 template_dict = dict(info_dict)
426 template_dict['epoch'] = int(time.time())
427 autonumber_size = self.params.get('autonumber_size')
428 if autonumber_size is None:
430 autonumber_templ = '%0' + str(autonumber_size) + 'd'
431 template_dict['autonumber'] = autonumber_templ % self._num_downloads
432 if template_dict.get('playlist_index') is not None:
433 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
434 if template_dict.get('resolution') is None:
435 if template_dict.get('width') and template_dict.get('height'):
436 template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
437 elif template_dict.get('height'):
438 template_dict['resolution'] = '%sp' % template_dict['height']
439 elif template_dict.get('width'):
440 template_dict['resolution'] = '?x%d' % template_dict['width']
442 sanitize = lambda k, v: sanitize_filename(
444 restricted=self.params.get('restrictfilenames'),
446 template_dict = dict((k, sanitize(k, v))
447 for k, v in template_dict.items()
449 template_dict = collections.defaultdict(lambda: 'NA', template_dict)
451 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
452 tmpl = os.path.expanduser(outtmpl)
453 filename = tmpl % template_dict
455 except ValueError as err:
456 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
459 def _match_entry(self, info_dict):
460 """ Returns None iff the file should be downloaded """
462 video_title = info_dict.get('title', info_dict.get('id', 'video'))
463 if 'title' in info_dict:
464 # This can happen when we're just evaluating the playlist
465 title = info_dict['title']
466 matchtitle = self.params.get('matchtitle', False)
468 if not re.search(matchtitle, title, re.IGNORECASE):
469 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
470 rejecttitle = self.params.get('rejecttitle', False)
472 if re.search(rejecttitle, title, re.IGNORECASE):
473 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
474 date = info_dict.get('upload_date', None)
476 dateRange = self.params.get('daterange', DateRange())
477 if date not in dateRange:
478 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
479 view_count = info_dict.get('view_count', None)
480 if view_count is not None:
481 min_views = self.params.get('min_views')
482 if min_views is not None and view_count < min_views:
483 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
484 max_views = self.params.get('max_views')
485 if max_views is not None and view_count > max_views:
486 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
487 age_limit = self.params.get('age_limit')
488 if age_limit is not None:
489 actual_age_limit = info_dict.get('age_limit')
490 if actual_age_limit is None:
492 if age_limit < actual_age_limit:
493 return 'Skipping "' + title + '" because it is age restricted'
494 if self.in_download_archive(info_dict):
495 return '%s has already been recorded in archive' % video_title
499 def add_extra_info(info_dict, extra_info):
500 '''Set the keys from extra_info in info dict if they are missing'''
501 for key, value in extra_info.items():
502 info_dict.setdefault(key, value)
504 def extract_info(self, url, download=True, ie_key=None, extra_info={},
507 Returns a list with a dictionary for each video we find.
508 If 'download', also downloads the videos.
509 extra_info is a dict containing the extra values to add to each result
513 ies = [self.get_info_extractor(ie_key)]
518 if not ie.suitable(url):
522 self.report_warning('The program functionality for this site has been marked as broken, '
523 'and will probably not work.')
526 ie_result = ie.extract(url)
527 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
529 if isinstance(ie_result, list):
530 # Backwards compatibility: old IE result format
532 '_type': 'compat_list',
533 'entries': ie_result,
535 self.add_default_extra_info(ie_result, ie, url)
537 return self.process_ie_result(ie_result, download, extra_info)
540 except ExtractorError as de: # An error we somewhat expected
541 self.report_error(compat_str(de), de.format_traceback())
543 except MaxDownloadsReached:
545 except Exception as e:
546 if self.params.get('ignoreerrors', False):
547 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
552 self.report_error('no suitable InfoExtractor for URL %s' % url)
554 def add_default_extra_info(self, ie_result, ie, url):
555 self.add_extra_info(ie_result, {
556 'extractor': ie.IE_NAME,
558 'webpage_url_basename': url_basename(url),
559 'extractor_key': ie.ie_key(),
562 def process_ie_result(self, ie_result, download=True, extra_info={}):
564 Take the result of the ie(may be modified) and resolve all unresolved
565 references (URLs, playlist items).
567 It will also download the videos if 'download'.
568 Returns the resolved ie_result.
571 result_type = ie_result.get('_type', 'video')
573 if result_type in ('url', 'url_transparent'):
574 extract_flat = self.params.get('extract_flat', False)
575 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
576 extract_flat is True):
577 if self.params.get('forcejson', False):
578 self.to_stdout(json.dumps(ie_result))
581 if result_type == 'video':
582 self.add_extra_info(ie_result, extra_info)
583 return self.process_video_result(ie_result, download=download)
584 elif result_type == 'url':
585 # We have to add extra_info to the results because it may be
586 # contained in a playlist
587 return self.extract_info(ie_result['url'],
589 ie_key=ie_result.get('ie_key'),
590 extra_info=extra_info)
591 elif result_type == 'url_transparent':
592 # Use the information from the embedding page
593 info = self.extract_info(
594 ie_result['url'], ie_key=ie_result.get('ie_key'),
595 extra_info=extra_info, download=False, process=False)
597 def make_result(embedded_info):
598 new_result = ie_result.copy()
599 for f in ('_type', 'url', 'ext', 'player_url', 'formats',
600 'entries', 'ie_key', 'duration',
601 'subtitles', 'annotations', 'format',
602 'thumbnail', 'thumbnails'):
605 if f in embedded_info:
606 new_result[f] = embedded_info[f]
608 new_result = make_result(info)
610 assert new_result.get('_type') != 'url_transparent'
611 if new_result.get('_type') == 'compat_list':
612 new_result['entries'] = [
613 make_result(e) for e in new_result['entries']]
615 return self.process_ie_result(
616 new_result, download=download, extra_info=extra_info)
617 elif result_type == 'playlist':
618 # We process each entry in the playlist
619 playlist = ie_result.get('title', None) or ie_result.get('id', None)
620 self.to_screen('[download] Downloading playlist: %s' % playlist)
622 playlist_results = []
624 playliststart = self.params.get('playliststart', 1) - 1
625 playlistend = self.params.get('playlistend', None)
626 # For backwards compatibility, interpret -1 as whole list
627 if playlistend == -1:
630 if isinstance(ie_result['entries'], list):
631 n_all_entries = len(ie_result['entries'])
632 entries = ie_result['entries'][playliststart:playlistend]
633 n_entries = len(entries)
635 "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
636 (ie_result['extractor'], playlist, n_all_entries, n_entries))
638 assert isinstance(ie_result['entries'], PagedList)
639 entries = ie_result['entries'].getslice(
640 playliststart, playlistend)
641 n_entries = len(entries)
643 "[%s] playlist %s: Downloading %d videos" %
644 (ie_result['extractor'], playlist, n_entries))
646 for i, entry in enumerate(entries, 1):
647 self.to_screen('[download] Downloading video #%s of %s' % (i, n_entries))
649 'n_entries': n_entries,
650 'playlist': playlist,
651 'playlist_index': i + playliststart,
652 'extractor': ie_result['extractor'],
653 'webpage_url': ie_result['webpage_url'],
654 'webpage_url_basename': url_basename(ie_result['webpage_url']),
655 'extractor_key': ie_result['extractor_key'],
658 reason = self._match_entry(entry)
659 if reason is not None:
660 self.to_screen('[download] ' + reason)
663 entry_result = self.process_ie_result(entry,
666 playlist_results.append(entry_result)
667 ie_result['entries'] = playlist_results
669 elif result_type == 'compat_list':
671 self.add_extra_info(r,
673 'extractor': ie_result['extractor'],
674 'webpage_url': ie_result['webpage_url'],
675 'webpage_url_basename': url_basename(ie_result['webpage_url']),
676 'extractor_key': ie_result['extractor_key'],
679 ie_result['entries'] = [
680 self.process_ie_result(_fixup(r), download, extra_info)
681 for r in ie_result['entries']
685 raise Exception('Invalid result type: %s' % result_type)
687 def select_format(self, format_spec, available_formats):
688 if format_spec == 'best' or format_spec is None:
689 return available_formats[-1]
690 elif format_spec == 'worst':
691 return available_formats[0]
692 elif format_spec == 'bestaudio':
694 f for f in available_formats
695 if f.get('vcodec') == 'none']
697 return audio_formats[-1]
698 elif format_spec == 'worstaudio':
700 f for f in available_formats
701 if f.get('vcodec') == 'none']
703 return audio_formats[0]
704 elif format_spec == 'bestvideo':
706 f for f in available_formats
707 if f.get('acodec') == 'none']
709 return video_formats[-1]
710 elif format_spec == 'worstvideo':
712 f for f in available_formats
713 if f.get('acodec') == 'none']
715 return video_formats[0]
717 extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a']
718 if format_spec in extensions:
719 filter_f = lambda f: f['ext'] == format_spec
721 filter_f = lambda f: f['format_id'] == format_spec
722 matches = list(filter(filter_f, available_formats))
727 def process_video_result(self, info_dict, download=True):
728 assert info_dict.get('_type', 'video') == 'video'
730 if 'id' not in info_dict:
731 raise ExtractorError('Missing "id" field in extractor result')
732 if 'title' not in info_dict:
733 raise ExtractorError('Missing "title" field in extractor result')
735 if 'playlist' not in info_dict:
736 # It isn't part of a playlist
737 info_dict['playlist'] = None
738 info_dict['playlist_index'] = None
740 thumbnails = info_dict.get('thumbnails')
742 thumbnails.sort(key=lambda t: (
743 t.get('width'), t.get('height'), t.get('url')))
745 if 'width' in t and 'height' in t:
746 t['resolution'] = '%dx%d' % (t['width'], t['height'])
748 if thumbnails and 'thumbnail' not in info_dict:
749 info_dict['thumbnail'] = thumbnails[-1]['url']
751 if 'display_id' not in info_dict and 'id' in info_dict:
752 info_dict['display_id'] = info_dict['id']
754 if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
755 upload_date = datetime.datetime.utcfromtimestamp(
756 info_dict['timestamp'])
757 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
759 # This extractors handle format selection themselves
760 if info_dict['extractor'] in ['Youku']:
762 self.process_info(info_dict)
765 # We now pick which formats have to be downloaded
766 if info_dict.get('formats') is None:
767 # There's only one format available
768 formats = [info_dict]
770 formats = info_dict['formats']
773 raise ExtractorError('No video formats found!')
775 # We check that all the formats have the format and format_id fields
776 for i, format in enumerate(formats):
777 if 'url' not in format:
778 raise ExtractorError('Missing "url" key in result (index %d)' % i)
780 if format.get('format_id') is None:
781 format['format_id'] = compat_str(i)
782 if format.get('format') is None:
783 format['format'] = '{id} - {res}{note}'.format(
784 id=format['format_id'],
785 res=self.format_resolution(format),
786 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
788 # Automatically determine file extension if missing
789 if 'ext' not in format:
790 format['ext'] = determine_ext(format['url']).lower()
792 format_limit = self.params.get('format_limit', None)
794 formats = list(takewhile_inclusive(
795 lambda f: f['format_id'] != format_limit, formats
798 # TODO Central sorting goes here
800 if formats[0] is not info_dict:
801 # only set the 'formats' fields if the original info_dict list them
802 # otherwise we end up with a circular reference, the first (and unique)
803 # element in the 'formats' field in info_dict is info_dict itself,
804 # wich can't be exported to json
805 info_dict['formats'] = formats
806 if self.params.get('listformats', None):
807 self.list_formats(info_dict)
810 req_format = self.params.get('format')
811 if req_format is None:
813 formats_to_download = []
814 # The -1 is for supporting YoutubeIE
815 if req_format in ('-1', 'all'):
816 formats_to_download = formats
818 for rfstr in req_format.split(','):
819 # We can accept formats requested in the format: 34/5/best, we pick
820 # the first that is available, starting from left
821 req_formats = rfstr.split('/')
822 for rf in req_formats:
823 if re.match(r'.+?\+.+?', rf) is not None:
824 # Two formats have been requested like '137+139'
825 format_1, format_2 = rf.split('+')
826 formats_info = (self.select_format(format_1, formats),
827 self.select_format(format_2, formats))
828 if all(formats_info):
830 'requested_formats': formats_info,
832 'ext': formats_info[0]['ext'],
835 selected_format = None
837 selected_format = self.select_format(rf, formats)
838 if selected_format is not None:
839 formats_to_download.append(selected_format)
841 if not formats_to_download:
842 raise ExtractorError('requested format not available',
846 if len(formats_to_download) > 1:
847 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
848 for format in formats_to_download:
849 new_info = dict(info_dict)
850 new_info.update(format)
851 self.process_info(new_info)
852 # We update the info dict with the best quality format (backwards compatibility)
853 info_dict.update(formats_to_download[-1])
856 def process_info(self, info_dict):
857 """Process a single resolved IE result."""
859 assert info_dict.get('_type', 'video') == 'video'
861 max_downloads = self.params.get('max_downloads')
862 if max_downloads is not None:
863 if self._num_downloads >= int(max_downloads):
864 raise MaxDownloadsReached()
866 info_dict['fulltitle'] = info_dict['title']
867 if len(info_dict['title']) > 200:
868 info_dict['title'] = info_dict['title'][:197] + '...'
870 # Keep for backwards compatibility
871 info_dict['stitle'] = info_dict['title']
873 if 'format' not in info_dict:
874 info_dict['format'] = info_dict['ext']
876 reason = self._match_entry(info_dict)
877 if reason is not None:
878 self.to_screen('[download] ' + reason)
881 self._num_downloads += 1
883 filename = self.prepare_filename(info_dict)
886 if self.params.get('forcetitle', False):
887 self.to_stdout(info_dict['fulltitle'])
888 if self.params.get('forceid', False):
889 self.to_stdout(info_dict['id'])
890 if self.params.get('forceurl', False):
891 # For RTMP URLs, also include the playpath
892 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
893 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
894 self.to_stdout(info_dict['thumbnail'])
895 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
896 self.to_stdout(info_dict['description'])
897 if self.params.get('forcefilename', False) and filename is not None:
898 self.to_stdout(filename)
899 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
900 self.to_stdout(formatSeconds(info_dict['duration']))
901 if self.params.get('forceformat', False):
902 self.to_stdout(info_dict['format'])
903 if self.params.get('forcejson', False):
904 info_dict['_filename'] = filename
905 self.to_stdout(json.dumps(info_dict))
907 # Do nothing else if in simulate mode
908 if self.params.get('simulate', False):
915 dn = os.path.dirname(encodeFilename(filename))
916 if dn and not os.path.exists(dn):
918 except (OSError, IOError) as err:
919 self.report_error('unable to create directory ' + compat_str(err))
922 if self.params.get('writedescription', False):
923 descfn = filename + '.description'
924 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
925 self.to_screen('[info] Video description is already present')
928 self.to_screen('[info] Writing video description to: ' + descfn)
929 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
930 descfile.write(info_dict['description'])
931 except (KeyError, TypeError):
932 self.report_warning('There\'s no description to write.')
933 except (OSError, IOError):
934 self.report_error('Cannot write description file ' + descfn)
937 if self.params.get('writeannotations', False):
938 annofn = filename + '.annotations.xml'
939 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
940 self.to_screen('[info] Video annotations are already present')
943 self.to_screen('[info] Writing video annotations to: ' + annofn)
944 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
945 annofile.write(info_dict['annotations'])
946 except (KeyError, TypeError):
947 self.report_warning('There are no annotations to write.')
948 except (OSError, IOError):
949 self.report_error('Cannot write annotations file: ' + annofn)
952 subtitles_are_requested = any([self.params.get('writesubtitles', False),
953 self.params.get('writeautomaticsub')])
955 if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
956 # subtitles download errors are already managed as troubles in relevant IE
957 # that way it will silently go on when used with unsupporting IE
958 subtitles = info_dict['subtitles']
959 sub_format = self.params.get('subtitlesformat', 'srt')
960 for sub_lang in subtitles.keys():
961 sub = subtitles[sub_lang]
965 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
966 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
967 self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
969 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
970 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
972 except (OSError, IOError):
973 self.report_error('Cannot write subtitles file ' + sub_filename)
976 if self.params.get('writeinfojson', False):
977 infofn = os.path.splitext(filename)[0] + '.info.json'
978 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
979 self.to_screen('[info] Video description metadata is already present')
981 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
983 write_json_file(info_dict, encodeFilename(infofn))
984 except (OSError, IOError):
985 self.report_error('Cannot write metadata to JSON file ' + infofn)
988 if self.params.get('writethumbnail', False):
989 if info_dict.get('thumbnail') is not None:
990 thumb_format = determine_ext(info_dict['thumbnail'], 'jpg')
991 thumb_filename = os.path.splitext(filename)[0] + '.' + thumb_format
992 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
993 self.to_screen('[%s] %s: Thumbnail is already present' %
994 (info_dict['extractor'], info_dict['id']))
996 self.to_screen('[%s] %s: Downloading thumbnail ...' %
997 (info_dict['extractor'], info_dict['id']))
999 uf = self.urlopen(info_dict['thumbnail'])
1000 with open(thumb_filename, 'wb') as thumbf:
1001 shutil.copyfileobj(uf, thumbf)
1002 self.to_screen('[%s] %s: Writing thumbnail to: %s' %
1003 (info_dict['extractor'], info_dict['id'], thumb_filename))
1004 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1005 self.report_warning('Unable to download thumbnail "%s": %s' %
1006 (info_dict['thumbnail'], compat_str(err)))
1008 if not self.params.get('skip_download', False):
1009 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
1014 fd = get_suitable_downloader(info)(self, self.params)
1015 for ph in self._progress_hooks:
1016 fd.add_progress_hook(ph)
1017 if self.params.get('verbose'):
1018 self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1019 return fd.download(name, info)
1020 if info_dict.get('requested_formats') is not None:
1023 merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
1024 if not merger._get_executable():
1026 self.report_warning('You have requested multiple '
1027 'formats but ffmpeg or avconv are not installed.'
1028 ' The formats won\'t be merged')
1030 postprocessors = [merger]
1031 for f in info_dict['requested_formats']:
1032 new_info = dict(info_dict)
1034 fname = self.prepare_filename(new_info)
1035 fname = prepend_extension(fname, 'f%s' % f['format_id'])
1036 downloaded.append(fname)
1037 partial_success = dl(fname, new_info)
1038 success = success and partial_success
1039 info_dict['__postprocessors'] = postprocessors
1040 info_dict['__files_to_merge'] = downloaded
1042 # Just a single file
1043 success = dl(filename, info_dict)
1044 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1045 self.report_error('unable to download video data: %s' % str(err))
1047 except (OSError, IOError) as err:
1048 raise UnavailableVideoError(err)
1049 except (ContentTooShortError, ) as err:
1050 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1055 self.post_process(filename, info_dict)
1056 except (PostProcessingError) as err:
1057 self.report_error('postprocessing: %s' % str(err))
1060 self.record_download_archive(info_dict)
1062 def download(self, url_list):
1063 """Download a given list of URLs."""
1064 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1065 if (len(url_list) > 1 and
1067 and self.params.get('max_downloads') != 1):
1068 raise SameFileError(outtmpl)
1070 for url in url_list:
1072 #It also downloads the videos
1073 self.extract_info(url)
1074 except UnavailableVideoError:
1075 self.report_error('unable to download video')
1076 except MaxDownloadsReached:
1077 self.to_screen('[info] Maximum number of downloaded files reached.')
1080 return self._download_retcode
1082 def download_with_info_file(self, info_filename):
1083 with io.open(info_filename, 'r', encoding='utf-8') as f:
1086 self.process_ie_result(info, download=True)
1087 except DownloadError:
1088 webpage_url = info.get('webpage_url')
1089 if webpage_url is not None:
1090 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1091 return self.download([webpage_url])
1094 return self._download_retcode
1096 def post_process(self, filename, ie_info):
1097 """Run all the postprocessors on the given file."""
1098 info = dict(ie_info)
1099 info['filepath'] = filename
1102 if ie_info.get('__postprocessors') is not None:
1103 pps_chain.extend(ie_info['__postprocessors'])
1104 pps_chain.extend(self._pps)
1105 for pp in pps_chain:
1107 keep_video_wish, new_info = pp.run(info)
1108 if keep_video_wish is not None:
1110 keep_video = keep_video_wish
1111 elif keep_video is None:
1112 # No clear decision yet, let IE decide
1113 keep_video = keep_video_wish
1114 except PostProcessingError as e:
1115 self.report_error(e.msg)
1116 if keep_video is False and not self.params.get('keepvideo', False):
1118 self.to_screen('Deleting original file %s (pass -k to keep)' % filename)
1119 os.remove(encodeFilename(filename))
1120 except (IOError, OSError):
1121 self.report_warning('Unable to remove downloaded video file')
1123 def _make_archive_id(self, info_dict):
1124 # Future-proof against any change in case
1125 # and backwards compatibility with prior versions
1126 extractor = info_dict.get('extractor_key')
1127 if extractor is None:
1128 if 'id' in info_dict:
1129 extractor = info_dict.get('ie_key') # key in a playlist
1130 if extractor is None:
1131 return None # Incomplete video information
1132 return extractor.lower() + ' ' + info_dict['id']
1134 def in_download_archive(self, info_dict):
1135 fn = self.params.get('download_archive')
1139 vid_id = self._make_archive_id(info_dict)
1141 return False # Incomplete video information
1144 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1145 for line in archive_file:
1146 if line.strip() == vid_id:
1148 except IOError as ioe:
1149 if ioe.errno != errno.ENOENT:
1153 def record_download_archive(self, info_dict):
1154 fn = self.params.get('download_archive')
1157 vid_id = self._make_archive_id(info_dict)
1159 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1160 archive_file.write(vid_id + '\n')
1163 def format_resolution(format, default='unknown'):
1164 if format.get('vcodec') == 'none':
1166 if format.get('resolution') is not None:
1167 return format['resolution']
1168 if format.get('height') is not None:
1169 if format.get('width') is not None:
1170 res = '%sx%s' % (format['width'], format['height'])
1172 res = '%sp' % format['height']
1173 elif format.get('width') is not None:
1174 res = '?x%d' % format['width']
1179 def _format_note(self, fdict):
1181 if fdict.get('ext') in ['f4f', 'f4m']:
1182 res += '(unsupported) '
1183 if fdict.get('format_note') is not None:
1184 res += fdict['format_note'] + ' '
1185 if fdict.get('tbr') is not None:
1186 res += '%4dk ' % fdict['tbr']
1187 if fdict.get('container') is not None:
1190 res += '%s container' % fdict['container']
1191 if (fdict.get('vcodec') is not None and
1192 fdict.get('vcodec') != 'none'):
1195 res += fdict['vcodec']
1196 if fdict.get('vbr') is not None:
1198 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1200 if fdict.get('vbr') is not None:
1201 res += '%4dk' % fdict['vbr']
1202 if fdict.get('acodec') is not None:
1205 if fdict['acodec'] == 'none':
1208 res += '%-5s' % fdict['acodec']
1209 elif fdict.get('abr') is not None:
1213 if fdict.get('abr') is not None:
1214 res += '@%3dk' % fdict['abr']
1215 if fdict.get('asr') is not None:
1216 res += ' (%5dHz)' % fdict['asr']
1217 if fdict.get('filesize') is not None:
1220 res += format_bytes(fdict['filesize'])
1221 elif fdict.get('filesize_approx') is not None:
1224 res += '~' + format_bytes(fdict['filesize_approx'])
1227 def list_formats(self, info_dict):
1228 def line(format, idlen=20):
1229 return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
1230 format['format_id'],
1232 self.format_resolution(format),
1233 self._format_note(format),
1236 formats = info_dict.get('formats', [info_dict])
1237 idlen = max(len('format code'),
1238 max(len(f['format_id']) for f in formats))
1239 formats_s = [line(f, idlen) for f in formats]
1240 if len(formats) > 1:
1241 formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)'
1242 formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
1244 header_line = line({
1245 'format_id': 'format code', 'ext': 'extension',
1246 'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
1247 self.to_screen('[info] Available formats for %s:\n%s\n%s' %
1248 (info_dict['id'], header_line, '\n'.join(formats_s)))
1250 def urlopen(self, req):
1251 """ Start an HTTP download """
1253 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1254 # always respected by websites, some tend to give out URLs with non percent-encoded
1255 # non-ASCII characters (see telemb.py, ard.py [#3412])
1256 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1257 # To work around aforementioned issue we will replace request's original URL with
1258 # percent-encoded one
1259 req_is_string = isinstance(req, basestring if sys.version_info < (3, 0) else compat_str)
1260 url = req if req_is_string else req.get_full_url()
1261 url_escaped = escape_url(url)
1263 # Substitute URL if any change after escaping
1264 if url != url_escaped:
1268 req = compat_urllib_request.Request(
1269 url_escaped, data=req.data, headers=req.headers,
1270 origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
1272 return self._opener.open(req, timeout=self._socket_timeout)
1274 def print_debug_header(self):
1275 if not self.params.get('verbose'):
1278 if type('') is not compat_str:
1279 # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1280 self.report_warning(
1281 'Your Python is broken! Update to a newer and supported version')
1284 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1285 locale.getpreferredencoding(),
1286 sys.getfilesystemencoding(),
1287 sys.stdout.encoding,
1288 self.get_encoding()))
1289 write_string(encoding_str, encoding=None)
1291 self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1293 sp = subprocess.Popen(
1294 ['git', 'rev-parse', '--short', 'HEAD'],
1295 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1296 cwd=os.path.dirname(os.path.abspath(__file__)))
1297 out, err = sp.communicate()
1298 out = out.decode().strip()
1299 if re.match('[0-9a-f]+', out):
1300 self._write_string('[debug] Git HEAD: ' + out + '\n')
1306 self._write_string('[debug] Python version %s - %s' %
1307 (platform.python_version(), platform_name()) + '\n')
1310 for handler in self._opener.handlers:
1311 if hasattr(handler, 'proxies'):
1312 proxy_map.update(handler.proxies)
1313 self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1315 def _setup_opener(self):
1316 timeout_val = self.params.get('socket_timeout')
1317 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1319 opts_cookiefile = self.params.get('cookiefile')
1320 opts_proxy = self.params.get('proxy')
1322 if opts_cookiefile is None:
1323 self.cookiejar = compat_cookiejar.CookieJar()
1325 self.cookiejar = compat_cookiejar.MozillaCookieJar(
1327 if os.access(opts_cookiefile, os.R_OK):
1328 self.cookiejar.load()
1330 cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1332 if opts_proxy is not None:
1333 if opts_proxy == '':
1336 proxies = {'http': opts_proxy, 'https': opts_proxy}
1338 proxies = compat_urllib_request.getproxies()
1339 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1340 if 'http' in proxies and 'https' not in proxies:
1341 proxies['https'] = proxies['http']
1342 proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1344 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1345 https_handler = make_HTTPS_handler(
1346 self.params.get('nocheckcertificate', False), debuglevel=debuglevel)
1347 ydlh = YoutubeDLHandler(debuglevel=debuglevel)
1348 opener = compat_urllib_request.build_opener(
1349 https_handler, proxy_handler, cookie_processor, ydlh)
1350 # Delete the default user-agent header, which would otherwise apply in
1351 # cases where our custom HTTP handler doesn't come into play
1352 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1353 opener.addheaders = []
1354 self._opener = opener
1356 def encode(self, s):
1357 if isinstance(s, bytes):
1358 return s # Already encoded
1361 return s.encode(self.get_encoding())
1362 except UnicodeEncodeError as err:
1363 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1366 def get_encoding(self):
1367 encoding = self.params.get('encoding')
1368 if encoding is None:
1369 encoding = preferredencoding()