2 # -*- coding: utf-8 -*-
4 from __future__ import absolute_import, unicode_literals
31 compat_urllib_request,
57 UnavailableVideoError,
64 from .cache import Cache
65 from .extractor import get_info_extractor, gen_extractors
66 from .downloader import get_suitable_downloader
67 from .downloader.rtmp import rtmpdump_version
68 from .postprocessor import FFmpegMergerPP, FFmpegPostProcessor
69 from .version import __version__
72 class YoutubeDL(object):
75 YoutubeDL objects are the ones responsible of downloading the
76 actual video file and writing it to disk if the user has requested
77 it, among some other tasks. In most cases there should be one per
78 program. As, given a video URL, the downloader doesn't know how to
79 extract all the needed information, task that InfoExtractors do, it
80 has to pass the URL to one of them.
82 For this, YoutubeDL objects have a method that allows
83 InfoExtractors to be registered in a given order. When it is passed
84 a URL, the YoutubeDL object handles it to the first InfoExtractor it
85 finds that reports being able to handle it. The InfoExtractor extracts
86 all the information about the video or videos the URL refers to, and
87 YoutubeDL process the extracted information, possibly using a File
88 Downloader to download the video.
90 YoutubeDL objects accept a lot of parameters. In order not to saturate
91 the object constructor with arguments, it receives a dictionary of
92 options instead. These options are available through the params
93 attribute for the InfoExtractors to use. The YoutubeDL also
94 registers itself as the downloader in charge for the InfoExtractors
95 that are added to it, so this is a "mutual registration".
99 username: Username for authentication purposes.
100 password: Password for authentication purposes.
101 videopassword: Password for acces a video.
102 usenetrc: Use netrc for authentication instead.
103 verbose: Print additional info to stdout.
104 quiet: Do not print messages to stdout.
105 no_warnings: Do not print out anything for warnings.
106 forceurl: Force printing final URL.
107 forcetitle: Force printing title.
108 forceid: Force printing ID.
109 forcethumbnail: Force printing thumbnail URL.
110 forcedescription: Force printing description.
111 forcefilename: Force printing final filename.
112 forceduration: Force printing duration.
113 forcejson: Force printing info_dict as JSON.
114 dump_single_json: Force printing the info_dict of the whole playlist
115 (or video) as a single JSON line.
116 simulate: Do not download the video files.
117 format: Video format code.
118 format_limit: Highest quality format to try.
119 outtmpl: Template for output names.
120 restrictfilenames: Do not allow "&" and spaces in file names
121 ignoreerrors: Do not stop on download errors.
122 nooverwrites: Prevent overwriting files.
123 playliststart: Playlist item to start at.
124 playlistend: Playlist item to end at.
125 matchtitle: Download only matching titles.
126 rejecttitle: Reject downloads for matching titles.
127 logger: Log messages to a logging.Logger instance.
128 logtostderr: Log messages to stderr instead of stdout.
129 writedescription: Write the video description to a .description file
130 writeinfojson: Write the video description to a .info.json file
131 writeannotations: Write the video annotations to a .annotations.xml file
132 writethumbnail: Write the thumbnail image to a file
133 writesubtitles: Write the video subtitles to a file
134 writeautomaticsub: Write the automatic subtitles to a file
135 allsubtitles: Downloads all the subtitles of the video
136 (requires writesubtitles or writeautomaticsub)
137 listsubtitles: Lists all available subtitles for the video
138 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
139 subtitleslangs: List of languages of the subtitles to download
140 keepvideo: Keep the video file after post-processing
141 daterange: A DateRange object, download only if the upload_date is in the range.
142 skip_download: Skip the actual download of the video file
143 cachedir: Location of the cache files in the filesystem.
144 False to disable filesystem cache.
145 noplaylist: Download single video instead of a playlist if in doubt.
146 age_limit: An integer representing the user's age in years.
147 Unsuitable videos for the given age are skipped.
148 min_views: An integer representing the minimum view count the video
149 must have in order to not be skipped.
150 Videos without view count information are always
151 downloaded. None for no limit.
152 max_views: An integer representing the maximum view count.
153 Videos that are more popular than that are not
155 Videos without view count information are always
156 downloaded. None for no limit.
157 download_archive: File name of a file where all downloads are recorded.
158 Videos already present in the file are not downloaded
160 cookiefile: File name where cookies should be read from and dumped to.
161 nocheckcertificate:Do not verify SSL certificates
162 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
163 At the moment, this is only supported by YouTube.
164 proxy: URL of the proxy server to use
165 socket_timeout: Time to wait for unresponsive hosts, in seconds
166 bidi_workaround: Work around buggy terminals without bidirectional text
167 support, using fridibi
168 debug_printtraffic:Print out sent and received HTTP traffic
169 include_ads: Download ads as well
170 default_search: Prepend this string if an input url is not valid.
171 'auto' for elaborate guessing
172 encoding: Use this encoding instead of the system-specified.
173 extract_flat: Do not resolve URLs, return the immediate result.
174 Pass in 'in_playlist' to only show this behavior for
177 The following parameters are not used by YoutubeDL itself, they are used by
179 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
180 noresizebuffer, retries, continuedl, noprogress, consoletitle
182 The following options are used by the post processors:
183 prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
184 otherwise prefer avconv.
185 exec_cmd: Arbitrary command to run after downloading
191 _download_retcode = None
192 _num_downloads = None
195 def __init__(self, params=None, auto_init=True):
196 """Create a FileDownloader object with the given options."""
200 self._ies_instances = {}
202 self._progress_hooks = []
203 self._download_retcode = 0
204 self._num_downloads = 0
205 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
206 self._err_file = sys.stderr
208 self.cache = Cache(self)
210 if params.get('bidi_workaround', False):
213 master, slave = pty.openpty()
214 width = get_term_width()
218 width_args = ['-w', str(width)]
220 stdin=subprocess.PIPE,
222 stderr=self._err_file)
224 self._output_process = subprocess.Popen(
225 ['bidiv'] + width_args, **sp_kwargs
228 self._output_process = subprocess.Popen(
229 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
230 self._output_channel = os.fdopen(master, 'rb')
231 except OSError as ose:
233 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
237 if (sys.version_info >= (3,) and sys.platform != 'win32' and
238 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
239 and not params.get('restrictfilenames', False)):
240 # On Python 3, the Unicode filesystem API will throw errors (#1474)
242 'Assuming --restrict-filenames since file system encoding '
243 'cannot encode all characters. '
244 'Set the LC_ALL environment variable to fix this.')
245 self.params['restrictfilenames'] = True
247 if '%(stitle)s' in self.params.get('outtmpl', ''):
248 self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
253 self.print_debug_header()
254 self.add_default_info_extractors()
256 def add_info_extractor(self, ie):
257 """Add an InfoExtractor object to the end of the list."""
259 self._ies_instances[ie.ie_key()] = ie
260 ie.set_downloader(self)
262 def get_info_extractor(self, ie_key):
264 Get an instance of an IE with name ie_key, it will try to get one from
265 the _ies list, if there's no instance it will create a new one and add
266 it to the extractor list.
268 ie = self._ies_instances.get(ie_key)
270 ie = get_info_extractor(ie_key)()
271 self.add_info_extractor(ie)
274 def add_default_info_extractors(self):
276 Add the InfoExtractors returned by gen_extractors to the end of the list
278 for ie in gen_extractors():
279 self.add_info_extractor(ie)
281 def add_post_processor(self, pp):
282 """Add a PostProcessor object to the end of the chain."""
284 pp.set_downloader(self)
286 def add_progress_hook(self, ph):
287 """Add the progress hook (currently only for the file downloader)"""
288 self._progress_hooks.append(ph)
290 def _bidi_workaround(self, message):
291 if not hasattr(self, '_output_channel'):
294 assert hasattr(self, '_output_process')
295 assert isinstance(message, compat_str)
296 line_count = message.count('\n') + 1
297 self._output_process.stdin.write((message + '\n').encode('utf-8'))
298 self._output_process.stdin.flush()
299 res = ''.join(self._output_channel.readline().decode('utf-8')
300 for _ in range(line_count))
301 return res[:-len('\n')]
303 def to_screen(self, message, skip_eol=False):
304 """Print message to stdout if not in quiet mode."""
305 return self.to_stdout(message, skip_eol, check_quiet=True)
307 def _write_string(self, s, out=None):
308 write_string(s, out=out, encoding=self.params.get('encoding'))
310 def to_stdout(self, message, skip_eol=False, check_quiet=False):
311 """Print message to stdout if not in quiet mode."""
312 if self.params.get('logger'):
313 self.params['logger'].debug(message)
314 elif not check_quiet or not self.params.get('quiet', False):
315 message = self._bidi_workaround(message)
316 terminator = ['\n', ''][skip_eol]
317 output = message + terminator
319 self._write_string(output, self._screen_file)
321 def to_stderr(self, message):
322 """Print message to stderr."""
323 assert isinstance(message, compat_str)
324 if self.params.get('logger'):
325 self.params['logger'].error(message)
327 message = self._bidi_workaround(message)
328 output = message + '\n'
329 self._write_string(output, self._err_file)
331 def to_console_title(self, message):
332 if not self.params.get('consoletitle', False):
334 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
335 # c_wchar_p() might not be necessary if `message` is
336 # already of type unicode()
337 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
338 elif 'TERM' in os.environ:
339 self._write_string('\033]0;%s\007' % message, self._screen_file)
341 def save_console_title(self):
342 if not self.params.get('consoletitle', False):
344 if 'TERM' in os.environ:
345 # Save the title on stack
346 self._write_string('\033[22;0t', self._screen_file)
348 def restore_console_title(self):
349 if not self.params.get('consoletitle', False):
351 if 'TERM' in os.environ:
352 # Restore the title from stack
353 self._write_string('\033[23;0t', self._screen_file)
356 self.save_console_title()
359 def __exit__(self, *args):
360 self.restore_console_title()
362 if self.params.get('cookiefile') is not None:
363 self.cookiejar.save()
365 def trouble(self, message=None, tb=None):
366 """Determine action to take when a download problem appears.
368 Depending on if the downloader has been configured to ignore
369 download errors or not, this method may throw an exception or
370 not when errors are found, after printing the message.
372 tb, if given, is additional traceback information.
374 if message is not None:
375 self.to_stderr(message)
376 if self.params.get('verbose'):
378 if sys.exc_info()[0]: # if .trouble has been called from an except block
380 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
381 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
382 tb += compat_str(traceback.format_exc())
384 tb_data = traceback.format_list(traceback.extract_stack())
385 tb = ''.join(tb_data)
387 if not self.params.get('ignoreerrors', False):
388 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
389 exc_info = sys.exc_info()[1].exc_info
391 exc_info = sys.exc_info()
392 raise DownloadError(message, exc_info)
393 self._download_retcode = 1
395 def report_warning(self, message):
397 Print the message to stderr, it will be prefixed with 'WARNING:'
398 If stderr is a tty file the 'WARNING:' will be colored
400 if self.params.get('logger') is not None:
401 self.params['logger'].warning(message)
403 if self.params.get('no_warnings'):
405 if self._err_file.isatty() and os.name != 'nt':
406 _msg_header = '\033[0;33mWARNING:\033[0m'
408 _msg_header = 'WARNING:'
409 warning_message = '%s %s' % (_msg_header, message)
410 self.to_stderr(warning_message)
412 def report_error(self, message, tb=None):
414 Do the same as trouble, but prefixes the message with 'ERROR:', colored
415 in red if stderr is a tty file.
417 if self._err_file.isatty() and os.name != 'nt':
418 _msg_header = '\033[0;31mERROR:\033[0m'
420 _msg_header = 'ERROR:'
421 error_message = '%s %s' % (_msg_header, message)
422 self.trouble(error_message, tb)
424 def report_file_already_downloaded(self, file_name):
425 """Report file has already been fully downloaded."""
427 self.to_screen('[download] %s has already been downloaded' % file_name)
428 except UnicodeEncodeError:
429 self.to_screen('[download] The file has already been downloaded')
431 def prepare_filename(self, info_dict):
432 """Generate the output filename."""
434 template_dict = dict(info_dict)
436 template_dict['epoch'] = int(time.time())
437 autonumber_size = self.params.get('autonumber_size')
438 if autonumber_size is None:
440 autonumber_templ = '%0' + str(autonumber_size) + 'd'
441 template_dict['autonumber'] = autonumber_templ % self._num_downloads
442 if template_dict.get('playlist_index') is not None:
443 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
444 if template_dict.get('resolution') is None:
445 if template_dict.get('width') and template_dict.get('height'):
446 template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
447 elif template_dict.get('height'):
448 template_dict['resolution'] = '%sp' % template_dict['height']
449 elif template_dict.get('width'):
450 template_dict['resolution'] = '?x%d' % template_dict['width']
452 sanitize = lambda k, v: sanitize_filename(
454 restricted=self.params.get('restrictfilenames'),
456 template_dict = dict((k, sanitize(k, v))
457 for k, v in template_dict.items()
459 template_dict = collections.defaultdict(lambda: 'NA', template_dict)
461 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
462 tmpl = compat_expanduser(outtmpl)
463 filename = tmpl % template_dict
465 except ValueError as err:
466 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
469 def _match_entry(self, info_dict):
470 """ Returns None iff the file should be downloaded """
472 video_title = info_dict.get('title', info_dict.get('id', 'video'))
473 if 'title' in info_dict:
474 # This can happen when we're just evaluating the playlist
475 title = info_dict['title']
476 matchtitle = self.params.get('matchtitle', False)
478 if not re.search(matchtitle, title, re.IGNORECASE):
479 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
480 rejecttitle = self.params.get('rejecttitle', False)
482 if re.search(rejecttitle, title, re.IGNORECASE):
483 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
484 date = info_dict.get('upload_date', None)
486 dateRange = self.params.get('daterange', DateRange())
487 if date not in dateRange:
488 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
489 view_count = info_dict.get('view_count', None)
490 if view_count is not None:
491 min_views = self.params.get('min_views')
492 if min_views is not None and view_count < min_views:
493 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
494 max_views = self.params.get('max_views')
495 if max_views is not None and view_count > max_views:
496 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
497 age_limit = self.params.get('age_limit')
498 if age_limit is not None:
499 actual_age_limit = info_dict.get('age_limit')
500 if actual_age_limit is None:
502 if age_limit < actual_age_limit:
503 return 'Skipping "' + title + '" because it is age restricted'
504 if self.in_download_archive(info_dict):
505 return '%s has already been recorded in archive' % video_title
509 def add_extra_info(info_dict, extra_info):
510 '''Set the keys from extra_info in info dict if they are missing'''
511 for key, value in extra_info.items():
512 info_dict.setdefault(key, value)
514 def extract_info(self, url, download=True, ie_key=None, extra_info={},
517 Returns a list with a dictionary for each video we find.
518 If 'download', also downloads the videos.
519 extra_info is a dict containing the extra values to add to each result
523 ies = [self.get_info_extractor(ie_key)]
528 if not ie.suitable(url):
532 self.report_warning('The program functionality for this site has been marked as broken, '
533 'and will probably not work.')
536 ie_result = ie.extract(url)
537 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
539 if isinstance(ie_result, list):
540 # Backwards compatibility: old IE result format
542 '_type': 'compat_list',
543 'entries': ie_result,
545 self.add_default_extra_info(ie_result, ie, url)
547 return self.process_ie_result(ie_result, download, extra_info)
550 except ExtractorError as de: # An error we somewhat expected
551 self.report_error(compat_str(de), de.format_traceback())
553 except MaxDownloadsReached:
555 except Exception as e:
556 if self.params.get('ignoreerrors', False):
557 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
562 self.report_error('no suitable InfoExtractor for URL %s' % url)
564 def add_default_extra_info(self, ie_result, ie, url):
565 self.add_extra_info(ie_result, {
566 'extractor': ie.IE_NAME,
568 'webpage_url_basename': url_basename(url),
569 'extractor_key': ie.ie_key(),
572 def process_ie_result(self, ie_result, download=True, extra_info={}):
574 Take the result of the ie(may be modified) and resolve all unresolved
575 references (URLs, playlist items).
577 It will also download the videos if 'download'.
578 Returns the resolved ie_result.
581 result_type = ie_result.get('_type', 'video')
583 if result_type in ('url', 'url_transparent'):
584 extract_flat = self.params.get('extract_flat', False)
585 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
586 extract_flat is True):
587 if self.params.get('forcejson', False):
588 self.to_stdout(json.dumps(ie_result))
591 if result_type == 'video':
592 self.add_extra_info(ie_result, extra_info)
593 return self.process_video_result(ie_result, download=download)
594 elif result_type == 'url':
595 # We have to add extra_info to the results because it may be
596 # contained in a playlist
597 return self.extract_info(ie_result['url'],
599 ie_key=ie_result.get('ie_key'),
600 extra_info=extra_info)
601 elif result_type == 'url_transparent':
602 # Use the information from the embedding page
603 info = self.extract_info(
604 ie_result['url'], ie_key=ie_result.get('ie_key'),
605 extra_info=extra_info, download=False, process=False)
607 def make_result(embedded_info):
608 new_result = ie_result.copy()
609 for f in ('_type', 'url', 'ext', 'player_url', 'formats',
610 'entries', 'ie_key', 'duration',
611 'subtitles', 'annotations', 'format',
612 'thumbnail', 'thumbnails'):
615 if f in embedded_info:
616 new_result[f] = embedded_info[f]
618 new_result = make_result(info)
620 assert new_result.get('_type') != 'url_transparent'
621 if new_result.get('_type') == 'compat_list':
622 new_result['entries'] = [
623 make_result(e) for e in new_result['entries']]
625 return self.process_ie_result(
626 new_result, download=download, extra_info=extra_info)
627 elif result_type == 'playlist':
628 # We process each entry in the playlist
629 playlist = ie_result.get('title', None) or ie_result.get('id', None)
630 self.to_screen('[download] Downloading playlist: %s' % playlist)
632 playlist_results = []
634 playliststart = self.params.get('playliststart', 1) - 1
635 playlistend = self.params.get('playlistend', None)
636 # For backwards compatibility, interpret -1 as whole list
637 if playlistend == -1:
640 if isinstance(ie_result['entries'], list):
641 n_all_entries = len(ie_result['entries'])
642 entries = ie_result['entries'][playliststart:playlistend]
643 n_entries = len(entries)
645 "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
646 (ie_result['extractor'], playlist, n_all_entries, n_entries))
648 assert isinstance(ie_result['entries'], PagedList)
649 entries = ie_result['entries'].getslice(
650 playliststart, playlistend)
651 n_entries = len(entries)
653 "[%s] playlist %s: Downloading %d videos" %
654 (ie_result['extractor'], playlist, n_entries))
656 for i, entry in enumerate(entries, 1):
657 self.to_screen('[download] Downloading video #%s of %s' % (i, n_entries))
659 'n_entries': n_entries,
660 'playlist': playlist,
661 'playlist_index': i + playliststart,
662 'extractor': ie_result['extractor'],
663 'webpage_url': ie_result['webpage_url'],
664 'webpage_url_basename': url_basename(ie_result['webpage_url']),
665 'extractor_key': ie_result['extractor_key'],
668 reason = self._match_entry(entry)
669 if reason is not None:
670 self.to_screen('[download] ' + reason)
673 entry_result = self.process_ie_result(entry,
676 playlist_results.append(entry_result)
677 ie_result['entries'] = playlist_results
679 elif result_type == 'compat_list':
681 self.add_extra_info(r,
683 'extractor': ie_result['extractor'],
684 'webpage_url': ie_result['webpage_url'],
685 'webpage_url_basename': url_basename(ie_result['webpage_url']),
686 'extractor_key': ie_result['extractor_key'],
689 ie_result['entries'] = [
690 self.process_ie_result(_fixup(r), download, extra_info)
691 for r in ie_result['entries']
695 raise Exception('Invalid result type: %s' % result_type)
697 def select_format(self, format_spec, available_formats):
698 if format_spec == 'best' or format_spec is None:
699 return available_formats[-1]
700 elif format_spec == 'worst':
701 return available_formats[0]
702 elif format_spec == 'bestaudio':
704 f for f in available_formats
705 if f.get('vcodec') == 'none']
707 return audio_formats[-1]
708 elif format_spec == 'worstaudio':
710 f for f in available_formats
711 if f.get('vcodec') == 'none']
713 return audio_formats[0]
714 elif format_spec == 'bestvideo':
716 f for f in available_formats
717 if f.get('acodec') == 'none']
719 return video_formats[-1]
720 elif format_spec == 'worstvideo':
722 f for f in available_formats
723 if f.get('acodec') == 'none']
725 return video_formats[0]
727 extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a']
728 if format_spec in extensions:
729 filter_f = lambda f: f['ext'] == format_spec
731 filter_f = lambda f: f['format_id'] == format_spec
732 matches = list(filter(filter_f, available_formats))
737 def process_video_result(self, info_dict, download=True):
738 assert info_dict.get('_type', 'video') == 'video'
740 if 'id' not in info_dict:
741 raise ExtractorError('Missing "id" field in extractor result')
742 if 'title' not in info_dict:
743 raise ExtractorError('Missing "title" field in extractor result')
745 if 'playlist' not in info_dict:
746 # It isn't part of a playlist
747 info_dict['playlist'] = None
748 info_dict['playlist_index'] = None
750 thumbnails = info_dict.get('thumbnails')
752 thumbnails.sort(key=lambda t: (
753 t.get('width'), t.get('height'), t.get('url')))
755 if 'width' in t and 'height' in t:
756 t['resolution'] = '%dx%d' % (t['width'], t['height'])
758 if thumbnails and 'thumbnail' not in info_dict:
759 info_dict['thumbnail'] = thumbnails[-1]['url']
761 if 'display_id' not in info_dict and 'id' in info_dict:
762 info_dict['display_id'] = info_dict['id']
764 if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
765 upload_date = datetime.datetime.utcfromtimestamp(
766 info_dict['timestamp'])
767 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
769 # This extractors handle format selection themselves
770 if info_dict['extractor'] in ['Youku']:
772 self.process_info(info_dict)
775 # We now pick which formats have to be downloaded
776 if info_dict.get('formats') is None:
777 # There's only one format available
778 formats = [info_dict]
780 formats = info_dict['formats']
783 raise ExtractorError('No video formats found!')
785 # We check that all the formats have the format and format_id fields
786 for i, format in enumerate(formats):
787 if 'url' not in format:
788 raise ExtractorError('Missing "url" key in result (index %d)' % i)
790 if format.get('format_id') is None:
791 format['format_id'] = compat_str(i)
792 if format.get('format') is None:
793 format['format'] = '{id} - {res}{note}'.format(
794 id=format['format_id'],
795 res=self.format_resolution(format),
796 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
798 # Automatically determine file extension if missing
799 if 'ext' not in format:
800 format['ext'] = determine_ext(format['url']).lower()
802 format_limit = self.params.get('format_limit', None)
804 formats = list(takewhile_inclusive(
805 lambda f: f['format_id'] != format_limit, formats
808 # TODO Central sorting goes here
810 if formats[0] is not info_dict:
811 # only set the 'formats' fields if the original info_dict list them
812 # otherwise we end up with a circular reference, the first (and unique)
813 # element in the 'formats' field in info_dict is info_dict itself,
814 # wich can't be exported to json
815 info_dict['formats'] = formats
816 if self.params.get('listformats', None):
817 self.list_formats(info_dict)
820 req_format = self.params.get('format')
821 if req_format is None:
823 formats_to_download = []
824 # The -1 is for supporting YoutubeIE
825 if req_format in ('-1', 'all'):
826 formats_to_download = formats
828 for rfstr in req_format.split(','):
829 # We can accept formats requested in the format: 34/5/best, we pick
830 # the first that is available, starting from left
831 req_formats = rfstr.split('/')
832 for rf in req_formats:
833 if re.match(r'.+?\+.+?', rf) is not None:
834 # Two formats have been requested like '137+139'
835 format_1, format_2 = rf.split('+')
836 formats_info = (self.select_format(format_1, formats),
837 self.select_format(format_2, formats))
838 if all(formats_info):
840 'requested_formats': formats_info,
842 'ext': formats_info[0]['ext'],
845 selected_format = None
847 selected_format = self.select_format(rf, formats)
848 if selected_format is not None:
849 formats_to_download.append(selected_format)
851 if not formats_to_download:
852 raise ExtractorError('requested format not available',
856 if len(formats_to_download) > 1:
857 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
858 for format in formats_to_download:
859 new_info = dict(info_dict)
860 new_info.update(format)
861 self.process_info(new_info)
862 # We update the info dict with the best quality format (backwards compatibility)
863 info_dict.update(formats_to_download[-1])
866 def process_info(self, info_dict):
867 """Process a single resolved IE result."""
869 assert info_dict.get('_type', 'video') == 'video'
871 max_downloads = self.params.get('max_downloads')
872 if max_downloads is not None:
873 if self._num_downloads >= int(max_downloads):
874 raise MaxDownloadsReached()
876 info_dict['fulltitle'] = info_dict['title']
877 if len(info_dict['title']) > 200:
878 info_dict['title'] = info_dict['title'][:197] + '...'
880 # Keep for backwards compatibility
881 info_dict['stitle'] = info_dict['title']
883 if 'format' not in info_dict:
884 info_dict['format'] = info_dict['ext']
886 reason = self._match_entry(info_dict)
887 if reason is not None:
888 self.to_screen('[download] ' + reason)
891 self._num_downloads += 1
893 filename = self.prepare_filename(info_dict)
896 if self.params.get('forcetitle', False):
897 self.to_stdout(info_dict['fulltitle'])
898 if self.params.get('forceid', False):
899 self.to_stdout(info_dict['id'])
900 if self.params.get('forceurl', False):
901 # For RTMP URLs, also include the playpath
902 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
903 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
904 self.to_stdout(info_dict['thumbnail'])
905 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
906 self.to_stdout(info_dict['description'])
907 if self.params.get('forcefilename', False) and filename is not None:
908 self.to_stdout(filename)
909 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
910 self.to_stdout(formatSeconds(info_dict['duration']))
911 if self.params.get('forceformat', False):
912 self.to_stdout(info_dict['format'])
913 if self.params.get('forcejson', False):
914 info_dict['_filename'] = filename
915 self.to_stdout(json.dumps(info_dict))
916 if self.params.get('dump_single_json', False):
917 info_dict['_filename'] = filename
919 # Do nothing else if in simulate mode
920 if self.params.get('simulate', False):
927 dn = os.path.dirname(encodeFilename(filename))
928 if dn and not os.path.exists(dn):
930 except (OSError, IOError) as err:
931 self.report_error('unable to create directory ' + compat_str(err))
934 if self.params.get('writedescription', False):
935 descfn = filename + '.description'
936 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
937 self.to_screen('[info] Video description is already present')
940 self.to_screen('[info] Writing video description to: ' + descfn)
941 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
942 descfile.write(info_dict['description'])
943 except (KeyError, TypeError):
944 self.report_warning('There\'s no description to write.')
945 except (OSError, IOError):
946 self.report_error('Cannot write description file ' + descfn)
949 if self.params.get('writeannotations', False):
950 annofn = filename + '.annotations.xml'
951 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
952 self.to_screen('[info] Video annotations are already present')
955 self.to_screen('[info] Writing video annotations to: ' + annofn)
956 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
957 annofile.write(info_dict['annotations'])
958 except (KeyError, TypeError):
959 self.report_warning('There are no annotations to write.')
960 except (OSError, IOError):
961 self.report_error('Cannot write annotations file: ' + annofn)
964 subtitles_are_requested = any([self.params.get('writesubtitles', False),
965 self.params.get('writeautomaticsub')])
967 if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
968 # subtitles download errors are already managed as troubles in relevant IE
969 # that way it will silently go on when used with unsupporting IE
970 subtitles = info_dict['subtitles']
971 sub_format = self.params.get('subtitlesformat', 'srt')
972 for sub_lang in subtitles.keys():
973 sub = subtitles[sub_lang]
977 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
978 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
979 self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
981 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
982 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
984 except (OSError, IOError):
985 self.report_error('Cannot write subtitles file ' + sub_filename)
988 if self.params.get('writeinfojson', False):
989 infofn = os.path.splitext(filename)[0] + '.info.json'
990 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
991 self.to_screen('[info] Video description metadata is already present')
993 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
995 write_json_file(info_dict, encodeFilename(infofn))
996 except (OSError, IOError):
997 self.report_error('Cannot write metadata to JSON file ' + infofn)
1000 if self.params.get('writethumbnail', False):
1001 if info_dict.get('thumbnail') is not None:
1002 thumb_format = determine_ext(info_dict['thumbnail'], 'jpg')
1003 thumb_filename = os.path.splitext(filename)[0] + '.' + thumb_format
1004 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
1005 self.to_screen('[%s] %s: Thumbnail is already present' %
1006 (info_dict['extractor'], info_dict['id']))
1008 self.to_screen('[%s] %s: Downloading thumbnail ...' %
1009 (info_dict['extractor'], info_dict['id']))
1011 uf = self.urlopen(info_dict['thumbnail'])
1012 with open(thumb_filename, 'wb') as thumbf:
1013 shutil.copyfileobj(uf, thumbf)
1014 self.to_screen('[%s] %s: Writing thumbnail to: %s' %
1015 (info_dict['extractor'], info_dict['id'], thumb_filename))
1016 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1017 self.report_warning('Unable to download thumbnail "%s": %s' %
1018 (info_dict['thumbnail'], compat_str(err)))
1020 if not self.params.get('skip_download', False):
1021 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
1026 fd = get_suitable_downloader(info)(self, self.params)
1027 for ph in self._progress_hooks:
1028 fd.add_progress_hook(ph)
1029 if self.params.get('verbose'):
1030 self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1031 return fd.download(name, info)
1032 if info_dict.get('requested_formats') is not None:
1035 merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
1036 if not merger._executable:
1038 self.report_warning('You have requested multiple '
1039 'formats but ffmpeg or avconv are not installed.'
1040 ' The formats won\'t be merged')
1042 postprocessors = [merger]
1043 for f in info_dict['requested_formats']:
1044 new_info = dict(info_dict)
1046 fname = self.prepare_filename(new_info)
1047 fname = prepend_extension(fname, 'f%s' % f['format_id'])
1048 downloaded.append(fname)
1049 partial_success = dl(fname, new_info)
1050 success = success and partial_success
1051 info_dict['__postprocessors'] = postprocessors
1052 info_dict['__files_to_merge'] = downloaded
1054 # Just a single file
1055 success = dl(filename, info_dict)
1056 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1057 self.report_error('unable to download video data: %s' % str(err))
1059 except (OSError, IOError) as err:
1060 raise UnavailableVideoError(err)
1061 except (ContentTooShortError, ) as err:
1062 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1067 self.post_process(filename, info_dict)
1068 except (PostProcessingError) as err:
1069 self.report_error('postprocessing: %s' % str(err))
1072 self.record_download_archive(info_dict)
1074 def download(self, url_list):
1075 """Download a given list of URLs."""
1076 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1077 if (len(url_list) > 1 and
1079 and self.params.get('max_downloads') != 1):
1080 raise SameFileError(outtmpl)
1082 for url in url_list:
1084 #It also downloads the videos
1085 res = self.extract_info(url)
1086 except UnavailableVideoError:
1087 self.report_error('unable to download video')
1088 except MaxDownloadsReached:
1089 self.to_screen('[info] Maximum number of downloaded files reached.')
1092 if self.params.get('dump_single_json', False):
1093 self.to_stdout(json.dumps(res))
1095 return self._download_retcode
1097 def download_with_info_file(self, info_filename):
1098 with io.open(info_filename, 'r', encoding='utf-8') as f:
1101 self.process_ie_result(info, download=True)
1102 except DownloadError:
1103 webpage_url = info.get('webpage_url')
1104 if webpage_url is not None:
1105 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1106 return self.download([webpage_url])
1109 return self._download_retcode
1111 def post_process(self, filename, ie_info):
1112 """Run all the postprocessors on the given file."""
1113 info = dict(ie_info)
1114 info['filepath'] = filename
1117 if ie_info.get('__postprocessors') is not None:
1118 pps_chain.extend(ie_info['__postprocessors'])
1119 pps_chain.extend(self._pps)
1120 for pp in pps_chain:
1122 keep_video_wish, new_info = pp.run(info)
1123 if keep_video_wish is not None:
1125 keep_video = keep_video_wish
1126 elif keep_video is None:
1127 # No clear decision yet, let IE decide
1128 keep_video = keep_video_wish
1129 except PostProcessingError as e:
1130 self.report_error(e.msg)
1131 if keep_video is False and not self.params.get('keepvideo', False):
1133 self.to_screen('Deleting original file %s (pass -k to keep)' % filename)
1134 os.remove(encodeFilename(filename))
1135 except (IOError, OSError):
1136 self.report_warning('Unable to remove downloaded video file')
1138 def _make_archive_id(self, info_dict):
1139 # Future-proof against any change in case
1140 # and backwards compatibility with prior versions
1141 extractor = info_dict.get('extractor_key')
1142 if extractor is None:
1143 if 'id' in info_dict:
1144 extractor = info_dict.get('ie_key') # key in a playlist
1145 if extractor is None:
1146 return None # Incomplete video information
1147 return extractor.lower() + ' ' + info_dict['id']
1149 def in_download_archive(self, info_dict):
1150 fn = self.params.get('download_archive')
1154 vid_id = self._make_archive_id(info_dict)
1156 return False # Incomplete video information
1159 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1160 for line in archive_file:
1161 if line.strip() == vid_id:
1163 except IOError as ioe:
1164 if ioe.errno != errno.ENOENT:
1168 def record_download_archive(self, info_dict):
1169 fn = self.params.get('download_archive')
1172 vid_id = self._make_archive_id(info_dict)
1174 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1175 archive_file.write(vid_id + '\n')
1178 def format_resolution(format, default='unknown'):
1179 if format.get('vcodec') == 'none':
1181 if format.get('resolution') is not None:
1182 return format['resolution']
1183 if format.get('height') is not None:
1184 if format.get('width') is not None:
1185 res = '%sx%s' % (format['width'], format['height'])
1187 res = '%sp' % format['height']
1188 elif format.get('width') is not None:
1189 res = '?x%d' % format['width']
1194 def _format_note(self, fdict):
1196 if fdict.get('ext') in ['f4f', 'f4m']:
1197 res += '(unsupported) '
1198 if fdict.get('format_note') is not None:
1199 res += fdict['format_note'] + ' '
1200 if fdict.get('tbr') is not None:
1201 res += '%4dk ' % fdict['tbr']
1202 if fdict.get('container') is not None:
1205 res += '%s container' % fdict['container']
1206 if (fdict.get('vcodec') is not None and
1207 fdict.get('vcodec') != 'none'):
1210 res += fdict['vcodec']
1211 if fdict.get('vbr') is not None:
1213 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1215 if fdict.get('vbr') is not None:
1216 res += '%4dk' % fdict['vbr']
1217 if fdict.get('fps') is not None:
1218 res += ', %sfps' % fdict['fps']
1219 if fdict.get('acodec') is not None:
1222 if fdict['acodec'] == 'none':
1225 res += '%-5s' % fdict['acodec']
1226 elif fdict.get('abr') is not None:
1230 if fdict.get('abr') is not None:
1231 res += '@%3dk' % fdict['abr']
1232 if fdict.get('asr') is not None:
1233 res += ' (%5dHz)' % fdict['asr']
1234 if fdict.get('filesize') is not None:
1237 res += format_bytes(fdict['filesize'])
1238 elif fdict.get('filesize_approx') is not None:
1241 res += '~' + format_bytes(fdict['filesize_approx'])
1244 def list_formats(self, info_dict):
1245 def line(format, idlen=20):
1246 return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
1247 format['format_id'],
1249 self.format_resolution(format),
1250 self._format_note(format),
1253 formats = info_dict.get('formats', [info_dict])
1254 idlen = max(len('format code'),
1255 max(len(f['format_id']) for f in formats))
1256 formats_s = [line(f, idlen) for f in formats]
1257 if len(formats) > 1:
1258 formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)'
1259 formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
1261 header_line = line({
1262 'format_id': 'format code', 'ext': 'extension',
1263 'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
1264 self.to_screen('[info] Available formats for %s:\n%s\n%s' %
1265 (info_dict['id'], header_line, '\n'.join(formats_s)))
1267 def urlopen(self, req):
1268 """ Start an HTTP download """
1270 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1271 # always respected by websites, some tend to give out URLs with non percent-encoded
1272 # non-ASCII characters (see telemb.py, ard.py [#3412])
1273 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1274 # To work around aforementioned issue we will replace request's original URL with
1275 # percent-encoded one
1276 req_is_string = isinstance(req, basestring if sys.version_info < (3, 0) else compat_str)
1277 url = req if req_is_string else req.get_full_url()
1278 url_escaped = escape_url(url)
1280 # Substitute URL if any change after escaping
1281 if url != url_escaped:
1285 req = compat_urllib_request.Request(
1286 url_escaped, data=req.data, headers=req.headers,
1287 origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
1289 return self._opener.open(req, timeout=self._socket_timeout)
1291 def print_debug_header(self):
1292 if not self.params.get('verbose'):
1295 if type('') is not compat_str:
1296 # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1297 self.report_warning(
1298 'Your Python is broken! Update to a newer and supported version')
1301 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1302 locale.getpreferredencoding(),
1303 sys.getfilesystemencoding(),
1304 sys.stdout.encoding,
1305 self.get_encoding()))
1306 write_string(encoding_str, encoding=None)
1308 self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1310 sp = subprocess.Popen(
1311 ['git', 'rev-parse', '--short', 'HEAD'],
1312 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1313 cwd=os.path.dirname(os.path.abspath(__file__)))
1314 out, err = sp.communicate()
1315 out = out.decode().strip()
1316 if re.match('[0-9a-f]+', out):
1317 self._write_string('[debug] Git HEAD: ' + out + '\n')
1323 self._write_string('[debug] Python version %s - %s\n' % (
1324 platform.python_version(), platform_name()))
1326 exe_versions = FFmpegPostProcessor.get_versions()
1327 exe_versions['rtmpdump'] = rtmpdump_version()
1328 exe_str = ', '.join(
1330 for exe, v in sorted(exe_versions.items())
1335 self._write_string('[debug] exe versions: %s\n' % exe_str)
1338 for handler in self._opener.handlers:
1339 if hasattr(handler, 'proxies'):
1340 proxy_map.update(handler.proxies)
1341 self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1343 def _setup_opener(self):
1344 timeout_val = self.params.get('socket_timeout')
1345 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1347 opts_cookiefile = self.params.get('cookiefile')
1348 opts_proxy = self.params.get('proxy')
1350 if opts_cookiefile is None:
1351 self.cookiejar = compat_cookiejar.CookieJar()
1353 self.cookiejar = compat_cookiejar.MozillaCookieJar(
1355 if os.access(opts_cookiefile, os.R_OK):
1356 self.cookiejar.load()
1358 cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1360 if opts_proxy is not None:
1361 if opts_proxy == '':
1364 proxies = {'http': opts_proxy, 'https': opts_proxy}
1366 proxies = compat_urllib_request.getproxies()
1367 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1368 if 'http' in proxies and 'https' not in proxies:
1369 proxies['https'] = proxies['http']
1370 proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1372 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1373 https_handler = make_HTTPS_handler(
1374 self.params.get('nocheckcertificate', False), debuglevel=debuglevel)
1375 ydlh = YoutubeDLHandler(debuglevel=debuglevel)
1376 opener = compat_urllib_request.build_opener(
1377 https_handler, proxy_handler, cookie_processor, ydlh)
1378 # Delete the default user-agent header, which would otherwise apply in
1379 # cases where our custom HTTP handler doesn't come into play
1380 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1381 opener.addheaders = []
1382 self._opener = opener
1384 def encode(self, s):
1385 if isinstance(s, bytes):
1386 return s # Already encoded
1389 return s.encode(self.get_encoding())
1390 except UnicodeEncodeError as err:
1391 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1394 def get_encoding(self):
1395 encoding = self.params.get('encoding')
1396 if encoding is None:
1397 encoding = preferredencoding()