2 # -*- coding: utf-8 -*-
4 from __future__ import absolute_import, unicode_literals
31 compat_urllib_request,
57 UnavailableVideoError,
64 from .cache import Cache
65 from .extractor import get_info_extractor, gen_extractors
66 from .downloader import get_suitable_downloader
67 from .downloader.rtmp import rtmpdump_version
68 from .postprocessor import FFmpegMergerPP, FFmpegPostProcessor
69 from .version import __version__
72 class YoutubeDL(object):
75 YoutubeDL objects are the ones responsible of downloading the
76 actual video file and writing it to disk if the user has requested
77 it, among some other tasks. In most cases there should be one per
78 program. As, given a video URL, the downloader doesn't know how to
79 extract all the needed information, task that InfoExtractors do, it
80 has to pass the URL to one of them.
82 For this, YoutubeDL objects have a method that allows
83 InfoExtractors to be registered in a given order. When it is passed
84 a URL, the YoutubeDL object handles it to the first InfoExtractor it
85 finds that reports being able to handle it. The InfoExtractor extracts
86 all the information about the video or videos the URL refers to, and
87 YoutubeDL process the extracted information, possibly using a File
88 Downloader to download the video.
90 YoutubeDL objects accept a lot of parameters. In order not to saturate
91 the object constructor with arguments, it receives a dictionary of
92 options instead. These options are available through the params
93 attribute for the InfoExtractors to use. The YoutubeDL also
94 registers itself as the downloader in charge for the InfoExtractors
95 that are added to it, so this is a "mutual registration".
99 username: Username for authentication purposes.
100 password: Password for authentication purposes.
101 videopassword: Password for acces a video.
102 usenetrc: Use netrc for authentication instead.
103 verbose: Print additional info to stdout.
104 quiet: Do not print messages to stdout.
105 no_warnings: Do not print out anything for warnings.
106 forceurl: Force printing final URL.
107 forcetitle: Force printing title.
108 forceid: Force printing ID.
109 forcethumbnail: Force printing thumbnail URL.
110 forcedescription: Force printing description.
111 forcefilename: Force printing final filename.
112 forceduration: Force printing duration.
113 forcejson: Force printing info_dict as JSON.
114 dump_single_json: Force printing the info_dict of the whole playlist
115 (or video) as a single JSON line.
116 simulate: Do not download the video files.
117 format: Video format code.
118 format_limit: Highest quality format to try.
119 outtmpl: Template for output names.
120 restrictfilenames: Do not allow "&" and spaces in file names
121 ignoreerrors: Do not stop on download errors.
122 nooverwrites: Prevent overwriting files.
123 playliststart: Playlist item to start at.
124 playlistend: Playlist item to end at.
125 matchtitle: Download only matching titles.
126 rejecttitle: Reject downloads for matching titles.
127 logger: Log messages to a logging.Logger instance.
128 logtostderr: Log messages to stderr instead of stdout.
129 writedescription: Write the video description to a .description file
130 writeinfojson: Write the video description to a .info.json file
131 writeannotations: Write the video annotations to a .annotations.xml file
132 writethumbnail: Write the thumbnail image to a file
133 writesubtitles: Write the video subtitles to a file
134 writeautomaticsub: Write the automatic subtitles to a file
135 allsubtitles: Downloads all the subtitles of the video
136 (requires writesubtitles or writeautomaticsub)
137 listsubtitles: Lists all available subtitles for the video
138 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
139 subtitleslangs: List of languages of the subtitles to download
140 keepvideo: Keep the video file after post-processing
141 daterange: A DateRange object, download only if the upload_date is in the range.
142 skip_download: Skip the actual download of the video file
143 cachedir: Location of the cache files in the filesystem.
144 False to disable filesystem cache.
145 noplaylist: Download single video instead of a playlist if in doubt.
146 age_limit: An integer representing the user's age in years.
147 Unsuitable videos for the given age are skipped.
148 min_views: An integer representing the minimum view count the video
149 must have in order to not be skipped.
150 Videos without view count information are always
151 downloaded. None for no limit.
152 max_views: An integer representing the maximum view count.
153 Videos that are more popular than that are not
155 Videos without view count information are always
156 downloaded. None for no limit.
157 download_archive: File name of a file where all downloads are recorded.
158 Videos already present in the file are not downloaded
160 cookiefile: File name where cookies should be read from and dumped to.
161 nocheckcertificate:Do not verify SSL certificates
162 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
163 At the moment, this is only supported by YouTube.
164 proxy: URL of the proxy server to use
165 socket_timeout: Time to wait for unresponsive hosts, in seconds
166 bidi_workaround: Work around buggy terminals without bidirectional text
167 support, using fridibi
168 debug_printtraffic:Print out sent and received HTTP traffic
169 include_ads: Download ads as well
170 default_search: Prepend this string if an input url is not valid.
171 'auto' for elaborate guessing
172 encoding: Use this encoding instead of the system-specified.
173 extract_flat: Do not resolve URLs, return the immediate result.
174 Pass in 'in_playlist' to only show this behavior for
177 The following parameters are not used by YoutubeDL itself, they are used by
179 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
180 noresizebuffer, retries, continuedl, noprogress, consoletitle
182 The following options are used by the post processors:
183 prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
184 otherwise prefer avconv.
185 exec_cmd: Arbitrary command to run after downloading
191 _download_retcode = None
192 _num_downloads = None
195 def __init__(self, params=None, auto_init=True):
196 """Create a FileDownloader object with the given options."""
200 self._ies_instances = {}
202 self._progress_hooks = []
203 self._download_retcode = 0
204 self._num_downloads = 0
205 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
206 self._err_file = sys.stderr
208 self.cache = Cache(self)
210 if params.get('bidi_workaround', False):
213 master, slave = pty.openpty()
214 width = get_term_width()
218 width_args = ['-w', str(width)]
220 stdin=subprocess.PIPE,
222 stderr=self._err_file)
224 self._output_process = subprocess.Popen(
225 ['bidiv'] + width_args, **sp_kwargs
228 self._output_process = subprocess.Popen(
229 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
230 self._output_channel = os.fdopen(master, 'rb')
231 except OSError as ose:
233 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
237 if (sys.version_info >= (3,) and sys.platform != 'win32' and
238 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
239 and not params.get('restrictfilenames', False)):
240 # On Python 3, the Unicode filesystem API will throw errors (#1474)
242 'Assuming --restrict-filenames since file system encoding '
243 'cannot encode all characters. '
244 'Set the LC_ALL environment variable to fix this.')
245 self.params['restrictfilenames'] = True
247 if '%(stitle)s' in self.params.get('outtmpl', ''):
248 self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
253 self.print_debug_header()
254 self.add_default_info_extractors()
256 def add_info_extractor(self, ie):
257 """Add an InfoExtractor object to the end of the list."""
259 self._ies_instances[ie.ie_key()] = ie
260 ie.set_downloader(self)
262 def get_info_extractor(self, ie_key):
264 Get an instance of an IE with name ie_key, it will try to get one from
265 the _ies list, if there's no instance it will create a new one and add
266 it to the extractor list.
268 ie = self._ies_instances.get(ie_key)
270 ie = get_info_extractor(ie_key)()
271 self.add_info_extractor(ie)
274 def add_default_info_extractors(self):
276 Add the InfoExtractors returned by gen_extractors to the end of the list
278 for ie in gen_extractors():
279 self.add_info_extractor(ie)
281 def add_post_processor(self, pp):
282 """Add a PostProcessor object to the end of the chain."""
284 pp.set_downloader(self)
286 def add_progress_hook(self, ph):
287 """Add the progress hook (currently only for the file downloader)"""
288 self._progress_hooks.append(ph)
290 def _bidi_workaround(self, message):
291 if not hasattr(self, '_output_channel'):
294 assert hasattr(self, '_output_process')
295 assert isinstance(message, compat_str)
296 line_count = message.count('\n') + 1
297 self._output_process.stdin.write((message + '\n').encode('utf-8'))
298 self._output_process.stdin.flush()
299 res = ''.join(self._output_channel.readline().decode('utf-8')
300 for _ in range(line_count))
301 return res[:-len('\n')]
303 def to_screen(self, message, skip_eol=False):
304 """Print message to stdout if not in quiet mode."""
305 return self.to_stdout(message, skip_eol, check_quiet=True)
307 def _write_string(self, s, out=None):
308 write_string(s, out=out, encoding=self.params.get('encoding'))
310 def to_stdout(self, message, skip_eol=False, check_quiet=False):
311 """Print message to stdout if not in quiet mode."""
312 if self.params.get('logger'):
313 self.params['logger'].debug(message)
314 elif not check_quiet or not self.params.get('quiet', False):
315 message = self._bidi_workaround(message)
316 terminator = ['\n', ''][skip_eol]
317 output = message + terminator
319 self._write_string(output, self._screen_file)
321 def to_stderr(self, message):
322 """Print message to stderr."""
323 assert isinstance(message, compat_str)
324 if self.params.get('logger'):
325 self.params['logger'].error(message)
327 message = self._bidi_workaround(message)
328 output = message + '\n'
329 self._write_string(output, self._err_file)
331 def to_console_title(self, message):
332 if not self.params.get('consoletitle', False):
334 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
335 # c_wchar_p() might not be necessary if `message` is
336 # already of type unicode()
337 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
338 elif 'TERM' in os.environ:
339 self._write_string('\033]0;%s\007' % message, self._screen_file)
341 def save_console_title(self):
342 if not self.params.get('consoletitle', False):
344 if 'TERM' in os.environ:
345 # Save the title on stack
346 self._write_string('\033[22;0t', self._screen_file)
348 def restore_console_title(self):
349 if not self.params.get('consoletitle', False):
351 if 'TERM' in os.environ:
352 # Restore the title from stack
353 self._write_string('\033[23;0t', self._screen_file)
356 self.save_console_title()
359 def __exit__(self, *args):
360 self.restore_console_title()
362 if self.params.get('cookiefile') is not None:
363 self.cookiejar.save()
365 def trouble(self, message=None, tb=None):
366 """Determine action to take when a download problem appears.
368 Depending on if the downloader has been configured to ignore
369 download errors or not, this method may throw an exception or
370 not when errors are found, after printing the message.
372 tb, if given, is additional traceback information.
374 if message is not None:
375 self.to_stderr(message)
376 if self.params.get('verbose'):
378 if sys.exc_info()[0]: # if .trouble has been called from an except block
380 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
381 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
382 tb += compat_str(traceback.format_exc())
384 tb_data = traceback.format_list(traceback.extract_stack())
385 tb = ''.join(tb_data)
387 if not self.params.get('ignoreerrors', False):
388 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
389 exc_info = sys.exc_info()[1].exc_info
391 exc_info = sys.exc_info()
392 raise DownloadError(message, exc_info)
393 self._download_retcode = 1
395 def report_warning(self, message):
397 Print the message to stderr, it will be prefixed with 'WARNING:'
398 If stderr is a tty file the 'WARNING:' will be colored
400 if self.params.get('logger') is not None:
401 self.params['logger'].warning(message)
403 if self.params.get('no_warnings'):
405 if self._err_file.isatty() and os.name != 'nt':
406 _msg_header = '\033[0;33mWARNING:\033[0m'
408 _msg_header = 'WARNING:'
409 warning_message = '%s %s' % (_msg_header, message)
410 self.to_stderr(warning_message)
412 def report_error(self, message, tb=None):
414 Do the same as trouble, but prefixes the message with 'ERROR:', colored
415 in red if stderr is a tty file.
417 if self._err_file.isatty() and os.name != 'nt':
418 _msg_header = '\033[0;31mERROR:\033[0m'
420 _msg_header = 'ERROR:'
421 error_message = '%s %s' % (_msg_header, message)
422 self.trouble(error_message, tb)
424 def report_file_already_downloaded(self, file_name):
425 """Report file has already been fully downloaded."""
427 self.to_screen('[download] %s has already been downloaded' % file_name)
428 except UnicodeEncodeError:
429 self.to_screen('[download] The file has already been downloaded')
431 def prepare_filename(self, info_dict):
432 """Generate the output filename."""
434 template_dict = dict(info_dict)
436 template_dict['epoch'] = int(time.time())
437 autonumber_size = self.params.get('autonumber_size')
438 if autonumber_size is None:
440 autonumber_templ = '%0' + str(autonumber_size) + 'd'
441 template_dict['autonumber'] = autonumber_templ % self._num_downloads
442 if template_dict.get('playlist_index') is not None:
443 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
444 if template_dict.get('resolution') is None:
445 if template_dict.get('width') and template_dict.get('height'):
446 template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
447 elif template_dict.get('height'):
448 template_dict['resolution'] = '%sp' % template_dict['height']
449 elif template_dict.get('width'):
450 template_dict['resolution'] = '?x%d' % template_dict['width']
452 sanitize = lambda k, v: sanitize_filename(
454 restricted=self.params.get('restrictfilenames'),
456 template_dict = dict((k, sanitize(k, v))
457 for k, v in template_dict.items()
459 template_dict = collections.defaultdict(lambda: 'NA', template_dict)
461 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
462 tmpl = compat_expanduser(outtmpl)
463 filename = tmpl % template_dict
465 except ValueError as err:
466 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
469 def _match_entry(self, info_dict):
470 """ Returns None iff the file should be downloaded """
472 video_title = info_dict.get('title', info_dict.get('id', 'video'))
473 if 'title' in info_dict:
474 # This can happen when we're just evaluating the playlist
475 title = info_dict['title']
476 matchtitle = self.params.get('matchtitle', False)
478 if not re.search(matchtitle, title, re.IGNORECASE):
479 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
480 rejecttitle = self.params.get('rejecttitle', False)
482 if re.search(rejecttitle, title, re.IGNORECASE):
483 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
484 date = info_dict.get('upload_date', None)
486 dateRange = self.params.get('daterange', DateRange())
487 if date not in dateRange:
488 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
489 view_count = info_dict.get('view_count', None)
490 if view_count is not None:
491 min_views = self.params.get('min_views')
492 if min_views is not None and view_count < min_views:
493 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
494 max_views = self.params.get('max_views')
495 if max_views is not None and view_count > max_views:
496 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
497 age_limit = self.params.get('age_limit')
498 if age_limit is not None:
499 actual_age_limit = info_dict.get('age_limit')
500 if actual_age_limit is None:
502 if age_limit < actual_age_limit:
503 return 'Skipping "' + title + '" because it is age restricted'
504 if self.in_download_archive(info_dict):
505 return '%s has already been recorded in archive' % video_title
509 def add_extra_info(info_dict, extra_info):
510 '''Set the keys from extra_info in info dict if they are missing'''
511 for key, value in extra_info.items():
512 info_dict.setdefault(key, value)
514 def extract_info(self, url, download=True, ie_key=None, extra_info={},
517 Returns a list with a dictionary for each video we find.
518 If 'download', also downloads the videos.
519 extra_info is a dict containing the extra values to add to each result
523 ies = [self.get_info_extractor(ie_key)]
528 if not ie.suitable(url):
532 self.report_warning('The program functionality for this site has been marked as broken, '
533 'and will probably not work.')
536 ie_result = ie.extract(url)
537 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
539 if isinstance(ie_result, list):
540 # Backwards compatibility: old IE result format
542 '_type': 'compat_list',
543 'entries': ie_result,
545 self.add_default_extra_info(ie_result, ie, url)
547 return self.process_ie_result(ie_result, download, extra_info)
550 except ExtractorError as de: # An error we somewhat expected
551 self.report_error(compat_str(de), de.format_traceback())
553 except MaxDownloadsReached:
555 except Exception as e:
556 if self.params.get('ignoreerrors', False):
557 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
562 self.report_error('no suitable InfoExtractor for URL %s' % url)
564 def add_default_extra_info(self, ie_result, ie, url):
565 self.add_extra_info(ie_result, {
566 'extractor': ie.IE_NAME,
568 'webpage_url_basename': url_basename(url),
569 'extractor_key': ie.ie_key(),
572 def process_ie_result(self, ie_result, download=True, extra_info={}):
574 Take the result of the ie(may be modified) and resolve all unresolved
575 references (URLs, playlist items).
577 It will also download the videos if 'download'.
578 Returns the resolved ie_result.
581 result_type = ie_result.get('_type', 'video')
583 if result_type in ('url', 'url_transparent'):
584 extract_flat = self.params.get('extract_flat', False)
585 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
586 extract_flat is True):
587 if self.params.get('forcejson', False):
588 self.to_stdout(json.dumps(ie_result))
591 if result_type == 'video':
592 self.add_extra_info(ie_result, extra_info)
593 return self.process_video_result(ie_result, download=download)
594 elif result_type == 'url':
595 # We have to add extra_info to the results because it may be
596 # contained in a playlist
597 return self.extract_info(ie_result['url'],
599 ie_key=ie_result.get('ie_key'),
600 extra_info=extra_info)
601 elif result_type == 'url_transparent':
602 # Use the information from the embedding page
603 info = self.extract_info(
604 ie_result['url'], ie_key=ie_result.get('ie_key'),
605 extra_info=extra_info, download=False, process=False)
607 def make_result(embedded_info):
608 new_result = ie_result.copy()
609 for f in ('_type', 'url', 'ext', 'player_url', 'formats',
610 'entries', 'ie_key', 'duration',
611 'subtitles', 'annotations', 'format',
612 'thumbnail', 'thumbnails'):
615 if f in embedded_info:
616 new_result[f] = embedded_info[f]
618 new_result = make_result(info)
620 assert new_result.get('_type') != 'url_transparent'
621 if new_result.get('_type') == 'compat_list':
622 new_result['entries'] = [
623 make_result(e) for e in new_result['entries']]
625 return self.process_ie_result(
626 new_result, download=download, extra_info=extra_info)
627 elif result_type == 'playlist':
628 # We process each entry in the playlist
629 playlist = ie_result.get('title', None) or ie_result.get('id', None)
630 self.to_screen('[download] Downloading playlist: %s' % playlist)
632 playlist_results = []
634 playliststart = self.params.get('playliststart', 1) - 1
635 playlistend = self.params.get('playlistend', None)
636 # For backwards compatibility, interpret -1 as whole list
637 if playlistend == -1:
640 if isinstance(ie_result['entries'], list):
641 n_all_entries = len(ie_result['entries'])
642 entries = ie_result['entries'][playliststart:playlistend]
643 n_entries = len(entries)
645 "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
646 (ie_result['extractor'], playlist, n_all_entries, n_entries))
648 assert isinstance(ie_result['entries'], PagedList)
649 entries = ie_result['entries'].getslice(
650 playliststart, playlistend)
651 n_entries = len(entries)
653 "[%s] playlist %s: Downloading %d videos" %
654 (ie_result['extractor'], playlist, n_entries))
656 for i, entry in enumerate(entries, 1):
657 self.to_screen('[download] Downloading video #%s of %s' % (i, n_entries))
659 'n_entries': n_entries,
660 'playlist': playlist,
661 'playlist_index': i + playliststart,
662 'extractor': ie_result['extractor'],
663 'webpage_url': ie_result['webpage_url'],
664 'webpage_url_basename': url_basename(ie_result['webpage_url']),
665 'extractor_key': ie_result['extractor_key'],
668 reason = self._match_entry(entry)
669 if reason is not None:
670 self.to_screen('[download] ' + reason)
673 entry_result = self.process_ie_result(entry,
676 playlist_results.append(entry_result)
677 ie_result['entries'] = playlist_results
679 elif result_type == 'compat_list':
681 self.add_extra_info(r,
683 'extractor': ie_result['extractor'],
684 'webpage_url': ie_result['webpage_url'],
685 'webpage_url_basename': url_basename(ie_result['webpage_url']),
686 'extractor_key': ie_result['extractor_key'],
689 ie_result['entries'] = [
690 self.process_ie_result(_fixup(r), download, extra_info)
691 for r in ie_result['entries']
695 raise Exception('Invalid result type: %s' % result_type)
697 def select_format(self, format_spec, available_formats):
698 if format_spec == 'best' or format_spec is None:
699 return available_formats[-1]
700 elif format_spec == 'worst':
701 return available_formats[0]
702 elif format_spec == 'bestaudio':
704 f for f in available_formats
705 if f.get('vcodec') == 'none']
707 return audio_formats[-1]
708 elif format_spec == 'worstaudio':
710 f for f in available_formats
711 if f.get('vcodec') == 'none']
713 return audio_formats[0]
714 elif format_spec == 'bestvideo':
716 f for f in available_formats
717 if f.get('acodec') == 'none']
719 return video_formats[-1]
720 elif format_spec == 'worstvideo':
722 f for f in available_formats
723 if f.get('acodec') == 'none']
725 return video_formats[0]
727 extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a']
728 if format_spec in extensions:
729 filter_f = lambda f: f['ext'] == format_spec
731 filter_f = lambda f: f['format_id'] == format_spec
732 matches = list(filter(filter_f, available_formats))
737 def process_video_result(self, info_dict, download=True):
738 assert info_dict.get('_type', 'video') == 'video'
740 if 'id' not in info_dict:
741 raise ExtractorError('Missing "id" field in extractor result')
742 if 'title' not in info_dict:
743 raise ExtractorError('Missing "title" field in extractor result')
745 if 'playlist' not in info_dict:
746 # It isn't part of a playlist
747 info_dict['playlist'] = None
748 info_dict['playlist_index'] = None
750 thumbnails = info_dict.get('thumbnails')
752 thumbnails.sort(key=lambda t: (
753 t.get('width'), t.get('height'), t.get('url')))
755 if 'width' in t and 'height' in t:
756 t['resolution'] = '%dx%d' % (t['width'], t['height'])
758 if thumbnails and 'thumbnail' not in info_dict:
759 info_dict['thumbnail'] = thumbnails[-1]['url']
761 if 'display_id' not in info_dict and 'id' in info_dict:
762 info_dict['display_id'] = info_dict['id']
764 if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
765 upload_date = datetime.datetime.utcfromtimestamp(
766 info_dict['timestamp'])
767 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
769 # This extractors handle format selection themselves
770 if info_dict['extractor'] in ['Youku']:
772 self.process_info(info_dict)
775 # We now pick which formats have to be downloaded
776 if info_dict.get('formats') is None:
777 # There's only one format available
778 formats = [info_dict]
780 formats = info_dict['formats']
783 raise ExtractorError('No video formats found!')
785 # We check that all the formats have the format and format_id fields
786 for i, format in enumerate(formats):
787 if 'url' not in format:
788 raise ExtractorError('Missing "url" key in result (index %d)' % i)
790 if format.get('format_id') is None:
791 format['format_id'] = compat_str(i)
792 if format.get('format') is None:
793 format['format'] = '{id} - {res}{note}'.format(
794 id=format['format_id'],
795 res=self.format_resolution(format),
796 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
798 # Automatically determine file extension if missing
799 if 'ext' not in format:
800 format['ext'] = determine_ext(format['url']).lower()
802 format_limit = self.params.get('format_limit', None)
804 formats = list(takewhile_inclusive(
805 lambda f: f['format_id'] != format_limit, formats
808 # TODO Central sorting goes here
810 if formats[0] is not info_dict:
811 # only set the 'formats' fields if the original info_dict list them
812 # otherwise we end up with a circular reference, the first (and unique)
813 # element in the 'formats' field in info_dict is info_dict itself,
814 # wich can't be exported to json
815 info_dict['formats'] = formats
816 if self.params.get('listformats', None):
817 self.list_formats(info_dict)
820 req_format = self.params.get('format')
821 if req_format is None:
823 formats_to_download = []
824 # The -1 is for supporting YoutubeIE
825 if req_format in ('-1', 'all'):
826 formats_to_download = formats
828 for rfstr in req_format.split(','):
829 # We can accept formats requested in the format: 34/5/best, we pick
830 # the first that is available, starting from left
831 req_formats = rfstr.split('/')
832 for rf in req_formats:
833 if re.match(r'.+?\+.+?', rf) is not None:
834 # Two formats have been requested like '137+139'
835 format_1, format_2 = rf.split('+')
836 formats_info = (self.select_format(format_1, formats),
837 self.select_format(format_2, formats))
838 # The first format must contain the video and the
840 if formats_info[0].get('vcodec') == 'none':
841 self.report_error('The first format must contain '
842 'the video, try using "-f %s+%s"' %
843 (format_2, format_1))
845 if all(formats_info):
847 'requested_formats': formats_info,
849 'ext': formats_info[0]['ext'],
852 selected_format = None
854 selected_format = self.select_format(rf, formats)
855 if selected_format is not None:
856 formats_to_download.append(selected_format)
858 if not formats_to_download:
859 raise ExtractorError('requested format not available',
863 if len(formats_to_download) > 1:
864 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
865 for format in formats_to_download:
866 new_info = dict(info_dict)
867 new_info.update(format)
868 self.process_info(new_info)
869 # We update the info dict with the best quality format (backwards compatibility)
870 info_dict.update(formats_to_download[-1])
873 def process_info(self, info_dict):
874 """Process a single resolved IE result."""
876 assert info_dict.get('_type', 'video') == 'video'
878 max_downloads = self.params.get('max_downloads')
879 if max_downloads is not None:
880 if self._num_downloads >= int(max_downloads):
881 raise MaxDownloadsReached()
883 info_dict['fulltitle'] = info_dict['title']
884 if len(info_dict['title']) > 200:
885 info_dict['title'] = info_dict['title'][:197] + '...'
887 # Keep for backwards compatibility
888 info_dict['stitle'] = info_dict['title']
890 if 'format' not in info_dict:
891 info_dict['format'] = info_dict['ext']
893 reason = self._match_entry(info_dict)
894 if reason is not None:
895 self.to_screen('[download] ' + reason)
898 self._num_downloads += 1
900 filename = self.prepare_filename(info_dict)
903 if self.params.get('forcetitle', False):
904 self.to_stdout(info_dict['fulltitle'])
905 if self.params.get('forceid', False):
906 self.to_stdout(info_dict['id'])
907 if self.params.get('forceurl', False):
908 # For RTMP URLs, also include the playpath
909 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
910 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
911 self.to_stdout(info_dict['thumbnail'])
912 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
913 self.to_stdout(info_dict['description'])
914 if self.params.get('forcefilename', False) and filename is not None:
915 self.to_stdout(filename)
916 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
917 self.to_stdout(formatSeconds(info_dict['duration']))
918 if self.params.get('forceformat', False):
919 self.to_stdout(info_dict['format'])
920 if self.params.get('forcejson', False):
921 info_dict['_filename'] = filename
922 self.to_stdout(json.dumps(info_dict))
923 if self.params.get('dump_single_json', False):
924 info_dict['_filename'] = filename
926 # Do nothing else if in simulate mode
927 if self.params.get('simulate', False):
934 dn = os.path.dirname(encodeFilename(filename))
935 if dn and not os.path.exists(dn):
937 except (OSError, IOError) as err:
938 self.report_error('unable to create directory ' + compat_str(err))
941 if self.params.get('writedescription', False):
942 descfn = filename + '.description'
943 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
944 self.to_screen('[info] Video description is already present')
947 self.to_screen('[info] Writing video description to: ' + descfn)
948 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
949 descfile.write(info_dict['description'])
950 except (KeyError, TypeError):
951 self.report_warning('There\'s no description to write.')
952 except (OSError, IOError):
953 self.report_error('Cannot write description file ' + descfn)
956 if self.params.get('writeannotations', False):
957 annofn = filename + '.annotations.xml'
958 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
959 self.to_screen('[info] Video annotations are already present')
962 self.to_screen('[info] Writing video annotations to: ' + annofn)
963 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
964 annofile.write(info_dict['annotations'])
965 except (KeyError, TypeError):
966 self.report_warning('There are no annotations to write.')
967 except (OSError, IOError):
968 self.report_error('Cannot write annotations file: ' + annofn)
971 subtitles_are_requested = any([self.params.get('writesubtitles', False),
972 self.params.get('writeautomaticsub')])
974 if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
975 # subtitles download errors are already managed as troubles in relevant IE
976 # that way it will silently go on when used with unsupporting IE
977 subtitles = info_dict['subtitles']
978 sub_format = self.params.get('subtitlesformat', 'srt')
979 for sub_lang in subtitles.keys():
980 sub = subtitles[sub_lang]
984 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
985 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
986 self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
988 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
989 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
991 except (OSError, IOError):
992 self.report_error('Cannot write subtitles file ' + sub_filename)
995 if self.params.get('writeinfojson', False):
996 infofn = os.path.splitext(filename)[0] + '.info.json'
997 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
998 self.to_screen('[info] Video description metadata is already present')
1000 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1002 write_json_file(info_dict, encodeFilename(infofn))
1003 except (OSError, IOError):
1004 self.report_error('Cannot write metadata to JSON file ' + infofn)
1007 if self.params.get('writethumbnail', False):
1008 if info_dict.get('thumbnail') is not None:
1009 thumb_format = determine_ext(info_dict['thumbnail'], 'jpg')
1010 thumb_filename = os.path.splitext(filename)[0] + '.' + thumb_format
1011 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
1012 self.to_screen('[%s] %s: Thumbnail is already present' %
1013 (info_dict['extractor'], info_dict['id']))
1015 self.to_screen('[%s] %s: Downloading thumbnail ...' %
1016 (info_dict['extractor'], info_dict['id']))
1018 uf = self.urlopen(info_dict['thumbnail'])
1019 with open(thumb_filename, 'wb') as thumbf:
1020 shutil.copyfileobj(uf, thumbf)
1021 self.to_screen('[%s] %s: Writing thumbnail to: %s' %
1022 (info_dict['extractor'], info_dict['id'], thumb_filename))
1023 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1024 self.report_warning('Unable to download thumbnail "%s": %s' %
1025 (info_dict['thumbnail'], compat_str(err)))
1027 if not self.params.get('skip_download', False):
1028 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
1033 fd = get_suitable_downloader(info)(self, self.params)
1034 for ph in self._progress_hooks:
1035 fd.add_progress_hook(ph)
1036 if self.params.get('verbose'):
1037 self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1038 return fd.download(name, info)
1039 if info_dict.get('requested_formats') is not None:
1042 merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
1043 if not merger._executable:
1045 self.report_warning('You have requested multiple '
1046 'formats but ffmpeg or avconv are not installed.'
1047 ' The formats won\'t be merged')
1049 postprocessors = [merger]
1050 for f in info_dict['requested_formats']:
1051 new_info = dict(info_dict)
1053 fname = self.prepare_filename(new_info)
1054 fname = prepend_extension(fname, 'f%s' % f['format_id'])
1055 downloaded.append(fname)
1056 partial_success = dl(fname, new_info)
1057 success = success and partial_success
1058 info_dict['__postprocessors'] = postprocessors
1059 info_dict['__files_to_merge'] = downloaded
1061 # Just a single file
1062 success = dl(filename, info_dict)
1063 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1064 self.report_error('unable to download video data: %s' % str(err))
1066 except (OSError, IOError) as err:
1067 raise UnavailableVideoError(err)
1068 except (ContentTooShortError, ) as err:
1069 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1074 self.post_process(filename, info_dict)
1075 except (PostProcessingError) as err:
1076 self.report_error('postprocessing: %s' % str(err))
1079 self.record_download_archive(info_dict)
1081 def download(self, url_list):
1082 """Download a given list of URLs."""
1083 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1084 if (len(url_list) > 1 and
1086 and self.params.get('max_downloads') != 1):
1087 raise SameFileError(outtmpl)
1089 for url in url_list:
1091 #It also downloads the videos
1092 res = self.extract_info(url)
1093 except UnavailableVideoError:
1094 self.report_error('unable to download video')
1095 except MaxDownloadsReached:
1096 self.to_screen('[info] Maximum number of downloaded files reached.')
1099 if self.params.get('dump_single_json', False):
1100 self.to_stdout(json.dumps(res))
1102 return self._download_retcode
1104 def download_with_info_file(self, info_filename):
1105 with io.open(info_filename, 'r', encoding='utf-8') as f:
1108 self.process_ie_result(info, download=True)
1109 except DownloadError:
1110 webpage_url = info.get('webpage_url')
1111 if webpage_url is not None:
1112 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1113 return self.download([webpage_url])
1116 return self._download_retcode
1118 def post_process(self, filename, ie_info):
1119 """Run all the postprocessors on the given file."""
1120 info = dict(ie_info)
1121 info['filepath'] = filename
1124 if ie_info.get('__postprocessors') is not None:
1125 pps_chain.extend(ie_info['__postprocessors'])
1126 pps_chain.extend(self._pps)
1127 for pp in pps_chain:
1129 keep_video_wish, new_info = pp.run(info)
1130 if keep_video_wish is not None:
1132 keep_video = keep_video_wish
1133 elif keep_video is None:
1134 # No clear decision yet, let IE decide
1135 keep_video = keep_video_wish
1136 except PostProcessingError as e:
1137 self.report_error(e.msg)
1138 if keep_video is False and not self.params.get('keepvideo', False):
1140 self.to_screen('Deleting original file %s (pass -k to keep)' % filename)
1141 os.remove(encodeFilename(filename))
1142 except (IOError, OSError):
1143 self.report_warning('Unable to remove downloaded video file')
1145 def _make_archive_id(self, info_dict):
1146 # Future-proof against any change in case
1147 # and backwards compatibility with prior versions
1148 extractor = info_dict.get('extractor_key')
1149 if extractor is None:
1150 if 'id' in info_dict:
1151 extractor = info_dict.get('ie_key') # key in a playlist
1152 if extractor is None:
1153 return None # Incomplete video information
1154 return extractor.lower() + ' ' + info_dict['id']
1156 def in_download_archive(self, info_dict):
1157 fn = self.params.get('download_archive')
1161 vid_id = self._make_archive_id(info_dict)
1163 return False # Incomplete video information
1166 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1167 for line in archive_file:
1168 if line.strip() == vid_id:
1170 except IOError as ioe:
1171 if ioe.errno != errno.ENOENT:
1175 def record_download_archive(self, info_dict):
1176 fn = self.params.get('download_archive')
1179 vid_id = self._make_archive_id(info_dict)
1181 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1182 archive_file.write(vid_id + '\n')
1185 def format_resolution(format, default='unknown'):
1186 if format.get('vcodec') == 'none':
1188 if format.get('resolution') is not None:
1189 return format['resolution']
1190 if format.get('height') is not None:
1191 if format.get('width') is not None:
1192 res = '%sx%s' % (format['width'], format['height'])
1194 res = '%sp' % format['height']
1195 elif format.get('width') is not None:
1196 res = '?x%d' % format['width']
1201 def _format_note(self, fdict):
1203 if fdict.get('ext') in ['f4f', 'f4m']:
1204 res += '(unsupported) '
1205 if fdict.get('format_note') is not None:
1206 res += fdict['format_note'] + ' '
1207 if fdict.get('tbr') is not None:
1208 res += '%4dk ' % fdict['tbr']
1209 if fdict.get('container') is not None:
1212 res += '%s container' % fdict['container']
1213 if (fdict.get('vcodec') is not None and
1214 fdict.get('vcodec') != 'none'):
1217 res += fdict['vcodec']
1218 if fdict.get('vbr') is not None:
1220 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1222 if fdict.get('vbr') is not None:
1223 res += '%4dk' % fdict['vbr']
1224 if fdict.get('fps') is not None:
1225 res += ', %sfps' % fdict['fps']
1226 if fdict.get('acodec') is not None:
1229 if fdict['acodec'] == 'none':
1232 res += '%-5s' % fdict['acodec']
1233 elif fdict.get('abr') is not None:
1237 if fdict.get('abr') is not None:
1238 res += '@%3dk' % fdict['abr']
1239 if fdict.get('asr') is not None:
1240 res += ' (%5dHz)' % fdict['asr']
1241 if fdict.get('filesize') is not None:
1244 res += format_bytes(fdict['filesize'])
1245 elif fdict.get('filesize_approx') is not None:
1248 res += '~' + format_bytes(fdict['filesize_approx'])
1251 def list_formats(self, info_dict):
1252 def line(format, idlen=20):
1253 return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
1254 format['format_id'],
1256 self.format_resolution(format),
1257 self._format_note(format),
1260 formats = info_dict.get('formats', [info_dict])
1261 idlen = max(len('format code'),
1262 max(len(f['format_id']) for f in formats))
1263 formats_s = [line(f, idlen) for f in formats]
1264 if len(formats) > 1:
1265 formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)'
1266 formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
1268 header_line = line({
1269 'format_id': 'format code', 'ext': 'extension',
1270 'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
1271 self.to_screen('[info] Available formats for %s:\n%s\n%s' %
1272 (info_dict['id'], header_line, '\n'.join(formats_s)))
1274 def urlopen(self, req):
1275 """ Start an HTTP download """
1277 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1278 # always respected by websites, some tend to give out URLs with non percent-encoded
1279 # non-ASCII characters (see telemb.py, ard.py [#3412])
1280 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1281 # To work around aforementioned issue we will replace request's original URL with
1282 # percent-encoded one
1283 req_is_string = isinstance(req, basestring if sys.version_info < (3, 0) else compat_str)
1284 url = req if req_is_string else req.get_full_url()
1285 url_escaped = escape_url(url)
1287 # Substitute URL if any change after escaping
1288 if url != url_escaped:
1292 req = compat_urllib_request.Request(
1293 url_escaped, data=req.data, headers=req.headers,
1294 origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
1296 return self._opener.open(req, timeout=self._socket_timeout)
1298 def print_debug_header(self):
1299 if not self.params.get('verbose'):
1302 if type('') is not compat_str:
1303 # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1304 self.report_warning(
1305 'Your Python is broken! Update to a newer and supported version')
1308 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1309 locale.getpreferredencoding(),
1310 sys.getfilesystemencoding(),
1311 sys.stdout.encoding,
1312 self.get_encoding()))
1313 write_string(encoding_str, encoding=None)
1315 self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1317 sp = subprocess.Popen(
1318 ['git', 'rev-parse', '--short', 'HEAD'],
1319 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1320 cwd=os.path.dirname(os.path.abspath(__file__)))
1321 out, err = sp.communicate()
1322 out = out.decode().strip()
1323 if re.match('[0-9a-f]+', out):
1324 self._write_string('[debug] Git HEAD: ' + out + '\n')
1330 self._write_string('[debug] Python version %s - %s\n' % (
1331 platform.python_version(), platform_name()))
1333 exe_versions = FFmpegPostProcessor.get_versions()
1334 exe_versions['rtmpdump'] = rtmpdump_version()
1335 exe_str = ', '.join(
1337 for exe, v in sorted(exe_versions.items())
1342 self._write_string('[debug] exe versions: %s\n' % exe_str)
1345 for handler in self._opener.handlers:
1346 if hasattr(handler, 'proxies'):
1347 proxy_map.update(handler.proxies)
1348 self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1350 def _setup_opener(self):
1351 timeout_val = self.params.get('socket_timeout')
1352 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1354 opts_cookiefile = self.params.get('cookiefile')
1355 opts_proxy = self.params.get('proxy')
1357 if opts_cookiefile is None:
1358 self.cookiejar = compat_cookiejar.CookieJar()
1360 self.cookiejar = compat_cookiejar.MozillaCookieJar(
1362 if os.access(opts_cookiefile, os.R_OK):
1363 self.cookiejar.load()
1365 cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1367 if opts_proxy is not None:
1368 if opts_proxy == '':
1371 proxies = {'http': opts_proxy, 'https': opts_proxy}
1373 proxies = compat_urllib_request.getproxies()
1374 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1375 if 'http' in proxies and 'https' not in proxies:
1376 proxies['https'] = proxies['http']
1377 proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1379 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1380 https_handler = make_HTTPS_handler(
1381 self.params.get('nocheckcertificate', False), debuglevel=debuglevel)
1382 ydlh = YoutubeDLHandler(debuglevel=debuglevel)
1383 opener = compat_urllib_request.build_opener(
1384 https_handler, proxy_handler, cookie_processor, ydlh)
1385 # Delete the default user-agent header, which would otherwise apply in
1386 # cases where our custom HTTP handler doesn't come into play
1387 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1388 opener.addheaders = []
1389 self._opener = opener
1391 def encode(self, s):
1392 if isinstance(s, bytes):
1393 return s # Already encoded
1396 return s.encode(self.get_encoding())
1397 except UnicodeEncodeError as err:
1398 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1401 def get_encoding(self):
1402 encoding = self.params.get('encoding')
1403 if encoding is None:
1404 encoding = preferredencoding()