2 # -*- coding: utf-8 -*-
4 from __future__ import absolute_import, unicode_literals
31 compat_urllib_request,
55 UnavailableVideoError,
62 from .cache import Cache
63 from .extractor import get_info_extractor, gen_extractors
64 from .downloader import get_suitable_downloader
65 from .downloader.rtmp import rtmpdump_version
66 from .postprocessor import FFmpegMergerPP, FFmpegPostProcessor
67 from .version import __version__
70 class YoutubeDL(object):
73 YoutubeDL objects are the ones responsible of downloading the
74 actual video file and writing it to disk if the user has requested
75 it, among some other tasks. In most cases there should be one per
76 program. As, given a video URL, the downloader doesn't know how to
77 extract all the needed information, task that InfoExtractors do, it
78 has to pass the URL to one of them.
80 For this, YoutubeDL objects have a method that allows
81 InfoExtractors to be registered in a given order. When it is passed
82 a URL, the YoutubeDL object handles it to the first InfoExtractor it
83 finds that reports being able to handle it. The InfoExtractor extracts
84 all the information about the video or videos the URL refers to, and
85 YoutubeDL process the extracted information, possibly using a File
86 Downloader to download the video.
88 YoutubeDL objects accept a lot of parameters. In order not to saturate
89 the object constructor with arguments, it receives a dictionary of
90 options instead. These options are available through the params
91 attribute for the InfoExtractors to use. The YoutubeDL also
92 registers itself as the downloader in charge for the InfoExtractors
93 that are added to it, so this is a "mutual registration".
97 username: Username for authentication purposes.
98 password: Password for authentication purposes.
99 videopassword: Password for acces a video.
100 usenetrc: Use netrc for authentication instead.
101 verbose: Print additional info to stdout.
102 quiet: Do not print messages to stdout.
103 no_warnings: Do not print out anything for warnings.
104 forceurl: Force printing final URL.
105 forcetitle: Force printing title.
106 forceid: Force printing ID.
107 forcethumbnail: Force printing thumbnail URL.
108 forcedescription: Force printing description.
109 forcefilename: Force printing final filename.
110 forceduration: Force printing duration.
111 forcejson: Force printing info_dict as JSON.
112 dump_single_json: Force printing the info_dict of the whole playlist
113 (or video) as a single JSON line.
114 simulate: Do not download the video files.
115 format: Video format code.
116 format_limit: Highest quality format to try.
117 outtmpl: Template for output names.
118 restrictfilenames: Do not allow "&" and spaces in file names
119 ignoreerrors: Do not stop on download errors.
120 nooverwrites: Prevent overwriting files.
121 playliststart: Playlist item to start at.
122 playlistend: Playlist item to end at.
123 matchtitle: Download only matching titles.
124 rejecttitle: Reject downloads for matching titles.
125 logger: Log messages to a logging.Logger instance.
126 logtostderr: Log messages to stderr instead of stdout.
127 writedescription: Write the video description to a .description file
128 writeinfojson: Write the video description to a .info.json file
129 writeannotations: Write the video annotations to a .annotations.xml file
130 writethumbnail: Write the thumbnail image to a file
131 writesubtitles: Write the video subtitles to a file
132 writeautomaticsub: Write the automatic subtitles to a file
133 allsubtitles: Downloads all the subtitles of the video
134 (requires writesubtitles or writeautomaticsub)
135 listsubtitles: Lists all available subtitles for the video
136 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
137 subtitleslangs: List of languages of the subtitles to download
138 keepvideo: Keep the video file after post-processing
139 daterange: A DateRange object, download only if the upload_date is in the range.
140 skip_download: Skip the actual download of the video file
141 cachedir: Location of the cache files in the filesystem.
142 False to disable filesystem cache.
143 noplaylist: Download single video instead of a playlist if in doubt.
144 age_limit: An integer representing the user's age in years.
145 Unsuitable videos for the given age are skipped.
146 min_views: An integer representing the minimum view count the video
147 must have in order to not be skipped.
148 Videos without view count information are always
149 downloaded. None for no limit.
150 max_views: An integer representing the maximum view count.
151 Videos that are more popular than that are not
153 Videos without view count information are always
154 downloaded. None for no limit.
155 download_archive: File name of a file where all downloads are recorded.
156 Videos already present in the file are not downloaded
158 cookiefile: File name where cookies should be read from and dumped to.
159 nocheckcertificate:Do not verify SSL certificates
160 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
161 At the moment, this is only supported by YouTube.
162 proxy: URL of the proxy server to use
163 socket_timeout: Time to wait for unresponsive hosts, in seconds
164 bidi_workaround: Work around buggy terminals without bidirectional text
165 support, using fridibi
166 debug_printtraffic:Print out sent and received HTTP traffic
167 include_ads: Download ads as well
168 default_search: Prepend this string if an input url is not valid.
169 'auto' for elaborate guessing
170 encoding: Use this encoding instead of the system-specified.
171 extract_flat: Do not resolve URLs, return the immediate result.
172 Pass in 'in_playlist' to only show this behavior for
175 The following parameters are not used by YoutubeDL itself, they are used by
177 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
178 noresizebuffer, retries, continuedl, noprogress, consoletitle
180 The following options are used by the post processors:
181 prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
182 otherwise prefer avconv.
183 exec_cmd: Arbitrary command to run after downloading
189 _download_retcode = None
190 _num_downloads = None
193 def __init__(self, params=None, auto_init=True):
194 """Create a FileDownloader object with the given options."""
198 self._ies_instances = {}
200 self._progress_hooks = []
201 self._download_retcode = 0
202 self._num_downloads = 0
203 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
204 self._err_file = sys.stderr
206 self.cache = Cache(self)
208 if params.get('bidi_workaround', False):
211 master, slave = pty.openpty()
212 width = get_term_width()
216 width_args = ['-w', str(width)]
218 stdin=subprocess.PIPE,
220 stderr=self._err_file)
222 self._output_process = subprocess.Popen(
223 ['bidiv'] + width_args, **sp_kwargs
226 self._output_process = subprocess.Popen(
227 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
228 self._output_channel = os.fdopen(master, 'rb')
229 except OSError as ose:
231 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
235 if (sys.version_info >= (3,) and sys.platform != 'win32' and
236 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
237 and not params.get('restrictfilenames', False)):
238 # On Python 3, the Unicode filesystem API will throw errors (#1474)
240 'Assuming --restrict-filenames since file system encoding '
241 'cannot encode all characters. '
242 'Set the LC_ALL environment variable to fix this.')
243 self.params['restrictfilenames'] = True
245 if '%(stitle)s' in self.params.get('outtmpl', ''):
246 self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
251 self.print_debug_header()
252 self.add_default_info_extractors()
254 def add_info_extractor(self, ie):
255 """Add an InfoExtractor object to the end of the list."""
257 self._ies_instances[ie.ie_key()] = ie
258 ie.set_downloader(self)
260 def get_info_extractor(self, ie_key):
262 Get an instance of an IE with name ie_key, it will try to get one from
263 the _ies list, if there's no instance it will create a new one and add
264 it to the extractor list.
266 ie = self._ies_instances.get(ie_key)
268 ie = get_info_extractor(ie_key)()
269 self.add_info_extractor(ie)
272 def add_default_info_extractors(self):
274 Add the InfoExtractors returned by gen_extractors to the end of the list
276 for ie in gen_extractors():
277 self.add_info_extractor(ie)
279 def add_post_processor(self, pp):
280 """Add a PostProcessor object to the end of the chain."""
282 pp.set_downloader(self)
284 def add_progress_hook(self, ph):
285 """Add the progress hook (currently only for the file downloader)"""
286 self._progress_hooks.append(ph)
288 def _bidi_workaround(self, message):
289 if not hasattr(self, '_output_channel'):
292 assert hasattr(self, '_output_process')
293 assert isinstance(message, compat_str)
294 line_count = message.count('\n') + 1
295 self._output_process.stdin.write((message + '\n').encode('utf-8'))
296 self._output_process.stdin.flush()
297 res = ''.join(self._output_channel.readline().decode('utf-8')
298 for _ in range(line_count))
299 return res[:-len('\n')]
301 def to_screen(self, message, skip_eol=False):
302 """Print message to stdout if not in quiet mode."""
303 return self.to_stdout(message, skip_eol, check_quiet=True)
305 def _write_string(self, s, out=None):
306 write_string(s, out=out, encoding=self.params.get('encoding'))
308 def to_stdout(self, message, skip_eol=False, check_quiet=False):
309 """Print message to stdout if not in quiet mode."""
310 if self.params.get('logger'):
311 self.params['logger'].debug(message)
312 elif not check_quiet or not self.params.get('quiet', False):
313 message = self._bidi_workaround(message)
314 terminator = ['\n', ''][skip_eol]
315 output = message + terminator
317 self._write_string(output, self._screen_file)
319 def to_stderr(self, message):
320 """Print message to stderr."""
321 assert isinstance(message, compat_str)
322 if self.params.get('logger'):
323 self.params['logger'].error(message)
325 message = self._bidi_workaround(message)
326 output = message + '\n'
327 self._write_string(output, self._err_file)
329 def to_console_title(self, message):
330 if not self.params.get('consoletitle', False):
332 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
333 # c_wchar_p() might not be necessary if `message` is
334 # already of type unicode()
335 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
336 elif 'TERM' in os.environ:
337 self._write_string('\033]0;%s\007' % message, self._screen_file)
339 def save_console_title(self):
340 if not self.params.get('consoletitle', False):
342 if 'TERM' in os.environ:
343 # Save the title on stack
344 self._write_string('\033[22;0t', self._screen_file)
346 def restore_console_title(self):
347 if not self.params.get('consoletitle', False):
349 if 'TERM' in os.environ:
350 # Restore the title from stack
351 self._write_string('\033[23;0t', self._screen_file)
354 self.save_console_title()
357 def __exit__(self, *args):
358 self.restore_console_title()
360 if self.params.get('cookiefile') is not None:
361 self.cookiejar.save()
363 def trouble(self, message=None, tb=None):
364 """Determine action to take when a download problem appears.
366 Depending on if the downloader has been configured to ignore
367 download errors or not, this method may throw an exception or
368 not when errors are found, after printing the message.
370 tb, if given, is additional traceback information.
372 if message is not None:
373 self.to_stderr(message)
374 if self.params.get('verbose'):
376 if sys.exc_info()[0]: # if .trouble has been called from an except block
378 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
379 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
380 tb += compat_str(traceback.format_exc())
382 tb_data = traceback.format_list(traceback.extract_stack())
383 tb = ''.join(tb_data)
385 if not self.params.get('ignoreerrors', False):
386 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
387 exc_info = sys.exc_info()[1].exc_info
389 exc_info = sys.exc_info()
390 raise DownloadError(message, exc_info)
391 self._download_retcode = 1
393 def report_warning(self, message):
395 Print the message to stderr, it will be prefixed with 'WARNING:'
396 If stderr is a tty file the 'WARNING:' will be colored
398 if self.params.get('logger') is not None:
399 self.params['logger'].warning(message)
401 if self.params.get('no_warnings'):
403 if self._err_file.isatty() and os.name != 'nt':
404 _msg_header = '\033[0;33mWARNING:\033[0m'
406 _msg_header = 'WARNING:'
407 warning_message = '%s %s' % (_msg_header, message)
408 self.to_stderr(warning_message)
410 def report_error(self, message, tb=None):
412 Do the same as trouble, but prefixes the message with 'ERROR:', colored
413 in red if stderr is a tty file.
415 if self._err_file.isatty() and os.name != 'nt':
416 _msg_header = '\033[0;31mERROR:\033[0m'
418 _msg_header = 'ERROR:'
419 error_message = '%s %s' % (_msg_header, message)
420 self.trouble(error_message, tb)
422 def report_file_already_downloaded(self, file_name):
423 """Report file has already been fully downloaded."""
425 self.to_screen('[download] %s has already been downloaded' % file_name)
426 except UnicodeEncodeError:
427 self.to_screen('[download] The file has already been downloaded')
429 def prepare_filename(self, info_dict):
430 """Generate the output filename."""
432 template_dict = dict(info_dict)
434 template_dict['epoch'] = int(time.time())
435 autonumber_size = self.params.get('autonumber_size')
436 if autonumber_size is None:
438 autonumber_templ = '%0' + str(autonumber_size) + 'd'
439 template_dict['autonumber'] = autonumber_templ % self._num_downloads
440 if template_dict.get('playlist_index') is not None:
441 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
442 if template_dict.get('resolution') is None:
443 if template_dict.get('width') and template_dict.get('height'):
444 template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
445 elif template_dict.get('height'):
446 template_dict['resolution'] = '%sp' % template_dict['height']
447 elif template_dict.get('width'):
448 template_dict['resolution'] = '?x%d' % template_dict['width']
450 sanitize = lambda k, v: sanitize_filename(
452 restricted=self.params.get('restrictfilenames'),
454 template_dict = dict((k, sanitize(k, v))
455 for k, v in template_dict.items()
457 template_dict = collections.defaultdict(lambda: 'NA', template_dict)
459 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
460 tmpl = compat_expanduser(outtmpl)
461 filename = tmpl % template_dict
463 except ValueError as err:
464 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
467 def _match_entry(self, info_dict):
468 """ Returns None iff the file should be downloaded """
470 video_title = info_dict.get('title', info_dict.get('id', 'video'))
471 if 'title' in info_dict:
472 # This can happen when we're just evaluating the playlist
473 title = info_dict['title']
474 matchtitle = self.params.get('matchtitle', False)
476 if not re.search(matchtitle, title, re.IGNORECASE):
477 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
478 rejecttitle = self.params.get('rejecttitle', False)
480 if re.search(rejecttitle, title, re.IGNORECASE):
481 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
482 date = info_dict.get('upload_date', None)
484 dateRange = self.params.get('daterange', DateRange())
485 if date not in dateRange:
486 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
487 view_count = info_dict.get('view_count', None)
488 if view_count is not None:
489 min_views = self.params.get('min_views')
490 if min_views is not None and view_count < min_views:
491 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
492 max_views = self.params.get('max_views')
493 if max_views is not None and view_count > max_views:
494 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
495 age_limit = self.params.get('age_limit')
496 if age_limit is not None:
497 actual_age_limit = info_dict.get('age_limit')
498 if actual_age_limit is None:
500 if age_limit < actual_age_limit:
501 return 'Skipping "' + title + '" because it is age restricted'
502 if self.in_download_archive(info_dict):
503 return '%s has already been recorded in archive' % video_title
507 def add_extra_info(info_dict, extra_info):
508 '''Set the keys from extra_info in info dict if they are missing'''
509 for key, value in extra_info.items():
510 info_dict.setdefault(key, value)
512 def extract_info(self, url, download=True, ie_key=None, extra_info={},
515 Returns a list with a dictionary for each video we find.
516 If 'download', also downloads the videos.
517 extra_info is a dict containing the extra values to add to each result
521 ies = [self.get_info_extractor(ie_key)]
526 if not ie.suitable(url):
530 self.report_warning('The program functionality for this site has been marked as broken, '
531 'and will probably not work.')
534 ie_result = ie.extract(url)
535 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
537 if isinstance(ie_result, list):
538 # Backwards compatibility: old IE result format
540 '_type': 'compat_list',
541 'entries': ie_result,
543 self.add_default_extra_info(ie_result, ie, url)
545 return self.process_ie_result(ie_result, download, extra_info)
548 except ExtractorError as de: # An error we somewhat expected
549 self.report_error(compat_str(de), de.format_traceback())
551 except MaxDownloadsReached:
553 except Exception as e:
554 if self.params.get('ignoreerrors', False):
555 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
560 self.report_error('no suitable InfoExtractor for URL %s' % url)
562 def add_default_extra_info(self, ie_result, ie, url):
563 self.add_extra_info(ie_result, {
564 'extractor': ie.IE_NAME,
566 'webpage_url_basename': url_basename(url),
567 'extractor_key': ie.ie_key(),
570 def process_ie_result(self, ie_result, download=True, extra_info={}):
572 Take the result of the ie(may be modified) and resolve all unresolved
573 references (URLs, playlist items).
575 It will also download the videos if 'download'.
576 Returns the resolved ie_result.
579 result_type = ie_result.get('_type', 'video')
581 if result_type in ('url', 'url_transparent'):
582 extract_flat = self.params.get('extract_flat', False)
583 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
584 extract_flat is True):
585 if self.params.get('forcejson', False):
586 self.to_stdout(json.dumps(ie_result))
589 if result_type == 'video':
590 self.add_extra_info(ie_result, extra_info)
591 return self.process_video_result(ie_result, download=download)
592 elif result_type == 'url':
593 # We have to add extra_info to the results because it may be
594 # contained in a playlist
595 return self.extract_info(ie_result['url'],
597 ie_key=ie_result.get('ie_key'),
598 extra_info=extra_info)
599 elif result_type == 'url_transparent':
600 # Use the information from the embedding page
601 info = self.extract_info(
602 ie_result['url'], ie_key=ie_result.get('ie_key'),
603 extra_info=extra_info, download=False, process=False)
605 def make_result(embedded_info):
606 new_result = ie_result.copy()
607 for f in ('_type', 'url', 'ext', 'player_url', 'formats',
608 'entries', 'ie_key', 'duration',
609 'subtitles', 'annotations', 'format',
610 'thumbnail', 'thumbnails'):
613 if f in embedded_info:
614 new_result[f] = embedded_info[f]
616 new_result = make_result(info)
618 assert new_result.get('_type') != 'url_transparent'
619 if new_result.get('_type') == 'compat_list':
620 new_result['entries'] = [
621 make_result(e) for e in new_result['entries']]
623 return self.process_ie_result(
624 new_result, download=download, extra_info=extra_info)
625 elif result_type == 'playlist':
626 # We process each entry in the playlist
627 playlist = ie_result.get('title', None) or ie_result.get('id', None)
628 self.to_screen('[download] Downloading playlist: %s' % playlist)
630 playlist_results = []
632 playliststart = self.params.get('playliststart', 1) - 1
633 playlistend = self.params.get('playlistend', None)
634 # For backwards compatibility, interpret -1 as whole list
635 if playlistend == -1:
638 if isinstance(ie_result['entries'], list):
639 n_all_entries = len(ie_result['entries'])
640 entries = ie_result['entries'][playliststart:playlistend]
641 n_entries = len(entries)
643 "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
644 (ie_result['extractor'], playlist, n_all_entries, n_entries))
646 assert isinstance(ie_result['entries'], PagedList)
647 entries = ie_result['entries'].getslice(
648 playliststart, playlistend)
649 n_entries = len(entries)
651 "[%s] playlist %s: Downloading %d videos" %
652 (ie_result['extractor'], playlist, n_entries))
654 for i, entry in enumerate(entries, 1):
655 self.to_screen('[download] Downloading video #%s of %s' % (i, n_entries))
657 'n_entries': n_entries,
658 'playlist': playlist,
659 'playlist_index': i + playliststart,
660 'extractor': ie_result['extractor'],
661 'webpage_url': ie_result['webpage_url'],
662 'webpage_url_basename': url_basename(ie_result['webpage_url']),
663 'extractor_key': ie_result['extractor_key'],
666 reason = self._match_entry(entry)
667 if reason is not None:
668 self.to_screen('[download] ' + reason)
671 entry_result = self.process_ie_result(entry,
674 playlist_results.append(entry_result)
675 ie_result['entries'] = playlist_results
677 elif result_type == 'compat_list':
679 self.add_extra_info(r,
681 'extractor': ie_result['extractor'],
682 'webpage_url': ie_result['webpage_url'],
683 'webpage_url_basename': url_basename(ie_result['webpage_url']),
684 'extractor_key': ie_result['extractor_key'],
687 ie_result['entries'] = [
688 self.process_ie_result(_fixup(r), download, extra_info)
689 for r in ie_result['entries']
693 raise Exception('Invalid result type: %s' % result_type)
695 def select_format(self, format_spec, available_formats):
696 if format_spec == 'best' or format_spec is None:
697 return available_formats[-1]
698 elif format_spec == 'worst':
699 return available_formats[0]
700 elif format_spec == 'bestaudio':
702 f for f in available_formats
703 if f.get('vcodec') == 'none']
705 return audio_formats[-1]
706 elif format_spec == 'worstaudio':
708 f for f in available_formats
709 if f.get('vcodec') == 'none']
711 return audio_formats[0]
712 elif format_spec == 'bestvideo':
714 f for f in available_formats
715 if f.get('acodec') == 'none']
717 return video_formats[-1]
718 elif format_spec == 'worstvideo':
720 f for f in available_formats
721 if f.get('acodec') == 'none']
723 return video_formats[0]
725 extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a']
726 if format_spec in extensions:
727 filter_f = lambda f: f['ext'] == format_spec
729 filter_f = lambda f: f['format_id'] == format_spec
730 matches = list(filter(filter_f, available_formats))
735 def process_video_result(self, info_dict, download=True):
736 assert info_dict.get('_type', 'video') == 'video'
738 if 'id' not in info_dict:
739 raise ExtractorError('Missing "id" field in extractor result')
740 if 'title' not in info_dict:
741 raise ExtractorError('Missing "title" field in extractor result')
743 if 'playlist' not in info_dict:
744 # It isn't part of a playlist
745 info_dict['playlist'] = None
746 info_dict['playlist_index'] = None
748 thumbnails = info_dict.get('thumbnails')
750 thumbnails.sort(key=lambda t: (
751 t.get('width'), t.get('height'), t.get('url')))
753 if 'width' in t and 'height' in t:
754 t['resolution'] = '%dx%d' % (t['width'], t['height'])
756 if thumbnails and 'thumbnail' not in info_dict:
757 info_dict['thumbnail'] = thumbnails[-1]['url']
759 if 'display_id' not in info_dict and 'id' in info_dict:
760 info_dict['display_id'] = info_dict['id']
762 if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
763 upload_date = datetime.datetime.utcfromtimestamp(
764 info_dict['timestamp'])
765 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
767 # This extractors handle format selection themselves
768 if info_dict['extractor'] in ['Youku']:
770 self.process_info(info_dict)
773 # We now pick which formats have to be downloaded
774 if info_dict.get('formats') is None:
775 # There's only one format available
776 formats = [info_dict]
778 formats = info_dict['formats']
781 raise ExtractorError('No video formats found!')
783 # We check that all the formats have the format and format_id fields
784 for i, format in enumerate(formats):
785 if 'url' not in format:
786 raise ExtractorError('Missing "url" key in result (index %d)' % i)
788 if format.get('format_id') is None:
789 format['format_id'] = compat_str(i)
790 if format.get('format') is None:
791 format['format'] = '{id} - {res}{note}'.format(
792 id=format['format_id'],
793 res=self.format_resolution(format),
794 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
796 # Automatically determine file extension if missing
797 if 'ext' not in format:
798 format['ext'] = determine_ext(format['url']).lower()
800 format_limit = self.params.get('format_limit', None)
802 formats = list(takewhile_inclusive(
803 lambda f: f['format_id'] != format_limit, formats
806 # TODO Central sorting goes here
808 if formats[0] is not info_dict:
809 # only set the 'formats' fields if the original info_dict list them
810 # otherwise we end up with a circular reference, the first (and unique)
811 # element in the 'formats' field in info_dict is info_dict itself,
812 # wich can't be exported to json
813 info_dict['formats'] = formats
814 if self.params.get('listformats', None):
815 self.list_formats(info_dict)
818 req_format = self.params.get('format')
819 if req_format is None:
821 formats_to_download = []
822 # The -1 is for supporting YoutubeIE
823 if req_format in ('-1', 'all'):
824 formats_to_download = formats
826 for rfstr in req_format.split(','):
827 # We can accept formats requested in the format: 34/5/best, we pick
828 # the first that is available, starting from left
829 req_formats = rfstr.split('/')
830 for rf in req_formats:
831 if re.match(r'.+?\+.+?', rf) is not None:
832 # Two formats have been requested like '137+139'
833 format_1, format_2 = rf.split('+')
834 formats_info = (self.select_format(format_1, formats),
835 self.select_format(format_2, formats))
836 if all(formats_info):
838 'requested_formats': formats_info,
840 'ext': formats_info[0]['ext'],
843 selected_format = None
845 selected_format = self.select_format(rf, formats)
846 if selected_format is not None:
847 formats_to_download.append(selected_format)
849 if not formats_to_download:
850 raise ExtractorError('requested format not available',
854 if len(formats_to_download) > 1:
855 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
856 for format in formats_to_download:
857 new_info = dict(info_dict)
858 new_info.update(format)
859 self.process_info(new_info)
860 # We update the info dict with the best quality format (backwards compatibility)
861 info_dict.update(formats_to_download[-1])
864 def process_info(self, info_dict):
865 """Process a single resolved IE result."""
867 assert info_dict.get('_type', 'video') == 'video'
869 max_downloads = self.params.get('max_downloads')
870 if max_downloads is not None:
871 if self._num_downloads >= int(max_downloads):
872 raise MaxDownloadsReached()
874 info_dict['fulltitle'] = info_dict['title']
875 if len(info_dict['title']) > 200:
876 info_dict['title'] = info_dict['title'][:197] + '...'
878 # Keep for backwards compatibility
879 info_dict['stitle'] = info_dict['title']
881 if 'format' not in info_dict:
882 info_dict['format'] = info_dict['ext']
884 reason = self._match_entry(info_dict)
885 if reason is not None:
886 self.to_screen('[download] ' + reason)
889 self._num_downloads += 1
891 filename = self.prepare_filename(info_dict)
894 if self.params.get('forcetitle', False):
895 self.to_stdout(info_dict['fulltitle'])
896 if self.params.get('forceid', False):
897 self.to_stdout(info_dict['id'])
898 if self.params.get('forceurl', False):
899 # For RTMP URLs, also include the playpath
900 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
901 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
902 self.to_stdout(info_dict['thumbnail'])
903 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
904 self.to_stdout(info_dict['description'])
905 if self.params.get('forcefilename', False) and filename is not None:
906 self.to_stdout(filename)
907 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
908 self.to_stdout(formatSeconds(info_dict['duration']))
909 if self.params.get('forceformat', False):
910 self.to_stdout(info_dict['format'])
911 if self.params.get('forcejson', False):
912 info_dict['_filename'] = filename
913 self.to_stdout(json.dumps(info_dict))
914 if self.params.get('dump_single_json', False):
915 info_dict['_filename'] = filename
917 # Do nothing else if in simulate mode
918 if self.params.get('simulate', False):
925 dn = os.path.dirname(encodeFilename(filename))
926 if dn and not os.path.exists(dn):
928 except (OSError, IOError) as err:
929 self.report_error('unable to create directory ' + compat_str(err))
932 if self.params.get('writedescription', False):
933 descfn = filename + '.description'
934 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
935 self.to_screen('[info] Video description is already present')
938 self.to_screen('[info] Writing video description to: ' + descfn)
939 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
940 descfile.write(info_dict['description'])
941 except (KeyError, TypeError):
942 self.report_warning('There\'s no description to write.')
943 except (OSError, IOError):
944 self.report_error('Cannot write description file ' + descfn)
947 if self.params.get('writeannotations', False):
948 annofn = filename + '.annotations.xml'
949 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
950 self.to_screen('[info] Video annotations are already present')
953 self.to_screen('[info] Writing video annotations to: ' + annofn)
954 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
955 annofile.write(info_dict['annotations'])
956 except (KeyError, TypeError):
957 self.report_warning('There are no annotations to write.')
958 except (OSError, IOError):
959 self.report_error('Cannot write annotations file: ' + annofn)
962 subtitles_are_requested = any([self.params.get('writesubtitles', False),
963 self.params.get('writeautomaticsub')])
965 if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
966 # subtitles download errors are already managed as troubles in relevant IE
967 # that way it will silently go on when used with unsupporting IE
968 subtitles = info_dict['subtitles']
969 sub_format = self.params.get('subtitlesformat', 'srt')
970 for sub_lang in subtitles.keys():
971 sub = subtitles[sub_lang]
975 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
976 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
977 self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
979 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
980 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
982 except (OSError, IOError):
983 self.report_error('Cannot write subtitles file ' + sub_filename)
986 if self.params.get('writeinfojson', False):
987 infofn = os.path.splitext(filename)[0] + '.info.json'
988 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
989 self.to_screen('[info] Video description metadata is already present')
991 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
993 write_json_file(info_dict, encodeFilename(infofn))
994 except (OSError, IOError):
995 self.report_error('Cannot write metadata to JSON file ' + infofn)
998 if self.params.get('writethumbnail', False):
999 if info_dict.get('thumbnail') is not None:
1000 thumb_format = determine_ext(info_dict['thumbnail'], 'jpg')
1001 thumb_filename = os.path.splitext(filename)[0] + '.' + thumb_format
1002 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
1003 self.to_screen('[%s] %s: Thumbnail is already present' %
1004 (info_dict['extractor'], info_dict['id']))
1006 self.to_screen('[%s] %s: Downloading thumbnail ...' %
1007 (info_dict['extractor'], info_dict['id']))
1009 uf = self.urlopen(info_dict['thumbnail'])
1010 with open(thumb_filename, 'wb') as thumbf:
1011 shutil.copyfileobj(uf, thumbf)
1012 self.to_screen('[%s] %s: Writing thumbnail to: %s' %
1013 (info_dict['extractor'], info_dict['id'], thumb_filename))
1014 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1015 self.report_warning('Unable to download thumbnail "%s": %s' %
1016 (info_dict['thumbnail'], compat_str(err)))
1018 if not self.params.get('skip_download', False):
1019 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
1024 fd = get_suitable_downloader(info)(self, self.params)
1025 for ph in self._progress_hooks:
1026 fd.add_progress_hook(ph)
1027 if self.params.get('verbose'):
1028 self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1029 return fd.download(name, info)
1030 if info_dict.get('requested_formats') is not None:
1033 merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
1034 if not merger._executable:
1036 self.report_warning('You have requested multiple '
1037 'formats but ffmpeg or avconv are not installed.'
1038 ' The formats won\'t be merged')
1040 postprocessors = [merger]
1041 for f in info_dict['requested_formats']:
1042 new_info = dict(info_dict)
1044 fname = self.prepare_filename(new_info)
1045 fname = prepend_extension(fname, 'f%s' % f['format_id'])
1046 downloaded.append(fname)
1047 partial_success = dl(fname, new_info)
1048 success = success and partial_success
1049 info_dict['__postprocessors'] = postprocessors
1050 info_dict['__files_to_merge'] = downloaded
1052 # Just a single file
1053 success = dl(filename, info_dict)
1054 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1055 self.report_error('unable to download video data: %s' % str(err))
1057 except (OSError, IOError) as err:
1058 raise UnavailableVideoError(err)
1059 except (ContentTooShortError, ) as err:
1060 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1065 self.post_process(filename, info_dict)
1066 except (PostProcessingError) as err:
1067 self.report_error('postprocessing: %s' % str(err))
1070 self.record_download_archive(info_dict)
1072 def download(self, url_list):
1073 """Download a given list of URLs."""
1074 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1075 if (len(url_list) > 1 and
1077 and self.params.get('max_downloads') != 1):
1078 raise SameFileError(outtmpl)
1080 for url in url_list:
1082 #It also downloads the videos
1083 res = self.extract_info(url)
1084 except UnavailableVideoError:
1085 self.report_error('unable to download video')
1086 except MaxDownloadsReached:
1087 self.to_screen('[info] Maximum number of downloaded files reached.')
1090 if self.params.get('dump_single_json', False):
1091 self.to_stdout(json.dumps(res))
1093 return self._download_retcode
1095 def download_with_info_file(self, info_filename):
1096 with io.open(info_filename, 'r', encoding='utf-8') as f:
1099 self.process_ie_result(info, download=True)
1100 except DownloadError:
1101 webpage_url = info.get('webpage_url')
1102 if webpage_url is not None:
1103 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1104 return self.download([webpage_url])
1107 return self._download_retcode
1109 def post_process(self, filename, ie_info):
1110 """Run all the postprocessors on the given file."""
1111 info = dict(ie_info)
1112 info['filepath'] = filename
1115 if ie_info.get('__postprocessors') is not None:
1116 pps_chain.extend(ie_info['__postprocessors'])
1117 pps_chain.extend(self._pps)
1118 for pp in pps_chain:
1120 keep_video_wish, new_info = pp.run(info)
1121 if keep_video_wish is not None:
1123 keep_video = keep_video_wish
1124 elif keep_video is None:
1125 # No clear decision yet, let IE decide
1126 keep_video = keep_video_wish
1127 except PostProcessingError as e:
1128 self.report_error(e.msg)
1129 if keep_video is False and not self.params.get('keepvideo', False):
1131 self.to_screen('Deleting original file %s (pass -k to keep)' % filename)
1132 os.remove(encodeFilename(filename))
1133 except (IOError, OSError):
1134 self.report_warning('Unable to remove downloaded video file')
1136 def _make_archive_id(self, info_dict):
1137 # Future-proof against any change in case
1138 # and backwards compatibility with prior versions
1139 extractor = info_dict.get('extractor_key')
1140 if extractor is None:
1141 if 'id' in info_dict:
1142 extractor = info_dict.get('ie_key') # key in a playlist
1143 if extractor is None:
1144 return None # Incomplete video information
1145 return extractor.lower() + ' ' + info_dict['id']
1147 def in_download_archive(self, info_dict):
1148 fn = self.params.get('download_archive')
1152 vid_id = self._make_archive_id(info_dict)
1154 return False # Incomplete video information
1157 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1158 for line in archive_file:
1159 if line.strip() == vid_id:
1161 except IOError as ioe:
1162 if ioe.errno != errno.ENOENT:
1166 def record_download_archive(self, info_dict):
1167 fn = self.params.get('download_archive')
1170 vid_id = self._make_archive_id(info_dict)
1172 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1173 archive_file.write(vid_id + '\n')
1176 def format_resolution(format, default='unknown'):
1177 if format.get('vcodec') == 'none':
1179 if format.get('resolution') is not None:
1180 return format['resolution']
1181 if format.get('height') is not None:
1182 if format.get('width') is not None:
1183 res = '%sx%s' % (format['width'], format['height'])
1185 res = '%sp' % format['height']
1186 elif format.get('width') is not None:
1187 res = '?x%d' % format['width']
1192 def _format_note(self, fdict):
1194 if fdict.get('ext') in ['f4f', 'f4m']:
1195 res += '(unsupported) '
1196 if fdict.get('format_note') is not None:
1197 res += fdict['format_note'] + ' '
1198 if fdict.get('tbr') is not None:
1199 res += '%4dk ' % fdict['tbr']
1200 if fdict.get('container') is not None:
1203 res += '%s container' % fdict['container']
1204 if (fdict.get('vcodec') is not None and
1205 fdict.get('vcodec') != 'none'):
1208 res += fdict['vcodec']
1209 if fdict.get('vbr') is not None:
1211 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1213 if fdict.get('vbr') is not None:
1214 res += '%4dk' % fdict['vbr']
1215 if fdict.get('fps') is not None:
1216 res += ', %sfps' % fdict['fps']
1217 if fdict.get('acodec') is not None:
1220 if fdict['acodec'] == 'none':
1223 res += '%-5s' % fdict['acodec']
1224 elif fdict.get('abr') is not None:
1228 if fdict.get('abr') is not None:
1229 res += '@%3dk' % fdict['abr']
1230 if fdict.get('asr') is not None:
1231 res += ' (%5dHz)' % fdict['asr']
1232 if fdict.get('filesize') is not None:
1235 res += format_bytes(fdict['filesize'])
1236 elif fdict.get('filesize_approx') is not None:
1239 res += '~' + format_bytes(fdict['filesize_approx'])
1242 def list_formats(self, info_dict):
1243 def line(format, idlen=20):
1244 return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
1245 format['format_id'],
1247 self.format_resolution(format),
1248 self._format_note(format),
1251 formats = info_dict.get('formats', [info_dict])
1252 idlen = max(len('format code'),
1253 max(len(f['format_id']) for f in formats))
1254 formats_s = [line(f, idlen) for f in formats]
1255 if len(formats) > 1:
1256 formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)'
1257 formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
1259 header_line = line({
1260 'format_id': 'format code', 'ext': 'extension',
1261 'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
1262 self.to_screen('[info] Available formats for %s:\n%s\n%s' %
1263 (info_dict['id'], header_line, '\n'.join(formats_s)))
1265 def urlopen(self, req):
1266 """ Start an HTTP download """
1268 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1269 # always respected by websites, some tend to give out URLs with non percent-encoded
1270 # non-ASCII characters (see telemb.py, ard.py [#3412])
1271 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1272 # To work around aforementioned issue we will replace request's original URL with
1273 # percent-encoded one
1274 req_is_string = isinstance(req, basestring if sys.version_info < (3, 0) else compat_str)
1275 url = req if req_is_string else req.get_full_url()
1276 url_escaped = escape_url(url)
1278 # Substitute URL if any change after escaping
1279 if url != url_escaped:
1283 req = compat_urllib_request.Request(
1284 url_escaped, data=req.data, headers=req.headers,
1285 origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
1287 return self._opener.open(req, timeout=self._socket_timeout)
1289 def print_debug_header(self):
1290 if not self.params.get('verbose'):
1293 if type('') is not compat_str:
1294 # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1295 self.report_warning(
1296 'Your Python is broken! Update to a newer and supported version')
1299 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1300 locale.getpreferredencoding(),
1301 sys.getfilesystemencoding(),
1302 sys.stdout.encoding,
1303 self.get_encoding()))
1304 write_string(encoding_str, encoding=None)
1306 self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1308 sp = subprocess.Popen(
1309 ['git', 'rev-parse', '--short', 'HEAD'],
1310 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1311 cwd=os.path.dirname(os.path.abspath(__file__)))
1312 out, err = sp.communicate()
1313 out = out.decode().strip()
1314 if re.match('[0-9a-f]+', out):
1315 self._write_string('[debug] Git HEAD: ' + out + '\n')
1321 self._write_string('[debug] Python version %s - %s\n' % (
1322 platform.python_version(), platform_name()))
1324 exe_versions = FFmpegPostProcessor.get_versions()
1325 exe_versions['rtmpdump'] = rtmpdump_version()
1326 exe_str = ', '.join(
1328 for exe, v in sorted(exe_versions.items())
1333 self._write_string('[debug] exe versions: %s\n' % exe_str)
1336 for handler in self._opener.handlers:
1337 if hasattr(handler, 'proxies'):
1338 proxy_map.update(handler.proxies)
1339 self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1341 def _setup_opener(self):
1342 timeout_val = self.params.get('socket_timeout')
1343 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1345 opts_cookiefile = self.params.get('cookiefile')
1346 opts_proxy = self.params.get('proxy')
1348 if opts_cookiefile is None:
1349 self.cookiejar = compat_cookiejar.CookieJar()
1351 self.cookiejar = compat_cookiejar.MozillaCookieJar(
1353 if os.access(opts_cookiefile, os.R_OK):
1354 self.cookiejar.load()
1356 cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1358 if opts_proxy is not None:
1359 if opts_proxy == '':
1362 proxies = {'http': opts_proxy, 'https': opts_proxy}
1364 proxies = compat_urllib_request.getproxies()
1365 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1366 if 'http' in proxies and 'https' not in proxies:
1367 proxies['https'] = proxies['http']
1368 proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1370 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1371 https_handler = make_HTTPS_handler(
1372 self.params.get('nocheckcertificate', False), debuglevel=debuglevel)
1373 ydlh = YoutubeDLHandler(debuglevel=debuglevel)
1374 opener = compat_urllib_request.build_opener(
1375 https_handler, proxy_handler, cookie_processor, ydlh)
1376 # Delete the default user-agent header, which would otherwise apply in
1377 # cases where our custom HTTP handler doesn't come into play
1378 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1379 opener.addheaders = []
1380 self._opener = opener
1382 def encode(self, s):
1383 if isinstance(s, bytes):
1384 return s # Already encoded
1387 return s.encode(self.get_encoding())
1388 except UnicodeEncodeError as err:
1389 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1392 def get_encoding(self):
1393 encoding = self.params.get('encoding')
1394 if encoding is None:
1395 encoding = preferredencoding()