2 # -*- coding: utf-8 -*-
4 from __future__ import absolute_import, unicode_literals
31 compat_urllib_request,
57 UnavailableVideoError,
65 from .cache import Cache
66 from .extractor import get_info_extractor, gen_extractors
67 from .downloader import get_suitable_downloader
68 from .downloader.rtmp import rtmpdump_version
69 from .postprocessor import FFmpegMergerPP, FFmpegPostProcessor
70 from .version import __version__
73 class YoutubeDL(object):
76 YoutubeDL objects are the ones responsible of downloading the
77 actual video file and writing it to disk if the user has requested
78 it, among some other tasks. In most cases there should be one per
79 program. As, given a video URL, the downloader doesn't know how to
80 extract all the needed information, task that InfoExtractors do, it
81 has to pass the URL to one of them.
83 For this, YoutubeDL objects have a method that allows
84 InfoExtractors to be registered in a given order. When it is passed
85 a URL, the YoutubeDL object handles it to the first InfoExtractor it
86 finds that reports being able to handle it. The InfoExtractor extracts
87 all the information about the video or videos the URL refers to, and
88 YoutubeDL process the extracted information, possibly using a File
89 Downloader to download the video.
91 YoutubeDL objects accept a lot of parameters. In order not to saturate
92 the object constructor with arguments, it receives a dictionary of
93 options instead. These options are available through the params
94 attribute for the InfoExtractors to use. The YoutubeDL also
95 registers itself as the downloader in charge for the InfoExtractors
96 that are added to it, so this is a "mutual registration".
100 username: Username for authentication purposes.
101 password: Password for authentication purposes.
102 videopassword: Password for acces a video.
103 usenetrc: Use netrc for authentication instead.
104 verbose: Print additional info to stdout.
105 quiet: Do not print messages to stdout.
106 no_warnings: Do not print out anything for warnings.
107 forceurl: Force printing final URL.
108 forcetitle: Force printing title.
109 forceid: Force printing ID.
110 forcethumbnail: Force printing thumbnail URL.
111 forcedescription: Force printing description.
112 forcefilename: Force printing final filename.
113 forceduration: Force printing duration.
114 forcejson: Force printing info_dict as JSON.
115 dump_single_json: Force printing the info_dict of the whole playlist
116 (or video) as a single JSON line.
117 simulate: Do not download the video files.
118 format: Video format code.
119 format_limit: Highest quality format to try.
120 outtmpl: Template for output names.
121 restrictfilenames: Do not allow "&" and spaces in file names
122 ignoreerrors: Do not stop on download errors.
123 nooverwrites: Prevent overwriting files.
124 playliststart: Playlist item to start at.
125 playlistend: Playlist item to end at.
126 matchtitle: Download only matching titles.
127 rejecttitle: Reject downloads for matching titles.
128 logger: Log messages to a logging.Logger instance.
129 logtostderr: Log messages to stderr instead of stdout.
130 writedescription: Write the video description to a .description file
131 writeinfojson: Write the video description to a .info.json file
132 writeannotations: Write the video annotations to a .annotations.xml file
133 writethumbnail: Write the thumbnail image to a file
134 writesubtitles: Write the video subtitles to a file
135 writeautomaticsub: Write the automatic subtitles to a file
136 allsubtitles: Downloads all the subtitles of the video
137 (requires writesubtitles or writeautomaticsub)
138 listsubtitles: Lists all available subtitles for the video
139 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
140 subtitleslangs: List of languages of the subtitles to download
141 keepvideo: Keep the video file after post-processing
142 daterange: A DateRange object, download only if the upload_date is in the range.
143 skip_download: Skip the actual download of the video file
144 cachedir: Location of the cache files in the filesystem.
145 False to disable filesystem cache.
146 noplaylist: Download single video instead of a playlist if in doubt.
147 age_limit: An integer representing the user's age in years.
148 Unsuitable videos for the given age are skipped.
149 min_views: An integer representing the minimum view count the video
150 must have in order to not be skipped.
151 Videos without view count information are always
152 downloaded. None for no limit.
153 max_views: An integer representing the maximum view count.
154 Videos that are more popular than that are not
156 Videos without view count information are always
157 downloaded. None for no limit.
158 download_archive: File name of a file where all downloads are recorded.
159 Videos already present in the file are not downloaded
161 cookiefile: File name where cookies should be read from and dumped to.
162 nocheckcertificate:Do not verify SSL certificates
163 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
164 At the moment, this is only supported by YouTube.
165 proxy: URL of the proxy server to use
166 socket_timeout: Time to wait for unresponsive hosts, in seconds
167 bidi_workaround: Work around buggy terminals without bidirectional text
168 support, using fridibi
169 debug_printtraffic:Print out sent and received HTTP traffic
170 include_ads: Download ads as well
171 default_search: Prepend this string if an input url is not valid.
172 'auto' for elaborate guessing
173 encoding: Use this encoding instead of the system-specified.
174 extract_flat: Do not resolve URLs, return the immediate result.
175 Pass in 'in_playlist' to only show this behavior for
177 no_playlist: If the URL contains both a playlist and a video ID,
178 download the video, not the playlist.
180 The following parameters are not used by YoutubeDL itself, they are used by
182 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
183 noresizebuffer, retries, continuedl, noprogress, consoletitle
185 The following options are used by the post processors:
186 prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
187 otherwise prefer avconv.
188 exec_cmd: Arbitrary command to run after downloading
194 _download_retcode = None
195 _num_downloads = None
198 def __init__(self, params=None, auto_init=True):
199 """Create a FileDownloader object with the given options."""
203 self._ies_instances = {}
205 self._progress_hooks = []
206 self._download_retcode = 0
207 self._num_downloads = 0
208 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
209 self._err_file = sys.stderr
211 self.cache = Cache(self)
213 if params.get('bidi_workaround', False):
216 master, slave = pty.openpty()
217 width = get_term_width()
221 width_args = ['-w', str(width)]
223 stdin=subprocess.PIPE,
225 stderr=self._err_file)
227 self._output_process = subprocess.Popen(
228 ['bidiv'] + width_args, **sp_kwargs
231 self._output_process = subprocess.Popen(
232 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
233 self._output_channel = os.fdopen(master, 'rb')
234 except OSError as ose:
236 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
240 if (sys.version_info >= (3,) and sys.platform != 'win32' and
241 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
242 and not params.get('restrictfilenames', False)):
243 # On Python 3, the Unicode filesystem API will throw errors (#1474)
245 'Assuming --restrict-filenames since file system encoding '
246 'cannot encode all characters. '
247 'Set the LC_ALL environment variable to fix this.')
248 self.params['restrictfilenames'] = True
250 if '%(stitle)s' in self.params.get('outtmpl', ''):
251 self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
256 self.print_debug_header()
257 self.add_default_info_extractors()
259 def warn_if_short_id(self, argv):
260 # short YouTube ID starting with dash?
262 i for i, a in enumerate(argv)
263 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
267 [a for i, a in enumerate(argv) if i not in idxs] +
268 ['--'] + [argv[i] for i in idxs]
271 'Long argument string detected. '
272 'Use -- to separate parameters and URLs, like this:\n%s\n' %
273 args_to_str(correct_argv))
275 def add_info_extractor(self, ie):
276 """Add an InfoExtractor object to the end of the list."""
278 self._ies_instances[ie.ie_key()] = ie
279 ie.set_downloader(self)
281 def get_info_extractor(self, ie_key):
283 Get an instance of an IE with name ie_key, it will try to get one from
284 the _ies list, if there's no instance it will create a new one and add
285 it to the extractor list.
287 ie = self._ies_instances.get(ie_key)
289 ie = get_info_extractor(ie_key)()
290 self.add_info_extractor(ie)
293 def add_default_info_extractors(self):
295 Add the InfoExtractors returned by gen_extractors to the end of the list
297 for ie in gen_extractors():
298 self.add_info_extractor(ie)
300 def add_post_processor(self, pp):
301 """Add a PostProcessor object to the end of the chain."""
303 pp.set_downloader(self)
305 def add_progress_hook(self, ph):
306 """Add the progress hook (currently only for the file downloader)"""
307 self._progress_hooks.append(ph)
309 def _bidi_workaround(self, message):
310 if not hasattr(self, '_output_channel'):
313 assert hasattr(self, '_output_process')
314 assert isinstance(message, compat_str)
315 line_count = message.count('\n') + 1
316 self._output_process.stdin.write((message + '\n').encode('utf-8'))
317 self._output_process.stdin.flush()
318 res = ''.join(self._output_channel.readline().decode('utf-8')
319 for _ in range(line_count))
320 return res[:-len('\n')]
322 def to_screen(self, message, skip_eol=False):
323 """Print message to stdout if not in quiet mode."""
324 return self.to_stdout(message, skip_eol, check_quiet=True)
326 def _write_string(self, s, out=None):
327 write_string(s, out=out, encoding=self.params.get('encoding'))
329 def to_stdout(self, message, skip_eol=False, check_quiet=False):
330 """Print message to stdout if not in quiet mode."""
331 if self.params.get('logger'):
332 self.params['logger'].debug(message)
333 elif not check_quiet or not self.params.get('quiet', False):
334 message = self._bidi_workaround(message)
335 terminator = ['\n', ''][skip_eol]
336 output = message + terminator
338 self._write_string(output, self._screen_file)
340 def to_stderr(self, message):
341 """Print message to stderr."""
342 assert isinstance(message, compat_str)
343 if self.params.get('logger'):
344 self.params['logger'].error(message)
346 message = self._bidi_workaround(message)
347 output = message + '\n'
348 self._write_string(output, self._err_file)
350 def to_console_title(self, message):
351 if not self.params.get('consoletitle', False):
353 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
354 # c_wchar_p() might not be necessary if `message` is
355 # already of type unicode()
356 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
357 elif 'TERM' in os.environ:
358 self._write_string('\033]0;%s\007' % message, self._screen_file)
360 def save_console_title(self):
361 if not self.params.get('consoletitle', False):
363 if 'TERM' in os.environ:
364 # Save the title on stack
365 self._write_string('\033[22;0t', self._screen_file)
367 def restore_console_title(self):
368 if not self.params.get('consoletitle', False):
370 if 'TERM' in os.environ:
371 # Restore the title from stack
372 self._write_string('\033[23;0t', self._screen_file)
375 self.save_console_title()
378 def __exit__(self, *args):
379 self.restore_console_title()
381 if self.params.get('cookiefile') is not None:
382 self.cookiejar.save()
384 def trouble(self, message=None, tb=None):
385 """Determine action to take when a download problem appears.
387 Depending on if the downloader has been configured to ignore
388 download errors or not, this method may throw an exception or
389 not when errors are found, after printing the message.
391 tb, if given, is additional traceback information.
393 if message is not None:
394 self.to_stderr(message)
395 if self.params.get('verbose'):
397 if sys.exc_info()[0]: # if .trouble has been called from an except block
399 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
400 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
401 tb += compat_str(traceback.format_exc())
403 tb_data = traceback.format_list(traceback.extract_stack())
404 tb = ''.join(tb_data)
406 if not self.params.get('ignoreerrors', False):
407 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
408 exc_info = sys.exc_info()[1].exc_info
410 exc_info = sys.exc_info()
411 raise DownloadError(message, exc_info)
412 self._download_retcode = 1
414 def report_warning(self, message):
416 Print the message to stderr, it will be prefixed with 'WARNING:'
417 If stderr is a tty file the 'WARNING:' will be colored
419 if self.params.get('logger') is not None:
420 self.params['logger'].warning(message)
422 if self.params.get('no_warnings'):
424 if self._err_file.isatty() and os.name != 'nt':
425 _msg_header = '\033[0;33mWARNING:\033[0m'
427 _msg_header = 'WARNING:'
428 warning_message = '%s %s' % (_msg_header, message)
429 self.to_stderr(warning_message)
431 def report_error(self, message, tb=None):
433 Do the same as trouble, but prefixes the message with 'ERROR:', colored
434 in red if stderr is a tty file.
436 if self._err_file.isatty() and os.name != 'nt':
437 _msg_header = '\033[0;31mERROR:\033[0m'
439 _msg_header = 'ERROR:'
440 error_message = '%s %s' % (_msg_header, message)
441 self.trouble(error_message, tb)
443 def report_file_already_downloaded(self, file_name):
444 """Report file has already been fully downloaded."""
446 self.to_screen('[download] %s has already been downloaded' % file_name)
447 except UnicodeEncodeError:
448 self.to_screen('[download] The file has already been downloaded')
450 def prepare_filename(self, info_dict):
451 """Generate the output filename."""
453 template_dict = dict(info_dict)
455 template_dict['epoch'] = int(time.time())
456 autonumber_size = self.params.get('autonumber_size')
457 if autonumber_size is None:
459 autonumber_templ = '%0' + str(autonumber_size) + 'd'
460 template_dict['autonumber'] = autonumber_templ % self._num_downloads
461 if template_dict.get('playlist_index') is not None:
462 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
463 if template_dict.get('resolution') is None:
464 if template_dict.get('width') and template_dict.get('height'):
465 template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
466 elif template_dict.get('height'):
467 template_dict['resolution'] = '%sp' % template_dict['height']
468 elif template_dict.get('width'):
469 template_dict['resolution'] = '?x%d' % template_dict['width']
471 sanitize = lambda k, v: sanitize_filename(
473 restricted=self.params.get('restrictfilenames'),
475 template_dict = dict((k, sanitize(k, v))
476 for k, v in template_dict.items()
478 template_dict = collections.defaultdict(lambda: 'NA', template_dict)
480 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
481 tmpl = compat_expanduser(outtmpl)
482 filename = tmpl % template_dict
484 except ValueError as err:
485 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
488 def _match_entry(self, info_dict):
489 """ Returns None iff the file should be downloaded """
491 video_title = info_dict.get('title', info_dict.get('id', 'video'))
492 if 'title' in info_dict:
493 # This can happen when we're just evaluating the playlist
494 title = info_dict['title']
495 matchtitle = self.params.get('matchtitle', False)
497 if not re.search(matchtitle, title, re.IGNORECASE):
498 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
499 rejecttitle = self.params.get('rejecttitle', False)
501 if re.search(rejecttitle, title, re.IGNORECASE):
502 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
503 date = info_dict.get('upload_date', None)
505 dateRange = self.params.get('daterange', DateRange())
506 if date not in dateRange:
507 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
508 view_count = info_dict.get('view_count', None)
509 if view_count is not None:
510 min_views = self.params.get('min_views')
511 if min_views is not None and view_count < min_views:
512 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
513 max_views = self.params.get('max_views')
514 if max_views is not None and view_count > max_views:
515 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
516 age_limit = self.params.get('age_limit')
517 if age_limit is not None:
518 actual_age_limit = info_dict.get('age_limit')
519 if actual_age_limit is None:
521 if age_limit < actual_age_limit:
522 return 'Skipping "' + title + '" because it is age restricted'
523 if self.in_download_archive(info_dict):
524 return '%s has already been recorded in archive' % video_title
528 def add_extra_info(info_dict, extra_info):
529 '''Set the keys from extra_info in info dict if they are missing'''
530 for key, value in extra_info.items():
531 info_dict.setdefault(key, value)
533 def extract_info(self, url, download=True, ie_key=None, extra_info={},
536 Returns a list with a dictionary for each video we find.
537 If 'download', also downloads the videos.
538 extra_info is a dict containing the extra values to add to each result
542 ies = [self.get_info_extractor(ie_key)]
547 if not ie.suitable(url):
551 self.report_warning('The program functionality for this site has been marked as broken, '
552 'and will probably not work.')
555 ie_result = ie.extract(url)
556 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
558 if isinstance(ie_result, list):
559 # Backwards compatibility: old IE result format
561 '_type': 'compat_list',
562 'entries': ie_result,
564 self.add_default_extra_info(ie_result, ie, url)
566 return self.process_ie_result(ie_result, download, extra_info)
569 except ExtractorError as de: # An error we somewhat expected
570 self.report_error(compat_str(de), de.format_traceback())
572 except MaxDownloadsReached:
574 except Exception as e:
575 if self.params.get('ignoreerrors', False):
576 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
581 self.report_error('no suitable InfoExtractor for URL %s' % url)
583 def add_default_extra_info(self, ie_result, ie, url):
584 self.add_extra_info(ie_result, {
585 'extractor': ie.IE_NAME,
587 'webpage_url_basename': url_basename(url),
588 'extractor_key': ie.ie_key(),
591 def process_ie_result(self, ie_result, download=True, extra_info={}):
593 Take the result of the ie(may be modified) and resolve all unresolved
594 references (URLs, playlist items).
596 It will also download the videos if 'download'.
597 Returns the resolved ie_result.
600 result_type = ie_result.get('_type', 'video')
602 if result_type in ('url', 'url_transparent'):
603 extract_flat = self.params.get('extract_flat', False)
604 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
605 extract_flat is True):
606 if self.params.get('forcejson', False):
607 self.to_stdout(json.dumps(ie_result))
610 if result_type == 'video':
611 self.add_extra_info(ie_result, extra_info)
612 return self.process_video_result(ie_result, download=download)
613 elif result_type == 'url':
614 # We have to add extra_info to the results because it may be
615 # contained in a playlist
616 return self.extract_info(ie_result['url'],
618 ie_key=ie_result.get('ie_key'),
619 extra_info=extra_info)
620 elif result_type == 'url_transparent':
621 # Use the information from the embedding page
622 info = self.extract_info(
623 ie_result['url'], ie_key=ie_result.get('ie_key'),
624 extra_info=extra_info, download=False, process=False)
626 def make_result(embedded_info):
627 new_result = ie_result.copy()
628 for f in ('_type', 'url', 'ext', 'player_url', 'formats',
629 'entries', 'ie_key', 'duration',
630 'subtitles', 'annotations', 'format',
631 'thumbnail', 'thumbnails'):
634 if f in embedded_info:
635 new_result[f] = embedded_info[f]
637 new_result = make_result(info)
639 assert new_result.get('_type') != 'url_transparent'
640 if new_result.get('_type') == 'compat_list':
641 new_result['entries'] = [
642 make_result(e) for e in new_result['entries']]
644 return self.process_ie_result(
645 new_result, download=download, extra_info=extra_info)
646 elif result_type == 'playlist' or result_type == 'multi_video':
647 # We process each entry in the playlist
648 playlist = ie_result.get('title', None) or ie_result.get('id', None)
649 self.to_screen('[download] Downloading playlist: %s' % playlist)
651 playlist_results = []
653 playliststart = self.params.get('playliststart', 1) - 1
654 playlistend = self.params.get('playlistend', None)
655 # For backwards compatibility, interpret -1 as whole list
656 if playlistend == -1:
659 if isinstance(ie_result['entries'], list):
660 n_all_entries = len(ie_result['entries'])
661 entries = ie_result['entries'][playliststart:playlistend]
662 n_entries = len(entries)
664 "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
665 (ie_result['extractor'], playlist, n_all_entries, n_entries))
667 assert isinstance(ie_result['entries'], PagedList)
668 entries = ie_result['entries'].getslice(
669 playliststart, playlistend)
670 n_entries = len(entries)
672 "[%s] playlist %s: Downloading %d videos" %
673 (ie_result['extractor'], playlist, n_entries))
675 for i, entry in enumerate(entries, 1):
676 self.to_screen('[download] Downloading video #%s of %s' % (i, n_entries))
678 'n_entries': n_entries,
679 'playlist': playlist,
680 'playlist_id': ie_result.get('id'),
681 'playlist_title': ie_result.get('title'),
682 'playlist_index': i + playliststart,
683 'extractor': ie_result['extractor'],
684 'webpage_url': ie_result['webpage_url'],
685 'webpage_url_basename': url_basename(ie_result['webpage_url']),
686 'extractor_key': ie_result['extractor_key'],
689 reason = self._match_entry(entry)
690 if reason is not None:
691 self.to_screen('[download] ' + reason)
694 entry_result = self.process_ie_result(entry,
697 playlist_results.append(entry_result)
698 ie_result['entries'] = playlist_results
700 elif result_type == 'compat_list':
702 'Extractor %s returned a compat_list result. '
703 'It needs to be updated.' % ie_result.get('extractor'))
709 'extractor': ie_result['extractor'],
710 'webpage_url': ie_result['webpage_url'],
711 'webpage_url_basename': url_basename(ie_result['webpage_url']),
712 'extractor_key': ie_result['extractor_key'],
716 ie_result['entries'] = [
717 self.process_ie_result(_fixup(r), download, extra_info)
718 for r in ie_result['entries']
722 raise Exception('Invalid result type: %s' % result_type)
724 def select_format(self, format_spec, available_formats):
725 if format_spec == 'best' or format_spec is None:
726 return available_formats[-1]
727 elif format_spec == 'worst':
728 return available_formats[0]
729 elif format_spec == 'bestaudio':
731 f for f in available_formats
732 if f.get('vcodec') == 'none']
734 return audio_formats[-1]
735 elif format_spec == 'worstaudio':
737 f for f in available_formats
738 if f.get('vcodec') == 'none']
740 return audio_formats[0]
741 elif format_spec == 'bestvideo':
743 f for f in available_formats
744 if f.get('acodec') == 'none']
746 return video_formats[-1]
747 elif format_spec == 'worstvideo':
749 f for f in available_formats
750 if f.get('acodec') == 'none']
752 return video_formats[0]
754 extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a']
755 if format_spec in extensions:
756 filter_f = lambda f: f['ext'] == format_spec
758 filter_f = lambda f: f['format_id'] == format_spec
759 matches = list(filter(filter_f, available_formats))
764 def process_video_result(self, info_dict, download=True):
765 assert info_dict.get('_type', 'video') == 'video'
767 if 'id' not in info_dict:
768 raise ExtractorError('Missing "id" field in extractor result')
769 if 'title' not in info_dict:
770 raise ExtractorError('Missing "title" field in extractor result')
772 if 'playlist' not in info_dict:
773 # It isn't part of a playlist
774 info_dict['playlist'] = None
775 info_dict['playlist_index'] = None
777 thumbnails = info_dict.get('thumbnails')
779 thumbnails.sort(key=lambda t: (
780 t.get('width'), t.get('height'), t.get('url')))
782 if 'width' in t and 'height' in t:
783 t['resolution'] = '%dx%d' % (t['width'], t['height'])
785 if thumbnails and 'thumbnail' not in info_dict:
786 info_dict['thumbnail'] = thumbnails[-1]['url']
788 if 'display_id' not in info_dict and 'id' in info_dict:
789 info_dict['display_id'] = info_dict['id']
791 if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
792 upload_date = datetime.datetime.utcfromtimestamp(
793 info_dict['timestamp'])
794 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
796 # This extractors handle format selection themselves
797 if info_dict['extractor'] in ['Youku']:
799 self.process_info(info_dict)
802 # We now pick which formats have to be downloaded
803 if info_dict.get('formats') is None:
804 # There's only one format available
805 formats = [info_dict]
807 formats = info_dict['formats']
810 raise ExtractorError('No video formats found!')
812 # We check that all the formats have the format and format_id fields
813 for i, format in enumerate(formats):
814 if 'url' not in format:
815 raise ExtractorError('Missing "url" key in result (index %d)' % i)
817 if format.get('format_id') is None:
818 format['format_id'] = compat_str(i)
819 if format.get('format') is None:
820 format['format'] = '{id} - {res}{note}'.format(
821 id=format['format_id'],
822 res=self.format_resolution(format),
823 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
825 # Automatically determine file extension if missing
826 if 'ext' not in format:
827 format['ext'] = determine_ext(format['url']).lower()
829 format_limit = self.params.get('format_limit', None)
831 formats = list(takewhile_inclusive(
832 lambda f: f['format_id'] != format_limit, formats
835 # TODO Central sorting goes here
837 if formats[0] is not info_dict:
838 # only set the 'formats' fields if the original info_dict list them
839 # otherwise we end up with a circular reference, the first (and unique)
840 # element in the 'formats' field in info_dict is info_dict itself,
841 # wich can't be exported to json
842 info_dict['formats'] = formats
843 if self.params.get('listformats', None):
844 self.list_formats(info_dict)
847 req_format = self.params.get('format')
848 if req_format is None:
850 formats_to_download = []
851 # The -1 is for supporting YoutubeIE
852 if req_format in ('-1', 'all'):
853 formats_to_download = formats
855 for rfstr in req_format.split(','):
856 # We can accept formats requested in the format: 34/5/best, we pick
857 # the first that is available, starting from left
858 req_formats = rfstr.split('/')
859 for rf in req_formats:
860 if re.match(r'.+?\+.+?', rf) is not None:
861 # Two formats have been requested like '137+139'
862 format_1, format_2 = rf.split('+')
863 formats_info = (self.select_format(format_1, formats),
864 self.select_format(format_2, formats))
865 if all(formats_info):
866 # The first format must contain the video and the
868 if formats_info[0].get('vcodec') == 'none':
869 self.report_error('The first format must '
870 'contain the video, try using '
871 '"-f %s+%s"' % (format_2, format_1))
874 'requested_formats': formats_info,
876 'ext': formats_info[0]['ext'],
879 selected_format = None
881 selected_format = self.select_format(rf, formats)
882 if selected_format is not None:
883 formats_to_download.append(selected_format)
885 if not formats_to_download:
886 raise ExtractorError('requested format not available',
890 if len(formats_to_download) > 1:
891 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
892 for format in formats_to_download:
893 new_info = dict(info_dict)
894 new_info.update(format)
895 self.process_info(new_info)
896 # We update the info dict with the best quality format (backwards compatibility)
897 info_dict.update(formats_to_download[-1])
900 def process_info(self, info_dict):
901 """Process a single resolved IE result."""
903 assert info_dict.get('_type', 'video') == 'video'
905 max_downloads = self.params.get('max_downloads')
906 if max_downloads is not None:
907 if self._num_downloads >= int(max_downloads):
908 raise MaxDownloadsReached()
910 info_dict['fulltitle'] = info_dict['title']
911 if len(info_dict['title']) > 200:
912 info_dict['title'] = info_dict['title'][:197] + '...'
914 # Keep for backwards compatibility
915 info_dict['stitle'] = info_dict['title']
917 if 'format' not in info_dict:
918 info_dict['format'] = info_dict['ext']
920 reason = self._match_entry(info_dict)
921 if reason is not None:
922 self.to_screen('[download] ' + reason)
925 self._num_downloads += 1
927 filename = self.prepare_filename(info_dict)
930 if self.params.get('forcetitle', False):
931 self.to_stdout(info_dict['fulltitle'])
932 if self.params.get('forceid', False):
933 self.to_stdout(info_dict['id'])
934 if self.params.get('forceurl', False):
935 # For RTMP URLs, also include the playpath
936 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
937 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
938 self.to_stdout(info_dict['thumbnail'])
939 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
940 self.to_stdout(info_dict['description'])
941 if self.params.get('forcefilename', False) and filename is not None:
942 self.to_stdout(filename)
943 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
944 self.to_stdout(formatSeconds(info_dict['duration']))
945 if self.params.get('forceformat', False):
946 self.to_stdout(info_dict['format'])
947 if self.params.get('forcejson', False):
948 info_dict['_filename'] = filename
949 self.to_stdout(json.dumps(info_dict))
950 if self.params.get('dump_single_json', False):
951 info_dict['_filename'] = filename
953 # Do nothing else if in simulate mode
954 if self.params.get('simulate', False):
961 dn = os.path.dirname(encodeFilename(filename))
962 if dn and not os.path.exists(dn):
964 except (OSError, IOError) as err:
965 self.report_error('unable to create directory ' + compat_str(err))
968 if self.params.get('writedescription', False):
969 descfn = filename + '.description'
970 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
971 self.to_screen('[info] Video description is already present')
974 self.to_screen('[info] Writing video description to: ' + descfn)
975 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
976 descfile.write(info_dict['description'])
977 except (KeyError, TypeError):
978 self.report_warning('There\'s no description to write.')
979 except (OSError, IOError):
980 self.report_error('Cannot write description file ' + descfn)
983 if self.params.get('writeannotations', False):
984 annofn = filename + '.annotations.xml'
985 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
986 self.to_screen('[info] Video annotations are already present')
989 self.to_screen('[info] Writing video annotations to: ' + annofn)
990 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
991 annofile.write(info_dict['annotations'])
992 except (KeyError, TypeError):
993 self.report_warning('There are no annotations to write.')
994 except (OSError, IOError):
995 self.report_error('Cannot write annotations file: ' + annofn)
998 subtitles_are_requested = any([self.params.get('writesubtitles', False),
999 self.params.get('writeautomaticsub')])
1001 if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
1002 # subtitles download errors are already managed as troubles in relevant IE
1003 # that way it will silently go on when used with unsupporting IE
1004 subtitles = info_dict['subtitles']
1005 sub_format = self.params.get('subtitlesformat', 'srt')
1006 for sub_lang in subtitles.keys():
1007 sub = subtitles[sub_lang]
1011 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1012 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1013 self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1015 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1016 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
1018 except (OSError, IOError):
1019 self.report_error('Cannot write subtitles file ' + sub_filename)
1022 if self.params.get('writeinfojson', False):
1023 infofn = os.path.splitext(filename)[0] + '.info.json'
1024 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1025 self.to_screen('[info] Video description metadata is already present')
1027 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1029 write_json_file(info_dict, infofn)
1030 except (OSError, IOError):
1031 self.report_error('Cannot write metadata to JSON file ' + infofn)
1034 if self.params.get('writethumbnail', False):
1035 if info_dict.get('thumbnail') is not None:
1036 thumb_format = determine_ext(info_dict['thumbnail'], 'jpg')
1037 thumb_filename = os.path.splitext(filename)[0] + '.' + thumb_format
1038 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
1039 self.to_screen('[%s] %s: Thumbnail is already present' %
1040 (info_dict['extractor'], info_dict['id']))
1042 self.to_screen('[%s] %s: Downloading thumbnail ...' %
1043 (info_dict['extractor'], info_dict['id']))
1045 uf = self.urlopen(info_dict['thumbnail'])
1046 with open(thumb_filename, 'wb') as thumbf:
1047 shutil.copyfileobj(uf, thumbf)
1048 self.to_screen('[%s] %s: Writing thumbnail to: %s' %
1049 (info_dict['extractor'], info_dict['id'], thumb_filename))
1050 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1051 self.report_warning('Unable to download thumbnail "%s": %s' %
1052 (info_dict['thumbnail'], compat_str(err)))
1054 if not self.params.get('skip_download', False):
1055 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
1060 fd = get_suitable_downloader(info)(self, self.params)
1061 for ph in self._progress_hooks:
1062 fd.add_progress_hook(ph)
1063 if self.params.get('verbose'):
1064 self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1065 return fd.download(name, info)
1066 if info_dict.get('requested_formats') is not None:
1069 merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
1070 if not merger._executable:
1072 self.report_warning('You have requested multiple '
1073 'formats but ffmpeg or avconv are not installed.'
1074 ' The formats won\'t be merged')
1076 postprocessors = [merger]
1077 for f in info_dict['requested_formats']:
1078 new_info = dict(info_dict)
1080 fname = self.prepare_filename(new_info)
1081 fname = prepend_extension(fname, 'f%s' % f['format_id'])
1082 downloaded.append(fname)
1083 partial_success = dl(fname, new_info)
1084 success = success and partial_success
1085 info_dict['__postprocessors'] = postprocessors
1086 info_dict['__files_to_merge'] = downloaded
1088 # Just a single file
1089 success = dl(filename, info_dict)
1090 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1091 self.report_error('unable to download video data: %s' % str(err))
1093 except (OSError, IOError) as err:
1094 raise UnavailableVideoError(err)
1095 except (ContentTooShortError, ) as err:
1096 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1101 self.post_process(filename, info_dict)
1102 except (PostProcessingError) as err:
1103 self.report_error('postprocessing: %s' % str(err))
1106 self.record_download_archive(info_dict)
1108 def download(self, url_list):
1109 """Download a given list of URLs."""
1110 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1111 if (len(url_list) > 1 and
1113 and self.params.get('max_downloads') != 1):
1114 raise SameFileError(outtmpl)
1116 for url in url_list:
1118 # It also downloads the videos
1119 res = self.extract_info(url)
1120 except UnavailableVideoError:
1121 self.report_error('unable to download video')
1122 except MaxDownloadsReached:
1123 self.to_screen('[info] Maximum number of downloaded files reached.')
1126 if self.params.get('dump_single_json', False):
1127 self.to_stdout(json.dumps(res))
1129 return self._download_retcode
1131 def download_with_info_file(self, info_filename):
1132 with io.open(info_filename, 'r', encoding='utf-8') as f:
1135 self.process_ie_result(info, download=True)
1136 except DownloadError:
1137 webpage_url = info.get('webpage_url')
1138 if webpage_url is not None:
1139 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1140 return self.download([webpage_url])
1143 return self._download_retcode
1145 def post_process(self, filename, ie_info):
1146 """Run all the postprocessors on the given file."""
1147 info = dict(ie_info)
1148 info['filepath'] = filename
1151 if ie_info.get('__postprocessors') is not None:
1152 pps_chain.extend(ie_info['__postprocessors'])
1153 pps_chain.extend(self._pps)
1154 for pp in pps_chain:
1156 keep_video_wish, new_info = pp.run(info)
1157 if keep_video_wish is not None:
1159 keep_video = keep_video_wish
1160 elif keep_video is None:
1161 # No clear decision yet, let IE decide
1162 keep_video = keep_video_wish
1163 except PostProcessingError as e:
1164 self.report_error(e.msg)
1165 if keep_video is False and not self.params.get('keepvideo', False):
1167 self.to_screen('Deleting original file %s (pass -k to keep)' % filename)
1168 os.remove(encodeFilename(filename))
1169 except (IOError, OSError):
1170 self.report_warning('Unable to remove downloaded video file')
1172 def _make_archive_id(self, info_dict):
1173 # Future-proof against any change in case
1174 # and backwards compatibility with prior versions
1175 extractor = info_dict.get('extractor_key')
1176 if extractor is None:
1177 if 'id' in info_dict:
1178 extractor = info_dict.get('ie_key') # key in a playlist
1179 if extractor is None:
1180 return None # Incomplete video information
1181 return extractor.lower() + ' ' + info_dict['id']
1183 def in_download_archive(self, info_dict):
1184 fn = self.params.get('download_archive')
1188 vid_id = self._make_archive_id(info_dict)
1190 return False # Incomplete video information
1193 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1194 for line in archive_file:
1195 if line.strip() == vid_id:
1197 except IOError as ioe:
1198 if ioe.errno != errno.ENOENT:
1202 def record_download_archive(self, info_dict):
1203 fn = self.params.get('download_archive')
1206 vid_id = self._make_archive_id(info_dict)
1208 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1209 archive_file.write(vid_id + '\n')
1212 def format_resolution(format, default='unknown'):
1213 if format.get('vcodec') == 'none':
1215 if format.get('resolution') is not None:
1216 return format['resolution']
1217 if format.get('height') is not None:
1218 if format.get('width') is not None:
1219 res = '%sx%s' % (format['width'], format['height'])
1221 res = '%sp' % format['height']
1222 elif format.get('width') is not None:
1223 res = '?x%d' % format['width']
1228 def _format_note(self, fdict):
1230 if fdict.get('ext') in ['f4f', 'f4m']:
1231 res += '(unsupported) '
1232 if fdict.get('format_note') is not None:
1233 res += fdict['format_note'] + ' '
1234 if fdict.get('tbr') is not None:
1235 res += '%4dk ' % fdict['tbr']
1236 if fdict.get('container') is not None:
1239 res += '%s container' % fdict['container']
1240 if (fdict.get('vcodec') is not None and
1241 fdict.get('vcodec') != 'none'):
1244 res += fdict['vcodec']
1245 if fdict.get('vbr') is not None:
1247 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1249 if fdict.get('vbr') is not None:
1250 res += '%4dk' % fdict['vbr']
1251 if fdict.get('fps') is not None:
1252 res += ', %sfps' % fdict['fps']
1253 if fdict.get('acodec') is not None:
1256 if fdict['acodec'] == 'none':
1259 res += '%-5s' % fdict['acodec']
1260 elif fdict.get('abr') is not None:
1264 if fdict.get('abr') is not None:
1265 res += '@%3dk' % fdict['abr']
1266 if fdict.get('asr') is not None:
1267 res += ' (%5dHz)' % fdict['asr']
1268 if fdict.get('filesize') is not None:
1271 res += format_bytes(fdict['filesize'])
1272 elif fdict.get('filesize_approx') is not None:
1275 res += '~' + format_bytes(fdict['filesize_approx'])
1278 def list_formats(self, info_dict):
1279 def line(format, idlen=20):
1280 return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
1281 format['format_id'],
1283 self.format_resolution(format),
1284 self._format_note(format),
1287 formats = info_dict.get('formats', [info_dict])
1288 idlen = max(len('format code'),
1289 max(len(f['format_id']) for f in formats))
1290 formats_s = [line(f, idlen) for f in formats]
1291 if len(formats) > 1:
1292 formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)'
1293 formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
1295 header_line = line({
1296 'format_id': 'format code', 'ext': 'extension',
1297 'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
1298 self.to_screen('[info] Available formats for %s:\n%s\n%s' %
1299 (info_dict['id'], header_line, '\n'.join(formats_s)))
1301 def urlopen(self, req):
1302 """ Start an HTTP download """
1304 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1305 # always respected by websites, some tend to give out URLs with non percent-encoded
1306 # non-ASCII characters (see telemb.py, ard.py [#3412])
1307 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1308 # To work around aforementioned issue we will replace request's original URL with
1309 # percent-encoded one
1310 req_is_string = isinstance(req, basestring if sys.version_info < (3, 0) else compat_str)
1311 url = req if req_is_string else req.get_full_url()
1312 url_escaped = escape_url(url)
1314 # Substitute URL if any change after escaping
1315 if url != url_escaped:
1319 req = compat_urllib_request.Request(
1320 url_escaped, data=req.data, headers=req.headers,
1321 origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
1323 return self._opener.open(req, timeout=self._socket_timeout)
1325 def print_debug_header(self):
1326 if not self.params.get('verbose'):
1329 if type('') is not compat_str:
1330 # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1331 self.report_warning(
1332 'Your Python is broken! Update to a newer and supported version')
1334 stdout_encoding = getattr(
1335 sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
1337 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1338 locale.getpreferredencoding(),
1339 sys.getfilesystemencoding(),
1341 self.get_encoding()))
1342 write_string(encoding_str, encoding=None)
1344 self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1346 sp = subprocess.Popen(
1347 ['git', 'rev-parse', '--short', 'HEAD'],
1348 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1349 cwd=os.path.dirname(os.path.abspath(__file__)))
1350 out, err = sp.communicate()
1351 out = out.decode().strip()
1352 if re.match('[0-9a-f]+', out):
1353 self._write_string('[debug] Git HEAD: ' + out + '\n')
1359 self._write_string('[debug] Python version %s - %s\n' % (
1360 platform.python_version(), platform_name()))
1362 exe_versions = FFmpegPostProcessor.get_versions()
1363 exe_versions['rtmpdump'] = rtmpdump_version()
1364 exe_str = ', '.join(
1366 for exe, v in sorted(exe_versions.items())
1371 self._write_string('[debug] exe versions: %s\n' % exe_str)
1374 for handler in self._opener.handlers:
1375 if hasattr(handler, 'proxies'):
1376 proxy_map.update(handler.proxies)
1377 self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1379 def _setup_opener(self):
1380 timeout_val = self.params.get('socket_timeout')
1381 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1383 opts_cookiefile = self.params.get('cookiefile')
1384 opts_proxy = self.params.get('proxy')
1386 if opts_cookiefile is None:
1387 self.cookiejar = compat_cookiejar.CookieJar()
1389 self.cookiejar = compat_cookiejar.MozillaCookieJar(
1391 if os.access(opts_cookiefile, os.R_OK):
1392 self.cookiejar.load()
1394 cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1396 if opts_proxy is not None:
1397 if opts_proxy == '':
1400 proxies = {'http': opts_proxy, 'https': opts_proxy}
1402 proxies = compat_urllib_request.getproxies()
1403 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1404 if 'http' in proxies and 'https' not in proxies:
1405 proxies['https'] = proxies['http']
1406 proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1408 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1409 https_handler = make_HTTPS_handler(
1410 self.params.get('nocheckcertificate', False), debuglevel=debuglevel)
1411 ydlh = YoutubeDLHandler(debuglevel=debuglevel)
1412 opener = compat_urllib_request.build_opener(
1413 https_handler, proxy_handler, cookie_processor, ydlh)
1414 # Delete the default user-agent header, which would otherwise apply in
1415 # cases where our custom HTTP handler doesn't come into play
1416 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1417 opener.addheaders = []
1418 self._opener = opener
1420 def encode(self, s):
1421 if isinstance(s, bytes):
1422 return s # Already encoded
1425 return s.encode(self.get_encoding())
1426 except UnicodeEncodeError as err:
1427 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1430 def get_encoding(self):
1431 encoding = self.params.get('encoding')
1432 if encoding is None:
1433 encoding = preferredencoding()