2 # -*- coding: utf-8 -*-
4 from __future__ import absolute_import, unicode_literals
30 compat_urllib_request,
54 UnavailableVideoError,
61 from .cache import Cache
62 from .extractor import get_info_extractor, gen_extractors
63 from .downloader import get_suitable_downloader
64 from .postprocessor import FFmpegMergerPP
65 from .version import __version__
68 class YoutubeDL(object):
71 YoutubeDL objects are the ones responsible of downloading the
72 actual video file and writing it to disk if the user has requested
73 it, among some other tasks. In most cases there should be one per
74 program. As, given a video URL, the downloader doesn't know how to
75 extract all the needed information, task that InfoExtractors do, it
76 has to pass the URL to one of them.
78 For this, YoutubeDL objects have a method that allows
79 InfoExtractors to be registered in a given order. When it is passed
80 a URL, the YoutubeDL object handles it to the first InfoExtractor it
81 finds that reports being able to handle it. The InfoExtractor extracts
82 all the information about the video or videos the URL refers to, and
83 YoutubeDL process the extracted information, possibly using a File
84 Downloader to download the video.
86 YoutubeDL objects accept a lot of parameters. In order not to saturate
87 the object constructor with arguments, it receives a dictionary of
88 options instead. These options are available through the params
89 attribute for the InfoExtractors to use. The YoutubeDL also
90 registers itself as the downloader in charge for the InfoExtractors
91 that are added to it, so this is a "mutual registration".
95 username: Username for authentication purposes.
96 password: Password for authentication purposes.
97 videopassword: Password for acces a video.
98 usenetrc: Use netrc for authentication instead.
99 verbose: Print additional info to stdout.
100 quiet: Do not print messages to stdout.
101 no_warnings: Do not print out anything for warnings.
102 forceurl: Force printing final URL.
103 forcetitle: Force printing title.
104 forceid: Force printing ID.
105 forcethumbnail: Force printing thumbnail URL.
106 forcedescription: Force printing description.
107 forcefilename: Force printing final filename.
108 forceduration: Force printing duration.
109 forcejson: Force printing info_dict as JSON.
110 dump_single_json: Force printing the info_dict of the whole playlist
111 (or video) as a single JSON line.
112 simulate: Do not download the video files.
113 format: Video format code.
114 format_limit: Highest quality format to try.
115 outtmpl: Template for output names.
116 restrictfilenames: Do not allow "&" and spaces in file names
117 ignoreerrors: Do not stop on download errors.
118 nooverwrites: Prevent overwriting files.
119 playliststart: Playlist item to start at.
120 playlistend: Playlist item to end at.
121 matchtitle: Download only matching titles.
122 rejecttitle: Reject downloads for matching titles.
123 logger: Log messages to a logging.Logger instance.
124 logtostderr: Log messages to stderr instead of stdout.
125 writedescription: Write the video description to a .description file
126 writeinfojson: Write the video description to a .info.json file
127 writeannotations: Write the video annotations to a .annotations.xml file
128 writethumbnail: Write the thumbnail image to a file
129 writesubtitles: Write the video subtitles to a file
130 writeautomaticsub: Write the automatic subtitles to a file
131 allsubtitles: Downloads all the subtitles of the video
132 (requires writesubtitles or writeautomaticsub)
133 listsubtitles: Lists all available subtitles for the video
134 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
135 subtitleslangs: List of languages of the subtitles to download
136 keepvideo: Keep the video file after post-processing
137 daterange: A DateRange object, download only if the upload_date is in the range.
138 skip_download: Skip the actual download of the video file
139 cachedir: Location of the cache files in the filesystem.
140 False to disable filesystem cache.
141 noplaylist: Download single video instead of a playlist if in doubt.
142 age_limit: An integer representing the user's age in years.
143 Unsuitable videos for the given age are skipped.
144 min_views: An integer representing the minimum view count the video
145 must have in order to not be skipped.
146 Videos without view count information are always
147 downloaded. None for no limit.
148 max_views: An integer representing the maximum view count.
149 Videos that are more popular than that are not
151 Videos without view count information are always
152 downloaded. None for no limit.
153 download_archive: File name of a file where all downloads are recorded.
154 Videos already present in the file are not downloaded
156 cookiefile: File name where cookies should be read from and dumped to.
157 nocheckcertificate:Do not verify SSL certificates
158 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
159 At the moment, this is only supported by YouTube.
160 proxy: URL of the proxy server to use
161 socket_timeout: Time to wait for unresponsive hosts, in seconds
162 bidi_workaround: Work around buggy terminals without bidirectional text
163 support, using fridibi
164 debug_printtraffic:Print out sent and received HTTP traffic
165 include_ads: Download ads as well
166 default_search: Prepend this string if an input url is not valid.
167 'auto' for elaborate guessing
168 encoding: Use this encoding instead of the system-specified.
169 extract_flat: Do not resolve URLs, return the immediate result.
170 Pass in 'in_playlist' to only show this behavior for
173 The following parameters are not used by YoutubeDL itself, they are used by
175 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
176 noresizebuffer, retries, continuedl, noprogress, consoletitle
178 The following options are used by the post processors:
179 prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
180 otherwise prefer avconv.
181 exec_cmd: Arbitrary command to run after downloading
187 _download_retcode = None
188 _num_downloads = None
191 def __init__(self, params=None):
192 """Create a FileDownloader object with the given options."""
196 self._ies_instances = {}
198 self._progress_hooks = []
199 self._download_retcode = 0
200 self._num_downloads = 0
201 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
202 self._err_file = sys.stderr
204 self.cache = Cache(self)
206 if params.get('bidi_workaround', False):
209 master, slave = pty.openpty()
210 width = get_term_width()
214 width_args = ['-w', str(width)]
216 stdin=subprocess.PIPE,
218 stderr=self._err_file)
220 self._output_process = subprocess.Popen(
221 ['bidiv'] + width_args, **sp_kwargs
224 self._output_process = subprocess.Popen(
225 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
226 self._output_channel = os.fdopen(master, 'rb')
227 except OSError as ose:
229 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
233 if (sys.version_info >= (3,) and sys.platform != 'win32' and
234 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
235 and not params.get('restrictfilenames', False)):
236 # On Python 3, the Unicode filesystem API will throw errors (#1474)
238 'Assuming --restrict-filenames since file system encoding '
239 'cannot encode all characters. '
240 'Set the LC_ALL environment variable to fix this.')
241 self.params['restrictfilenames'] = True
243 if '%(stitle)s' in self.params.get('outtmpl', ''):
244 self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
248 def add_info_extractor(self, ie):
249 """Add an InfoExtractor object to the end of the list."""
251 self._ies_instances[ie.ie_key()] = ie
252 ie.set_downloader(self)
254 def get_info_extractor(self, ie_key):
256 Get an instance of an IE with name ie_key, it will try to get one from
257 the _ies list, if there's no instance it will create a new one and add
258 it to the extractor list.
260 ie = self._ies_instances.get(ie_key)
262 ie = get_info_extractor(ie_key)()
263 self.add_info_extractor(ie)
266 def add_default_info_extractors(self):
268 Add the InfoExtractors returned by gen_extractors to the end of the list
270 for ie in gen_extractors():
271 self.add_info_extractor(ie)
273 def add_post_processor(self, pp):
274 """Add a PostProcessor object to the end of the chain."""
276 pp.set_downloader(self)
278 def add_progress_hook(self, ph):
279 """Add the progress hook (currently only for the file downloader)"""
280 self._progress_hooks.append(ph)
282 def _bidi_workaround(self, message):
283 if not hasattr(self, '_output_channel'):
286 assert hasattr(self, '_output_process')
287 assert isinstance(message, compat_str)
288 line_count = message.count('\n') + 1
289 self._output_process.stdin.write((message + '\n').encode('utf-8'))
290 self._output_process.stdin.flush()
291 res = ''.join(self._output_channel.readline().decode('utf-8')
292 for _ in range(line_count))
293 return res[:-len('\n')]
295 def to_screen(self, message, skip_eol=False):
296 """Print message to stdout if not in quiet mode."""
297 return self.to_stdout(message, skip_eol, check_quiet=True)
299 def _write_string(self, s, out=None):
300 write_string(s, out=out, encoding=self.params.get('encoding'))
302 def to_stdout(self, message, skip_eol=False, check_quiet=False):
303 """Print message to stdout if not in quiet mode."""
304 if self.params.get('logger'):
305 self.params['logger'].debug(message)
306 elif not check_quiet or not self.params.get('quiet', False):
307 message = self._bidi_workaround(message)
308 terminator = ['\n', ''][skip_eol]
309 output = message + terminator
311 self._write_string(output, self._screen_file)
313 def to_stderr(self, message):
314 """Print message to stderr."""
315 assert isinstance(message, compat_str)
316 if self.params.get('logger'):
317 self.params['logger'].error(message)
319 message = self._bidi_workaround(message)
320 output = message + '\n'
321 self._write_string(output, self._err_file)
323 def to_console_title(self, message):
324 if not self.params.get('consoletitle', False):
326 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
327 # c_wchar_p() might not be necessary if `message` is
328 # already of type unicode()
329 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
330 elif 'TERM' in os.environ:
331 self._write_string('\033]0;%s\007' % message, self._screen_file)
333 def save_console_title(self):
334 if not self.params.get('consoletitle', False):
336 if 'TERM' in os.environ:
337 # Save the title on stack
338 self._write_string('\033[22;0t', self._screen_file)
340 def restore_console_title(self):
341 if not self.params.get('consoletitle', False):
343 if 'TERM' in os.environ:
344 # Restore the title from stack
345 self._write_string('\033[23;0t', self._screen_file)
348 self.save_console_title()
351 def __exit__(self, *args):
352 self.restore_console_title()
354 if self.params.get('cookiefile') is not None:
355 self.cookiejar.save()
357 def trouble(self, message=None, tb=None):
358 """Determine action to take when a download problem appears.
360 Depending on if the downloader has been configured to ignore
361 download errors or not, this method may throw an exception or
362 not when errors are found, after printing the message.
364 tb, if given, is additional traceback information.
366 if message is not None:
367 self.to_stderr(message)
368 if self.params.get('verbose'):
370 if sys.exc_info()[0]: # if .trouble has been called from an except block
372 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
373 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
374 tb += compat_str(traceback.format_exc())
376 tb_data = traceback.format_list(traceback.extract_stack())
377 tb = ''.join(tb_data)
379 if not self.params.get('ignoreerrors', False):
380 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
381 exc_info = sys.exc_info()[1].exc_info
383 exc_info = sys.exc_info()
384 raise DownloadError(message, exc_info)
385 self._download_retcode = 1
387 def report_warning(self, message):
389 Print the message to stderr, it will be prefixed with 'WARNING:'
390 If stderr is a tty file the 'WARNING:' will be colored
392 if self.params.get('logger') is not None:
393 self.params['logger'].warning(message)
395 if self.params.get('no_warnings'):
397 if self._err_file.isatty() and os.name != 'nt':
398 _msg_header = '\033[0;33mWARNING:\033[0m'
400 _msg_header = 'WARNING:'
401 warning_message = '%s %s' % (_msg_header, message)
402 self.to_stderr(warning_message)
404 def report_error(self, message, tb=None):
406 Do the same as trouble, but prefixes the message with 'ERROR:', colored
407 in red if stderr is a tty file.
409 if self._err_file.isatty() and os.name != 'nt':
410 _msg_header = '\033[0;31mERROR:\033[0m'
412 _msg_header = 'ERROR:'
413 error_message = '%s %s' % (_msg_header, message)
414 self.trouble(error_message, tb)
416 def report_file_already_downloaded(self, file_name):
417 """Report file has already been fully downloaded."""
419 self.to_screen('[download] %s has already been downloaded' % file_name)
420 except UnicodeEncodeError:
421 self.to_screen('[download] The file has already been downloaded')
423 def prepare_filename(self, info_dict):
424 """Generate the output filename."""
426 template_dict = dict(info_dict)
428 template_dict['epoch'] = int(time.time())
429 autonumber_size = self.params.get('autonumber_size')
430 if autonumber_size is None:
432 autonumber_templ = '%0' + str(autonumber_size) + 'd'
433 template_dict['autonumber'] = autonumber_templ % self._num_downloads
434 if template_dict.get('playlist_index') is not None:
435 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
436 if template_dict.get('resolution') is None:
437 if template_dict.get('width') and template_dict.get('height'):
438 template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
439 elif template_dict.get('height'):
440 template_dict['resolution'] = '%sp' % template_dict['height']
441 elif template_dict.get('width'):
442 template_dict['resolution'] = '?x%d' % template_dict['width']
444 sanitize = lambda k, v: sanitize_filename(
446 restricted=self.params.get('restrictfilenames'),
448 template_dict = dict((k, sanitize(k, v))
449 for k, v in template_dict.items()
451 template_dict = collections.defaultdict(lambda: 'NA', template_dict)
453 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
454 tmpl = os.path.expanduser(outtmpl)
455 filename = tmpl % template_dict
457 except ValueError as err:
458 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
461 def _match_entry(self, info_dict):
462 """ Returns None iff the file should be downloaded """
464 video_title = info_dict.get('title', info_dict.get('id', 'video'))
465 if 'title' in info_dict:
466 # This can happen when we're just evaluating the playlist
467 title = info_dict['title']
468 matchtitle = self.params.get('matchtitle', False)
470 if not re.search(matchtitle, title, re.IGNORECASE):
471 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
472 rejecttitle = self.params.get('rejecttitle', False)
474 if re.search(rejecttitle, title, re.IGNORECASE):
475 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
476 date = info_dict.get('upload_date', None)
478 dateRange = self.params.get('daterange', DateRange())
479 if date not in dateRange:
480 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
481 view_count = info_dict.get('view_count', None)
482 if view_count is not None:
483 min_views = self.params.get('min_views')
484 if min_views is not None and view_count < min_views:
485 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
486 max_views = self.params.get('max_views')
487 if max_views is not None and view_count > max_views:
488 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
489 age_limit = self.params.get('age_limit')
490 if age_limit is not None:
491 actual_age_limit = info_dict.get('age_limit')
492 if actual_age_limit is None:
494 if age_limit < actual_age_limit:
495 return 'Skipping "' + title + '" because it is age restricted'
496 if self.in_download_archive(info_dict):
497 return '%s has already been recorded in archive' % video_title
501 def add_extra_info(info_dict, extra_info):
502 '''Set the keys from extra_info in info dict if they are missing'''
503 for key, value in extra_info.items():
504 info_dict.setdefault(key, value)
506 def extract_info(self, url, download=True, ie_key=None, extra_info={},
509 Returns a list with a dictionary for each video we find.
510 If 'download', also downloads the videos.
511 extra_info is a dict containing the extra values to add to each result
515 ies = [self.get_info_extractor(ie_key)]
520 if not ie.suitable(url):
524 self.report_warning('The program functionality for this site has been marked as broken, '
525 'and will probably not work.')
528 ie_result = ie.extract(url)
529 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
531 if isinstance(ie_result, list):
532 # Backwards compatibility: old IE result format
534 '_type': 'compat_list',
535 'entries': ie_result,
537 self.add_default_extra_info(ie_result, ie, url)
539 return self.process_ie_result(ie_result, download, extra_info)
542 except ExtractorError as de: # An error we somewhat expected
543 self.report_error(compat_str(de), de.format_traceback())
545 except MaxDownloadsReached:
547 except Exception as e:
548 if self.params.get('ignoreerrors', False):
549 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
554 self.report_error('no suitable InfoExtractor for URL %s' % url)
556 def add_default_extra_info(self, ie_result, ie, url):
557 self.add_extra_info(ie_result, {
558 'extractor': ie.IE_NAME,
560 'webpage_url_basename': url_basename(url),
561 'extractor_key': ie.ie_key(),
564 def process_ie_result(self, ie_result, download=True, extra_info={}):
566 Take the result of the ie(may be modified) and resolve all unresolved
567 references (URLs, playlist items).
569 It will also download the videos if 'download'.
570 Returns the resolved ie_result.
573 result_type = ie_result.get('_type', 'video')
575 if result_type in ('url', 'url_transparent'):
576 extract_flat = self.params.get('extract_flat', False)
577 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
578 extract_flat is True):
579 if self.params.get('forcejson', False):
580 self.to_stdout(json.dumps(ie_result))
583 if result_type == 'video':
584 self.add_extra_info(ie_result, extra_info)
585 return self.process_video_result(ie_result, download=download)
586 elif result_type == 'url':
587 # We have to add extra_info to the results because it may be
588 # contained in a playlist
589 return self.extract_info(ie_result['url'],
591 ie_key=ie_result.get('ie_key'),
592 extra_info=extra_info)
593 elif result_type == 'url_transparent':
594 # Use the information from the embedding page
595 info = self.extract_info(
596 ie_result['url'], ie_key=ie_result.get('ie_key'),
597 extra_info=extra_info, download=False, process=False)
599 def make_result(embedded_info):
600 new_result = ie_result.copy()
601 for f in ('_type', 'url', 'ext', 'player_url', 'formats',
602 'entries', 'ie_key', 'duration',
603 'subtitles', 'annotations', 'format',
604 'thumbnail', 'thumbnails'):
607 if f in embedded_info:
608 new_result[f] = embedded_info[f]
610 new_result = make_result(info)
612 assert new_result.get('_type') != 'url_transparent'
613 if new_result.get('_type') == 'compat_list':
614 new_result['entries'] = [
615 make_result(e) for e in new_result['entries']]
617 return self.process_ie_result(
618 new_result, download=download, extra_info=extra_info)
619 elif result_type == 'playlist':
620 # We process each entry in the playlist
621 playlist = ie_result.get('title', None) or ie_result.get('id', None)
622 self.to_screen('[download] Downloading playlist: %s' % playlist)
624 playlist_results = []
626 playliststart = self.params.get('playliststart', 1) - 1
627 playlistend = self.params.get('playlistend', None)
628 # For backwards compatibility, interpret -1 as whole list
629 if playlistend == -1:
632 if isinstance(ie_result['entries'], list):
633 n_all_entries = len(ie_result['entries'])
634 entries = ie_result['entries'][playliststart:playlistend]
635 n_entries = len(entries)
637 "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
638 (ie_result['extractor'], playlist, n_all_entries, n_entries))
640 assert isinstance(ie_result['entries'], PagedList)
641 entries = ie_result['entries'].getslice(
642 playliststart, playlistend)
643 n_entries = len(entries)
645 "[%s] playlist %s: Downloading %d videos" %
646 (ie_result['extractor'], playlist, n_entries))
648 for i, entry in enumerate(entries, 1):
649 self.to_screen('[download] Downloading video #%s of %s' % (i, n_entries))
651 'n_entries': n_entries,
652 'playlist': playlist,
653 'playlist_index': i + playliststart,
654 'extractor': ie_result['extractor'],
655 'webpage_url': ie_result['webpage_url'],
656 'webpage_url_basename': url_basename(ie_result['webpage_url']),
657 'extractor_key': ie_result['extractor_key'],
660 reason = self._match_entry(entry)
661 if reason is not None:
662 self.to_screen('[download] ' + reason)
665 entry_result = self.process_ie_result(entry,
668 playlist_results.append(entry_result)
669 ie_result['entries'] = playlist_results
671 elif result_type == 'compat_list':
673 self.add_extra_info(r,
675 'extractor': ie_result['extractor'],
676 'webpage_url': ie_result['webpage_url'],
677 'webpage_url_basename': url_basename(ie_result['webpage_url']),
678 'extractor_key': ie_result['extractor_key'],
681 ie_result['entries'] = [
682 self.process_ie_result(_fixup(r), download, extra_info)
683 for r in ie_result['entries']
687 raise Exception('Invalid result type: %s' % result_type)
689 def select_format(self, format_spec, available_formats):
690 if format_spec == 'best' or format_spec is None:
691 return available_formats[-1]
692 elif format_spec == 'worst':
693 return available_formats[0]
694 elif format_spec == 'bestaudio':
696 f for f in available_formats
697 if f.get('vcodec') == 'none']
699 return audio_formats[-1]
700 elif format_spec == 'worstaudio':
702 f for f in available_formats
703 if f.get('vcodec') == 'none']
705 return audio_formats[0]
706 elif format_spec == 'bestvideo':
708 f for f in available_formats
709 if f.get('acodec') == 'none']
711 return video_formats[-1]
712 elif format_spec == 'worstvideo':
714 f for f in available_formats
715 if f.get('acodec') == 'none']
717 return video_formats[0]
719 extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a']
720 if format_spec in extensions:
721 filter_f = lambda f: f['ext'] == format_spec
723 filter_f = lambda f: f['format_id'] == format_spec
724 matches = list(filter(filter_f, available_formats))
729 def process_video_result(self, info_dict, download=True):
730 assert info_dict.get('_type', 'video') == 'video'
732 if 'id' not in info_dict:
733 raise ExtractorError('Missing "id" field in extractor result')
734 if 'title' not in info_dict:
735 raise ExtractorError('Missing "title" field in extractor result')
737 if 'playlist' not in info_dict:
738 # It isn't part of a playlist
739 info_dict['playlist'] = None
740 info_dict['playlist_index'] = None
742 thumbnails = info_dict.get('thumbnails')
744 thumbnails.sort(key=lambda t: (
745 t.get('width'), t.get('height'), t.get('url')))
747 if 'width' in t and 'height' in t:
748 t['resolution'] = '%dx%d' % (t['width'], t['height'])
750 if thumbnails and 'thumbnail' not in info_dict:
751 info_dict['thumbnail'] = thumbnails[-1]['url']
753 if 'display_id' not in info_dict and 'id' in info_dict:
754 info_dict['display_id'] = info_dict['id']
756 if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
757 upload_date = datetime.datetime.utcfromtimestamp(
758 info_dict['timestamp'])
759 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
761 # This extractors handle format selection themselves
762 if info_dict['extractor'] in ['Youku']:
764 self.process_info(info_dict)
767 # We now pick which formats have to be downloaded
768 if info_dict.get('formats') is None:
769 # There's only one format available
770 formats = [info_dict]
772 formats = info_dict['formats']
775 raise ExtractorError('No video formats found!')
777 # We check that all the formats have the format and format_id fields
778 for i, format in enumerate(formats):
779 if 'url' not in format:
780 raise ExtractorError('Missing "url" key in result (index %d)' % i)
782 if format.get('format_id') is None:
783 format['format_id'] = compat_str(i)
784 if format.get('format') is None:
785 format['format'] = '{id} - {res}{note}'.format(
786 id=format['format_id'],
787 res=self.format_resolution(format),
788 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
790 # Automatically determine file extension if missing
791 if 'ext' not in format:
792 format['ext'] = determine_ext(format['url']).lower()
794 format_limit = self.params.get('format_limit', None)
796 formats = list(takewhile_inclusive(
797 lambda f: f['format_id'] != format_limit, formats
800 # TODO Central sorting goes here
802 if formats[0] is not info_dict:
803 # only set the 'formats' fields if the original info_dict list them
804 # otherwise we end up with a circular reference, the first (and unique)
805 # element in the 'formats' field in info_dict is info_dict itself,
806 # wich can't be exported to json
807 info_dict['formats'] = formats
808 if self.params.get('listformats', None):
809 self.list_formats(info_dict)
812 req_format = self.params.get('format')
813 if req_format is None:
815 formats_to_download = []
816 # The -1 is for supporting YoutubeIE
817 if req_format in ('-1', 'all'):
818 formats_to_download = formats
820 for rfstr in req_format.split(','):
821 # We can accept formats requested in the format: 34/5/best, we pick
822 # the first that is available, starting from left
823 req_formats = rfstr.split('/')
824 for rf in req_formats:
825 if re.match(r'.+?\+.+?', rf) is not None:
826 # Two formats have been requested like '137+139'
827 format_1, format_2 = rf.split('+')
828 formats_info = (self.select_format(format_1, formats),
829 self.select_format(format_2, formats))
830 if all(formats_info):
832 'requested_formats': formats_info,
834 'ext': formats_info[0]['ext'],
837 selected_format = None
839 selected_format = self.select_format(rf, formats)
840 if selected_format is not None:
841 formats_to_download.append(selected_format)
843 if not formats_to_download:
844 raise ExtractorError('requested format not available',
848 if len(formats_to_download) > 1:
849 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
850 for format in formats_to_download:
851 new_info = dict(info_dict)
852 new_info.update(format)
853 self.process_info(new_info)
854 # We update the info dict with the best quality format (backwards compatibility)
855 info_dict.update(formats_to_download[-1])
858 def process_info(self, info_dict):
859 """Process a single resolved IE result."""
861 assert info_dict.get('_type', 'video') == 'video'
863 max_downloads = self.params.get('max_downloads')
864 if max_downloads is not None:
865 if self._num_downloads >= int(max_downloads):
866 raise MaxDownloadsReached()
868 info_dict['fulltitle'] = info_dict['title']
869 if len(info_dict['title']) > 200:
870 info_dict['title'] = info_dict['title'][:197] + '...'
872 # Keep for backwards compatibility
873 info_dict['stitle'] = info_dict['title']
875 if 'format' not in info_dict:
876 info_dict['format'] = info_dict['ext']
878 reason = self._match_entry(info_dict)
879 if reason is not None:
880 self.to_screen('[download] ' + reason)
883 self._num_downloads += 1
885 filename = self.prepare_filename(info_dict)
888 if self.params.get('forcetitle', False):
889 self.to_stdout(info_dict['fulltitle'])
890 if self.params.get('forceid', False):
891 self.to_stdout(info_dict['id'])
892 if self.params.get('forceurl', False):
893 # For RTMP URLs, also include the playpath
894 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
895 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
896 self.to_stdout(info_dict['thumbnail'])
897 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
898 self.to_stdout(info_dict['description'])
899 if self.params.get('forcefilename', False) and filename is not None:
900 self.to_stdout(filename)
901 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
902 self.to_stdout(formatSeconds(info_dict['duration']))
903 if self.params.get('forceformat', False):
904 self.to_stdout(info_dict['format'])
905 if self.params.get('forcejson', False):
906 info_dict['_filename'] = filename
907 self.to_stdout(json.dumps(info_dict))
908 if self.params.get('dump_single_json', False):
909 info_dict['_filename'] = filename
911 # Do nothing else if in simulate mode
912 if self.params.get('simulate', False):
919 dn = os.path.dirname(encodeFilename(filename))
920 if dn and not os.path.exists(dn):
922 except (OSError, IOError) as err:
923 self.report_error('unable to create directory ' + compat_str(err))
926 if self.params.get('writedescription', False):
927 descfn = filename + '.description'
928 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
929 self.to_screen('[info] Video description is already present')
932 self.to_screen('[info] Writing video description to: ' + descfn)
933 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
934 descfile.write(info_dict['description'])
935 except (KeyError, TypeError):
936 self.report_warning('There\'s no description to write.')
937 except (OSError, IOError):
938 self.report_error('Cannot write description file ' + descfn)
941 if self.params.get('writeannotations', False):
942 annofn = filename + '.annotations.xml'
943 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
944 self.to_screen('[info] Video annotations are already present')
947 self.to_screen('[info] Writing video annotations to: ' + annofn)
948 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
949 annofile.write(info_dict['annotations'])
950 except (KeyError, TypeError):
951 self.report_warning('There are no annotations to write.')
952 except (OSError, IOError):
953 self.report_error('Cannot write annotations file: ' + annofn)
956 subtitles_are_requested = any([self.params.get('writesubtitles', False),
957 self.params.get('writeautomaticsub')])
959 if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
960 # subtitles download errors are already managed as troubles in relevant IE
961 # that way it will silently go on when used with unsupporting IE
962 subtitles = info_dict['subtitles']
963 sub_format = self.params.get('subtitlesformat', 'srt')
964 for sub_lang in subtitles.keys():
965 sub = subtitles[sub_lang]
969 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
970 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
971 self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
973 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
974 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
976 except (OSError, IOError):
977 self.report_error('Cannot write subtitles file ' + sub_filename)
980 if self.params.get('writeinfojson', False):
981 infofn = os.path.splitext(filename)[0] + '.info.json'
982 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
983 self.to_screen('[info] Video description metadata is already present')
985 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
987 write_json_file(info_dict, encodeFilename(infofn))
988 except (OSError, IOError):
989 self.report_error('Cannot write metadata to JSON file ' + infofn)
992 if self.params.get('writethumbnail', False):
993 if info_dict.get('thumbnail') is not None:
994 thumb_format = determine_ext(info_dict['thumbnail'], 'jpg')
995 thumb_filename = os.path.splitext(filename)[0] + '.' + thumb_format
996 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
997 self.to_screen('[%s] %s: Thumbnail is already present' %
998 (info_dict['extractor'], info_dict['id']))
1000 self.to_screen('[%s] %s: Downloading thumbnail ...' %
1001 (info_dict['extractor'], info_dict['id']))
1003 uf = self.urlopen(info_dict['thumbnail'])
1004 with open(thumb_filename, 'wb') as thumbf:
1005 shutil.copyfileobj(uf, thumbf)
1006 self.to_screen('[%s] %s: Writing thumbnail to: %s' %
1007 (info_dict['extractor'], info_dict['id'], thumb_filename))
1008 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1009 self.report_warning('Unable to download thumbnail "%s": %s' %
1010 (info_dict['thumbnail'], compat_str(err)))
1012 if not self.params.get('skip_download', False):
1013 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
1018 fd = get_suitable_downloader(info)(self, self.params)
1019 for ph in self._progress_hooks:
1020 fd.add_progress_hook(ph)
1021 if self.params.get('verbose'):
1022 self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1023 return fd.download(name, info)
1024 if info_dict.get('requested_formats') is not None:
1027 merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
1028 if not merger._get_executable():
1030 self.report_warning('You have requested multiple '
1031 'formats but ffmpeg or avconv are not installed.'
1032 ' The formats won\'t be merged')
1034 postprocessors = [merger]
1035 for f in info_dict['requested_formats']:
1036 new_info = dict(info_dict)
1038 fname = self.prepare_filename(new_info)
1039 fname = prepend_extension(fname, 'f%s' % f['format_id'])
1040 downloaded.append(fname)
1041 partial_success = dl(fname, new_info)
1042 success = success and partial_success
1043 info_dict['__postprocessors'] = postprocessors
1044 info_dict['__files_to_merge'] = downloaded
1046 # Just a single file
1047 success = dl(filename, info_dict)
1048 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1049 self.report_error('unable to download video data: %s' % str(err))
1051 except (OSError, IOError) as err:
1052 raise UnavailableVideoError(err)
1053 except (ContentTooShortError, ) as err:
1054 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1059 self.post_process(filename, info_dict)
1060 except (PostProcessingError) as err:
1061 self.report_error('postprocessing: %s' % str(err))
1064 self.record_download_archive(info_dict)
1066 def download(self, url_list):
1067 """Download a given list of URLs."""
1068 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1069 if (len(url_list) > 1 and
1071 and self.params.get('max_downloads') != 1):
1072 raise SameFileError(outtmpl)
1074 for url in url_list:
1076 #It also downloads the videos
1077 res = self.extract_info(url)
1078 except UnavailableVideoError:
1079 self.report_error('unable to download video')
1080 except MaxDownloadsReached:
1081 self.to_screen('[info] Maximum number of downloaded files reached.')
1084 if self.params.get('dump_single_json', False):
1085 self.to_stdout(json.dumps(res))
1087 return self._download_retcode
1089 def download_with_info_file(self, info_filename):
1090 with io.open(info_filename, 'r', encoding='utf-8') as f:
1093 self.process_ie_result(info, download=True)
1094 except DownloadError:
1095 webpage_url = info.get('webpage_url')
1096 if webpage_url is not None:
1097 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1098 return self.download([webpage_url])
1101 return self._download_retcode
1103 def post_process(self, filename, ie_info):
1104 """Run all the postprocessors on the given file."""
1105 info = dict(ie_info)
1106 info['filepath'] = filename
1109 if ie_info.get('__postprocessors') is not None:
1110 pps_chain.extend(ie_info['__postprocessors'])
1111 pps_chain.extend(self._pps)
1112 for pp in pps_chain:
1114 keep_video_wish, new_info = pp.run(info)
1115 if keep_video_wish is not None:
1117 keep_video = keep_video_wish
1118 elif keep_video is None:
1119 # No clear decision yet, let IE decide
1120 keep_video = keep_video_wish
1121 except PostProcessingError as e:
1122 self.report_error(e.msg)
1123 if keep_video is False and not self.params.get('keepvideo', False):
1125 self.to_screen('Deleting original file %s (pass -k to keep)' % filename)
1126 os.remove(encodeFilename(filename))
1127 except (IOError, OSError):
1128 self.report_warning('Unable to remove downloaded video file')
1130 def _make_archive_id(self, info_dict):
1131 # Future-proof against any change in case
1132 # and backwards compatibility with prior versions
1133 extractor = info_dict.get('extractor_key')
1134 if extractor is None:
1135 if 'id' in info_dict:
1136 extractor = info_dict.get('ie_key') # key in a playlist
1137 if extractor is None:
1138 return None # Incomplete video information
1139 return extractor.lower() + ' ' + info_dict['id']
1141 def in_download_archive(self, info_dict):
1142 fn = self.params.get('download_archive')
1146 vid_id = self._make_archive_id(info_dict)
1148 return False # Incomplete video information
1151 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1152 for line in archive_file:
1153 if line.strip() == vid_id:
1155 except IOError as ioe:
1156 if ioe.errno != errno.ENOENT:
1160 def record_download_archive(self, info_dict):
1161 fn = self.params.get('download_archive')
1164 vid_id = self._make_archive_id(info_dict)
1166 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1167 archive_file.write(vid_id + '\n')
1170 def format_resolution(format, default='unknown'):
1171 if format.get('vcodec') == 'none':
1173 if format.get('resolution') is not None:
1174 return format['resolution']
1175 if format.get('height') is not None:
1176 if format.get('width') is not None:
1177 res = '%sx%s' % (format['width'], format['height'])
1179 res = '%sp' % format['height']
1180 elif format.get('width') is not None:
1181 res = '?x%d' % format['width']
1186 def _format_note(self, fdict):
1188 if fdict.get('ext') in ['f4f', 'f4m']:
1189 res += '(unsupported) '
1190 if fdict.get('format_note') is not None:
1191 res += fdict['format_note'] + ' '
1192 if fdict.get('tbr') is not None:
1193 res += '%4dk ' % fdict['tbr']
1194 if fdict.get('container') is not None:
1197 res += '%s container' % fdict['container']
1198 if (fdict.get('vcodec') is not None and
1199 fdict.get('vcodec') != 'none'):
1202 res += fdict['vcodec']
1203 if fdict.get('vbr') is not None:
1205 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1207 if fdict.get('vbr') is not None:
1208 res += '%4dk' % fdict['vbr']
1209 if fdict.get('acodec') is not None:
1212 if fdict['acodec'] == 'none':
1215 res += '%-5s' % fdict['acodec']
1216 elif fdict.get('abr') is not None:
1220 if fdict.get('abr') is not None:
1221 res += '@%3dk' % fdict['abr']
1222 if fdict.get('asr') is not None:
1223 res += ' (%5dHz)' % fdict['asr']
1224 if fdict.get('filesize') is not None:
1227 res += format_bytes(fdict['filesize'])
1228 elif fdict.get('filesize_approx') is not None:
1231 res += '~' + format_bytes(fdict['filesize_approx'])
1234 def list_formats(self, info_dict):
1235 def line(format, idlen=20):
1236 return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
1237 format['format_id'],
1239 self.format_resolution(format),
1240 self._format_note(format),
1243 formats = info_dict.get('formats', [info_dict])
1244 idlen = max(len('format code'),
1245 max(len(f['format_id']) for f in formats))
1246 formats_s = [line(f, idlen) for f in formats]
1247 if len(formats) > 1:
1248 formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)'
1249 formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
1251 header_line = line({
1252 'format_id': 'format code', 'ext': 'extension',
1253 'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
1254 self.to_screen('[info] Available formats for %s:\n%s\n%s' %
1255 (info_dict['id'], header_line, '\n'.join(formats_s)))
1257 def urlopen(self, req):
1258 """ Start an HTTP download """
1260 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1261 # always respected by websites, some tend to give out URLs with non percent-encoded
1262 # non-ASCII characters (see telemb.py, ard.py [#3412])
1263 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1264 # To work around aforementioned issue we will replace request's original URL with
1265 # percent-encoded one
1266 req_is_string = isinstance(req, basestring if sys.version_info < (3, 0) else compat_str)
1267 url = req if req_is_string else req.get_full_url()
1268 url_escaped = escape_url(url)
1270 # Substitute URL if any change after escaping
1271 if url != url_escaped:
1275 req = compat_urllib_request.Request(
1276 url_escaped, data=req.data, headers=req.headers,
1277 origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
1279 return self._opener.open(req, timeout=self._socket_timeout)
1281 def print_debug_header(self):
1282 if not self.params.get('verbose'):
1285 if type('') is not compat_str:
1286 # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1287 self.report_warning(
1288 'Your Python is broken! Update to a newer and supported version')
1291 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1292 locale.getpreferredencoding(),
1293 sys.getfilesystemencoding(),
1294 sys.stdout.encoding,
1295 self.get_encoding()))
1296 write_string(encoding_str, encoding=None)
1298 self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1300 sp = subprocess.Popen(
1301 ['git', 'rev-parse', '--short', 'HEAD'],
1302 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1303 cwd=os.path.dirname(os.path.abspath(__file__)))
1304 out, err = sp.communicate()
1305 out = out.decode().strip()
1306 if re.match('[0-9a-f]+', out):
1307 self._write_string('[debug] Git HEAD: ' + out + '\n')
1313 self._write_string('[debug] Python version %s - %s' %
1314 (platform.python_version(), platform_name()) + '\n')
1317 for handler in self._opener.handlers:
1318 if hasattr(handler, 'proxies'):
1319 proxy_map.update(handler.proxies)
1320 self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1322 def _setup_opener(self):
1323 timeout_val = self.params.get('socket_timeout')
1324 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1326 opts_cookiefile = self.params.get('cookiefile')
1327 opts_proxy = self.params.get('proxy')
1329 if opts_cookiefile is None:
1330 self.cookiejar = compat_cookiejar.CookieJar()
1332 self.cookiejar = compat_cookiejar.MozillaCookieJar(
1334 if os.access(opts_cookiefile, os.R_OK):
1335 self.cookiejar.load()
1337 cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1339 if opts_proxy is not None:
1340 if opts_proxy == '':
1343 proxies = {'http': opts_proxy, 'https': opts_proxy}
1345 proxies = compat_urllib_request.getproxies()
1346 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1347 if 'http' in proxies and 'https' not in proxies:
1348 proxies['https'] = proxies['http']
1349 proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1351 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1352 https_handler = make_HTTPS_handler(
1353 self.params.get('nocheckcertificate', False), debuglevel=debuglevel)
1354 ydlh = YoutubeDLHandler(debuglevel=debuglevel)
1355 opener = compat_urllib_request.build_opener(
1356 https_handler, proxy_handler, cookie_processor, ydlh)
1357 # Delete the default user-agent header, which would otherwise apply in
1358 # cases where our custom HTTP handler doesn't come into play
1359 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1360 opener.addheaders = []
1361 self._opener = opener
1363 def encode(self, s):
1364 if isinstance(s, bytes):
1365 return s # Already encoded
1368 return s.encode(self.get_encoding())
1369 except UnicodeEncodeError as err:
1370 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1373 def get_encoding(self):
1374 encoding = self.params.get('encoding')
1375 if encoding is None:
1376 encoding = preferredencoding()