2 # -*- coding: utf-8 -*-
4 from __future__ import absolute_import, unicode_literals
28 compat_urllib_request,
50 UnavailableVideoError,
57 from .extractor import get_info_extractor, gen_extractors
58 from .downloader import get_suitable_downloader
59 from .postprocessor import FFmpegMergerPP
60 from .version import __version__
63 class YoutubeDL(object):
66 YoutubeDL objects are the ones responsible of downloading the
67 actual video file and writing it to disk if the user has requested
68 it, among some other tasks. In most cases there should be one per
69 program. As, given a video URL, the downloader doesn't know how to
70 extract all the needed information, task that InfoExtractors do, it
71 has to pass the URL to one of them.
73 For this, YoutubeDL objects have a method that allows
74 InfoExtractors to be registered in a given order. When it is passed
75 a URL, the YoutubeDL object handles it to the first InfoExtractor it
76 finds that reports being able to handle it. The InfoExtractor extracts
77 all the information about the video or videos the URL refers to, and
78 YoutubeDL process the extracted information, possibly using a File
79 Downloader to download the video.
81 YoutubeDL objects accept a lot of parameters. In order not to saturate
82 the object constructor with arguments, it receives a dictionary of
83 options instead. These options are available through the params
84 attribute for the InfoExtractors to use. The YoutubeDL also
85 registers itself as the downloader in charge for the InfoExtractors
86 that are added to it, so this is a "mutual registration".
90 username: Username for authentication purposes.
91 password: Password for authentication purposes.
92 videopassword: Password for acces a video.
93 usenetrc: Use netrc for authentication instead.
94 verbose: Print additional info to stdout.
95 quiet: Do not print messages to stdout.
96 forceurl: Force printing final URL.
97 forcetitle: Force printing title.
98 forceid: Force printing ID.
99 forcethumbnail: Force printing thumbnail URL.
100 forcedescription: Force printing description.
101 forcefilename: Force printing final filename.
102 forceduration: Force printing duration.
103 forcejson: Force printing info_dict as JSON.
104 simulate: Do not download the video files.
105 format: Video format code.
106 format_limit: Highest quality format to try.
107 outtmpl: Template for output names.
108 restrictfilenames: Do not allow "&" and spaces in file names
109 ignoreerrors: Do not stop on download errors.
110 nooverwrites: Prevent overwriting files.
111 playliststart: Playlist item to start at.
112 playlistend: Playlist item to end at.
113 matchtitle: Download only matching titles.
114 rejecttitle: Reject downloads for matching titles.
115 logger: Log messages to a logging.Logger instance.
116 logtostderr: Log messages to stderr instead of stdout.
117 writedescription: Write the video description to a .description file
118 writeinfojson: Write the video description to a .info.json file
119 writeannotations: Write the video annotations to a .annotations.xml file
120 writethumbnail: Write the thumbnail image to a file
121 writesubtitles: Write the video subtitles to a file
122 writeautomaticsub: Write the automatic subtitles to a file
123 allsubtitles: Downloads all the subtitles of the video
124 (requires writesubtitles or writeautomaticsub)
125 listsubtitles: Lists all available subtitles for the video
126 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
127 subtitleslangs: List of languages of the subtitles to download
128 keepvideo: Keep the video file after post-processing
129 daterange: A DateRange object, download only if the upload_date is in the range.
130 skip_download: Skip the actual download of the video file
131 cachedir: Location of the cache files in the filesystem.
132 None to disable filesystem cache.
133 noplaylist: Download single video instead of a playlist if in doubt.
134 age_limit: An integer representing the user's age in years.
135 Unsuitable videos for the given age are skipped.
136 min_views: An integer representing the minimum view count the video
137 must have in order to not be skipped.
138 Videos without view count information are always
139 downloaded. None for no limit.
140 max_views: An integer representing the maximum view count.
141 Videos that are more popular than that are not
143 Videos without view count information are always
144 downloaded. None for no limit.
145 download_archive: File name of a file where all downloads are recorded.
146 Videos already present in the file are not downloaded
148 cookiefile: File name where cookies should be read from and dumped to.
149 nocheckcertificate:Do not verify SSL certificates
150 proxy: URL of the proxy server to use
151 socket_timeout: Time to wait for unresponsive hosts, in seconds
152 bidi_workaround: Work around buggy terminals without bidirectional text
153 support, using fridibi
154 debug_printtraffic:Print out sent and received HTTP traffic
156 The following parameters are not used by YoutubeDL itself, they are used by
158 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
159 noresizebuffer, retries, continuedl, noprogress, consoletitle
161 The following options are used by the post processors:
162 prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
163 otherwise prefer avconv.
169 _download_retcode = None
170 _num_downloads = None
173 def __init__(self, params=None):
174 """Create a FileDownloader object with the given options."""
178 self._ies_instances = {}
180 self._progress_hooks = []
181 self._download_retcode = 0
182 self._num_downloads = 0
183 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
184 self._err_file = sys.stderr
187 if params.get('bidi_workaround', False):
190 master, slave = pty.openpty()
191 width = get_term_width()
195 width_args = ['-w', str(width)]
197 stdin=subprocess.PIPE,
199 stderr=self._err_file)
201 self._output_process = subprocess.Popen(
202 ['bidiv'] + width_args, **sp_kwargs
205 self._output_process = subprocess.Popen(
206 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
207 self._output_channel = os.fdopen(master, 'rb')
208 except OSError as ose:
210 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
214 if (sys.version_info >= (3,) and sys.platform != 'win32' and
215 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
216 and not params['restrictfilenames']):
217 # On Python 3, the Unicode filesystem API will throw errors (#1474)
219 'Assuming --restrict-filenames since file system encoding '
220 'cannot encode all charactes. '
221 'Set the LC_ALL environment variable to fix this.')
222 self.params['restrictfilenames'] = True
224 if '%(stitle)s' in self.params.get('outtmpl', ''):
225 self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
229 def add_info_extractor(self, ie):
230 """Add an InfoExtractor object to the end of the list."""
232 self._ies_instances[ie.ie_key()] = ie
233 ie.set_downloader(self)
235 def get_info_extractor(self, ie_key):
237 Get an instance of an IE with name ie_key, it will try to get one from
238 the _ies list, if there's no instance it will create a new one and add
239 it to the extractor list.
241 ie = self._ies_instances.get(ie_key)
243 ie = get_info_extractor(ie_key)()
244 self.add_info_extractor(ie)
247 def add_default_info_extractors(self):
249 Add the InfoExtractors returned by gen_extractors to the end of the list
251 for ie in gen_extractors():
252 self.add_info_extractor(ie)
254 def add_post_processor(self, pp):
255 """Add a PostProcessor object to the end of the chain."""
257 pp.set_downloader(self)
259 def add_progress_hook(self, ph):
260 """Add the progress hook (currently only for the file downloader)"""
261 self._progress_hooks.append(ph)
263 def _bidi_workaround(self, message):
264 if not hasattr(self, '_output_channel'):
267 assert hasattr(self, '_output_process')
268 assert type(message) == type('')
269 line_count = message.count('\n') + 1
270 self._output_process.stdin.write((message + '\n').encode('utf-8'))
271 self._output_process.stdin.flush()
272 res = ''.join(self._output_channel.readline().decode('utf-8')
273 for _ in range(line_count))
274 return res[:-len('\n')]
276 def to_screen(self, message, skip_eol=False):
277 """Print message to stdout if not in quiet mode."""
278 return self.to_stdout(message, skip_eol, check_quiet=True)
280 def to_stdout(self, message, skip_eol=False, check_quiet=False):
281 """Print message to stdout if not in quiet mode."""
282 if self.params.get('logger'):
283 self.params['logger'].debug(message)
284 elif not check_quiet or not self.params.get('quiet', False):
285 message = self._bidi_workaround(message)
286 terminator = ['\n', ''][skip_eol]
287 output = message + terminator
289 write_string(output, self._screen_file)
291 def to_stderr(self, message):
292 """Print message to stderr."""
293 assert type(message) == type('')
294 if self.params.get('logger'):
295 self.params['logger'].error(message)
297 message = self._bidi_workaround(message)
298 output = message + '\n'
299 write_string(output, self._err_file)
301 def to_console_title(self, message):
302 if not self.params.get('consoletitle', False):
304 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
305 # c_wchar_p() might not be necessary if `message` is
306 # already of type unicode()
307 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
308 elif 'TERM' in os.environ:
309 write_string('\033]0;%s\007' % message, self._screen_file)
311 def save_console_title(self):
312 if not self.params.get('consoletitle', False):
314 if 'TERM' in os.environ:
315 # Save the title on stack
316 write_string('\033[22;0t', self._screen_file)
318 def restore_console_title(self):
319 if not self.params.get('consoletitle', False):
321 if 'TERM' in os.environ:
322 # Restore the title from stack
323 write_string('\033[23;0t', self._screen_file)
326 self.save_console_title()
329 def __exit__(self, *args):
330 self.restore_console_title()
332 if self.params.get('cookiefile') is not None:
333 self.cookiejar.save()
335 def trouble(self, message=None, tb=None):
336 """Determine action to take when a download problem appears.
338 Depending on if the downloader has been configured to ignore
339 download errors or not, this method may throw an exception or
340 not when errors are found, after printing the message.
342 tb, if given, is additional traceback information.
344 if message is not None:
345 self.to_stderr(message)
346 if self.params.get('verbose'):
348 if sys.exc_info()[0]: # if .trouble has been called from an except block
350 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
351 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
352 tb += compat_str(traceback.format_exc())
354 tb_data = traceback.format_list(traceback.extract_stack())
355 tb = ''.join(tb_data)
357 if not self.params.get('ignoreerrors', False):
358 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
359 exc_info = sys.exc_info()[1].exc_info
361 exc_info = sys.exc_info()
362 raise DownloadError(message, exc_info)
363 self._download_retcode = 1
365 def report_warning(self, message):
367 Print the message to stderr, it will be prefixed with 'WARNING:'
368 If stderr is a tty file the 'WARNING:' will be colored
370 if self._err_file.isatty() and os.name != 'nt':
371 _msg_header = '\033[0;33mWARNING:\033[0m'
373 _msg_header = 'WARNING:'
374 warning_message = '%s %s' % (_msg_header, message)
375 self.to_stderr(warning_message)
377 def report_error(self, message, tb=None):
379 Do the same as trouble, but prefixes the message with 'ERROR:', colored
380 in red if stderr is a tty file.
382 if self._err_file.isatty() and os.name != 'nt':
383 _msg_header = '\033[0;31mERROR:\033[0m'
385 _msg_header = 'ERROR:'
386 error_message = '%s %s' % (_msg_header, message)
387 self.trouble(error_message, tb)
389 def report_file_already_downloaded(self, file_name):
390 """Report file has already been fully downloaded."""
392 self.to_screen('[download] %s has already been downloaded' % file_name)
393 except UnicodeEncodeError:
394 self.to_screen('[download] The file has already been downloaded')
396 def increment_downloads(self):
397 """Increment the ordinal that assigns a number to each file."""
398 self._num_downloads += 1
400 def prepare_filename(self, info_dict):
401 """Generate the output filename."""
403 template_dict = dict(info_dict)
405 template_dict['epoch'] = int(time.time())
406 autonumber_size = self.params.get('autonumber_size')
407 if autonumber_size is None:
409 autonumber_templ = '%0' + str(autonumber_size) + 'd'
410 template_dict['autonumber'] = autonumber_templ % self._num_downloads
411 if template_dict.get('playlist_index') is not None:
412 template_dict['playlist_index'] = '%05d' % template_dict['playlist_index']
414 sanitize = lambda k, v: sanitize_filename(
416 restricted=self.params.get('restrictfilenames'),
418 template_dict = dict((k, sanitize(k, v))
419 for k, v in template_dict.items()
421 template_dict = collections.defaultdict(lambda: 'NA', template_dict)
423 tmpl = os.path.expanduser(self.params['outtmpl'])
424 filename = tmpl % template_dict
426 except ValueError as err:
427 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
430 def _match_entry(self, info_dict):
431 """ Returns None iff the file should be downloaded """
433 video_title = info_dict.get('title', info_dict.get('id', 'video'))
434 if 'title' in info_dict:
435 # This can happen when we're just evaluating the playlist
436 title = info_dict['title']
437 matchtitle = self.params.get('matchtitle', False)
439 if not re.search(matchtitle, title, re.IGNORECASE):
440 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
441 rejecttitle = self.params.get('rejecttitle', False)
443 if re.search(rejecttitle, title, re.IGNORECASE):
444 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
445 date = info_dict.get('upload_date', None)
447 dateRange = self.params.get('daterange', DateRange())
448 if date not in dateRange:
449 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
450 view_count = info_dict.get('view_count', None)
451 if view_count is not None:
452 min_views = self.params.get('min_views')
453 if min_views is not None and view_count < min_views:
454 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
455 max_views = self.params.get('max_views')
456 if max_views is not None and view_count > max_views:
457 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
458 age_limit = self.params.get('age_limit')
459 if age_limit is not None:
460 if age_limit < info_dict.get('age_limit', 0):
461 return 'Skipping "' + title + '" because it is age restricted'
462 if self.in_download_archive(info_dict):
463 return '%s has already been recorded in archive' % video_title
467 def add_extra_info(info_dict, extra_info):
468 '''Set the keys from extra_info in info dict if they are missing'''
469 for key, value in extra_info.items():
470 info_dict.setdefault(key, value)
472 def extract_info(self, url, download=True, ie_key=None, extra_info={},
475 Returns a list with a dictionary for each video we find.
476 If 'download', also downloads the videos.
477 extra_info is a dict containing the extra values to add to each result
481 ies = [self.get_info_extractor(ie_key)]
486 if not ie.suitable(url):
490 self.report_warning('The program functionality for this site has been marked as broken, '
491 'and will probably not work.')
494 ie_result = ie.extract(url)
495 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
497 if isinstance(ie_result, list):
498 # Backwards compatibility: old IE result format
500 '_type': 'compat_list',
501 'entries': ie_result,
503 self.add_extra_info(ie_result,
505 'extractor': ie.IE_NAME,
507 'webpage_url_basename': url_basename(url),
508 'extractor_key': ie.ie_key(),
511 return self.process_ie_result(ie_result, download, extra_info)
514 except ExtractorError as de: # An error we somewhat expected
515 self.report_error(compat_str(de), de.format_traceback())
517 except Exception as e:
518 if self.params.get('ignoreerrors', False):
519 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
524 self.report_error('no suitable InfoExtractor: %s' % url)
526 def process_ie_result(self, ie_result, download=True, extra_info={}):
528 Take the result of the ie(may be modified) and resolve all unresolved
529 references (URLs, playlist items).
531 It will also download the videos if 'download'.
532 Returns the resolved ie_result.
535 result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
536 if result_type == 'video':
537 self.add_extra_info(ie_result, extra_info)
538 return self.process_video_result(ie_result, download=download)
539 elif result_type == 'url':
540 # We have to add extra_info to the results because it may be
541 # contained in a playlist
542 return self.extract_info(ie_result['url'],
544 ie_key=ie_result.get('ie_key'),
545 extra_info=extra_info)
546 elif result_type == 'url_transparent':
547 # Use the information from the embedding page
548 info = self.extract_info(
549 ie_result['url'], ie_key=ie_result.get('ie_key'),
550 extra_info=extra_info, download=False, process=False)
552 def make_result(embedded_info):
553 new_result = ie_result.copy()
554 for f in ('_type', 'url', 'ext', 'player_url', 'formats',
555 'entries', 'ie_key', 'duration',
556 'subtitles', 'annotations', 'format',
557 'thumbnail', 'thumbnails'):
560 if f in embedded_info:
561 new_result[f] = embedded_info[f]
563 new_result = make_result(info)
565 assert new_result.get('_type') != 'url_transparent'
566 if new_result.get('_type') == 'compat_list':
567 new_result['entries'] = [
568 make_result(e) for e in new_result['entries']]
570 return self.process_ie_result(
571 new_result, download=download, extra_info=extra_info)
572 elif result_type == 'playlist':
573 # We process each entry in the playlist
574 playlist = ie_result.get('title', None) or ie_result.get('id', None)
575 self.to_screen('[download] Downloading playlist: %s' % playlist)
577 playlist_results = []
579 playliststart = self.params.get('playliststart', 1) - 1
580 playlistend = self.params.get('playlistend', None)
581 # For backwards compatibility, interpret -1 as whole list
582 if playlistend == -1:
585 if isinstance(ie_result['entries'], list):
586 n_all_entries = len(ie_result['entries'])
587 entries = ie_result['entries'][playliststart:playlistend]
588 n_entries = len(entries)
590 "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
591 (ie_result['extractor'], playlist, n_all_entries, n_entries))
593 assert isinstance(ie_result['entries'], PagedList)
594 entries = ie_result['entries'].getslice(
595 playliststart, playlistend)
596 n_entries = len(entries)
598 "[%s] playlist %s: Downloading %d videos" %
599 (ie_result['extractor'], playlist, n_entries))
601 for i, entry in enumerate(entries, 1):
602 self.to_screen('[download] Downloading video #%s of %s' % (i, n_entries))
604 'playlist': playlist,
605 'playlist_index': i + playliststart,
606 'extractor': ie_result['extractor'],
607 'webpage_url': ie_result['webpage_url'],
608 'webpage_url_basename': url_basename(ie_result['webpage_url']),
609 'extractor_key': ie_result['extractor_key'],
612 reason = self._match_entry(entry)
613 if reason is not None:
614 self.to_screen('[download] ' + reason)
617 entry_result = self.process_ie_result(entry,
620 playlist_results.append(entry_result)
621 ie_result['entries'] = playlist_results
623 elif result_type == 'compat_list':
625 self.add_extra_info(r,
627 'extractor': ie_result['extractor'],
628 'webpage_url': ie_result['webpage_url'],
629 'webpage_url_basename': url_basename(ie_result['webpage_url']),
630 'extractor_key': ie_result['extractor_key'],
633 ie_result['entries'] = [
634 self.process_ie_result(_fixup(r), download, extra_info)
635 for r in ie_result['entries']
639 raise Exception('Invalid result type: %s' % result_type)
641 def select_format(self, format_spec, available_formats):
642 if format_spec == 'best' or format_spec is None:
643 return available_formats[-1]
644 elif format_spec == 'worst':
645 return available_formats[0]
647 extensions = ['mp4', 'flv', 'webm', '3gp']
648 if format_spec in extensions:
649 filter_f = lambda f: f['ext'] == format_spec
651 filter_f = lambda f: f['format_id'] == format_spec
652 matches = list(filter(filter_f, available_formats))
657 def process_video_result(self, info_dict, download=True):
658 assert info_dict.get('_type', 'video') == 'video'
660 if 'playlist' not in info_dict:
661 # It isn't part of a playlist
662 info_dict['playlist'] = None
663 info_dict['playlist_index'] = None
665 # This extractors handle format selection themselves
666 if info_dict['extractor'] in ['Youku']:
668 self.process_info(info_dict)
671 # We now pick which formats have to be downloaded
672 if info_dict.get('formats') is None:
673 # There's only one format available
674 formats = [info_dict]
676 formats = info_dict['formats']
678 # We check that all the formats have the format and format_id fields
679 for (i, format) in enumerate(formats):
680 if format.get('format_id') is None:
681 format['format_id'] = compat_str(i)
682 if format.get('format') is None:
683 format['format'] = '{id} - {res}{note}'.format(
684 id=format['format_id'],
685 res=self.format_resolution(format),
686 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
688 # Automatically determine file extension if missing
689 if 'ext' not in format:
690 format['ext'] = determine_ext(format['url'])
692 format_limit = self.params.get('format_limit', None)
694 formats = list(takewhile_inclusive(
695 lambda f: f['format_id'] != format_limit, formats
698 # TODO Central sorting goes here
700 if formats[0] is not info_dict:
701 # only set the 'formats' fields if the original info_dict list them
702 # otherwise we end up with a circular reference, the first (and unique)
703 # element in the 'formats' field in info_dict is info_dict itself,
704 # wich can't be exported to json
705 info_dict['formats'] = formats
706 if self.params.get('listformats', None):
707 self.list_formats(info_dict)
710 req_format = self.params.get('format', 'best')
711 if req_format is None:
713 formats_to_download = []
714 # The -1 is for supporting YoutubeIE
715 if req_format in ('-1', 'all'):
716 formats_to_download = formats
718 # We can accept formats requested in the format: 34/5/best, we pick
719 # the first that is available, starting from left
720 req_formats = req_format.split('/')
721 for rf in req_formats:
722 if re.match(r'.+?\+.+?', rf) is not None:
723 # Two formats have been requested like '137+139'
724 format_1, format_2 = rf.split('+')
725 formats_info = (self.select_format(format_1, formats),
726 self.select_format(format_2, formats))
727 if all(formats_info):
729 'requested_formats': formats_info,
731 'ext': formats_info[0]['ext'],
734 selected_format = None
736 selected_format = self.select_format(rf, formats)
737 if selected_format is not None:
738 formats_to_download = [selected_format]
740 if not formats_to_download:
741 raise ExtractorError('requested format not available',
745 if len(formats_to_download) > 1:
746 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
747 for format in formats_to_download:
748 new_info = dict(info_dict)
749 new_info.update(format)
750 self.process_info(new_info)
751 # We update the info dict with the best quality format (backwards compatibility)
752 info_dict.update(formats_to_download[-1])
755 def process_info(self, info_dict):
756 """Process a single resolved IE result."""
758 assert info_dict.get('_type', 'video') == 'video'
759 #We increment the download the download count here to match the previous behaviour.
760 self.increment_downloads()
762 info_dict['fulltitle'] = info_dict['title']
763 if len(info_dict['title']) > 200:
764 info_dict['title'] = info_dict['title'][:197] + '...'
766 # Keep for backwards compatibility
767 info_dict['stitle'] = info_dict['title']
769 if not 'format' in info_dict:
770 info_dict['format'] = info_dict['ext']
772 reason = self._match_entry(info_dict)
773 if reason is not None:
774 self.to_screen('[download] ' + reason)
777 max_downloads = self.params.get('max_downloads')
778 if max_downloads is not None:
779 if self._num_downloads > int(max_downloads):
780 raise MaxDownloadsReached()
782 filename = self.prepare_filename(info_dict)
785 if self.params.get('forcetitle', False):
786 self.to_stdout(info_dict['fulltitle'])
787 if self.params.get('forceid', False):
788 self.to_stdout(info_dict['id'])
789 if self.params.get('forceurl', False):
790 # For RTMP URLs, also include the playpath
791 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
792 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
793 self.to_stdout(info_dict['thumbnail'])
794 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
795 self.to_stdout(info_dict['description'])
796 if self.params.get('forcefilename', False) and filename is not None:
797 self.to_stdout(filename)
798 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
799 self.to_stdout(formatSeconds(info_dict['duration']))
800 if self.params.get('forceformat', False):
801 self.to_stdout(info_dict['format'])
802 if self.params.get('forcejson', False):
803 info_dict['_filename'] = filename
804 self.to_stdout(json.dumps(info_dict))
806 # Do nothing else if in simulate mode
807 if self.params.get('simulate', False):
814 dn = os.path.dirname(encodeFilename(filename))
815 if dn != '' and not os.path.exists(dn):
817 except (OSError, IOError) as err:
818 self.report_error('unable to create directory ' + compat_str(err))
821 if self.params.get('writedescription', False):
822 descfn = filename + '.description'
823 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
824 self.to_screen('[info] Video description is already present')
827 self.to_screen('[info] Writing video description to: ' + descfn)
828 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
829 descfile.write(info_dict['description'])
830 except (KeyError, TypeError):
831 self.report_warning('There\'s no description to write.')
832 except (OSError, IOError):
833 self.report_error('Cannot write description file ' + descfn)
836 if self.params.get('writeannotations', False):
837 annofn = filename + '.annotations.xml'
838 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
839 self.to_screen('[info] Video annotations are already present')
842 self.to_screen('[info] Writing video annotations to: ' + annofn)
843 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
844 annofile.write(info_dict['annotations'])
845 except (KeyError, TypeError):
846 self.report_warning('There are no annotations to write.')
847 except (OSError, IOError):
848 self.report_error('Cannot write annotations file: ' + annofn)
851 subtitles_are_requested = any([self.params.get('writesubtitles', False),
852 self.params.get('writeautomaticsub')])
854 if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
855 # subtitles download errors are already managed as troubles in relevant IE
856 # that way it will silently go on when used with unsupporting IE
857 subtitles = info_dict['subtitles']
858 sub_format = self.params.get('subtitlesformat', 'srt')
859 for sub_lang in subtitles.keys():
860 sub = subtitles[sub_lang]
864 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
865 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
866 self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
868 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
869 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
871 except (OSError, IOError):
872 self.report_error('Cannot write subtitles file ' + descfn)
875 if self.params.get('writeinfojson', False):
876 infofn = os.path.splitext(filename)[0] + '.info.json'
877 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
878 self.to_screen('[info] Video description metadata is already present')
880 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
882 write_json_file(info_dict, encodeFilename(infofn))
883 except (OSError, IOError):
884 self.report_error('Cannot write metadata to JSON file ' + infofn)
887 if self.params.get('writethumbnail', False):
888 if info_dict.get('thumbnail') is not None:
889 thumb_format = determine_ext(info_dict['thumbnail'], 'jpg')
890 thumb_filename = os.path.splitext(filename)[0] + '.' + thumb_format
891 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
892 self.to_screen('[%s] %s: Thumbnail is already present' %
893 (info_dict['extractor'], info_dict['id']))
895 self.to_screen('[%s] %s: Downloading thumbnail ...' %
896 (info_dict['extractor'], info_dict['id']))
898 uf = compat_urllib_request.urlopen(info_dict['thumbnail'])
899 with open(thumb_filename, 'wb') as thumbf:
900 shutil.copyfileobj(uf, thumbf)
901 self.to_screen('[%s] %s: Writing thumbnail to: %s' %
902 (info_dict['extractor'], info_dict['id'], thumb_filename))
903 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
904 self.report_warning('Unable to download thumbnail "%s": %s' %
905 (info_dict['thumbnail'], compat_str(err)))
907 if not self.params.get('skip_download', False):
908 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
913 fd = get_suitable_downloader(info)(self, self.params)
914 for ph in self._progress_hooks:
915 fd.add_progress_hook(ph)
916 return fd.download(name, info)
917 if info_dict.get('requested_formats') is not None:
920 merger = FFmpegMergerPP(self)
921 if not merger._get_executable():
923 self.report_warning('You have requested multiple '
924 'formats but ffmpeg or avconv are not installed.'
925 ' The formats won\'t be merged')
927 postprocessors = [merger]
928 for f in info_dict['requested_formats']:
929 new_info = dict(info_dict)
931 fname = self.prepare_filename(new_info)
932 fname = prepend_extension(fname, 'f%s' % f['format_id'])
933 downloaded.append(fname)
934 partial_success = dl(fname, new_info)
935 success = success and partial_success
936 info_dict['__postprocessors'] = postprocessors
937 info_dict['__files_to_merge'] = downloaded
940 success = dl(filename, info_dict)
941 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
942 self.report_error('unable to download video data: %s' % str(err))
944 except (OSError, IOError) as err:
945 raise UnavailableVideoError(err)
946 except (ContentTooShortError, ) as err:
947 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
952 self.post_process(filename, info_dict)
953 except (PostProcessingError) as err:
954 self.report_error('postprocessing: %s' % str(err))
957 self.record_download_archive(info_dict)
959 def download(self, url_list):
960 """Download a given list of URLs."""
961 if (len(url_list) > 1 and
962 '%' not in self.params['outtmpl']
963 and self.params.get('max_downloads') != 1):
964 raise SameFileError(self.params['outtmpl'])
968 #It also downloads the videos
969 self.extract_info(url)
970 except UnavailableVideoError:
971 self.report_error('unable to download video')
972 except MaxDownloadsReached:
973 self.to_screen('[info] Maximum number of downloaded files reached.')
976 return self._download_retcode
978 def download_with_info_file(self, info_filename):
979 with io.open(info_filename, 'r', encoding='utf-8') as f:
982 self.process_ie_result(info, download=True)
983 except DownloadError:
984 webpage_url = info.get('webpage_url')
985 if webpage_url is not None:
986 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
987 return self.download([webpage_url])
990 return self._download_retcode
992 def post_process(self, filename, ie_info):
993 """Run all the postprocessors on the given file."""
995 info['filepath'] = filename
998 if ie_info.get('__postprocessors') is not None:
999 pps_chain.extend(ie_info['__postprocessors'])
1000 pps_chain.extend(self._pps)
1001 for pp in pps_chain:
1003 keep_video_wish, new_info = pp.run(info)
1004 if keep_video_wish is not None:
1006 keep_video = keep_video_wish
1007 elif keep_video is None:
1008 # No clear decision yet, let IE decide
1009 keep_video = keep_video_wish
1010 except PostProcessingError as e:
1011 self.report_error(e.msg)
1012 if keep_video is False and not self.params.get('keepvideo', False):
1014 self.to_screen('Deleting original file %s (pass -k to keep)' % filename)
1015 os.remove(encodeFilename(filename))
1016 except (IOError, OSError):
1017 self.report_warning('Unable to remove downloaded video file')
1019 def _make_archive_id(self, info_dict):
1020 # Future-proof against any change in case
1021 # and backwards compatibility with prior versions
1022 extractor = info_dict.get('extractor_key')
1023 if extractor is None:
1024 if 'id' in info_dict:
1025 extractor = info_dict.get('ie_key') # key in a playlist
1026 if extractor is None:
1027 return None # Incomplete video information
1028 return extractor.lower() + ' ' + info_dict['id']
1030 def in_download_archive(self, info_dict):
1031 fn = self.params.get('download_archive')
1035 vid_id = self._make_archive_id(info_dict)
1037 return False # Incomplete video information
1040 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1041 for line in archive_file:
1042 if line.strip() == vid_id:
1044 except IOError as ioe:
1045 if ioe.errno != errno.ENOENT:
1049 def record_download_archive(self, info_dict):
1050 fn = self.params.get('download_archive')
1053 vid_id = self._make_archive_id(info_dict)
1055 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1056 archive_file.write(vid_id + '\n')
1059 def format_resolution(format, default='unknown'):
1060 if format.get('vcodec') == 'none':
1062 if format.get('resolution') is not None:
1063 return format['resolution']
1064 if format.get('height') is not None:
1065 if format.get('width') is not None:
1066 res = '%sx%s' % (format['width'], format['height'])
1068 res = '%sp' % format['height']
1069 elif format.get('width') is not None:
1070 res = '?x%d' % format['width']
1075 def list_formats(self, info_dict):
1076 def format_note(fdict):
1078 if fdict.get('ext') in ['f4f', 'f4m']:
1079 res += '(unsupported) '
1080 if fdict.get('format_note') is not None:
1081 res += fdict['format_note'] + ' '
1082 if fdict.get('tbr') is not None:
1083 res += '%4dk ' % fdict['tbr']
1084 if (fdict.get('vcodec') is not None and
1085 fdict.get('vcodec') != 'none'):
1086 res += '%-5s' % fdict['vcodec']
1087 if fdict.get('vbr') is not None:
1089 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1091 if fdict.get('vbr') is not None:
1092 res += '%4dk' % fdict['vbr']
1093 if fdict.get('acodec') is not None:
1096 res += '%-5s' % fdict['acodec']
1097 elif fdict.get('abr') is not None:
1101 if fdict.get('abr') is not None:
1102 res += '@%3dk' % fdict['abr']
1103 if fdict.get('filesize') is not None:
1106 res += format_bytes(fdict['filesize'])
1109 def line(format, idlen=20):
1110 return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
1111 format['format_id'],
1113 self.format_resolution(format),
1114 format_note(format),
1117 formats = info_dict.get('formats', [info_dict])
1118 idlen = max(len('format code'),
1119 max(len(f['format_id']) for f in formats))
1120 formats_s = [line(f, idlen) for f in formats]
1121 if len(formats) > 1:
1122 formats_s[0] += (' ' if format_note(formats[0]) else '') + '(worst)'
1123 formats_s[-1] += (' ' if format_note(formats[-1]) else '') + '(best)'
1125 header_line = line({
1126 'format_id': 'format code', 'ext': 'extension',
1127 'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
1128 self.to_screen('[info] Available formats for %s:\n%s\n%s' %
1129 (info_dict['id'], header_line, '\n'.join(formats_s)))
1131 def urlopen(self, req):
1132 """ Start an HTTP download """
1133 return self._opener.open(req)
1135 def print_debug_header(self):
1136 if not self.params.get('verbose'):
1138 write_string('[debug] youtube-dl version ' + __version__ + '\n')
1140 sp = subprocess.Popen(
1141 ['git', 'rev-parse', '--short', 'HEAD'],
1142 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1143 cwd=os.path.dirname(os.path.abspath(__file__)))
1144 out, err = sp.communicate()
1145 out = out.decode().strip()
1146 if re.match('[0-9a-f]+', out):
1147 write_string('[debug] Git HEAD: ' + out + '\n')
1153 write_string('[debug] Python version %s - %s' %
1154 (platform.python_version(), platform_name()) + '\n')
1157 for handler in self._opener.handlers:
1158 if hasattr(handler, 'proxies'):
1159 proxy_map.update(handler.proxies)
1160 write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1162 def _setup_opener(self):
1163 timeout_val = self.params.get('socket_timeout')
1164 timeout = 600 if timeout_val is None else float(timeout_val)
1166 opts_cookiefile = self.params.get('cookiefile')
1167 opts_proxy = self.params.get('proxy')
1169 if opts_cookiefile is None:
1170 self.cookiejar = compat_cookiejar.CookieJar()
1172 self.cookiejar = compat_cookiejar.MozillaCookieJar(
1174 if os.access(opts_cookiefile, os.R_OK):
1175 self.cookiejar.load()
1177 cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1179 if opts_proxy is not None:
1180 if opts_proxy == '':
1183 proxies = {'http': opts_proxy, 'https': opts_proxy}
1185 proxies = compat_urllib_request.getproxies()
1186 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1187 if 'http' in proxies and 'https' not in proxies:
1188 proxies['https'] = proxies['http']
1189 proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1191 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1192 https_handler = make_HTTPS_handler(
1193 self.params.get('nocheckcertificate', False), debuglevel=debuglevel)
1194 ydlh = YoutubeDLHandler(debuglevel=debuglevel)
1195 opener = compat_urllib_request.build_opener(
1196 https_handler, proxy_handler, cookie_processor, ydlh)
1197 # Delete the default user-agent header, which would otherwise apply in
1198 # cases where our custom HTTP handler doesn't come into play
1199 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1200 opener.addheaders = []
1201 self._opener = opener
1203 # TODO remove this global modification
1204 compat_urllib_request.install_opener(opener)
1205 socket.setdefaulttimeout(timeout)