2 # -*- coding: utf-8 -*-
4 from __future__ import absolute_import, unicode_literals
28 compat_urllib_request,
49 UnavailableVideoError,
56 from .extractor import get_info_extractor, gen_extractors
57 from .downloader import get_suitable_downloader
58 from .postprocessor import FFmpegMergerPP
59 from .version import __version__
62 class YoutubeDL(object):
65 YoutubeDL objects are the ones responsible of downloading the
66 actual video file and writing it to disk if the user has requested
67 it, among some other tasks. In most cases there should be one per
68 program. As, given a video URL, the downloader doesn't know how to
69 extract all the needed information, task that InfoExtractors do, it
70 has to pass the URL to one of them.
72 For this, YoutubeDL objects have a method that allows
73 InfoExtractors to be registered in a given order. When it is passed
74 a URL, the YoutubeDL object handles it to the first InfoExtractor it
75 finds that reports being able to handle it. The InfoExtractor extracts
76 all the information about the video or videos the URL refers to, and
77 YoutubeDL process the extracted information, possibly using a File
78 Downloader to download the video.
80 YoutubeDL objects accept a lot of parameters. In order not to saturate
81 the object constructor with arguments, it receives a dictionary of
82 options instead. These options are available through the params
83 attribute for the InfoExtractors to use. The YoutubeDL also
84 registers itself as the downloader in charge for the InfoExtractors
85 that are added to it, so this is a "mutual registration".
89 username: Username for authentication purposes.
90 password: Password for authentication purposes.
91 videopassword: Password for acces a video.
92 usenetrc: Use netrc for authentication instead.
93 verbose: Print additional info to stdout.
94 quiet: Do not print messages to stdout.
95 forceurl: Force printing final URL.
96 forcetitle: Force printing title.
97 forceid: Force printing ID.
98 forcethumbnail: Force printing thumbnail URL.
99 forcedescription: Force printing description.
100 forcefilename: Force printing final filename.
101 forceduration: Force printing duration.
102 forcejson: Force printing info_dict as JSON.
103 simulate: Do not download the video files.
104 format: Video format code.
105 format_limit: Highest quality format to try.
106 outtmpl: Template for output names.
107 restrictfilenames: Do not allow "&" and spaces in file names
108 ignoreerrors: Do not stop on download errors.
109 nooverwrites: Prevent overwriting files.
110 playliststart: Playlist item to start at.
111 playlistend: Playlist item to end at.
112 matchtitle: Download only matching titles.
113 rejecttitle: Reject downloads for matching titles.
114 logger: Log messages to a logging.Logger instance.
115 logtostderr: Log messages to stderr instead of stdout.
116 writedescription: Write the video description to a .description file
117 writeinfojson: Write the video description to a .info.json file
118 writeannotations: Write the video annotations to a .annotations.xml file
119 writethumbnail: Write the thumbnail image to a file
120 writesubtitles: Write the video subtitles to a file
121 writeautomaticsub: Write the automatic subtitles to a file
122 allsubtitles: Downloads all the subtitles of the video
123 (requires writesubtitles or writeautomaticsub)
124 listsubtitles: Lists all available subtitles for the video
125 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
126 subtitleslangs: List of languages of the subtitles to download
127 keepvideo: Keep the video file after post-processing
128 daterange: A DateRange object, download only if the upload_date is in the range.
129 skip_download: Skip the actual download of the video file
130 cachedir: Location of the cache files in the filesystem.
131 None to disable filesystem cache.
132 noplaylist: Download single video instead of a playlist if in doubt.
133 age_limit: An integer representing the user's age in years.
134 Unsuitable videos for the given age are skipped.
135 min_views: An integer representing the minimum view count the video
136 must have in order to not be skipped.
137 Videos without view count information are always
138 downloaded. None for no limit.
139 max_views: An integer representing the maximum view count.
140 Videos that are more popular than that are not
142 Videos without view count information are always
143 downloaded. None for no limit.
144 download_archive: File name of a file where all downloads are recorded.
145 Videos already present in the file are not downloaded
147 cookiefile: File name where cookies should be read from and dumped to.
148 nocheckcertificate:Do not verify SSL certificates
149 proxy: URL of the proxy server to use
150 socket_timeout: Time to wait for unresponsive hosts, in seconds
151 bidi_workaround: Work around buggy terminals without bidirectional text
152 support, using fridibi
153 debug_printtraffic:Print out sent and received HTTP traffic
154 include_ads: Download ads as well
155 default_search: Prepend this string if an input url is not valid.
156 'auto' for elaborate guessing
158 The following parameters are not used by YoutubeDL itself, they are used by
160 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
161 noresizebuffer, retries, continuedl, noprogress, consoletitle
163 The following options are used by the post processors:
164 prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
165 otherwise prefer avconv.
171 _download_retcode = None
172 _num_downloads = None
175 def __init__(self, params=None):
176 """Create a FileDownloader object with the given options."""
180 self._ies_instances = {}
182 self._progress_hooks = []
183 self._download_retcode = 0
184 self._num_downloads = 0
185 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
186 self._err_file = sys.stderr
189 if params.get('bidi_workaround', False):
192 master, slave = pty.openpty()
193 width = get_term_width()
197 width_args = ['-w', str(width)]
199 stdin=subprocess.PIPE,
201 stderr=self._err_file)
203 self._output_process = subprocess.Popen(
204 ['bidiv'] + width_args, **sp_kwargs
207 self._output_process = subprocess.Popen(
208 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
209 self._output_channel = os.fdopen(master, 'rb')
210 except OSError as ose:
212 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
216 if (sys.version_info >= (3,) and sys.platform != 'win32' and
217 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
218 and not params['restrictfilenames']):
219 # On Python 3, the Unicode filesystem API will throw errors (#1474)
221 'Assuming --restrict-filenames since file system encoding '
222 'cannot encode all charactes. '
223 'Set the LC_ALL environment variable to fix this.')
224 self.params['restrictfilenames'] = True
226 if '%(stitle)s' in self.params.get('outtmpl', ''):
227 self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
231 def add_info_extractor(self, ie):
232 """Add an InfoExtractor object to the end of the list."""
234 self._ies_instances[ie.ie_key()] = ie
235 ie.set_downloader(self)
237 def get_info_extractor(self, ie_key):
239 Get an instance of an IE with name ie_key, it will try to get one from
240 the _ies list, if there's no instance it will create a new one and add
241 it to the extractor list.
243 ie = self._ies_instances.get(ie_key)
245 ie = get_info_extractor(ie_key)()
246 self.add_info_extractor(ie)
249 def add_default_info_extractors(self):
251 Add the InfoExtractors returned by gen_extractors to the end of the list
253 for ie in gen_extractors():
254 self.add_info_extractor(ie)
256 def add_post_processor(self, pp):
257 """Add a PostProcessor object to the end of the chain."""
259 pp.set_downloader(self)
261 def add_progress_hook(self, ph):
262 """Add the progress hook (currently only for the file downloader)"""
263 self._progress_hooks.append(ph)
265 def _bidi_workaround(self, message):
266 if not hasattr(self, '_output_channel'):
269 assert hasattr(self, '_output_process')
270 assert type(message) == type('')
271 line_count = message.count('\n') + 1
272 self._output_process.stdin.write((message + '\n').encode('utf-8'))
273 self._output_process.stdin.flush()
274 res = ''.join(self._output_channel.readline().decode('utf-8')
275 for _ in range(line_count))
276 return res[:-len('\n')]
278 def to_screen(self, message, skip_eol=False):
279 """Print message to stdout if not in quiet mode."""
280 return self.to_stdout(message, skip_eol, check_quiet=True)
282 def to_stdout(self, message, skip_eol=False, check_quiet=False):
283 """Print message to stdout if not in quiet mode."""
284 if self.params.get('logger'):
285 self.params['logger'].debug(message)
286 elif not check_quiet or not self.params.get('quiet', False):
287 message = self._bidi_workaround(message)
288 terminator = ['\n', ''][skip_eol]
289 output = message + terminator
291 write_string(output, self._screen_file)
293 def to_stderr(self, message):
294 """Print message to stderr."""
295 assert type(message) == type('')
296 if self.params.get('logger'):
297 self.params['logger'].error(message)
299 message = self._bidi_workaround(message)
300 output = message + '\n'
301 write_string(output, self._err_file)
303 def to_console_title(self, message):
304 if not self.params.get('consoletitle', False):
306 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
307 # c_wchar_p() might not be necessary if `message` is
308 # already of type unicode()
309 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
310 elif 'TERM' in os.environ:
311 write_string('\033]0;%s\007' % message, self._screen_file)
313 def save_console_title(self):
314 if not self.params.get('consoletitle', False):
316 if 'TERM' in os.environ:
317 # Save the title on stack
318 write_string('\033[22;0t', self._screen_file)
320 def restore_console_title(self):
321 if not self.params.get('consoletitle', False):
323 if 'TERM' in os.environ:
324 # Restore the title from stack
325 write_string('\033[23;0t', self._screen_file)
328 self.save_console_title()
331 def __exit__(self, *args):
332 self.restore_console_title()
334 if self.params.get('cookiefile') is not None:
335 self.cookiejar.save()
337 def trouble(self, message=None, tb=None):
338 """Determine action to take when a download problem appears.
340 Depending on if the downloader has been configured to ignore
341 download errors or not, this method may throw an exception or
342 not when errors are found, after printing the message.
344 tb, if given, is additional traceback information.
346 if message is not None:
347 self.to_stderr(message)
348 if self.params.get('verbose'):
350 if sys.exc_info()[0]: # if .trouble has been called from an except block
352 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
353 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
354 tb += compat_str(traceback.format_exc())
356 tb_data = traceback.format_list(traceback.extract_stack())
357 tb = ''.join(tb_data)
359 if not self.params.get('ignoreerrors', False):
360 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
361 exc_info = sys.exc_info()[1].exc_info
363 exc_info = sys.exc_info()
364 raise DownloadError(message, exc_info)
365 self._download_retcode = 1
367 def report_warning(self, message):
369 Print the message to stderr, it will be prefixed with 'WARNING:'
370 If stderr is a tty file the 'WARNING:' will be colored
372 if self._err_file.isatty() and os.name != 'nt':
373 _msg_header = '\033[0;33mWARNING:\033[0m'
375 _msg_header = 'WARNING:'
376 warning_message = '%s %s' % (_msg_header, message)
377 self.to_stderr(warning_message)
379 def report_error(self, message, tb=None):
381 Do the same as trouble, but prefixes the message with 'ERROR:', colored
382 in red if stderr is a tty file.
384 if self._err_file.isatty() and os.name != 'nt':
385 _msg_header = '\033[0;31mERROR:\033[0m'
387 _msg_header = 'ERROR:'
388 error_message = '%s %s' % (_msg_header, message)
389 self.trouble(error_message, tb)
391 def report_file_already_downloaded(self, file_name):
392 """Report file has already been fully downloaded."""
394 self.to_screen('[download] %s has already been downloaded' % file_name)
395 except UnicodeEncodeError:
396 self.to_screen('[download] The file has already been downloaded')
398 def increment_downloads(self):
399 """Increment the ordinal that assigns a number to each file."""
400 self._num_downloads += 1
402 def prepare_filename(self, info_dict):
403 """Generate the output filename."""
405 template_dict = dict(info_dict)
407 template_dict['epoch'] = int(time.time())
408 autonumber_size = self.params.get('autonumber_size')
409 if autonumber_size is None:
411 autonumber_templ = '%0' + str(autonumber_size) + 'd'
412 template_dict['autonumber'] = autonumber_templ % self._num_downloads
413 if template_dict.get('playlist_index') is not None:
414 template_dict['playlist_index'] = '%05d' % template_dict['playlist_index']
416 sanitize = lambda k, v: sanitize_filename(
418 restricted=self.params.get('restrictfilenames'),
420 template_dict = dict((k, sanitize(k, v))
421 for k, v in template_dict.items()
423 template_dict = collections.defaultdict(lambda: 'NA', template_dict)
425 tmpl = os.path.expanduser(self.params['outtmpl'])
426 filename = tmpl % template_dict
428 except ValueError as err:
429 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
432 def _match_entry(self, info_dict):
433 """ Returns None iff the file should be downloaded """
435 video_title = info_dict.get('title', info_dict.get('id', 'video'))
436 if 'title' in info_dict:
437 # This can happen when we're just evaluating the playlist
438 title = info_dict['title']
439 matchtitle = self.params.get('matchtitle', False)
441 if not re.search(matchtitle, title, re.IGNORECASE):
442 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
443 rejecttitle = self.params.get('rejecttitle', False)
445 if re.search(rejecttitle, title, re.IGNORECASE):
446 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
447 date = info_dict.get('upload_date', None)
449 dateRange = self.params.get('daterange', DateRange())
450 if date not in dateRange:
451 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
452 view_count = info_dict.get('view_count', None)
453 if view_count is not None:
454 min_views = self.params.get('min_views')
455 if min_views is not None and view_count < min_views:
456 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
457 max_views = self.params.get('max_views')
458 if max_views is not None and view_count > max_views:
459 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
460 age_limit = self.params.get('age_limit')
461 if age_limit is not None:
462 if age_limit < info_dict.get('age_limit', 0):
463 return 'Skipping "' + title + '" because it is age restricted'
464 if self.in_download_archive(info_dict):
465 return '%s has already been recorded in archive' % video_title
469 def add_extra_info(info_dict, extra_info):
470 '''Set the keys from extra_info in info dict if they are missing'''
471 for key, value in extra_info.items():
472 info_dict.setdefault(key, value)
474 def extract_info(self, url, download=True, ie_key=None, extra_info={},
477 Returns a list with a dictionary for each video we find.
478 If 'download', also downloads the videos.
479 extra_info is a dict containing the extra values to add to each result
483 ies = [self.get_info_extractor(ie_key)]
488 if not ie.suitable(url):
492 self.report_warning('The program functionality for this site has been marked as broken, '
493 'and will probably not work.')
496 ie_result = ie.extract(url)
497 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
499 if isinstance(ie_result, list):
500 # Backwards compatibility: old IE result format
502 '_type': 'compat_list',
503 'entries': ie_result,
505 self.add_extra_info(ie_result,
507 'extractor': ie.IE_NAME,
509 'webpage_url_basename': url_basename(url),
510 'extractor_key': ie.ie_key(),
513 return self.process_ie_result(ie_result, download, extra_info)
516 except ExtractorError as de: # An error we somewhat expected
517 self.report_error(compat_str(de), de.format_traceback())
519 except Exception as e:
520 if self.params.get('ignoreerrors', False):
521 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
526 self.report_error('no suitable InfoExtractor: %s' % url)
528 def process_ie_result(self, ie_result, download=True, extra_info={}):
530 Take the result of the ie(may be modified) and resolve all unresolved
531 references (URLs, playlist items).
533 It will also download the videos if 'download'.
534 Returns the resolved ie_result.
537 result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
538 if result_type == 'video':
539 self.add_extra_info(ie_result, extra_info)
540 return self.process_video_result(ie_result, download=download)
541 elif result_type == 'url':
542 # We have to add extra_info to the results because it may be
543 # contained in a playlist
544 return self.extract_info(ie_result['url'],
546 ie_key=ie_result.get('ie_key'),
547 extra_info=extra_info)
548 elif result_type == 'url_transparent':
549 # Use the information from the embedding page
550 info = self.extract_info(
551 ie_result['url'], ie_key=ie_result.get('ie_key'),
552 extra_info=extra_info, download=False, process=False)
554 def make_result(embedded_info):
555 new_result = ie_result.copy()
556 for f in ('_type', 'url', 'ext', 'player_url', 'formats',
557 'entries', 'ie_key', 'duration',
558 'subtitles', 'annotations', 'format',
559 'thumbnail', 'thumbnails'):
562 if f in embedded_info:
563 new_result[f] = embedded_info[f]
565 new_result = make_result(info)
567 assert new_result.get('_type') != 'url_transparent'
568 if new_result.get('_type') == 'compat_list':
569 new_result['entries'] = [
570 make_result(e) for e in new_result['entries']]
572 return self.process_ie_result(
573 new_result, download=download, extra_info=extra_info)
574 elif result_type == 'playlist':
575 # We process each entry in the playlist
576 playlist = ie_result.get('title', None) or ie_result.get('id', None)
577 self.to_screen('[download] Downloading playlist: %s' % playlist)
579 playlist_results = []
581 n_all_entries = len(ie_result['entries'])
582 playliststart = self.params.get('playliststart', 1) - 1
583 playlistend = self.params.get('playlistend', None)
584 # For backwards compatibility, interpret -1 as whole list
585 if playlistend == -1:
588 entries = ie_result['entries'][playliststart:playlistend]
589 n_entries = len(entries)
592 "[%s] playlist '%s': Collected %d video ids (downloading %d of them)" %
593 (ie_result['extractor'], playlist, n_all_entries, n_entries))
595 for i, entry in enumerate(entries, 1):
596 self.to_screen('[download] Downloading video #%s of %s' % (i, n_entries))
598 'playlist': playlist,
599 'playlist_index': i + playliststart,
600 'extractor': ie_result['extractor'],
601 'webpage_url': ie_result['webpage_url'],
602 'webpage_url_basename': url_basename(ie_result['webpage_url']),
603 'extractor_key': ie_result['extractor_key'],
606 reason = self._match_entry(entry)
607 if reason is not None:
608 self.to_screen('[download] ' + reason)
611 entry_result = self.process_ie_result(entry,
614 playlist_results.append(entry_result)
615 ie_result['entries'] = playlist_results
617 elif result_type == 'compat_list':
619 self.add_extra_info(r,
621 'extractor': ie_result['extractor'],
622 'webpage_url': ie_result['webpage_url'],
623 'webpage_url_basename': url_basename(ie_result['webpage_url']),
624 'extractor_key': ie_result['extractor_key'],
627 ie_result['entries'] = [
628 self.process_ie_result(_fixup(r), download, extra_info)
629 for r in ie_result['entries']
633 raise Exception('Invalid result type: %s' % result_type)
635 def select_format(self, format_spec, available_formats):
636 if format_spec == 'best' or format_spec is None:
637 return available_formats[-1]
638 elif format_spec == 'worst':
639 return available_formats[0]
641 extensions = ['mp4', 'flv', 'webm', '3gp']
642 if format_spec in extensions:
643 filter_f = lambda f: f['ext'] == format_spec
645 filter_f = lambda f: f['format_id'] == format_spec
646 matches = list(filter(filter_f, available_formats))
651 def process_video_result(self, info_dict, download=True):
652 assert info_dict.get('_type', 'video') == 'video'
654 if 'playlist' not in info_dict:
655 # It isn't part of a playlist
656 info_dict['playlist'] = None
657 info_dict['playlist_index'] = None
659 # This extractors handle format selection themselves
660 if info_dict['extractor'] in ['Youku']:
662 self.process_info(info_dict)
665 # We now pick which formats have to be downloaded
666 if info_dict.get('formats') is None:
667 # There's only one format available
668 formats = [info_dict]
670 formats = info_dict['formats']
672 # We check that all the formats have the format and format_id fields
673 for (i, format) in enumerate(formats):
674 if format.get('format_id') is None:
675 format['format_id'] = compat_str(i)
676 if format.get('format') is None:
677 format['format'] = '{id} - {res}{note}'.format(
678 id=format['format_id'],
679 res=self.format_resolution(format),
680 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
682 # Automatically determine file extension if missing
683 if 'ext' not in format:
684 format['ext'] = determine_ext(format['url'])
686 format_limit = self.params.get('format_limit', None)
688 formats = list(takewhile_inclusive(
689 lambda f: f['format_id'] != format_limit, formats
692 # TODO Central sorting goes here
694 if formats[0] is not info_dict:
695 # only set the 'formats' fields if the original info_dict list them
696 # otherwise we end up with a circular reference, the first (and unique)
697 # element in the 'formats' field in info_dict is info_dict itself,
698 # wich can't be exported to json
699 info_dict['formats'] = formats
700 if self.params.get('listformats', None):
701 self.list_formats(info_dict)
704 req_format = self.params.get('format', 'best')
705 if req_format is None:
707 formats_to_download = []
708 # The -1 is for supporting YoutubeIE
709 if req_format in ('-1', 'all'):
710 formats_to_download = formats
712 # We can accept formats requested in the format: 34/5/best, we pick
713 # the first that is available, starting from left
714 req_formats = req_format.split('/')
715 for rf in req_formats:
716 if re.match(r'.+?\+.+?', rf) is not None:
717 # Two formats have been requested like '137+139'
718 format_1, format_2 = rf.split('+')
719 formats_info = (self.select_format(format_1, formats),
720 self.select_format(format_2, formats))
721 if all(formats_info):
723 'requested_formats': formats_info,
725 'ext': formats_info[0]['ext'],
728 selected_format = None
730 selected_format = self.select_format(rf, formats)
731 if selected_format is not None:
732 formats_to_download = [selected_format]
734 if not formats_to_download:
735 raise ExtractorError('requested format not available',
739 if len(formats_to_download) > 1:
740 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
741 for format in formats_to_download:
742 new_info = dict(info_dict)
743 new_info.update(format)
744 self.process_info(new_info)
745 # We update the info dict with the best quality format (backwards compatibility)
746 info_dict.update(formats_to_download[-1])
749 def process_info(self, info_dict):
750 """Process a single resolved IE result."""
752 assert info_dict.get('_type', 'video') == 'video'
753 #We increment the download the download count here to match the previous behaviour.
754 self.increment_downloads()
756 info_dict['fulltitle'] = info_dict['title']
757 if len(info_dict['title']) > 200:
758 info_dict['title'] = info_dict['title'][:197] + '...'
760 # Keep for backwards compatibility
761 info_dict['stitle'] = info_dict['title']
763 if not 'format' in info_dict:
764 info_dict['format'] = info_dict['ext']
766 reason = self._match_entry(info_dict)
767 if reason is not None:
768 self.to_screen('[download] ' + reason)
771 max_downloads = self.params.get('max_downloads')
772 if max_downloads is not None:
773 if self._num_downloads > int(max_downloads):
774 raise MaxDownloadsReached()
776 filename = self.prepare_filename(info_dict)
779 if self.params.get('forcetitle', False):
780 self.to_stdout(info_dict['fulltitle'])
781 if self.params.get('forceid', False):
782 self.to_stdout(info_dict['id'])
783 if self.params.get('forceurl', False):
784 # For RTMP URLs, also include the playpath
785 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
786 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
787 self.to_stdout(info_dict['thumbnail'])
788 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
789 self.to_stdout(info_dict['description'])
790 if self.params.get('forcefilename', False) and filename is not None:
791 self.to_stdout(filename)
792 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
793 self.to_stdout(formatSeconds(info_dict['duration']))
794 if self.params.get('forceformat', False):
795 self.to_stdout(info_dict['format'])
796 if self.params.get('forcejson', False):
797 info_dict['_filename'] = filename
798 self.to_stdout(json.dumps(info_dict))
800 # Do nothing else if in simulate mode
801 if self.params.get('simulate', False):
808 dn = os.path.dirname(encodeFilename(filename))
809 if dn != '' and not os.path.exists(dn):
811 except (OSError, IOError) as err:
812 self.report_error('unable to create directory ' + compat_str(err))
815 if self.params.get('writedescription', False):
816 descfn = filename + '.description'
817 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
818 self.to_screen('[info] Video description is already present')
821 self.to_screen('[info] Writing video description to: ' + descfn)
822 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
823 descfile.write(info_dict['description'])
824 except (KeyError, TypeError):
825 self.report_warning('There\'s no description to write.')
826 except (OSError, IOError):
827 self.report_error('Cannot write description file ' + descfn)
830 if self.params.get('writeannotations', False):
831 annofn = filename + '.annotations.xml'
832 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
833 self.to_screen('[info] Video annotations are already present')
836 self.to_screen('[info] Writing video annotations to: ' + annofn)
837 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
838 annofile.write(info_dict['annotations'])
839 except (KeyError, TypeError):
840 self.report_warning('There are no annotations to write.')
841 except (OSError, IOError):
842 self.report_error('Cannot write annotations file: ' + annofn)
845 subtitles_are_requested = any([self.params.get('writesubtitles', False),
846 self.params.get('writeautomaticsub')])
848 if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
849 # subtitles download errors are already managed as troubles in relevant IE
850 # that way it will silently go on when used with unsupporting IE
851 subtitles = info_dict['subtitles']
852 sub_format = self.params.get('subtitlesformat', 'srt')
853 for sub_lang in subtitles.keys():
854 sub = subtitles[sub_lang]
858 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
859 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
860 self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
862 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
863 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
865 except (OSError, IOError):
866 self.report_error('Cannot write subtitles file ' + descfn)
869 if self.params.get('writeinfojson', False):
870 infofn = os.path.splitext(filename)[0] + '.info.json'
871 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
872 self.to_screen('[info] Video description metadata is already present')
874 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
876 write_json_file(info_dict, encodeFilename(infofn))
877 except (OSError, IOError):
878 self.report_error('Cannot write metadata to JSON file ' + infofn)
881 if self.params.get('writethumbnail', False):
882 if info_dict.get('thumbnail') is not None:
883 thumb_format = determine_ext(info_dict['thumbnail'], 'jpg')
884 thumb_filename = os.path.splitext(filename)[0] + '.' + thumb_format
885 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
886 self.to_screen('[%s] %s: Thumbnail is already present' %
887 (info_dict['extractor'], info_dict['id']))
889 self.to_screen('[%s] %s: Downloading thumbnail ...' %
890 (info_dict['extractor'], info_dict['id']))
892 uf = compat_urllib_request.urlopen(info_dict['thumbnail'])
893 with open(thumb_filename, 'wb') as thumbf:
894 shutil.copyfileobj(uf, thumbf)
895 self.to_screen('[%s] %s: Writing thumbnail to: %s' %
896 (info_dict['extractor'], info_dict['id'], thumb_filename))
897 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
898 self.report_warning('Unable to download thumbnail "%s": %s' %
899 (info_dict['thumbnail'], compat_str(err)))
901 if not self.params.get('skip_download', False):
902 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
907 fd = get_suitable_downloader(info)(self, self.params)
908 for ph in self._progress_hooks:
909 fd.add_progress_hook(ph)
910 return fd.download(name, info)
911 if info_dict.get('requested_formats') is not None:
914 merger = FFmpegMergerPP(self)
915 if not merger._get_executable():
917 self.report_warning('You have requested multiple '
918 'formats but ffmpeg or avconv are not installed.'
919 ' The formats won\'t be merged')
921 postprocessors = [merger]
922 for f in info_dict['requested_formats']:
923 new_info = dict(info_dict)
925 fname = self.prepare_filename(new_info)
926 fname = prepend_extension(fname, 'f%s' % f['format_id'])
927 downloaded.append(fname)
928 partial_success = dl(fname, new_info)
929 success = success and partial_success
930 info_dict['__postprocessors'] = postprocessors
931 info_dict['__files_to_merge'] = downloaded
934 success = dl(filename, info_dict)
935 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
936 self.report_error('unable to download video data: %s' % str(err))
938 except (OSError, IOError) as err:
939 raise UnavailableVideoError(err)
940 except (ContentTooShortError, ) as err:
941 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
946 self.post_process(filename, info_dict)
947 except (PostProcessingError) as err:
948 self.report_error('postprocessing: %s' % str(err))
951 self.record_download_archive(info_dict)
953 def download(self, url_list):
954 """Download a given list of URLs."""
955 if (len(url_list) > 1 and
956 '%' not in self.params['outtmpl']
957 and self.params.get('max_downloads') != 1):
958 raise SameFileError(self.params['outtmpl'])
962 #It also downloads the videos
963 self.extract_info(url)
964 except UnavailableVideoError:
965 self.report_error('unable to download video')
966 except MaxDownloadsReached:
967 self.to_screen('[info] Maximum number of downloaded files reached.')
970 return self._download_retcode
972 def download_with_info_file(self, info_filename):
973 with io.open(info_filename, 'r', encoding='utf-8') as f:
976 self.process_ie_result(info, download=True)
977 except DownloadError:
978 webpage_url = info.get('webpage_url')
979 if webpage_url is not None:
980 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
981 return self.download([webpage_url])
984 return self._download_retcode
986 def post_process(self, filename, ie_info):
987 """Run all the postprocessors on the given file."""
989 info['filepath'] = filename
992 if ie_info.get('__postprocessors') is not None:
993 pps_chain.extend(ie_info['__postprocessors'])
994 pps_chain.extend(self._pps)
997 keep_video_wish, new_info = pp.run(info)
998 if keep_video_wish is not None:
1000 keep_video = keep_video_wish
1001 elif keep_video is None:
1002 # No clear decision yet, let IE decide
1003 keep_video = keep_video_wish
1004 except PostProcessingError as e:
1005 self.report_error(e.msg)
1006 if keep_video is False and not self.params.get('keepvideo', False):
1008 self.to_screen('Deleting original file %s (pass -k to keep)' % filename)
1009 os.remove(encodeFilename(filename))
1010 except (IOError, OSError):
1011 self.report_warning('Unable to remove downloaded video file')
1013 def _make_archive_id(self, info_dict):
1014 # Future-proof against any change in case
1015 # and backwards compatibility with prior versions
1016 extractor = info_dict.get('extractor_key')
1017 if extractor is None:
1018 if 'id' in info_dict:
1019 extractor = info_dict.get('ie_key') # key in a playlist
1020 if extractor is None:
1021 return None # Incomplete video information
1022 return extractor.lower() + ' ' + info_dict['id']
1024 def in_download_archive(self, info_dict):
1025 fn = self.params.get('download_archive')
1029 vid_id = self._make_archive_id(info_dict)
1031 return False # Incomplete video information
1034 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1035 for line in archive_file:
1036 if line.strip() == vid_id:
1038 except IOError as ioe:
1039 if ioe.errno != errno.ENOENT:
1043 def record_download_archive(self, info_dict):
1044 fn = self.params.get('download_archive')
1047 vid_id = self._make_archive_id(info_dict)
1049 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1050 archive_file.write(vid_id + '\n')
1053 def format_resolution(format, default='unknown'):
1054 if format.get('vcodec') == 'none':
1056 if format.get('resolution') is not None:
1057 return format['resolution']
1058 if format.get('height') is not None:
1059 if format.get('width') is not None:
1060 res = '%sx%s' % (format['width'], format['height'])
1062 res = '%sp' % format['height']
1063 elif format.get('width') is not None:
1064 res = '?x%d' % format['width']
1069 def list_formats(self, info_dict):
1070 def format_note(fdict):
1072 if fdict.get('ext') in ['f4f', 'f4m']:
1073 res += '(unsupported) '
1074 if fdict.get('format_note') is not None:
1075 res += fdict['format_note'] + ' '
1076 if fdict.get('tbr') is not None:
1077 res += '%4dk ' % fdict['tbr']
1078 if (fdict.get('vcodec') is not None and
1079 fdict.get('vcodec') != 'none'):
1080 res += '%-5s' % fdict['vcodec']
1081 if fdict.get('vbr') is not None:
1083 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1085 if fdict.get('vbr') is not None:
1086 res += '%4dk' % fdict['vbr']
1087 if fdict.get('acodec') is not None:
1090 res += '%-5s' % fdict['acodec']
1091 elif fdict.get('abr') is not None:
1095 if fdict.get('abr') is not None:
1096 res += '@%3dk' % fdict['abr']
1097 if fdict.get('filesize') is not None:
1100 res += format_bytes(fdict['filesize'])
1103 def line(format, idlen=20):
1104 return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
1105 format['format_id'],
1107 self.format_resolution(format),
1108 format_note(format),
1111 formats = info_dict.get('formats', [info_dict])
1112 idlen = max(len('format code'),
1113 max(len(f['format_id']) for f in formats))
1114 formats_s = [line(f, idlen) for f in formats]
1115 if len(formats) > 1:
1116 formats_s[0] += (' ' if format_note(formats[0]) else '') + '(worst)'
1117 formats_s[-1] += (' ' if format_note(formats[-1]) else '') + '(best)'
1119 header_line = line({
1120 'format_id': 'format code', 'ext': 'extension',
1121 'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
1122 self.to_screen('[info] Available formats for %s:\n%s\n%s' %
1123 (info_dict['id'], header_line, '\n'.join(formats_s)))
1125 def urlopen(self, req):
1126 """ Start an HTTP download """
1127 return self._opener.open(req)
1129 def print_debug_header(self):
1130 if not self.params.get('verbose'):
1132 write_string('[debug] youtube-dl version ' + __version__ + '\n')
1134 sp = subprocess.Popen(
1135 ['git', 'rev-parse', '--short', 'HEAD'],
1136 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1137 cwd=os.path.dirname(os.path.abspath(__file__)))
1138 out, err = sp.communicate()
1139 out = out.decode().strip()
1140 if re.match('[0-9a-f]+', out):
1141 write_string('[debug] Git HEAD: ' + out + '\n')
1147 write_string('[debug] Python version %s - %s' %
1148 (platform.python_version(), platform_name()) + '\n')
1151 for handler in self._opener.handlers:
1152 if hasattr(handler, 'proxies'):
1153 proxy_map.update(handler.proxies)
1154 write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1156 def _setup_opener(self):
1157 timeout_val = self.params.get('socket_timeout')
1158 timeout = 600 if timeout_val is None else float(timeout_val)
1160 opts_cookiefile = self.params.get('cookiefile')
1161 opts_proxy = self.params.get('proxy')
1163 if opts_cookiefile is None:
1164 self.cookiejar = compat_cookiejar.CookieJar()
1166 self.cookiejar = compat_cookiejar.MozillaCookieJar(
1168 if os.access(opts_cookiefile, os.R_OK):
1169 self.cookiejar.load()
1171 cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1173 if opts_proxy is not None:
1174 if opts_proxy == '':
1177 proxies = {'http': opts_proxy, 'https': opts_proxy}
1179 proxies = compat_urllib_request.getproxies()
1180 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1181 if 'http' in proxies and 'https' not in proxies:
1182 proxies['https'] = proxies['http']
1183 proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1185 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1186 https_handler = make_HTTPS_handler(
1187 self.params.get('nocheckcertificate', False), debuglevel=debuglevel)
1188 ydlh = YoutubeDLHandler(debuglevel=debuglevel)
1189 opener = compat_urllib_request.build_opener(
1190 https_handler, proxy_handler, cookie_processor, ydlh)
1191 # Delete the default user-agent header, which would otherwise apply in
1192 # cases where our custom HTTP handler doesn't come into play
1193 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1194 opener.addheaders = []
1195 self._opener = opener
1197 # TODO remove this global modification
1198 compat_urllib_request.install_opener(opener)
1199 socket.setdefaulttimeout(timeout)