2 # -*- coding: utf-8 -*-
4 from __future__ import absolute_import, unicode_literals
28 compat_urllib_request,
50 UnavailableVideoError,
57 from .extractor import get_info_extractor, gen_extractors
58 from .downloader import get_suitable_downloader
59 from .postprocessor import FFmpegMergerPP
60 from .version import __version__
63 class YoutubeDL(object):
66 YoutubeDL objects are the ones responsible of downloading the
67 actual video file and writing it to disk if the user has requested
68 it, among some other tasks. In most cases there should be one per
69 program. As, given a video URL, the downloader doesn't know how to
70 extract all the needed information, task that InfoExtractors do, it
71 has to pass the URL to one of them.
73 For this, YoutubeDL objects have a method that allows
74 InfoExtractors to be registered in a given order. When it is passed
75 a URL, the YoutubeDL object handles it to the first InfoExtractor it
76 finds that reports being able to handle it. The InfoExtractor extracts
77 all the information about the video or videos the URL refers to, and
78 YoutubeDL process the extracted information, possibly using a File
79 Downloader to download the video.
81 YoutubeDL objects accept a lot of parameters. In order not to saturate
82 the object constructor with arguments, it receives a dictionary of
83 options instead. These options are available through the params
84 attribute for the InfoExtractors to use. The YoutubeDL also
85 registers itself as the downloader in charge for the InfoExtractors
86 that are added to it, so this is a "mutual registration".
90 username: Username for authentication purposes.
91 password: Password for authentication purposes.
92 videopassword: Password for acces a video.
93 usenetrc: Use netrc for authentication instead.
94 verbose: Print additional info to stdout.
95 quiet: Do not print messages to stdout.
96 forceurl: Force printing final URL.
97 forcetitle: Force printing title.
98 forceid: Force printing ID.
99 forcethumbnail: Force printing thumbnail URL.
100 forcedescription: Force printing description.
101 forcefilename: Force printing final filename.
102 forceduration: Force printing duration.
103 forcejson: Force printing info_dict as JSON.
104 simulate: Do not download the video files.
105 format: Video format code.
106 format_limit: Highest quality format to try.
107 outtmpl: Template for output names.
108 restrictfilenames: Do not allow "&" and spaces in file names
109 ignoreerrors: Do not stop on download errors.
110 nooverwrites: Prevent overwriting files.
111 playliststart: Playlist item to start at.
112 playlistend: Playlist item to end at.
113 matchtitle: Download only matching titles.
114 rejecttitle: Reject downloads for matching titles.
115 logger: Log messages to a logging.Logger instance.
116 logtostderr: Log messages to stderr instead of stdout.
117 writedescription: Write the video description to a .description file
118 writeinfojson: Write the video description to a .info.json file
119 writeannotations: Write the video annotations to a .annotations.xml file
120 writethumbnail: Write the thumbnail image to a file
121 writesubtitles: Write the video subtitles to a file
122 writeautomaticsub: Write the automatic subtitles to a file
123 allsubtitles: Downloads all the subtitles of the video
124 (requires writesubtitles or writeautomaticsub)
125 listsubtitles: Lists all available subtitles for the video
126 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
127 subtitleslangs: List of languages of the subtitles to download
128 keepvideo: Keep the video file after post-processing
129 daterange: A DateRange object, download only if the upload_date is in the range.
130 skip_download: Skip the actual download of the video file
131 cachedir: Location of the cache files in the filesystem.
132 None to disable filesystem cache.
133 noplaylist: Download single video instead of a playlist if in doubt.
134 age_limit: An integer representing the user's age in years.
135 Unsuitable videos for the given age are skipped.
136 min_views: An integer representing the minimum view count the video
137 must have in order to not be skipped.
138 Videos without view count information are always
139 downloaded. None for no limit.
140 max_views: An integer representing the maximum view count.
141 Videos that are more popular than that are not
143 Videos without view count information are always
144 downloaded. None for no limit.
145 download_archive: File name of a file where all downloads are recorded.
146 Videos already present in the file are not downloaded
148 cookiefile: File name where cookies should be read from and dumped to.
149 nocheckcertificate:Do not verify SSL certificates
150 proxy: URL of the proxy server to use
151 socket_timeout: Time to wait for unresponsive hosts, in seconds
152 bidi_workaround: Work around buggy terminals without bidirectional text
153 support, using fridibi
154 debug_printtraffic:Print out sent and received HTTP traffic
155 include_ads: Download ads as well
156 default_search: Prepend this string if an input url is not valid.
157 'auto' for elaborate guessing
159 The following parameters are not used by YoutubeDL itself, they are used by
161 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
162 noresizebuffer, retries, continuedl, noprogress, consoletitle
164 The following options are used by the post processors:
165 prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
166 otherwise prefer avconv.
172 _download_retcode = None
173 _num_downloads = None
176 def __init__(self, params=None):
177 """Create a FileDownloader object with the given options."""
181 self._ies_instances = {}
183 self._progress_hooks = []
184 self._download_retcode = 0
185 self._num_downloads = 0
186 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
187 self._err_file = sys.stderr
190 if params.get('bidi_workaround', False):
193 master, slave = pty.openpty()
194 width = get_term_width()
198 width_args = ['-w', str(width)]
200 stdin=subprocess.PIPE,
202 stderr=self._err_file)
204 self._output_process = subprocess.Popen(
205 ['bidiv'] + width_args, **sp_kwargs
208 self._output_process = subprocess.Popen(
209 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
210 self._output_channel = os.fdopen(master, 'rb')
211 except OSError as ose:
213 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
217 if (sys.version_info >= (3,) and sys.platform != 'win32' and
218 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
219 and not params['restrictfilenames']):
220 # On Python 3, the Unicode filesystem API will throw errors (#1474)
222 'Assuming --restrict-filenames since file system encoding '
223 'cannot encode all charactes. '
224 'Set the LC_ALL environment variable to fix this.')
225 self.params['restrictfilenames'] = True
227 if '%(stitle)s' in self.params.get('outtmpl', ''):
228 self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
232 def add_info_extractor(self, ie):
233 """Add an InfoExtractor object to the end of the list."""
235 self._ies_instances[ie.ie_key()] = ie
236 ie.set_downloader(self)
238 def get_info_extractor(self, ie_key):
240 Get an instance of an IE with name ie_key, it will try to get one from
241 the _ies list, if there's no instance it will create a new one and add
242 it to the extractor list.
244 ie = self._ies_instances.get(ie_key)
246 ie = get_info_extractor(ie_key)()
247 self.add_info_extractor(ie)
250 def add_default_info_extractors(self):
252 Add the InfoExtractors returned by gen_extractors to the end of the list
254 for ie in gen_extractors():
255 self.add_info_extractor(ie)
257 def add_post_processor(self, pp):
258 """Add a PostProcessor object to the end of the chain."""
260 pp.set_downloader(self)
262 def add_progress_hook(self, ph):
263 """Add the progress hook (currently only for the file downloader)"""
264 self._progress_hooks.append(ph)
266 def _bidi_workaround(self, message):
267 if not hasattr(self, '_output_channel'):
270 assert hasattr(self, '_output_process')
271 assert type(message) == type('')
272 line_count = message.count('\n') + 1
273 self._output_process.stdin.write((message + '\n').encode('utf-8'))
274 self._output_process.stdin.flush()
275 res = ''.join(self._output_channel.readline().decode('utf-8')
276 for _ in range(line_count))
277 return res[:-len('\n')]
279 def to_screen(self, message, skip_eol=False):
280 """Print message to stdout if not in quiet mode."""
281 return self.to_stdout(message, skip_eol, check_quiet=True)
283 def to_stdout(self, message, skip_eol=False, check_quiet=False):
284 """Print message to stdout if not in quiet mode."""
285 if self.params.get('logger'):
286 self.params['logger'].debug(message)
287 elif not check_quiet or not self.params.get('quiet', False):
288 message = self._bidi_workaround(message)
289 terminator = ['\n', ''][skip_eol]
290 output = message + terminator
292 write_string(output, self._screen_file)
294 def to_stderr(self, message):
295 """Print message to stderr."""
296 assert type(message) == type('')
297 if self.params.get('logger'):
298 self.params['logger'].error(message)
300 message = self._bidi_workaround(message)
301 output = message + '\n'
302 write_string(output, self._err_file)
304 def to_console_title(self, message):
305 if not self.params.get('consoletitle', False):
307 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
308 # c_wchar_p() might not be necessary if `message` is
309 # already of type unicode()
310 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
311 elif 'TERM' in os.environ:
312 write_string('\033]0;%s\007' % message, self._screen_file)
314 def save_console_title(self):
315 if not self.params.get('consoletitle', False):
317 if 'TERM' in os.environ:
318 # Save the title on stack
319 write_string('\033[22;0t', self._screen_file)
321 def restore_console_title(self):
322 if not self.params.get('consoletitle', False):
324 if 'TERM' in os.environ:
325 # Restore the title from stack
326 write_string('\033[23;0t', self._screen_file)
329 self.save_console_title()
332 def __exit__(self, *args):
333 self.restore_console_title()
335 if self.params.get('cookiefile') is not None:
336 self.cookiejar.save()
338 def trouble(self, message=None, tb=None):
339 """Determine action to take when a download problem appears.
341 Depending on if the downloader has been configured to ignore
342 download errors or not, this method may throw an exception or
343 not when errors are found, after printing the message.
345 tb, if given, is additional traceback information.
347 if message is not None:
348 self.to_stderr(message)
349 if self.params.get('verbose'):
351 if sys.exc_info()[0]: # if .trouble has been called from an except block
353 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
354 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
355 tb += compat_str(traceback.format_exc())
357 tb_data = traceback.format_list(traceback.extract_stack())
358 tb = ''.join(tb_data)
360 if not self.params.get('ignoreerrors', False):
361 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
362 exc_info = sys.exc_info()[1].exc_info
364 exc_info = sys.exc_info()
365 raise DownloadError(message, exc_info)
366 self._download_retcode = 1
368 def report_warning(self, message):
370 Print the message to stderr, it will be prefixed with 'WARNING:'
371 If stderr is a tty file the 'WARNING:' will be colored
373 if self._err_file.isatty() and os.name != 'nt':
374 _msg_header = '\033[0;33mWARNING:\033[0m'
376 _msg_header = 'WARNING:'
377 warning_message = '%s %s' % (_msg_header, message)
378 self.to_stderr(warning_message)
380 def report_error(self, message, tb=None):
382 Do the same as trouble, but prefixes the message with 'ERROR:', colored
383 in red if stderr is a tty file.
385 if self._err_file.isatty() and os.name != 'nt':
386 _msg_header = '\033[0;31mERROR:\033[0m'
388 _msg_header = 'ERROR:'
389 error_message = '%s %s' % (_msg_header, message)
390 self.trouble(error_message, tb)
392 def report_file_already_downloaded(self, file_name):
393 """Report file has already been fully downloaded."""
395 self.to_screen('[download] %s has already been downloaded' % file_name)
396 except UnicodeEncodeError:
397 self.to_screen('[download] The file has already been downloaded')
399 def increment_downloads(self):
400 """Increment the ordinal that assigns a number to each file."""
401 self._num_downloads += 1
403 def prepare_filename(self, info_dict):
404 """Generate the output filename."""
406 template_dict = dict(info_dict)
408 template_dict['epoch'] = int(time.time())
409 autonumber_size = self.params.get('autonumber_size')
410 if autonumber_size is None:
412 autonumber_templ = '%0' + str(autonumber_size) + 'd'
413 template_dict['autonumber'] = autonumber_templ % self._num_downloads
414 if template_dict.get('playlist_index') is not None:
415 template_dict['playlist_index'] = '%05d' % template_dict['playlist_index']
417 sanitize = lambda k, v: sanitize_filename(
419 restricted=self.params.get('restrictfilenames'),
421 template_dict = dict((k, sanitize(k, v))
422 for k, v in template_dict.items()
424 template_dict = collections.defaultdict(lambda: 'NA', template_dict)
426 tmpl = os.path.expanduser(self.params['outtmpl'])
427 filename = tmpl % template_dict
429 except ValueError as err:
430 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
433 def _match_entry(self, info_dict):
434 """ Returns None iff the file should be downloaded """
436 video_title = info_dict.get('title', info_dict.get('id', 'video'))
437 if 'title' in info_dict:
438 # This can happen when we're just evaluating the playlist
439 title = info_dict['title']
440 matchtitle = self.params.get('matchtitle', False)
442 if not re.search(matchtitle, title, re.IGNORECASE):
443 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
444 rejecttitle = self.params.get('rejecttitle', False)
446 if re.search(rejecttitle, title, re.IGNORECASE):
447 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
448 date = info_dict.get('upload_date', None)
450 dateRange = self.params.get('daterange', DateRange())
451 if date not in dateRange:
452 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
453 view_count = info_dict.get('view_count', None)
454 if view_count is not None:
455 min_views = self.params.get('min_views')
456 if min_views is not None and view_count < min_views:
457 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
458 max_views = self.params.get('max_views')
459 if max_views is not None and view_count > max_views:
460 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
461 age_limit = self.params.get('age_limit')
462 if age_limit is not None:
463 if age_limit < info_dict.get('age_limit', 0):
464 return 'Skipping "' + title + '" because it is age restricted'
465 if self.in_download_archive(info_dict):
466 return '%s has already been recorded in archive' % video_title
470 def add_extra_info(info_dict, extra_info):
471 '''Set the keys from extra_info in info dict if they are missing'''
472 for key, value in extra_info.items():
473 info_dict.setdefault(key, value)
475 def extract_info(self, url, download=True, ie_key=None, extra_info={},
478 Returns a list with a dictionary for each video we find.
479 If 'download', also downloads the videos.
480 extra_info is a dict containing the extra values to add to each result
484 ies = [self.get_info_extractor(ie_key)]
489 if not ie.suitable(url):
493 self.report_warning('The program functionality for this site has been marked as broken, '
494 'and will probably not work.')
497 ie_result = ie.extract(url)
498 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
500 if isinstance(ie_result, list):
501 # Backwards compatibility: old IE result format
503 '_type': 'compat_list',
504 'entries': ie_result,
506 self.add_extra_info(ie_result,
508 'extractor': ie.IE_NAME,
510 'webpage_url_basename': url_basename(url),
511 'extractor_key': ie.ie_key(),
514 return self.process_ie_result(ie_result, download, extra_info)
517 except ExtractorError as de: # An error we somewhat expected
518 self.report_error(compat_str(de), de.format_traceback())
520 except Exception as e:
521 if self.params.get('ignoreerrors', False):
522 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
527 self.report_error('no suitable InfoExtractor: %s' % url)
529 def process_ie_result(self, ie_result, download=True, extra_info={}):
531 Take the result of the ie(may be modified) and resolve all unresolved
532 references (URLs, playlist items).
534 It will also download the videos if 'download'.
535 Returns the resolved ie_result.
538 result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
539 if result_type == 'video':
540 self.add_extra_info(ie_result, extra_info)
541 return self.process_video_result(ie_result, download=download)
542 elif result_type == 'url':
543 # We have to add extra_info to the results because it may be
544 # contained in a playlist
545 return self.extract_info(ie_result['url'],
547 ie_key=ie_result.get('ie_key'),
548 extra_info=extra_info)
549 elif result_type == 'url_transparent':
550 # Use the information from the embedding page
551 info = self.extract_info(
552 ie_result['url'], ie_key=ie_result.get('ie_key'),
553 extra_info=extra_info, download=False, process=False)
555 def make_result(embedded_info):
556 new_result = ie_result.copy()
557 for f in ('_type', 'url', 'ext', 'player_url', 'formats',
558 'entries', 'ie_key', 'duration',
559 'subtitles', 'annotations', 'format',
560 'thumbnail', 'thumbnails'):
563 if f in embedded_info:
564 new_result[f] = embedded_info[f]
566 new_result = make_result(info)
568 assert new_result.get('_type') != 'url_transparent'
569 if new_result.get('_type') == 'compat_list':
570 new_result['entries'] = [
571 make_result(e) for e in new_result['entries']]
573 return self.process_ie_result(
574 new_result, download=download, extra_info=extra_info)
575 elif result_type == 'playlist':
576 # We process each entry in the playlist
577 playlist = ie_result.get('title', None) or ie_result.get('id', None)
578 self.to_screen('[download] Downloading playlist: %s' % playlist)
580 playlist_results = []
582 playliststart = self.params.get('playliststart', 1) - 1
583 playlistend = self.params.get('playlistend', None)
584 # For backwards compatibility, interpret -1 as whole list
585 if playlistend == -1:
588 if isinstance(ie_result['entries'], list):
589 n_all_entries = len(ie_result['entries'])
590 entries = ie_result['entries'][playliststart:playlistend]
591 n_entries = len(entries)
593 "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
594 (ie_result['extractor'], playlist, n_all_entries, n_entries))
596 assert isinstance(ie_result['entries'], PagedList)
597 entries = ie_result['entries'].getslice(
598 playliststart, playlistend)
599 n_entries = len(entries)
601 "[%s] playlist %s: Downloading %d videos" %
602 (ie_result['extractor'], playlist, n_entries))
604 for i, entry in enumerate(entries, 1):
605 self.to_screen('[download] Downloading video #%s of %s' % (i, n_entries))
607 'playlist': playlist,
608 'playlist_index': i + playliststart,
609 'extractor': ie_result['extractor'],
610 'webpage_url': ie_result['webpage_url'],
611 'webpage_url_basename': url_basename(ie_result['webpage_url']),
612 'extractor_key': ie_result['extractor_key'],
615 reason = self._match_entry(entry)
616 if reason is not None:
617 self.to_screen('[download] ' + reason)
620 entry_result = self.process_ie_result(entry,
623 playlist_results.append(entry_result)
624 ie_result['entries'] = playlist_results
626 elif result_type == 'compat_list':
628 self.add_extra_info(r,
630 'extractor': ie_result['extractor'],
631 'webpage_url': ie_result['webpage_url'],
632 'webpage_url_basename': url_basename(ie_result['webpage_url']),
633 'extractor_key': ie_result['extractor_key'],
636 ie_result['entries'] = [
637 self.process_ie_result(_fixup(r), download, extra_info)
638 for r in ie_result['entries']
642 raise Exception('Invalid result type: %s' % result_type)
644 def select_format(self, format_spec, available_formats):
645 if format_spec == 'best' or format_spec is None:
646 return available_formats[-1]
647 elif format_spec == 'worst':
648 return available_formats[0]
649 elif format_spec == 'bestaudio':
651 f for f in available_formats
652 if f.get('vcodec') == 'none']
654 return audio_formats[-1]
655 elif format_spec == 'worstaudio':
657 f for f in available_formats
658 if f.get('vcodec') == 'none']
660 return audio_formats[0]
662 extensions = ['mp4', 'flv', 'webm', '3gp']
663 if format_spec in extensions:
664 filter_f = lambda f: f['ext'] == format_spec
666 filter_f = lambda f: f['format_id'] == format_spec
667 matches = list(filter(filter_f, available_formats))
672 def process_video_result(self, info_dict, download=True):
673 assert info_dict.get('_type', 'video') == 'video'
675 if 'playlist' not in info_dict:
676 # It isn't part of a playlist
677 info_dict['playlist'] = None
678 info_dict['playlist_index'] = None
680 # This extractors handle format selection themselves
681 if info_dict['extractor'] in ['Youku']:
683 self.process_info(info_dict)
686 # We now pick which formats have to be downloaded
687 if info_dict.get('formats') is None:
688 # There's only one format available
689 formats = [info_dict]
691 formats = info_dict['formats']
693 # We check that all the formats have the format and format_id fields
694 for (i, format) in enumerate(formats):
695 if format.get('format_id') is None:
696 format['format_id'] = compat_str(i)
697 if format.get('format') is None:
698 format['format'] = '{id} - {res}{note}'.format(
699 id=format['format_id'],
700 res=self.format_resolution(format),
701 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
703 # Automatically determine file extension if missing
704 if 'ext' not in format:
705 format['ext'] = determine_ext(format['url'])
707 format_limit = self.params.get('format_limit', None)
709 formats = list(takewhile_inclusive(
710 lambda f: f['format_id'] != format_limit, formats
713 # TODO Central sorting goes here
715 if formats[0] is not info_dict:
716 # only set the 'formats' fields if the original info_dict list them
717 # otherwise we end up with a circular reference, the first (and unique)
718 # element in the 'formats' field in info_dict is info_dict itself,
719 # wich can't be exported to json
720 info_dict['formats'] = formats
721 if self.params.get('listformats', None):
722 self.list_formats(info_dict)
725 req_format = self.params.get('format')
726 if req_format is None:
728 formats_to_download = []
729 # The -1 is for supporting YoutubeIE
730 if req_format in ('-1', 'all'):
731 formats_to_download = formats
733 # We can accept formats requested in the format: 34/5/best, we pick
734 # the first that is available, starting from left
735 req_formats = req_format.split('/')
736 for rf in req_formats:
737 if re.match(r'.+?\+.+?', rf) is not None:
738 # Two formats have been requested like '137+139'
739 format_1, format_2 = rf.split('+')
740 formats_info = (self.select_format(format_1, formats),
741 self.select_format(format_2, formats))
742 if all(formats_info):
744 'requested_formats': formats_info,
746 'ext': formats_info[0]['ext'],
749 selected_format = None
751 selected_format = self.select_format(rf, formats)
752 if selected_format is not None:
753 formats_to_download = [selected_format]
755 if not formats_to_download:
756 raise ExtractorError('requested format not available',
760 if len(formats_to_download) > 1:
761 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
762 for format in formats_to_download:
763 new_info = dict(info_dict)
764 new_info.update(format)
765 self.process_info(new_info)
766 # We update the info dict with the best quality format (backwards compatibility)
767 info_dict.update(formats_to_download[-1])
770 def process_info(self, info_dict):
771 """Process a single resolved IE result."""
773 assert info_dict.get('_type', 'video') == 'video'
774 #We increment the download the download count here to match the previous behaviour.
775 self.increment_downloads()
777 info_dict['fulltitle'] = info_dict['title']
778 if len(info_dict['title']) > 200:
779 info_dict['title'] = info_dict['title'][:197] + '...'
781 # Keep for backwards compatibility
782 info_dict['stitle'] = info_dict['title']
784 if not 'format' in info_dict:
785 info_dict['format'] = info_dict['ext']
787 reason = self._match_entry(info_dict)
788 if reason is not None:
789 self.to_screen('[download] ' + reason)
792 max_downloads = self.params.get('max_downloads')
793 if max_downloads is not None:
794 if self._num_downloads > int(max_downloads):
795 raise MaxDownloadsReached()
797 filename = self.prepare_filename(info_dict)
800 if self.params.get('forcetitle', False):
801 self.to_stdout(info_dict['fulltitle'])
802 if self.params.get('forceid', False):
803 self.to_stdout(info_dict['id'])
804 if self.params.get('forceurl', False):
805 # For RTMP URLs, also include the playpath
806 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
807 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
808 self.to_stdout(info_dict['thumbnail'])
809 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
810 self.to_stdout(info_dict['description'])
811 if self.params.get('forcefilename', False) and filename is not None:
812 self.to_stdout(filename)
813 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
814 self.to_stdout(formatSeconds(info_dict['duration']))
815 if self.params.get('forceformat', False):
816 self.to_stdout(info_dict['format'])
817 if self.params.get('forcejson', False):
818 info_dict['_filename'] = filename
819 self.to_stdout(json.dumps(info_dict))
821 # Do nothing else if in simulate mode
822 if self.params.get('simulate', False):
829 dn = os.path.dirname(encodeFilename(filename))
830 if dn != '' and not os.path.exists(dn):
832 except (OSError, IOError) as err:
833 self.report_error('unable to create directory ' + compat_str(err))
836 if self.params.get('writedescription', False):
837 descfn = filename + '.description'
838 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
839 self.to_screen('[info] Video description is already present')
842 self.to_screen('[info] Writing video description to: ' + descfn)
843 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
844 descfile.write(info_dict['description'])
845 except (KeyError, TypeError):
846 self.report_warning('There\'s no description to write.')
847 except (OSError, IOError):
848 self.report_error('Cannot write description file ' + descfn)
851 if self.params.get('writeannotations', False):
852 annofn = filename + '.annotations.xml'
853 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
854 self.to_screen('[info] Video annotations are already present')
857 self.to_screen('[info] Writing video annotations to: ' + annofn)
858 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
859 annofile.write(info_dict['annotations'])
860 except (KeyError, TypeError):
861 self.report_warning('There are no annotations to write.')
862 except (OSError, IOError):
863 self.report_error('Cannot write annotations file: ' + annofn)
866 subtitles_are_requested = any([self.params.get('writesubtitles', False),
867 self.params.get('writeautomaticsub')])
869 if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
870 # subtitles download errors are already managed as troubles in relevant IE
871 # that way it will silently go on when used with unsupporting IE
872 subtitles = info_dict['subtitles']
873 sub_format = self.params.get('subtitlesformat', 'srt')
874 for sub_lang in subtitles.keys():
875 sub = subtitles[sub_lang]
879 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
880 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
881 self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
883 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
884 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
886 except (OSError, IOError):
887 self.report_error('Cannot write subtitles file ' + descfn)
890 if self.params.get('writeinfojson', False):
891 infofn = os.path.splitext(filename)[0] + '.info.json'
892 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
893 self.to_screen('[info] Video description metadata is already present')
895 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
897 write_json_file(info_dict, encodeFilename(infofn))
898 except (OSError, IOError):
899 self.report_error('Cannot write metadata to JSON file ' + infofn)
902 if self.params.get('writethumbnail', False):
903 if info_dict.get('thumbnail') is not None:
904 thumb_format = determine_ext(info_dict['thumbnail'], 'jpg')
905 thumb_filename = os.path.splitext(filename)[0] + '.' + thumb_format
906 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
907 self.to_screen('[%s] %s: Thumbnail is already present' %
908 (info_dict['extractor'], info_dict['id']))
910 self.to_screen('[%s] %s: Downloading thumbnail ...' %
911 (info_dict['extractor'], info_dict['id']))
913 uf = compat_urllib_request.urlopen(info_dict['thumbnail'])
914 with open(thumb_filename, 'wb') as thumbf:
915 shutil.copyfileobj(uf, thumbf)
916 self.to_screen('[%s] %s: Writing thumbnail to: %s' %
917 (info_dict['extractor'], info_dict['id'], thumb_filename))
918 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
919 self.report_warning('Unable to download thumbnail "%s": %s' %
920 (info_dict['thumbnail'], compat_str(err)))
922 if not self.params.get('skip_download', False):
923 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
928 fd = get_suitable_downloader(info)(self, self.params)
929 for ph in self._progress_hooks:
930 fd.add_progress_hook(ph)
931 return fd.download(name, info)
932 if info_dict.get('requested_formats') is not None:
935 merger = FFmpegMergerPP(self)
936 if not merger._get_executable():
938 self.report_warning('You have requested multiple '
939 'formats but ffmpeg or avconv are not installed.'
940 ' The formats won\'t be merged')
942 postprocessors = [merger]
943 for f in info_dict['requested_formats']:
944 new_info = dict(info_dict)
946 fname = self.prepare_filename(new_info)
947 fname = prepend_extension(fname, 'f%s' % f['format_id'])
948 downloaded.append(fname)
949 partial_success = dl(fname, new_info)
950 success = success and partial_success
951 info_dict['__postprocessors'] = postprocessors
952 info_dict['__files_to_merge'] = downloaded
955 success = dl(filename, info_dict)
956 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
957 self.report_error('unable to download video data: %s' % str(err))
959 except (OSError, IOError) as err:
960 raise UnavailableVideoError(err)
961 except (ContentTooShortError, ) as err:
962 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
967 self.post_process(filename, info_dict)
968 except (PostProcessingError) as err:
969 self.report_error('postprocessing: %s' % str(err))
972 self.record_download_archive(info_dict)
974 def download(self, url_list):
975 """Download a given list of URLs."""
976 if (len(url_list) > 1 and
977 '%' not in self.params['outtmpl']
978 and self.params.get('max_downloads') != 1):
979 raise SameFileError(self.params['outtmpl'])
983 #It also downloads the videos
984 self.extract_info(url)
985 except UnavailableVideoError:
986 self.report_error('unable to download video')
987 except MaxDownloadsReached:
988 self.to_screen('[info] Maximum number of downloaded files reached.')
991 return self._download_retcode
993 def download_with_info_file(self, info_filename):
994 with io.open(info_filename, 'r', encoding='utf-8') as f:
997 self.process_ie_result(info, download=True)
998 except DownloadError:
999 webpage_url = info.get('webpage_url')
1000 if webpage_url is not None:
1001 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1002 return self.download([webpage_url])
1005 return self._download_retcode
1007 def post_process(self, filename, ie_info):
1008 """Run all the postprocessors on the given file."""
1009 info = dict(ie_info)
1010 info['filepath'] = filename
1013 if ie_info.get('__postprocessors') is not None:
1014 pps_chain.extend(ie_info['__postprocessors'])
1015 pps_chain.extend(self._pps)
1016 for pp in pps_chain:
1018 keep_video_wish, new_info = pp.run(info)
1019 if keep_video_wish is not None:
1021 keep_video = keep_video_wish
1022 elif keep_video is None:
1023 # No clear decision yet, let IE decide
1024 keep_video = keep_video_wish
1025 except PostProcessingError as e:
1026 self.report_error(e.msg)
1027 if keep_video is False and not self.params.get('keepvideo', False):
1029 self.to_screen('Deleting original file %s (pass -k to keep)' % filename)
1030 os.remove(encodeFilename(filename))
1031 except (IOError, OSError):
1032 self.report_warning('Unable to remove downloaded video file')
1034 def _make_archive_id(self, info_dict):
1035 # Future-proof against any change in case
1036 # and backwards compatibility with prior versions
1037 extractor = info_dict.get('extractor_key')
1038 if extractor is None:
1039 if 'id' in info_dict:
1040 extractor = info_dict.get('ie_key') # key in a playlist
1041 if extractor is None:
1042 return None # Incomplete video information
1043 return extractor.lower() + ' ' + info_dict['id']
1045 def in_download_archive(self, info_dict):
1046 fn = self.params.get('download_archive')
1050 vid_id = self._make_archive_id(info_dict)
1052 return False # Incomplete video information
1055 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1056 for line in archive_file:
1057 if line.strip() == vid_id:
1059 except IOError as ioe:
1060 if ioe.errno != errno.ENOENT:
1064 def record_download_archive(self, info_dict):
1065 fn = self.params.get('download_archive')
1068 vid_id = self._make_archive_id(info_dict)
1070 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1071 archive_file.write(vid_id + '\n')
1074 def format_resolution(format, default='unknown'):
1075 if format.get('vcodec') == 'none':
1077 if format.get('resolution') is not None:
1078 return format['resolution']
1079 if format.get('height') is not None:
1080 if format.get('width') is not None:
1081 res = '%sx%s' % (format['width'], format['height'])
1083 res = '%sp' % format['height']
1084 elif format.get('width') is not None:
1085 res = '?x%d' % format['width']
1090 def list_formats(self, info_dict):
1091 def format_note(fdict):
1093 if fdict.get('ext') in ['f4f', 'f4m']:
1094 res += '(unsupported) '
1095 if fdict.get('format_note') is not None:
1096 res += fdict['format_note'] + ' '
1097 if fdict.get('tbr') is not None:
1098 res += '%4dk ' % fdict['tbr']
1099 if (fdict.get('vcodec') is not None and
1100 fdict.get('vcodec') != 'none'):
1101 res += '%-5s' % fdict['vcodec']
1102 if fdict.get('vbr') is not None:
1104 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1106 if fdict.get('vbr') is not None:
1107 res += '%4dk' % fdict['vbr']
1108 if fdict.get('acodec') is not None:
1111 res += '%-5s' % fdict['acodec']
1112 elif fdict.get('abr') is not None:
1116 if fdict.get('abr') is not None:
1117 res += '@%3dk' % fdict['abr']
1118 if fdict.get('asr') is not None:
1119 res += ' (%5dHz)' % fdict['asr']
1120 if fdict.get('filesize') is not None:
1123 res += format_bytes(fdict['filesize'])
1126 def line(format, idlen=20):
1127 return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
1128 format['format_id'],
1130 self.format_resolution(format),
1131 format_note(format),
1134 formats = info_dict.get('formats', [info_dict])
1135 idlen = max(len('format code'),
1136 max(len(f['format_id']) for f in formats))
1137 formats_s = [line(f, idlen) for f in formats]
1138 if len(formats) > 1:
1139 formats_s[0] += (' ' if format_note(formats[0]) else '') + '(worst)'
1140 formats_s[-1] += (' ' if format_note(formats[-1]) else '') + '(best)'
1142 header_line = line({
1143 'format_id': 'format code', 'ext': 'extension',
1144 'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
1145 self.to_screen('[info] Available formats for %s:\n%s\n%s' %
1146 (info_dict['id'], header_line, '\n'.join(formats_s)))
1148 def urlopen(self, req):
1149 """ Start an HTTP download """
1150 return self._opener.open(req)
1152 def print_debug_header(self):
1153 if not self.params.get('verbose'):
1155 write_string('[debug] youtube-dl version ' + __version__ + '\n')
1157 sp = subprocess.Popen(
1158 ['git', 'rev-parse', '--short', 'HEAD'],
1159 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1160 cwd=os.path.dirname(os.path.abspath(__file__)))
1161 out, err = sp.communicate()
1162 out = out.decode().strip()
1163 if re.match('[0-9a-f]+', out):
1164 write_string('[debug] Git HEAD: ' + out + '\n')
1170 write_string('[debug] Python version %s - %s' %
1171 (platform.python_version(), platform_name()) + '\n')
1174 for handler in self._opener.handlers:
1175 if hasattr(handler, 'proxies'):
1176 proxy_map.update(handler.proxies)
1177 write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1179 def _setup_opener(self):
1180 timeout_val = self.params.get('socket_timeout')
1181 timeout = 600 if timeout_val is None else float(timeout_val)
1183 opts_cookiefile = self.params.get('cookiefile')
1184 opts_proxy = self.params.get('proxy')
1186 if opts_cookiefile is None:
1187 self.cookiejar = compat_cookiejar.CookieJar()
1189 self.cookiejar = compat_cookiejar.MozillaCookieJar(
1191 if os.access(opts_cookiefile, os.R_OK):
1192 self.cookiejar.load()
1194 cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1196 if opts_proxy is not None:
1197 if opts_proxy == '':
1200 proxies = {'http': opts_proxy, 'https': opts_proxy}
1202 proxies = compat_urllib_request.getproxies()
1203 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1204 if 'http' in proxies and 'https' not in proxies:
1205 proxies['https'] = proxies['http']
1206 proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1208 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1209 https_handler = make_HTTPS_handler(
1210 self.params.get('nocheckcertificate', False), debuglevel=debuglevel)
1211 ydlh = YoutubeDLHandler(debuglevel=debuglevel)
1212 opener = compat_urllib_request.build_opener(
1213 https_handler, proxy_handler, cookie_processor, ydlh)
1214 # Delete the default user-agent header, which would otherwise apply in
1215 # cases where our custom HTTP handler doesn't come into play
1216 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1217 opener.addheaders = []
1218 self._opener = opener
1220 # TODO remove this global modification
1221 compat_urllib_request.install_opener(opener)
1222 socket.setdefaulttimeout(timeout)