2 # -*- coding: utf-8 -*-
4 from __future__ import absolute_import, unicode_literals
28 compat_urllib_request,
49 UnavailableVideoError,
56 from .extractor import get_info_extractor, gen_extractors
57 from .downloader import get_suitable_downloader
58 from .postprocessor import FFmpegMergerPP
59 from .version import __version__
62 class YoutubeDL(object):
65 YoutubeDL objects are the ones responsible of downloading the
66 actual video file and writing it to disk if the user has requested
67 it, among some other tasks. In most cases there should be one per
68 program. As, given a video URL, the downloader doesn't know how to
69 extract all the needed information, task that InfoExtractors do, it
70 has to pass the URL to one of them.
72 For this, YoutubeDL objects have a method that allows
73 InfoExtractors to be registered in a given order. When it is passed
74 a URL, the YoutubeDL object handles it to the first InfoExtractor it
75 finds that reports being able to handle it. The InfoExtractor extracts
76 all the information about the video or videos the URL refers to, and
77 YoutubeDL process the extracted information, possibly using a File
78 Downloader to download the video.
80 YoutubeDL objects accept a lot of parameters. In order not to saturate
81 the object constructor with arguments, it receives a dictionary of
82 options instead. These options are available through the params
83 attribute for the InfoExtractors to use. The YoutubeDL also
84 registers itself as the downloader in charge for the InfoExtractors
85 that are added to it, so this is a "mutual registration".
89 username: Username for authentication purposes.
90 password: Password for authentication purposes.
91 videopassword: Password for acces a video.
92 usenetrc: Use netrc for authentication instead.
93 verbose: Print additional info to stdout.
94 quiet: Do not print messages to stdout.
95 forceurl: Force printing final URL.
96 forcetitle: Force printing title.
97 forceid: Force printing ID.
98 forcethumbnail: Force printing thumbnail URL.
99 forcedescription: Force printing description.
100 forcefilename: Force printing final filename.
101 forceduration: Force printing duration.
102 forcejson: Force printing info_dict as JSON.
103 simulate: Do not download the video files.
104 format: Video format code.
105 format_limit: Highest quality format to try.
106 outtmpl: Template for output names.
107 restrictfilenames: Do not allow "&" and spaces in file names
108 ignoreerrors: Do not stop on download errors.
109 nooverwrites: Prevent overwriting files.
110 playliststart: Playlist item to start at.
111 playlistend: Playlist item to end at.
112 matchtitle: Download only matching titles.
113 rejecttitle: Reject downloads for matching titles.
114 logger: Log messages to a logging.Logger instance.
115 logtostderr: Log messages to stderr instead of stdout.
116 writedescription: Write the video description to a .description file
117 writeinfojson: Write the video description to a .info.json file
118 writeannotations: Write the video annotations to a .annotations.xml file
119 writethumbnail: Write the thumbnail image to a file
120 writesubtitles: Write the video subtitles to a file
121 writeautomaticsub: Write the automatic subtitles to a file
122 allsubtitles: Downloads all the subtitles of the video
123 (requires writesubtitles or writeautomaticsub)
124 listsubtitles: Lists all available subtitles for the video
125 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
126 subtitleslangs: List of languages of the subtitles to download
127 keepvideo: Keep the video file after post-processing
128 daterange: A DateRange object, download only if the upload_date is in the range.
129 skip_download: Skip the actual download of the video file
130 cachedir: Location of the cache files in the filesystem.
131 None to disable filesystem cache.
132 noplaylist: Download single video instead of a playlist if in doubt.
133 age_limit: An integer representing the user's age in years.
134 Unsuitable videos for the given age are skipped.
135 min_views: An integer representing the minimum view count the video
136 must have in order to not be skipped.
137 Videos without view count information are always
138 downloaded. None for no limit.
139 max_views: An integer representing the maximum view count.
140 Videos that are more popular than that are not
142 Videos without view count information are always
143 downloaded. None for no limit.
144 download_archive: File name of a file where all downloads are recorded.
145 Videos already present in the file are not downloaded
147 cookiefile: File name where cookies should be read from and dumped to.
148 nocheckcertificate:Do not verify SSL certificates
149 proxy: URL of the proxy server to use
150 socket_timeout: Time to wait for unresponsive hosts, in seconds
151 bidi_workaround: Work around buggy terminals without bidirectional text
152 support, using fridibi
153 debug_printtraffic:Print out sent and received HTTP traffic
155 The following parameters are not used by YoutubeDL itself, they are used by
157 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
158 noresizebuffer, retries, continuedl, noprogress, consoletitle
164 _download_retcode = None
165 _num_downloads = None
168 def __init__(self, params=None):
169 """Create a FileDownloader object with the given options."""
173 self._ies_instances = {}
175 self._progress_hooks = []
176 self._download_retcode = 0
177 self._num_downloads = 0
178 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
179 self._err_file = sys.stderr
182 if params.get('bidi_workaround', False):
185 master, slave = pty.openpty()
186 width = get_term_width()
190 width_args = ['-w', str(width)]
192 stdin=subprocess.PIPE,
194 stderr=self._err_file)
196 self._output_process = subprocess.Popen(
197 ['bidiv'] + width_args, **sp_kwargs
200 self._output_process = subprocess.Popen(
201 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
202 self._output_channel = os.fdopen(master, 'rb')
203 except OSError as ose:
205 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
209 if (sys.version_info >= (3,) and sys.platform != 'win32' and
210 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
211 and not params['restrictfilenames']):
212 # On Python 3, the Unicode filesystem API will throw errors (#1474)
214 'Assuming --restrict-filenames since file system encoding '
215 'cannot encode all charactes. '
216 'Set the LC_ALL environment variable to fix this.')
217 self.params['restrictfilenames'] = True
219 if '%(stitle)s' in self.params.get('outtmpl', ''):
220 self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
224 def add_info_extractor(self, ie):
225 """Add an InfoExtractor object to the end of the list."""
227 self._ies_instances[ie.ie_key()] = ie
228 ie.set_downloader(self)
230 def get_info_extractor(self, ie_key):
232 Get an instance of an IE with name ie_key, it will try to get one from
233 the _ies list, if there's no instance it will create a new one and add
234 it to the extractor list.
236 ie = self._ies_instances.get(ie_key)
238 ie = get_info_extractor(ie_key)()
239 self.add_info_extractor(ie)
242 def add_default_info_extractors(self):
244 Add the InfoExtractors returned by gen_extractors to the end of the list
246 for ie in gen_extractors():
247 self.add_info_extractor(ie)
249 def add_post_processor(self, pp):
250 """Add a PostProcessor object to the end of the chain."""
252 pp.set_downloader(self)
254 def add_progress_hook(self, ph):
255 """Add the progress hook (currently only for the file downloader)"""
256 self._progress_hooks.append(ph)
258 def _bidi_workaround(self, message):
259 if not hasattr(self, '_output_channel'):
262 assert hasattr(self, '_output_process')
263 assert type(message) == type('')
264 line_count = message.count('\n') + 1
265 self._output_process.stdin.write((message + '\n').encode('utf-8'))
266 self._output_process.stdin.flush()
267 res = ''.join(self._output_channel.readline().decode('utf-8')
268 for _ in range(line_count))
269 return res[:-len('\n')]
271 def to_screen(self, message, skip_eol=False):
272 """Print message to stdout if not in quiet mode."""
273 return self.to_stdout(message, skip_eol, check_quiet=True)
275 def to_stdout(self, message, skip_eol=False, check_quiet=False):
276 """Print message to stdout if not in quiet mode."""
277 if self.params.get('logger'):
278 self.params['logger'].debug(message)
279 elif not check_quiet or not self.params.get('quiet', False):
280 message = self._bidi_workaround(message)
281 terminator = ['\n', ''][skip_eol]
282 output = message + terminator
284 write_string(output, self._screen_file)
286 def to_stderr(self, message):
287 """Print message to stderr."""
288 assert type(message) == type('')
289 if self.params.get('logger'):
290 self.params['logger'].error(message)
292 message = self._bidi_workaround(message)
293 output = message + '\n'
294 write_string(output, self._err_file)
296 def to_console_title(self, message):
297 if not self.params.get('consoletitle', False):
299 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
300 # c_wchar_p() might not be necessary if `message` is
301 # already of type unicode()
302 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
303 elif 'TERM' in os.environ:
304 write_string('\033]0;%s\007' % message, self._screen_file)
306 def save_console_title(self):
307 if not self.params.get('consoletitle', False):
309 if 'TERM' in os.environ:
310 # Save the title on stack
311 write_string('\033[22;0t', self._screen_file)
313 def restore_console_title(self):
314 if not self.params.get('consoletitle', False):
316 if 'TERM' in os.environ:
317 # Restore the title from stack
318 write_string('\033[23;0t', self._screen_file)
321 self.save_console_title()
324 def __exit__(self, *args):
325 self.restore_console_title()
327 if self.params.get('cookiefile') is not None:
328 self.cookiejar.save()
330 def trouble(self, message=None, tb=None):
331 """Determine action to take when a download problem appears.
333 Depending on if the downloader has been configured to ignore
334 download errors or not, this method may throw an exception or
335 not when errors are found, after printing the message.
337 tb, if given, is additional traceback information.
339 if message is not None:
340 self.to_stderr(message)
341 if self.params.get('verbose'):
343 if sys.exc_info()[0]: # if .trouble has been called from an except block
345 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
346 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
347 tb += compat_str(traceback.format_exc())
349 tb_data = traceback.format_list(traceback.extract_stack())
350 tb = ''.join(tb_data)
352 if not self.params.get('ignoreerrors', False):
353 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
354 exc_info = sys.exc_info()[1].exc_info
356 exc_info = sys.exc_info()
357 raise DownloadError(message, exc_info)
358 self._download_retcode = 1
360 def report_warning(self, message):
362 Print the message to stderr, it will be prefixed with 'WARNING:'
363 If stderr is a tty file the 'WARNING:' will be colored
365 if self._err_file.isatty() and os.name != 'nt':
366 _msg_header = '\033[0;33mWARNING:\033[0m'
368 _msg_header = 'WARNING:'
369 warning_message = '%s %s' % (_msg_header, message)
370 self.to_stderr(warning_message)
372 def report_error(self, message, tb=None):
374 Do the same as trouble, but prefixes the message with 'ERROR:', colored
375 in red if stderr is a tty file.
377 if self._err_file.isatty() and os.name != 'nt':
378 _msg_header = '\033[0;31mERROR:\033[0m'
380 _msg_header = 'ERROR:'
381 error_message = '%s %s' % (_msg_header, message)
382 self.trouble(error_message, tb)
384 def report_file_already_downloaded(self, file_name):
385 """Report file has already been fully downloaded."""
387 self.to_screen('[download] %s has already been downloaded' % file_name)
388 except UnicodeEncodeError:
389 self.to_screen('[download] The file has already been downloaded')
391 def increment_downloads(self):
392 """Increment the ordinal that assigns a number to each file."""
393 self._num_downloads += 1
395 def prepare_filename(self, info_dict):
396 """Generate the output filename."""
398 template_dict = dict(info_dict)
400 template_dict['epoch'] = int(time.time())
401 autonumber_size = self.params.get('autonumber_size')
402 if autonumber_size is None:
404 autonumber_templ = '%0' + str(autonumber_size) + 'd'
405 template_dict['autonumber'] = autonumber_templ % self._num_downloads
406 if template_dict.get('playlist_index') is not None:
407 template_dict['playlist_index'] = '%05d' % template_dict['playlist_index']
409 sanitize = lambda k, v: sanitize_filename(
411 restricted=self.params.get('restrictfilenames'),
413 template_dict = dict((k, sanitize(k, v))
414 for k, v in template_dict.items()
416 template_dict = collections.defaultdict(lambda: 'NA', template_dict)
418 tmpl = os.path.expanduser(self.params['outtmpl'])
419 filename = tmpl % template_dict
421 except ValueError as err:
422 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
425 def _match_entry(self, info_dict):
426 """ Returns None iff the file should be downloaded """
428 video_title = info_dict.get('title', info_dict.get('id', 'video'))
429 if 'title' in info_dict:
430 # This can happen when we're just evaluating the playlist
431 title = info_dict['title']
432 matchtitle = self.params.get('matchtitle', False)
434 if not re.search(matchtitle, title, re.IGNORECASE):
435 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
436 rejecttitle = self.params.get('rejecttitle', False)
438 if re.search(rejecttitle, title, re.IGNORECASE):
439 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
440 date = info_dict.get('upload_date', None)
442 dateRange = self.params.get('daterange', DateRange())
443 if date not in dateRange:
444 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
445 view_count = info_dict.get('view_count', None)
446 if view_count is not None:
447 min_views = self.params.get('min_views')
448 if min_views is not None and view_count < min_views:
449 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
450 max_views = self.params.get('max_views')
451 if max_views is not None and view_count > max_views:
452 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
453 age_limit = self.params.get('age_limit')
454 if age_limit is not None:
455 if age_limit < info_dict.get('age_limit', 0):
456 return 'Skipping "' + title + '" because it is age restricted'
457 if self.in_download_archive(info_dict):
458 return '%s has already been recorded in archive' % video_title
462 def add_extra_info(info_dict, extra_info):
463 '''Set the keys from extra_info in info dict if they are missing'''
464 for key, value in extra_info.items():
465 info_dict.setdefault(key, value)
467 def extract_info(self, url, download=True, ie_key=None, extra_info={},
470 Returns a list with a dictionary for each video we find.
471 If 'download', also downloads the videos.
472 extra_info is a dict containing the extra values to add to each result
476 ies = [self.get_info_extractor(ie_key)]
481 if not ie.suitable(url):
485 self.report_warning('The program functionality for this site has been marked as broken, '
486 'and will probably not work.')
489 ie_result = ie.extract(url)
490 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
492 if isinstance(ie_result, list):
493 # Backwards compatibility: old IE result format
495 '_type': 'compat_list',
496 'entries': ie_result,
498 self.add_extra_info(ie_result,
500 'extractor': ie.IE_NAME,
502 'webpage_url_basename': url_basename(url),
503 'extractor_key': ie.ie_key(),
506 return self.process_ie_result(ie_result, download, extra_info)
509 except ExtractorError as de: # An error we somewhat expected
510 self.report_error(compat_str(de), de.format_traceback())
512 except Exception as e:
513 if self.params.get('ignoreerrors', False):
514 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
519 self.report_error('no suitable InfoExtractor: %s' % url)
521 def process_ie_result(self, ie_result, download=True, extra_info={}):
523 Take the result of the ie(may be modified) and resolve all unresolved
524 references (URLs, playlist items).
526 It will also download the videos if 'download'.
527 Returns the resolved ie_result.
530 result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
531 if result_type == 'video':
532 self.add_extra_info(ie_result, extra_info)
533 return self.process_video_result(ie_result, download=download)
534 elif result_type == 'url':
535 # We have to add extra_info to the results because it may be
536 # contained in a playlist
537 return self.extract_info(ie_result['url'],
539 ie_key=ie_result.get('ie_key'),
540 extra_info=extra_info)
541 elif result_type == 'url_transparent':
542 # Use the information from the embedding page
543 info = self.extract_info(
544 ie_result['url'], ie_key=ie_result.get('ie_key'),
545 extra_info=extra_info, download=False, process=False)
547 def make_result(embedded_info):
548 new_result = ie_result.copy()
549 for f in ('_type', 'url', 'ext', 'player_url', 'formats',
550 'entries', 'ie_key', 'duration',
551 'subtitles', 'annotations', 'format',
552 'thumbnail', 'thumbnails'):
555 if f in embedded_info:
556 new_result[f] = embedded_info[f]
558 new_result = make_result(info)
560 assert new_result.get('_type') != 'url_transparent'
561 if new_result.get('_type') == 'compat_list':
562 new_result['entries'] = [
563 make_result(e) for e in new_result['entries']]
565 return self.process_ie_result(
566 new_result, download=download, extra_info=extra_info)
567 elif result_type == 'playlist':
568 # We process each entry in the playlist
569 playlist = ie_result.get('title', None) or ie_result.get('id', None)
570 self.to_screen('[download] Downloading playlist: %s' % playlist)
572 playlist_results = []
574 n_all_entries = len(ie_result['entries'])
575 playliststart = self.params.get('playliststart', 1) - 1
576 playlistend = self.params.get('playlistend', None)
577 # For backwards compatibility, interpret -1 as whole list
578 if playlistend == -1:
581 entries = ie_result['entries'][playliststart:playlistend]
582 n_entries = len(entries)
585 "[%s] playlist '%s': Collected %d video ids (downloading %d of them)" %
586 (ie_result['extractor'], playlist, n_all_entries, n_entries))
588 for i, entry in enumerate(entries, 1):
589 self.to_screen('[download] Downloading video #%s of %s' % (i, n_entries))
591 'playlist': playlist,
592 'playlist_index': i + playliststart,
593 'extractor': ie_result['extractor'],
594 'webpage_url': ie_result['webpage_url'],
595 'webpage_url_basename': url_basename(ie_result['webpage_url']),
596 'extractor_key': ie_result['extractor_key'],
599 reason = self._match_entry(entry)
600 if reason is not None:
601 self.to_screen('[download] ' + reason)
604 entry_result = self.process_ie_result(entry,
607 playlist_results.append(entry_result)
608 ie_result['entries'] = playlist_results
610 elif result_type == 'compat_list':
612 self.add_extra_info(r,
614 'extractor': ie_result['extractor'],
615 'webpage_url': ie_result['webpage_url'],
616 'webpage_url_basename': url_basename(ie_result['webpage_url']),
617 'extractor_key': ie_result['extractor_key'],
620 ie_result['entries'] = [
621 self.process_ie_result(_fixup(r), download, extra_info)
622 for r in ie_result['entries']
626 raise Exception('Invalid result type: %s' % result_type)
628 def select_format(self, format_spec, available_formats):
629 if format_spec == 'best' or format_spec is None:
630 return available_formats[-1]
631 elif format_spec == 'worst':
632 return available_formats[0]
634 extensions = ['mp4', 'flv', 'webm', '3gp']
635 if format_spec in extensions:
636 filter_f = lambda f: f['ext'] == format_spec
638 filter_f = lambda f: f['format_id'] == format_spec
639 matches = list(filter(filter_f, available_formats))
644 def process_video_result(self, info_dict, download=True):
645 assert info_dict.get('_type', 'video') == 'video'
647 if 'playlist' not in info_dict:
648 # It isn't part of a playlist
649 info_dict['playlist'] = None
650 info_dict['playlist_index'] = None
652 # This extractors handle format selection themselves
653 if info_dict['extractor'] in ['Youku']:
655 self.process_info(info_dict)
658 # We now pick which formats have to be downloaded
659 if info_dict.get('formats') is None:
660 # There's only one format available
661 formats = [info_dict]
663 formats = info_dict['formats']
665 # We check that all the formats have the format and format_id fields
666 for (i, format) in enumerate(formats):
667 if format.get('format_id') is None:
668 format['format_id'] = compat_str(i)
669 if format.get('format') is None:
670 format['format'] = '{id} - {res}{note}'.format(
671 id=format['format_id'],
672 res=self.format_resolution(format),
673 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
675 # Automatically determine file extension if missing
676 if 'ext' not in format:
677 format['ext'] = determine_ext(format['url'])
679 format_limit = self.params.get('format_limit', None)
681 formats = list(takewhile_inclusive(
682 lambda f: f['format_id'] != format_limit, formats
685 # TODO Central sorting goes here
687 if formats[0] is not info_dict:
688 # only set the 'formats' fields if the original info_dict list them
689 # otherwise we end up with a circular reference, the first (and unique)
690 # element in the 'formats' field in info_dict is info_dict itself,
691 # wich can't be exported to json
692 info_dict['formats'] = formats
693 if self.params.get('listformats', None):
694 self.list_formats(info_dict)
697 req_format = self.params.get('format', 'best')
698 if req_format is None:
700 formats_to_download = []
701 # The -1 is for supporting YoutubeIE
702 if req_format in ('-1', 'all'):
703 formats_to_download = formats
705 # We can accept formats requested in the format: 34/5/best, we pick
706 # the first that is available, starting from left
707 req_formats = req_format.split('/')
708 for rf in req_formats:
709 if re.match(r'.+?\+.+?', rf) is not None:
710 # Two formats have been requested like '137+139'
711 format_1, format_2 = rf.split('+')
712 formats_info = (self.select_format(format_1, formats),
713 self.select_format(format_2, formats))
714 if all(formats_info):
716 'requested_formats': formats_info,
718 'ext': formats_info[0]['ext'],
721 selected_format = None
723 selected_format = self.select_format(rf, formats)
724 if selected_format is not None:
725 formats_to_download = [selected_format]
727 if not formats_to_download:
728 raise ExtractorError('requested format not available',
732 if len(formats_to_download) > 1:
733 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
734 for format in formats_to_download:
735 new_info = dict(info_dict)
736 new_info.update(format)
737 self.process_info(new_info)
738 # We update the info dict with the best quality format (backwards compatibility)
739 info_dict.update(formats_to_download[-1])
742 def process_info(self, info_dict):
743 """Process a single resolved IE result."""
745 assert info_dict.get('_type', 'video') == 'video'
746 #We increment the download the download count here to match the previous behaviour.
747 self.increment_downloads()
749 info_dict['fulltitle'] = info_dict['title']
750 if len(info_dict['title']) > 200:
751 info_dict['title'] = info_dict['title'][:197] + '...'
753 # Keep for backwards compatibility
754 info_dict['stitle'] = info_dict['title']
756 if not 'format' in info_dict:
757 info_dict['format'] = info_dict['ext']
759 reason = self._match_entry(info_dict)
760 if reason is not None:
761 self.to_screen('[download] ' + reason)
764 max_downloads = self.params.get('max_downloads')
765 if max_downloads is not None:
766 if self._num_downloads > int(max_downloads):
767 raise MaxDownloadsReached()
769 filename = self.prepare_filename(info_dict)
772 if self.params.get('forcetitle', False):
773 self.to_stdout(info_dict['fulltitle'])
774 if self.params.get('forceid', False):
775 self.to_stdout(info_dict['id'])
776 if self.params.get('forceurl', False):
777 # For RTMP URLs, also include the playpath
778 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
779 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
780 self.to_stdout(info_dict['thumbnail'])
781 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
782 self.to_stdout(info_dict['description'])
783 if self.params.get('forcefilename', False) and filename is not None:
784 self.to_stdout(filename)
785 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
786 self.to_stdout(formatSeconds(info_dict['duration']))
787 if self.params.get('forceformat', False):
788 self.to_stdout(info_dict['format'])
789 if self.params.get('forcejson', False):
790 info_dict['_filename'] = filename
791 self.to_stdout(json.dumps(info_dict))
793 # Do nothing else if in simulate mode
794 if self.params.get('simulate', False):
801 dn = os.path.dirname(encodeFilename(filename))
802 if dn != '' and not os.path.exists(dn):
804 except (OSError, IOError) as err:
805 self.report_error('unable to create directory ' + compat_str(err))
808 if self.params.get('writedescription', False):
809 descfn = filename + '.description'
810 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
811 self.to_screen('[info] Video description is already present')
814 self.to_screen('[info] Writing video description to: ' + descfn)
815 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
816 descfile.write(info_dict['description'])
817 except (KeyError, TypeError):
818 self.report_warning('There\'s no description to write.')
819 except (OSError, IOError):
820 self.report_error('Cannot write description file ' + descfn)
823 if self.params.get('writeannotations', False):
824 annofn = filename + '.annotations.xml'
825 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
826 self.to_screen('[info] Video annotations are already present')
829 self.to_screen('[info] Writing video annotations to: ' + annofn)
830 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
831 annofile.write(info_dict['annotations'])
832 except (KeyError, TypeError):
833 self.report_warning('There are no annotations to write.')
834 except (OSError, IOError):
835 self.report_error('Cannot write annotations file: ' + annofn)
838 subtitles_are_requested = any([self.params.get('writesubtitles', False),
839 self.params.get('writeautomaticsub')])
841 if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
842 # subtitles download errors are already managed as troubles in relevant IE
843 # that way it will silently go on when used with unsupporting IE
844 subtitles = info_dict['subtitles']
845 sub_format = self.params.get('subtitlesformat', 'srt')
846 for sub_lang in subtitles.keys():
847 sub = subtitles[sub_lang]
851 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
852 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
853 self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
855 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
856 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
858 except (OSError, IOError):
859 self.report_error('Cannot write subtitles file ' + descfn)
862 if self.params.get('writeinfojson', False):
863 infofn = os.path.splitext(filename)[0] + '.info.json'
864 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
865 self.to_screen('[info] Video description metadata is already present')
867 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
869 write_json_file(info_dict, encodeFilename(infofn))
870 except (OSError, IOError):
871 self.report_error('Cannot write metadata to JSON file ' + infofn)
874 if self.params.get('writethumbnail', False):
875 if info_dict.get('thumbnail') is not None:
876 thumb_format = determine_ext(info_dict['thumbnail'], 'jpg')
877 thumb_filename = os.path.splitext(filename)[0] + '.' + thumb_format
878 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
879 self.to_screen('[%s] %s: Thumbnail is already present' %
880 (info_dict['extractor'], info_dict['id']))
882 self.to_screen('[%s] %s: Downloading thumbnail ...' %
883 (info_dict['extractor'], info_dict['id']))
885 uf = compat_urllib_request.urlopen(info_dict['thumbnail'])
886 with open(thumb_filename, 'wb') as thumbf:
887 shutil.copyfileobj(uf, thumbf)
888 self.to_screen('[%s] %s: Writing thumbnail to: %s' %
889 (info_dict['extractor'], info_dict['id'], thumb_filename))
890 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
891 self.report_warning('Unable to download thumbnail "%s": %s' %
892 (info_dict['thumbnail'], compat_str(err)))
894 if not self.params.get('skip_download', False):
895 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
900 fd = get_suitable_downloader(info)(self, self.params)
901 for ph in self._progress_hooks:
902 fd.add_progress_hook(ph)
903 return fd.download(name, info)
904 if info_dict.get('requested_formats') is not None:
907 for f in info_dict['requested_formats']:
908 new_info = dict(info_dict)
910 fname = self.prepare_filename(new_info)
911 fname = prepend_extension(fname, 'f%s' % f['format_id'])
912 downloaded.append(fname)
913 partial_success = dl(fname, new_info)
914 success = success and partial_success
915 info_dict['__postprocessors'] = [FFmpegMergerPP(self)]
916 info_dict['__files_to_merge'] = downloaded
919 success = dl(filename, info_dict)
920 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
921 self.report_error('unable to download video data: %s' % str(err))
923 except (OSError, IOError) as err:
924 raise UnavailableVideoError(err)
925 except (ContentTooShortError, ) as err:
926 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
931 self.post_process(filename, info_dict)
932 except (PostProcessingError) as err:
933 self.report_error('postprocessing: %s' % str(err))
936 self.record_download_archive(info_dict)
938 def download(self, url_list):
939 """Download a given list of URLs."""
940 if (len(url_list) > 1 and
941 '%' not in self.params['outtmpl']
942 and self.params.get('max_downloads') != 1):
943 raise SameFileError(self.params['outtmpl'])
947 #It also downloads the videos
948 self.extract_info(url)
949 except UnavailableVideoError:
950 self.report_error('unable to download video')
951 except MaxDownloadsReached:
952 self.to_screen('[info] Maximum number of downloaded files reached.')
955 return self._download_retcode
957 def download_with_info_file(self, info_filename):
958 with io.open(info_filename, 'r', encoding='utf-8') as f:
961 self.process_ie_result(info, download=True)
962 except DownloadError:
963 webpage_url = info.get('webpage_url')
964 if webpage_url is not None:
965 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
966 return self.download([webpage_url])
969 return self._download_retcode
971 def post_process(self, filename, ie_info):
972 """Run all the postprocessors on the given file."""
974 info['filepath'] = filename
977 if ie_info.get('__postprocessors') is not None:
978 pps_chain.extend(ie_info['__postprocessors'])
979 pps_chain.extend(self._pps)
982 keep_video_wish, new_info = pp.run(info)
983 if keep_video_wish is not None:
985 keep_video = keep_video_wish
986 elif keep_video is None:
987 # No clear decision yet, let IE decide
988 keep_video = keep_video_wish
989 except PostProcessingError as e:
990 self.report_error(e.msg)
991 if keep_video is False and not self.params.get('keepvideo', False):
993 self.to_screen('Deleting original file %s (pass -k to keep)' % filename)
994 os.remove(encodeFilename(filename))
995 except (IOError, OSError):
996 self.report_warning('Unable to remove downloaded video file')
998 def _make_archive_id(self, info_dict):
999 # Future-proof against any change in case
1000 # and backwards compatibility with prior versions
1001 extractor = info_dict.get('extractor_key')
1002 if extractor is None:
1003 if 'id' in info_dict:
1004 extractor = info_dict.get('ie_key') # key in a playlist
1005 if extractor is None:
1006 return None # Incomplete video information
1007 return extractor.lower() + ' ' + info_dict['id']
1009 def in_download_archive(self, info_dict):
1010 fn = self.params.get('download_archive')
1014 vid_id = self._make_archive_id(info_dict)
1016 return False # Incomplete video information
1019 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1020 for line in archive_file:
1021 if line.strip() == vid_id:
1023 except IOError as ioe:
1024 if ioe.errno != errno.ENOENT:
1028 def record_download_archive(self, info_dict):
1029 fn = self.params.get('download_archive')
1032 vid_id = self._make_archive_id(info_dict)
1034 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1035 archive_file.write(vid_id + '\n')
1038 def format_resolution(format, default='unknown'):
1039 if format.get('vcodec') == 'none':
1041 if format.get('resolution') is not None:
1042 return format['resolution']
1043 if format.get('height') is not None:
1044 if format.get('width') is not None:
1045 res = '%sx%s' % (format['width'], format['height'])
1047 res = '%sp' % format['height']
1048 elif format.get('width') is not None:
1049 res = '?x%d' % format['width']
1054 def list_formats(self, info_dict):
1055 def format_note(fdict):
1057 if fdict.get('ext') in ['f4f', 'f4m']:
1058 res += '(unsupported) '
1059 if fdict.get('format_note') is not None:
1060 res += fdict['format_note'] + ' '
1061 if fdict.get('tbr') is not None:
1062 res += '%4dk ' % fdict['tbr']
1063 if (fdict.get('vcodec') is not None and
1064 fdict.get('vcodec') != 'none'):
1065 res += '%-5s@' % fdict['vcodec']
1066 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1068 if fdict.get('vbr') is not None:
1069 res += '%4dk' % fdict['vbr']
1070 if fdict.get('acodec') is not None:
1073 res += '%-5s' % fdict['acodec']
1074 elif fdict.get('abr') is not None:
1078 if fdict.get('abr') is not None:
1079 res += '@%3dk' % fdict['abr']
1080 if fdict.get('filesize') is not None:
1083 res += format_bytes(fdict['filesize'])
1086 def line(format, idlen=20):
1087 return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
1088 format['format_id'],
1090 self.format_resolution(format),
1091 format_note(format),
1094 formats = info_dict.get('formats', [info_dict])
1095 idlen = max(len('format code'),
1096 max(len(f['format_id']) for f in formats))
1097 formats_s = [line(f, idlen) for f in formats]
1098 if len(formats) > 1:
1099 formats_s[0] += (' ' if format_note(formats[0]) else '') + '(worst)'
1100 formats_s[-1] += (' ' if format_note(formats[-1]) else '') + '(best)'
1102 header_line = line({
1103 'format_id': 'format code', 'ext': 'extension',
1104 'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
1105 self.to_screen('[info] Available formats for %s:\n%s\n%s' %
1106 (info_dict['id'], header_line, '\n'.join(formats_s)))
1108 def urlopen(self, req):
1109 """ Start an HTTP download """
1110 return self._opener.open(req)
1112 def print_debug_header(self):
1113 if not self.params.get('verbose'):
1115 write_string('[debug] youtube-dl version ' + __version__ + '\n')
1117 sp = subprocess.Popen(
1118 ['git', 'rev-parse', '--short', 'HEAD'],
1119 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1120 cwd=os.path.dirname(os.path.abspath(__file__)))
1121 out, err = sp.communicate()
1122 out = out.decode().strip()
1123 if re.match('[0-9a-f]+', out):
1124 write_string('[debug] Git HEAD: ' + out + '\n')
1130 write_string('[debug] Python version %s - %s' %
1131 (platform.python_version(), platform_name()) + '\n')
1134 for handler in self._opener.handlers:
1135 if hasattr(handler, 'proxies'):
1136 proxy_map.update(handler.proxies)
1137 write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1139 def _setup_opener(self):
1140 timeout_val = self.params.get('socket_timeout')
1141 timeout = 600 if timeout_val is None else float(timeout_val)
1143 opts_cookiefile = self.params.get('cookiefile')
1144 opts_proxy = self.params.get('proxy')
1146 if opts_cookiefile is None:
1147 self.cookiejar = compat_cookiejar.CookieJar()
1149 self.cookiejar = compat_cookiejar.MozillaCookieJar(
1151 if os.access(opts_cookiefile, os.R_OK):
1152 self.cookiejar.load()
1154 cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1156 if opts_proxy is not None:
1157 if opts_proxy == '':
1160 proxies = {'http': opts_proxy, 'https': opts_proxy}
1162 proxies = compat_urllib_request.getproxies()
1163 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1164 if 'http' in proxies and 'https' not in proxies:
1165 proxies['https'] = proxies['http']
1166 proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1168 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1169 https_handler = make_HTTPS_handler(
1170 self.params.get('nocheckcertificate', False), debuglevel=debuglevel)
1171 ydlh = YoutubeDLHandler(debuglevel=debuglevel)
1172 opener = compat_urllib_request.build_opener(
1173 https_handler, proxy_handler, cookie_processor, ydlh)
1174 # Delete the default user-agent header, which would otherwise apply in
1175 # cases where our custom HTTP handler doesn't come into play
1176 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1177 opener.addheaders = []
1178 self._opener = opener
1180 # TODO remove this global modification
1181 compat_urllib_request.install_opener(opener)
1182 socket.setdefaulttimeout(timeout)