2 # -*- coding: utf-8 -*-
4 from __future__ import absolute_import, unicode_literals
28 compat_urllib_request,
50 UnavailableVideoError,
57 from .extractor import get_info_extractor, gen_extractors
58 from .downloader import get_suitable_downloader
59 from .postprocessor import FFmpegMergerPP
60 from .version import __version__
63 class YoutubeDL(object):
66 YoutubeDL objects are the ones responsible of downloading the
67 actual video file and writing it to disk if the user has requested
68 it, among some other tasks. In most cases there should be one per
69 program. As, given a video URL, the downloader doesn't know how to
70 extract all the needed information, task that InfoExtractors do, it
71 has to pass the URL to one of them.
73 For this, YoutubeDL objects have a method that allows
74 InfoExtractors to be registered in a given order. When it is passed
75 a URL, the YoutubeDL object handles it to the first InfoExtractor it
76 finds that reports being able to handle it. The InfoExtractor extracts
77 all the information about the video or videos the URL refers to, and
78 YoutubeDL process the extracted information, possibly using a File
79 Downloader to download the video.
81 YoutubeDL objects accept a lot of parameters. In order not to saturate
82 the object constructor with arguments, it receives a dictionary of
83 options instead. These options are available through the params
84 attribute for the InfoExtractors to use. The YoutubeDL also
85 registers itself as the downloader in charge for the InfoExtractors
86 that are added to it, so this is a "mutual registration".
90 username: Username for authentication purposes.
91 password: Password for authentication purposes.
92 videopassword: Password for acces a video.
93 usenetrc: Use netrc for authentication instead.
94 verbose: Print additional info to stdout.
95 quiet: Do not print messages to stdout.
96 forceurl: Force printing final URL.
97 forcetitle: Force printing title.
98 forceid: Force printing ID.
99 forcethumbnail: Force printing thumbnail URL.
100 forcedescription: Force printing description.
101 forcefilename: Force printing final filename.
102 forceduration: Force printing duration.
103 forcejson: Force printing info_dict as JSON.
104 simulate: Do not download the video files.
105 format: Video format code.
106 format_limit: Highest quality format to try.
107 outtmpl: Template for output names.
108 restrictfilenames: Do not allow "&" and spaces in file names
109 ignoreerrors: Do not stop on download errors.
110 nooverwrites: Prevent overwriting files.
111 playliststart: Playlist item to start at.
112 playlistend: Playlist item to end at.
113 matchtitle: Download only matching titles.
114 rejecttitle: Reject downloads for matching titles.
115 logger: Log messages to a logging.Logger instance.
116 logtostderr: Log messages to stderr instead of stdout.
117 writedescription: Write the video description to a .description file
118 writeinfojson: Write the video description to a .info.json file
119 writeannotations: Write the video annotations to a .annotations.xml file
120 writethumbnail: Write the thumbnail image to a file
121 writesubtitles: Write the video subtitles to a file
122 writeautomaticsub: Write the automatic subtitles to a file
123 allsubtitles: Downloads all the subtitles of the video
124 (requires writesubtitles or writeautomaticsub)
125 listsubtitles: Lists all available subtitles for the video
126 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
127 subtitleslangs: List of languages of the subtitles to download
128 keepvideo: Keep the video file after post-processing
129 daterange: A DateRange object, download only if the upload_date is in the range.
130 skip_download: Skip the actual download of the video file
131 cachedir: Location of the cache files in the filesystem.
132 None to disable filesystem cache.
133 noplaylist: Download single video instead of a playlist if in doubt.
134 age_limit: An integer representing the user's age in years.
135 Unsuitable videos for the given age are skipped.
136 min_views: An integer representing the minimum view count the video
137 must have in order to not be skipped.
138 Videos without view count information are always
139 downloaded. None for no limit.
140 max_views: An integer representing the maximum view count.
141 Videos that are more popular than that are not
143 Videos without view count information are always
144 downloaded. None for no limit.
145 download_archive: File name of a file where all downloads are recorded.
146 Videos already present in the file are not downloaded
148 cookiefile: File name where cookies should be read from and dumped to.
149 nocheckcertificate:Do not verify SSL certificates
150 proxy: URL of the proxy server to use
151 socket_timeout: Time to wait for unresponsive hosts, in seconds
152 bidi_workaround: Work around buggy terminals without bidirectional text
153 support, using fridibi
154 debug_printtraffic:Print out sent and received HTTP traffic
155 include_ads: Download ads as well
156 default_search: Prepend this string if an input url is not valid.
157 'auto' for elaborate guessing
159 The following parameters are not used by YoutubeDL itself, they are used by
161 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
162 noresizebuffer, retries, continuedl, noprogress, consoletitle
164 The following options are used by the post processors:
165 prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
166 otherwise prefer avconv.
172 _download_retcode = None
173 _num_downloads = None
176 def __init__(self, params=None):
177 """Create a FileDownloader object with the given options."""
181 self._ies_instances = {}
183 self._progress_hooks = []
184 self._download_retcode = 0
185 self._num_downloads = 0
186 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
187 self._err_file = sys.stderr
190 if params.get('bidi_workaround', False):
193 master, slave = pty.openpty()
194 width = get_term_width()
198 width_args = ['-w', str(width)]
200 stdin=subprocess.PIPE,
202 stderr=self._err_file)
204 self._output_process = subprocess.Popen(
205 ['bidiv'] + width_args, **sp_kwargs
208 self._output_process = subprocess.Popen(
209 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
210 self._output_channel = os.fdopen(master, 'rb')
211 except OSError as ose:
213 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
217 if (sys.version_info >= (3,) and sys.platform != 'win32' and
218 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
219 and not params['restrictfilenames']):
220 # On Python 3, the Unicode filesystem API will throw errors (#1474)
222 'Assuming --restrict-filenames since file system encoding '
223 'cannot encode all charactes. '
224 'Set the LC_ALL environment variable to fix this.')
225 self.params['restrictfilenames'] = True
227 if '%(stitle)s' in self.params.get('outtmpl', ''):
228 self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
232 def add_info_extractor(self, ie):
233 """Add an InfoExtractor object to the end of the list."""
235 self._ies_instances[ie.ie_key()] = ie
236 ie.set_downloader(self)
238 def get_info_extractor(self, ie_key):
240 Get an instance of an IE with name ie_key, it will try to get one from
241 the _ies list, if there's no instance it will create a new one and add
242 it to the extractor list.
244 ie = self._ies_instances.get(ie_key)
246 ie = get_info_extractor(ie_key)()
247 self.add_info_extractor(ie)
250 def add_default_info_extractors(self):
252 Add the InfoExtractors returned by gen_extractors to the end of the list
254 for ie in gen_extractors():
255 self.add_info_extractor(ie)
257 def add_post_processor(self, pp):
258 """Add a PostProcessor object to the end of the chain."""
260 pp.set_downloader(self)
262 def add_progress_hook(self, ph):
263 """Add the progress hook (currently only for the file downloader)"""
264 self._progress_hooks.append(ph)
266 def _bidi_workaround(self, message):
267 if not hasattr(self, '_output_channel'):
270 assert hasattr(self, '_output_process')
271 assert type(message) == type('')
272 line_count = message.count('\n') + 1
273 self._output_process.stdin.write((message + '\n').encode('utf-8'))
274 self._output_process.stdin.flush()
275 res = ''.join(self._output_channel.readline().decode('utf-8')
276 for _ in range(line_count))
277 return res[:-len('\n')]
279 def to_screen(self, message, skip_eol=False):
280 """Print message to stdout if not in quiet mode."""
281 return self.to_stdout(message, skip_eol, check_quiet=True)
283 def to_stdout(self, message, skip_eol=False, check_quiet=False):
284 """Print message to stdout if not in quiet mode."""
285 if self.params.get('logger'):
286 self.params['logger'].debug(message)
287 elif not check_quiet or not self.params.get('quiet', False):
288 message = self._bidi_workaround(message)
289 terminator = ['\n', ''][skip_eol]
290 output = message + terminator
292 write_string(output, self._screen_file)
294 def to_stderr(self, message):
295 """Print message to stderr."""
296 assert type(message) == type('')
297 if self.params.get('logger'):
298 self.params['logger'].error(message)
300 message = self._bidi_workaround(message)
301 output = message + '\n'
302 write_string(output, self._err_file)
304 def to_console_title(self, message):
305 if not self.params.get('consoletitle', False):
307 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
308 # c_wchar_p() might not be necessary if `message` is
309 # already of type unicode()
310 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
311 elif 'TERM' in os.environ:
312 write_string('\033]0;%s\007' % message, self._screen_file)
314 def save_console_title(self):
315 if not self.params.get('consoletitle', False):
317 if 'TERM' in os.environ:
318 # Save the title on stack
319 write_string('\033[22;0t', self._screen_file)
321 def restore_console_title(self):
322 if not self.params.get('consoletitle', False):
324 if 'TERM' in os.environ:
325 # Restore the title from stack
326 write_string('\033[23;0t', self._screen_file)
329 self.save_console_title()
332 def __exit__(self, *args):
333 self.restore_console_title()
335 if self.params.get('cookiefile') is not None:
336 self.cookiejar.save()
338 def trouble(self, message=None, tb=None):
339 """Determine action to take when a download problem appears.
341 Depending on if the downloader has been configured to ignore
342 download errors or not, this method may throw an exception or
343 not when errors are found, after printing the message.
345 tb, if given, is additional traceback information.
347 if message is not None:
348 self.to_stderr(message)
349 if self.params.get('verbose'):
351 if sys.exc_info()[0]: # if .trouble has been called from an except block
353 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
354 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
355 tb += compat_str(traceback.format_exc())
357 tb_data = traceback.format_list(traceback.extract_stack())
358 tb = ''.join(tb_data)
360 if not self.params.get('ignoreerrors', False):
361 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
362 exc_info = sys.exc_info()[1].exc_info
364 exc_info = sys.exc_info()
365 raise DownloadError(message, exc_info)
366 self._download_retcode = 1
368 def report_warning(self, message):
370 Print the message to stderr, it will be prefixed with 'WARNING:'
371 If stderr is a tty file the 'WARNING:' will be colored
373 if self._err_file.isatty() and os.name != 'nt':
374 _msg_header = '\033[0;33mWARNING:\033[0m'
376 _msg_header = 'WARNING:'
377 warning_message = '%s %s' % (_msg_header, message)
378 self.to_stderr(warning_message)
380 def report_error(self, message, tb=None):
382 Do the same as trouble, but prefixes the message with 'ERROR:', colored
383 in red if stderr is a tty file.
385 if self._err_file.isatty() and os.name != 'nt':
386 _msg_header = '\033[0;31mERROR:\033[0m'
388 _msg_header = 'ERROR:'
389 error_message = '%s %s' % (_msg_header, message)
390 self.trouble(error_message, tb)
392 def report_file_already_downloaded(self, file_name):
393 """Report file has already been fully downloaded."""
395 self.to_screen('[download] %s has already been downloaded' % file_name)
396 except UnicodeEncodeError:
397 self.to_screen('[download] The file has already been downloaded')
399 def increment_downloads(self):
400 """Increment the ordinal that assigns a number to each file."""
401 self._num_downloads += 1
403 def prepare_filename(self, info_dict):
404 """Generate the output filename."""
406 template_dict = dict(info_dict)
408 template_dict['epoch'] = int(time.time())
409 autonumber_size = self.params.get('autonumber_size')
410 if autonumber_size is None:
412 autonumber_templ = '%0' + str(autonumber_size) + 'd'
413 template_dict['autonumber'] = autonumber_templ % self._num_downloads
414 if template_dict.get('playlist_index') is not None:
415 template_dict['playlist_index'] = '%05d' % template_dict['playlist_index']
417 sanitize = lambda k, v: sanitize_filename(
419 restricted=self.params.get('restrictfilenames'),
421 template_dict = dict((k, sanitize(k, v))
422 for k, v in template_dict.items()
424 template_dict = collections.defaultdict(lambda: 'NA', template_dict)
426 tmpl = os.path.expanduser(self.params['outtmpl'])
427 filename = tmpl % template_dict
429 except ValueError as err:
430 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
433 def _match_entry(self, info_dict):
434 """ Returns None iff the file should be downloaded """
436 video_title = info_dict.get('title', info_dict.get('id', 'video'))
437 if 'title' in info_dict:
438 # This can happen when we're just evaluating the playlist
439 title = info_dict['title']
440 matchtitle = self.params.get('matchtitle', False)
442 if not re.search(matchtitle, title, re.IGNORECASE):
443 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
444 rejecttitle = self.params.get('rejecttitle', False)
446 if re.search(rejecttitle, title, re.IGNORECASE):
447 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
448 date = info_dict.get('upload_date', None)
450 dateRange = self.params.get('daterange', DateRange())
451 if date not in dateRange:
452 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
453 view_count = info_dict.get('view_count', None)
454 if view_count is not None:
455 min_views = self.params.get('min_views')
456 if min_views is not None and view_count < min_views:
457 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
458 max_views = self.params.get('max_views')
459 if max_views is not None and view_count > max_views:
460 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
461 age_limit = self.params.get('age_limit')
462 if age_limit is not None:
463 if age_limit < info_dict.get('age_limit', 0):
464 return 'Skipping "' + title + '" because it is age restricted'
465 if self.in_download_archive(info_dict):
466 return '%s has already been recorded in archive' % video_title
470 def add_extra_info(info_dict, extra_info):
471 '''Set the keys from extra_info in info dict if they are missing'''
472 for key, value in extra_info.items():
473 info_dict.setdefault(key, value)
475 def extract_info(self, url, download=True, ie_key=None, extra_info={},
478 Returns a list with a dictionary for each video we find.
479 If 'download', also downloads the videos.
480 extra_info is a dict containing the extra values to add to each result
484 ies = [self.get_info_extractor(ie_key)]
489 if not ie.suitable(url):
493 self.report_warning('The program functionality for this site has been marked as broken, '
494 'and will probably not work.')
497 ie_result = ie.extract(url)
498 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
500 if isinstance(ie_result, list):
501 # Backwards compatibility: old IE result format
503 '_type': 'compat_list',
504 'entries': ie_result,
506 self.add_extra_info(ie_result,
508 'extractor': ie.IE_NAME,
510 'webpage_url_basename': url_basename(url),
511 'extractor_key': ie.ie_key(),
514 return self.process_ie_result(ie_result, download, extra_info)
517 except ExtractorError as de: # An error we somewhat expected
518 self.report_error(compat_str(de), de.format_traceback())
520 except MaxDownloadsReached:
522 except Exception as e:
523 if self.params.get('ignoreerrors', False):
524 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
529 self.report_error('no suitable InfoExtractor: %s' % url)
531 def process_ie_result(self, ie_result, download=True, extra_info={}):
533 Take the result of the ie(may be modified) and resolve all unresolved
534 references (URLs, playlist items).
536 It will also download the videos if 'download'.
537 Returns the resolved ie_result.
540 result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
541 if result_type == 'video':
542 self.add_extra_info(ie_result, extra_info)
543 return self.process_video_result(ie_result, download=download)
544 elif result_type == 'url':
545 # We have to add extra_info to the results because it may be
546 # contained in a playlist
547 return self.extract_info(ie_result['url'],
549 ie_key=ie_result.get('ie_key'),
550 extra_info=extra_info)
551 elif result_type == 'url_transparent':
552 # Use the information from the embedding page
553 info = self.extract_info(
554 ie_result['url'], ie_key=ie_result.get('ie_key'),
555 extra_info=extra_info, download=False, process=False)
557 def make_result(embedded_info):
558 new_result = ie_result.copy()
559 for f in ('_type', 'url', 'ext', 'player_url', 'formats',
560 'entries', 'ie_key', 'duration',
561 'subtitles', 'annotations', 'format',
562 'thumbnail', 'thumbnails'):
565 if f in embedded_info:
566 new_result[f] = embedded_info[f]
568 new_result = make_result(info)
570 assert new_result.get('_type') != 'url_transparent'
571 if new_result.get('_type') == 'compat_list':
572 new_result['entries'] = [
573 make_result(e) for e in new_result['entries']]
575 return self.process_ie_result(
576 new_result, download=download, extra_info=extra_info)
577 elif result_type == 'playlist':
578 # We process each entry in the playlist
579 playlist = ie_result.get('title', None) or ie_result.get('id', None)
580 self.to_screen('[download] Downloading playlist: %s' % playlist)
582 playlist_results = []
584 playliststart = self.params.get('playliststart', 1) - 1
585 playlistend = self.params.get('playlistend', None)
586 # For backwards compatibility, interpret -1 as whole list
587 if playlistend == -1:
590 if isinstance(ie_result['entries'], list):
591 n_all_entries = len(ie_result['entries'])
592 entries = ie_result['entries'][playliststart:playlistend]
593 n_entries = len(entries)
595 "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
596 (ie_result['extractor'], playlist, n_all_entries, n_entries))
598 assert isinstance(ie_result['entries'], PagedList)
599 entries = ie_result['entries'].getslice(
600 playliststart, playlistend)
601 n_entries = len(entries)
603 "[%s] playlist %s: Downloading %d videos" %
604 (ie_result['extractor'], playlist, n_entries))
606 for i, entry in enumerate(entries, 1):
607 self.to_screen('[download] Downloading video #%s of %s' % (i, n_entries))
609 'playlist': playlist,
610 'playlist_index': i + playliststart,
611 'extractor': ie_result['extractor'],
612 'webpage_url': ie_result['webpage_url'],
613 'webpage_url_basename': url_basename(ie_result['webpage_url']),
614 'extractor_key': ie_result['extractor_key'],
617 reason = self._match_entry(entry)
618 if reason is not None:
619 self.to_screen('[download] ' + reason)
622 entry_result = self.process_ie_result(entry,
625 playlist_results.append(entry_result)
626 ie_result['entries'] = playlist_results
628 elif result_type == 'compat_list':
630 self.add_extra_info(r,
632 'extractor': ie_result['extractor'],
633 'webpage_url': ie_result['webpage_url'],
634 'webpage_url_basename': url_basename(ie_result['webpage_url']),
635 'extractor_key': ie_result['extractor_key'],
638 ie_result['entries'] = [
639 self.process_ie_result(_fixup(r), download, extra_info)
640 for r in ie_result['entries']
644 raise Exception('Invalid result type: %s' % result_type)
646 def select_format(self, format_spec, available_formats):
647 if format_spec == 'best' or format_spec is None:
648 return available_formats[-1]
649 elif format_spec == 'worst':
650 return available_formats[0]
651 elif format_spec == 'bestaudio':
653 f for f in available_formats
654 if f.get('vcodec') == 'none']
656 return audio_formats[-1]
657 elif format_spec == 'worstaudio':
659 f for f in available_formats
660 if f.get('vcodec') == 'none']
662 return audio_formats[0]
664 extensions = ['mp4', 'flv', 'webm', '3gp']
665 if format_spec in extensions:
666 filter_f = lambda f: f['ext'] == format_spec
668 filter_f = lambda f: f['format_id'] == format_spec
669 matches = list(filter(filter_f, available_formats))
674 def process_video_result(self, info_dict, download=True):
675 assert info_dict.get('_type', 'video') == 'video'
677 if 'playlist' not in info_dict:
678 # It isn't part of a playlist
679 info_dict['playlist'] = None
680 info_dict['playlist_index'] = None
682 # This extractors handle format selection themselves
683 if info_dict['extractor'] in ['Youku']:
685 self.process_info(info_dict)
688 # We now pick which formats have to be downloaded
689 if info_dict.get('formats') is None:
690 # There's only one format available
691 formats = [info_dict]
693 formats = info_dict['formats']
695 # We check that all the formats have the format and format_id fields
696 for (i, format) in enumerate(formats):
697 if format.get('format_id') is None:
698 format['format_id'] = compat_str(i)
699 if format.get('format') is None:
700 format['format'] = '{id} - {res}{note}'.format(
701 id=format['format_id'],
702 res=self.format_resolution(format),
703 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
705 # Automatically determine file extension if missing
706 if 'ext' not in format:
707 format['ext'] = determine_ext(format['url'])
709 format_limit = self.params.get('format_limit', None)
711 formats = list(takewhile_inclusive(
712 lambda f: f['format_id'] != format_limit, formats
715 # TODO Central sorting goes here
717 if formats[0] is not info_dict:
718 # only set the 'formats' fields if the original info_dict list them
719 # otherwise we end up with a circular reference, the first (and unique)
720 # element in the 'formats' field in info_dict is info_dict itself,
721 # wich can't be exported to json
722 info_dict['formats'] = formats
723 if self.params.get('listformats', None):
724 self.list_formats(info_dict)
727 req_format = self.params.get('format')
728 if req_format is None:
730 formats_to_download = []
731 # The -1 is for supporting YoutubeIE
732 if req_format in ('-1', 'all'):
733 formats_to_download = formats
735 # We can accept formats requested in the format: 34/5/best, we pick
736 # the first that is available, starting from left
737 req_formats = req_format.split('/')
738 for rf in req_formats:
739 if re.match(r'.+?\+.+?', rf) is not None:
740 # Two formats have been requested like '137+139'
741 format_1, format_2 = rf.split('+')
742 formats_info = (self.select_format(format_1, formats),
743 self.select_format(format_2, formats))
744 if all(formats_info):
746 'requested_formats': formats_info,
748 'ext': formats_info[0]['ext'],
751 selected_format = None
753 selected_format = self.select_format(rf, formats)
754 if selected_format is not None:
755 formats_to_download = [selected_format]
757 if not formats_to_download:
758 raise ExtractorError('requested format not available',
762 if len(formats_to_download) > 1:
763 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
764 for format in formats_to_download:
765 new_info = dict(info_dict)
766 new_info.update(format)
767 self.process_info(new_info)
768 # We update the info dict with the best quality format (backwards compatibility)
769 info_dict.update(formats_to_download[-1])
772 def process_info(self, info_dict):
773 """Process a single resolved IE result."""
775 assert info_dict.get('_type', 'video') == 'video'
776 #We increment the download the download count here to match the previous behaviour.
777 self.increment_downloads()
779 info_dict['fulltitle'] = info_dict['title']
780 if len(info_dict['title']) > 200:
781 info_dict['title'] = info_dict['title'][:197] + '...'
783 # Keep for backwards compatibility
784 info_dict['stitle'] = info_dict['title']
786 if not 'format' in info_dict:
787 info_dict['format'] = info_dict['ext']
789 reason = self._match_entry(info_dict)
790 if reason is not None:
791 self.to_screen('[download] ' + reason)
794 max_downloads = self.params.get('max_downloads')
795 if max_downloads is not None:
796 if self._num_downloads > int(max_downloads):
797 raise MaxDownloadsReached()
799 filename = self.prepare_filename(info_dict)
802 if self.params.get('forcetitle', False):
803 self.to_stdout(info_dict['fulltitle'])
804 if self.params.get('forceid', False):
805 self.to_stdout(info_dict['id'])
806 if self.params.get('forceurl', False):
807 # For RTMP URLs, also include the playpath
808 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
809 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
810 self.to_stdout(info_dict['thumbnail'])
811 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
812 self.to_stdout(info_dict['description'])
813 if self.params.get('forcefilename', False) and filename is not None:
814 self.to_stdout(filename)
815 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
816 self.to_stdout(formatSeconds(info_dict['duration']))
817 if self.params.get('forceformat', False):
818 self.to_stdout(info_dict['format'])
819 if self.params.get('forcejson', False):
820 info_dict['_filename'] = filename
821 self.to_stdout(json.dumps(info_dict))
823 # Do nothing else if in simulate mode
824 if self.params.get('simulate', False):
831 dn = os.path.dirname(encodeFilename(filename))
832 if dn != '' and not os.path.exists(dn):
834 except (OSError, IOError) as err:
835 self.report_error('unable to create directory ' + compat_str(err))
838 if self.params.get('writedescription', False):
839 descfn = filename + '.description'
840 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
841 self.to_screen('[info] Video description is already present')
844 self.to_screen('[info] Writing video description to: ' + descfn)
845 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
846 descfile.write(info_dict['description'])
847 except (KeyError, TypeError):
848 self.report_warning('There\'s no description to write.')
849 except (OSError, IOError):
850 self.report_error('Cannot write description file ' + descfn)
853 if self.params.get('writeannotations', False):
854 annofn = filename + '.annotations.xml'
855 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
856 self.to_screen('[info] Video annotations are already present')
859 self.to_screen('[info] Writing video annotations to: ' + annofn)
860 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
861 annofile.write(info_dict['annotations'])
862 except (KeyError, TypeError):
863 self.report_warning('There are no annotations to write.')
864 except (OSError, IOError):
865 self.report_error('Cannot write annotations file: ' + annofn)
868 subtitles_are_requested = any([self.params.get('writesubtitles', False),
869 self.params.get('writeautomaticsub')])
871 if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
872 # subtitles download errors are already managed as troubles in relevant IE
873 # that way it will silently go on when used with unsupporting IE
874 subtitles = info_dict['subtitles']
875 sub_format = self.params.get('subtitlesformat', 'srt')
876 for sub_lang in subtitles.keys():
877 sub = subtitles[sub_lang]
881 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
882 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
883 self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
885 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
886 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
888 except (OSError, IOError):
889 self.report_error('Cannot write subtitles file ' + descfn)
892 if self.params.get('writeinfojson', False):
893 infofn = os.path.splitext(filename)[0] + '.info.json'
894 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
895 self.to_screen('[info] Video description metadata is already present')
897 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
899 write_json_file(info_dict, encodeFilename(infofn))
900 except (OSError, IOError):
901 self.report_error('Cannot write metadata to JSON file ' + infofn)
904 if self.params.get('writethumbnail', False):
905 if info_dict.get('thumbnail') is not None:
906 thumb_format = determine_ext(info_dict['thumbnail'], 'jpg')
907 thumb_filename = os.path.splitext(filename)[0] + '.' + thumb_format
908 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
909 self.to_screen('[%s] %s: Thumbnail is already present' %
910 (info_dict['extractor'], info_dict['id']))
912 self.to_screen('[%s] %s: Downloading thumbnail ...' %
913 (info_dict['extractor'], info_dict['id']))
915 uf = compat_urllib_request.urlopen(info_dict['thumbnail'])
916 with open(thumb_filename, 'wb') as thumbf:
917 shutil.copyfileobj(uf, thumbf)
918 self.to_screen('[%s] %s: Writing thumbnail to: %s' %
919 (info_dict['extractor'], info_dict['id'], thumb_filename))
920 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
921 self.report_warning('Unable to download thumbnail "%s": %s' %
922 (info_dict['thumbnail'], compat_str(err)))
924 if not self.params.get('skip_download', False):
925 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
930 fd = get_suitable_downloader(info)(self, self.params)
931 for ph in self._progress_hooks:
932 fd.add_progress_hook(ph)
933 return fd.download(name, info)
934 if info_dict.get('requested_formats') is not None:
937 merger = FFmpegMergerPP(self)
938 if not merger._get_executable():
940 self.report_warning('You have requested multiple '
941 'formats but ffmpeg or avconv are not installed.'
942 ' The formats won\'t be merged')
944 postprocessors = [merger]
945 for f in info_dict['requested_formats']:
946 new_info = dict(info_dict)
948 fname = self.prepare_filename(new_info)
949 fname = prepend_extension(fname, 'f%s' % f['format_id'])
950 downloaded.append(fname)
951 partial_success = dl(fname, new_info)
952 success = success and partial_success
953 info_dict['__postprocessors'] = postprocessors
954 info_dict['__files_to_merge'] = downloaded
957 success = dl(filename, info_dict)
958 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
959 self.report_error('unable to download video data: %s' % str(err))
961 except (OSError, IOError) as err:
962 raise UnavailableVideoError(err)
963 except (ContentTooShortError, ) as err:
964 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
969 self.post_process(filename, info_dict)
970 except (PostProcessingError) as err:
971 self.report_error('postprocessing: %s' % str(err))
974 self.record_download_archive(info_dict)
976 def download(self, url_list):
977 """Download a given list of URLs."""
978 if (len(url_list) > 1 and
979 '%' not in self.params['outtmpl']
980 and self.params.get('max_downloads') != 1):
981 raise SameFileError(self.params['outtmpl'])
985 #It also downloads the videos
986 self.extract_info(url)
987 except UnavailableVideoError:
988 self.report_error('unable to download video')
989 except MaxDownloadsReached:
990 self.to_screen('[info] Maximum number of downloaded files reached.')
993 return self._download_retcode
995 def download_with_info_file(self, info_filename):
996 with io.open(info_filename, 'r', encoding='utf-8') as f:
999 self.process_ie_result(info, download=True)
1000 except DownloadError:
1001 webpage_url = info.get('webpage_url')
1002 if webpage_url is not None:
1003 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1004 return self.download([webpage_url])
1007 return self._download_retcode
1009 def post_process(self, filename, ie_info):
1010 """Run all the postprocessors on the given file."""
1011 info = dict(ie_info)
1012 info['filepath'] = filename
1015 if ie_info.get('__postprocessors') is not None:
1016 pps_chain.extend(ie_info['__postprocessors'])
1017 pps_chain.extend(self._pps)
1018 for pp in pps_chain:
1020 keep_video_wish, new_info = pp.run(info)
1021 if keep_video_wish is not None:
1023 keep_video = keep_video_wish
1024 elif keep_video is None:
1025 # No clear decision yet, let IE decide
1026 keep_video = keep_video_wish
1027 except PostProcessingError as e:
1028 self.report_error(e.msg)
1029 if keep_video is False and not self.params.get('keepvideo', False):
1031 self.to_screen('Deleting original file %s (pass -k to keep)' % filename)
1032 os.remove(encodeFilename(filename))
1033 except (IOError, OSError):
1034 self.report_warning('Unable to remove downloaded video file')
1036 def _make_archive_id(self, info_dict):
1037 # Future-proof against any change in case
1038 # and backwards compatibility with prior versions
1039 extractor = info_dict.get('extractor_key')
1040 if extractor is None:
1041 if 'id' in info_dict:
1042 extractor = info_dict.get('ie_key') # key in a playlist
1043 if extractor is None:
1044 return None # Incomplete video information
1045 return extractor.lower() + ' ' + info_dict['id']
1047 def in_download_archive(self, info_dict):
1048 fn = self.params.get('download_archive')
1052 vid_id = self._make_archive_id(info_dict)
1054 return False # Incomplete video information
1057 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1058 for line in archive_file:
1059 if line.strip() == vid_id:
1061 except IOError as ioe:
1062 if ioe.errno != errno.ENOENT:
1066 def record_download_archive(self, info_dict):
1067 fn = self.params.get('download_archive')
1070 vid_id = self._make_archive_id(info_dict)
1072 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1073 archive_file.write(vid_id + '\n')
1076 def format_resolution(format, default='unknown'):
1077 if format.get('vcodec') == 'none':
1079 if format.get('resolution') is not None:
1080 return format['resolution']
1081 if format.get('height') is not None:
1082 if format.get('width') is not None:
1083 res = '%sx%s' % (format['width'], format['height'])
1085 res = '%sp' % format['height']
1086 elif format.get('width') is not None:
1087 res = '?x%d' % format['width']
1092 def list_formats(self, info_dict):
1093 def format_note(fdict):
1095 if fdict.get('ext') in ['f4f', 'f4m']:
1096 res += '(unsupported) '
1097 if fdict.get('format_note') is not None:
1098 res += fdict['format_note'] + ' '
1099 if fdict.get('tbr') is not None:
1100 res += '%4dk ' % fdict['tbr']
1101 if (fdict.get('vcodec') is not None and
1102 fdict.get('vcodec') != 'none'):
1103 res += '%-5s' % fdict['vcodec']
1104 if fdict.get('vbr') is not None:
1106 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1108 if fdict.get('vbr') is not None:
1109 res += '%4dk' % fdict['vbr']
1110 if fdict.get('acodec') is not None:
1113 res += '%-5s' % fdict['acodec']
1114 elif fdict.get('abr') is not None:
1118 if fdict.get('abr') is not None:
1119 res += '@%3dk' % fdict['abr']
1120 if fdict.get('asr') is not None:
1121 res += ' (%5dHz)' % fdict['asr']
1122 if fdict.get('filesize') is not None:
1125 res += format_bytes(fdict['filesize'])
1128 def line(format, idlen=20):
1129 return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
1130 format['format_id'],
1132 self.format_resolution(format),
1133 format_note(format),
1136 formats = info_dict.get('formats', [info_dict])
1137 idlen = max(len('format code'),
1138 max(len(f['format_id']) for f in formats))
1139 formats_s = [line(f, idlen) for f in formats]
1140 if len(formats) > 1:
1141 formats_s[0] += (' ' if format_note(formats[0]) else '') + '(worst)'
1142 formats_s[-1] += (' ' if format_note(formats[-1]) else '') + '(best)'
1144 header_line = line({
1145 'format_id': 'format code', 'ext': 'extension',
1146 'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
1147 self.to_screen('[info] Available formats for %s:\n%s\n%s' %
1148 (info_dict['id'], header_line, '\n'.join(formats_s)))
1150 def urlopen(self, req):
1151 """ Start an HTTP download """
1152 return self._opener.open(req)
1154 def print_debug_header(self):
1155 if not self.params.get('verbose'):
1157 write_string('[debug] youtube-dl version ' + __version__ + '\n')
1159 sp = subprocess.Popen(
1160 ['git', 'rev-parse', '--short', 'HEAD'],
1161 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1162 cwd=os.path.dirname(os.path.abspath(__file__)))
1163 out, err = sp.communicate()
1164 out = out.decode().strip()
1165 if re.match('[0-9a-f]+', out):
1166 write_string('[debug] Git HEAD: ' + out + '\n')
1172 write_string('[debug] Python version %s - %s' %
1173 (platform.python_version(), platform_name()) + '\n')
1176 for handler in self._opener.handlers:
1177 if hasattr(handler, 'proxies'):
1178 proxy_map.update(handler.proxies)
1179 write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1181 def _setup_opener(self):
1182 timeout_val = self.params.get('socket_timeout')
1183 timeout = 600 if timeout_val is None else float(timeout_val)
1185 opts_cookiefile = self.params.get('cookiefile')
1186 opts_proxy = self.params.get('proxy')
1188 if opts_cookiefile is None:
1189 self.cookiejar = compat_cookiejar.CookieJar()
1191 self.cookiejar = compat_cookiejar.MozillaCookieJar(
1193 if os.access(opts_cookiefile, os.R_OK):
1194 self.cookiejar.load()
1196 cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1198 if opts_proxy is not None:
1199 if opts_proxy == '':
1202 proxies = {'http': opts_proxy, 'https': opts_proxy}
1204 proxies = compat_urllib_request.getproxies()
1205 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1206 if 'http' in proxies and 'https' not in proxies:
1207 proxies['https'] = proxies['http']
1208 proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1210 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1211 https_handler = make_HTTPS_handler(
1212 self.params.get('nocheckcertificate', False), debuglevel=debuglevel)
1213 ydlh = YoutubeDLHandler(debuglevel=debuglevel)
1214 opener = compat_urllib_request.build_opener(
1215 https_handler, proxy_handler, cookie_processor, ydlh)
1216 # Delete the default user-agent header, which would otherwise apply in
1217 # cases where our custom HTTP handler doesn't come into play
1218 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1219 opener.addheaders = []
1220 self._opener = opener
1222 # TODO remove this global modification
1223 compat_urllib_request.install_opener(opener)
1224 socket.setdefaulttimeout(timeout)