2 # -*- coding: utf-8 -*-
4 from __future__ import absolute_import, unicode_literals
28 compat_urllib_request,
50 UnavailableVideoError,
57 from .extractor import get_info_extractor, gen_extractors
58 from .downloader import get_suitable_downloader
59 from .postprocessor import FFmpegMergerPP
60 from .version import __version__
63 class YoutubeDL(object):
66 YoutubeDL objects are the ones responsible of downloading the
67 actual video file and writing it to disk if the user has requested
68 it, among some other tasks. In most cases there should be one per
69 program. As, given a video URL, the downloader doesn't know how to
70 extract all the needed information, task that InfoExtractors do, it
71 has to pass the URL to one of them.
73 For this, YoutubeDL objects have a method that allows
74 InfoExtractors to be registered in a given order. When it is passed
75 a URL, the YoutubeDL object handles it to the first InfoExtractor it
76 finds that reports being able to handle it. The InfoExtractor extracts
77 all the information about the video or videos the URL refers to, and
78 YoutubeDL process the extracted information, possibly using a File
79 Downloader to download the video.
81 YoutubeDL objects accept a lot of parameters. In order not to saturate
82 the object constructor with arguments, it receives a dictionary of
83 options instead. These options are available through the params
84 attribute for the InfoExtractors to use. The YoutubeDL also
85 registers itself as the downloader in charge for the InfoExtractors
86 that are added to it, so this is a "mutual registration".
90 username: Username for authentication purposes.
91 password: Password for authentication purposes.
92 videopassword: Password for acces a video.
93 usenetrc: Use netrc for authentication instead.
94 verbose: Print additional info to stdout.
95 quiet: Do not print messages to stdout.
96 forceurl: Force printing final URL.
97 forcetitle: Force printing title.
98 forceid: Force printing ID.
99 forcethumbnail: Force printing thumbnail URL.
100 forcedescription: Force printing description.
101 forcefilename: Force printing final filename.
102 forceduration: Force printing duration.
103 forcejson: Force printing info_dict as JSON.
104 simulate: Do not download the video files.
105 format: Video format code.
106 format_limit: Highest quality format to try.
107 outtmpl: Template for output names.
108 restrictfilenames: Do not allow "&" and spaces in file names
109 ignoreerrors: Do not stop on download errors.
110 nooverwrites: Prevent overwriting files.
111 playliststart: Playlist item to start at.
112 playlistend: Playlist item to end at.
113 matchtitle: Download only matching titles.
114 rejecttitle: Reject downloads for matching titles.
115 logger: Log messages to a logging.Logger instance.
116 logtostderr: Log messages to stderr instead of stdout.
117 writedescription: Write the video description to a .description file
118 writeinfojson: Write the video description to a .info.json file
119 writeannotations: Write the video annotations to a .annotations.xml file
120 writethumbnail: Write the thumbnail image to a file
121 writesubtitles: Write the video subtitles to a file
122 writeautomaticsub: Write the automatic subtitles to a file
123 allsubtitles: Downloads all the subtitles of the video
124 (requires writesubtitles or writeautomaticsub)
125 listsubtitles: Lists all available subtitles for the video
126 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
127 subtitleslangs: List of languages of the subtitles to download
128 keepvideo: Keep the video file after post-processing
129 daterange: A DateRange object, download only if the upload_date is in the range.
130 skip_download: Skip the actual download of the video file
131 cachedir: Location of the cache files in the filesystem.
132 None to disable filesystem cache.
133 noplaylist: Download single video instead of a playlist if in doubt.
134 age_limit: An integer representing the user's age in years.
135 Unsuitable videos for the given age are skipped.
136 min_views: An integer representing the minimum view count the video
137 must have in order to not be skipped.
138 Videos without view count information are always
139 downloaded. None for no limit.
140 max_views: An integer representing the maximum view count.
141 Videos that are more popular than that are not
143 Videos without view count information are always
144 downloaded. None for no limit.
145 download_archive: File name of a file where all downloads are recorded.
146 Videos already present in the file are not downloaded
148 cookiefile: File name where cookies should be read from and dumped to.
149 nocheckcertificate:Do not verify SSL certificates
150 proxy: URL of the proxy server to use
151 socket_timeout: Time to wait for unresponsive hosts, in seconds
152 bidi_workaround: Work around buggy terminals without bidirectional text
153 support, using fridibi
154 debug_printtraffic:Print out sent and received HTTP traffic
155 include_ads: Download ads as well
156 default_search: Prepend this string if an input url is not valid.
157 'auto' for elaborate guessing
159 The following parameters are not used by YoutubeDL itself, they are used by
161 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
162 noresizebuffer, retries, continuedl, noprogress, consoletitle
164 The following options are used by the post processors:
165 prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
166 otherwise prefer avconv.
172 _download_retcode = None
173 _num_downloads = None
176 def __init__(self, params=None):
177 """Create a FileDownloader object with the given options."""
181 self._ies_instances = {}
183 self._progress_hooks = []
184 self._download_retcode = 0
185 self._num_downloads = 0
186 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
187 self._err_file = sys.stderr
190 if params.get('bidi_workaround', False):
193 master, slave = pty.openpty()
194 width = get_term_width()
198 width_args = ['-w', str(width)]
200 stdin=subprocess.PIPE,
202 stderr=self._err_file)
204 self._output_process = subprocess.Popen(
205 ['bidiv'] + width_args, **sp_kwargs
208 self._output_process = subprocess.Popen(
209 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
210 self._output_channel = os.fdopen(master, 'rb')
211 except OSError as ose:
213 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
217 if (sys.version_info >= (3,) and sys.platform != 'win32' and
218 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
219 and not params['restrictfilenames']):
220 # On Python 3, the Unicode filesystem API will throw errors (#1474)
222 'Assuming --restrict-filenames since file system encoding '
223 'cannot encode all charactes. '
224 'Set the LC_ALL environment variable to fix this.')
225 self.params['restrictfilenames'] = True
227 if '%(stitle)s' in self.params.get('outtmpl', ''):
228 self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
232 def add_info_extractor(self, ie):
233 """Add an InfoExtractor object to the end of the list."""
235 self._ies_instances[ie.ie_key()] = ie
236 ie.set_downloader(self)
238 def get_info_extractor(self, ie_key):
240 Get an instance of an IE with name ie_key, it will try to get one from
241 the _ies list, if there's no instance it will create a new one and add
242 it to the extractor list.
244 ie = self._ies_instances.get(ie_key)
246 ie = get_info_extractor(ie_key)()
247 self.add_info_extractor(ie)
250 def add_default_info_extractors(self):
252 Add the InfoExtractors returned by gen_extractors to the end of the list
254 for ie in gen_extractors():
255 self.add_info_extractor(ie)
257 def add_post_processor(self, pp):
258 """Add a PostProcessor object to the end of the chain."""
260 pp.set_downloader(self)
262 def add_progress_hook(self, ph):
263 """Add the progress hook (currently only for the file downloader)"""
264 self._progress_hooks.append(ph)
266 def _bidi_workaround(self, message):
267 if not hasattr(self, '_output_channel'):
270 assert hasattr(self, '_output_process')
271 assert type(message) == type('')
272 line_count = message.count('\n') + 1
273 self._output_process.stdin.write((message + '\n').encode('utf-8'))
274 self._output_process.stdin.flush()
275 res = ''.join(self._output_channel.readline().decode('utf-8')
276 for _ in range(line_count))
277 return res[:-len('\n')]
279 def to_screen(self, message, skip_eol=False):
280 """Print message to stdout if not in quiet mode."""
281 return self.to_stdout(message, skip_eol, check_quiet=True)
283 def to_stdout(self, message, skip_eol=False, check_quiet=False):
284 """Print message to stdout if not in quiet mode."""
285 if self.params.get('logger'):
286 self.params['logger'].debug(message)
287 elif not check_quiet or not self.params.get('quiet', False):
288 message = self._bidi_workaround(message)
289 terminator = ['\n', ''][skip_eol]
290 output = message + terminator
292 write_string(output, self._screen_file)
294 def to_stderr(self, message):
295 """Print message to stderr."""
296 assert type(message) == type('')
297 if self.params.get('logger'):
298 self.params['logger'].error(message)
300 message = self._bidi_workaround(message)
301 output = message + '\n'
302 write_string(output, self._err_file)
304 def to_console_title(self, message):
305 if not self.params.get('consoletitle', False):
307 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
308 # c_wchar_p() might not be necessary if `message` is
309 # already of type unicode()
310 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
311 elif 'TERM' in os.environ:
312 write_string('\033]0;%s\007' % message, self._screen_file)
314 def save_console_title(self):
315 if not self.params.get('consoletitle', False):
317 if 'TERM' in os.environ:
318 # Save the title on stack
319 write_string('\033[22;0t', self._screen_file)
321 def restore_console_title(self):
322 if not self.params.get('consoletitle', False):
324 if 'TERM' in os.environ:
325 # Restore the title from stack
326 write_string('\033[23;0t', self._screen_file)
329 self.save_console_title()
332 def __exit__(self, *args):
333 self.restore_console_title()
335 if self.params.get('cookiefile') is not None:
336 self.cookiejar.save()
338 def trouble(self, message=None, tb=None):
339 """Determine action to take when a download problem appears.
341 Depending on if the downloader has been configured to ignore
342 download errors or not, this method may throw an exception or
343 not when errors are found, after printing the message.
345 tb, if given, is additional traceback information.
347 if message is not None:
348 self.to_stderr(message)
349 if self.params.get('verbose'):
351 if sys.exc_info()[0]: # if .trouble has been called from an except block
353 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
354 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
355 tb += compat_str(traceback.format_exc())
357 tb_data = traceback.format_list(traceback.extract_stack())
358 tb = ''.join(tb_data)
360 if not self.params.get('ignoreerrors', False):
361 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
362 exc_info = sys.exc_info()[1].exc_info
364 exc_info = sys.exc_info()
365 raise DownloadError(message, exc_info)
366 self._download_retcode = 1
368 def report_warning(self, message):
370 Print the message to stderr, it will be prefixed with 'WARNING:'
371 If stderr is a tty file the 'WARNING:' will be colored
373 if self.params.get('logger') is not None:
374 self.params['logger'].warning(message)
376 if self._err_file.isatty() and os.name != 'nt':
377 _msg_header = '\033[0;33mWARNING:\033[0m'
379 _msg_header = 'WARNING:'
380 warning_message = '%s %s' % (_msg_header, message)
381 self.to_stderr(warning_message)
383 def report_error(self, message, tb=None):
385 Do the same as trouble, but prefixes the message with 'ERROR:', colored
386 in red if stderr is a tty file.
388 if self._err_file.isatty() and os.name != 'nt':
389 _msg_header = '\033[0;31mERROR:\033[0m'
391 _msg_header = 'ERROR:'
392 error_message = '%s %s' % (_msg_header, message)
393 self.trouble(error_message, tb)
395 def report_file_already_downloaded(self, file_name):
396 """Report file has already been fully downloaded."""
398 self.to_screen('[download] %s has already been downloaded' % file_name)
399 except UnicodeEncodeError:
400 self.to_screen('[download] The file has already been downloaded')
402 def prepare_filename(self, info_dict):
403 """Generate the output filename."""
405 template_dict = dict(info_dict)
407 template_dict['epoch'] = int(time.time())
408 autonumber_size = self.params.get('autonumber_size')
409 if autonumber_size is None:
411 autonumber_templ = '%0' + str(autonumber_size) + 'd'
412 template_dict['autonumber'] = autonumber_templ % self._num_downloads
413 if template_dict.get('playlist_index') is not None:
414 template_dict['playlist_index'] = '%05d' % template_dict['playlist_index']
415 if template_dict.get('resolution') is None:
416 if template_dict.get('width') and template_dict.get('height'):
417 template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
418 elif template_dict.get('height'):
419 template_dict['resolution'] = '%sp' % template_dict['height']
420 elif template_dict.get('width'):
421 template_dict['resolution'] = '?x%d' % template_dict['width']
423 sanitize = lambda k, v: sanitize_filename(
425 restricted=self.params.get('restrictfilenames'),
427 template_dict = dict((k, sanitize(k, v))
428 for k, v in template_dict.items()
430 template_dict = collections.defaultdict(lambda: 'NA', template_dict)
432 tmpl = os.path.expanduser(self.params['outtmpl'])
433 filename = tmpl % template_dict
435 except ValueError as err:
436 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
439 def _match_entry(self, info_dict):
440 """ Returns None iff the file should be downloaded """
442 video_title = info_dict.get('title', info_dict.get('id', 'video'))
443 if 'title' in info_dict:
444 # This can happen when we're just evaluating the playlist
445 title = info_dict['title']
446 matchtitle = self.params.get('matchtitle', False)
448 if not re.search(matchtitle, title, re.IGNORECASE):
449 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
450 rejecttitle = self.params.get('rejecttitle', False)
452 if re.search(rejecttitle, title, re.IGNORECASE):
453 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
454 date = info_dict.get('upload_date', None)
456 dateRange = self.params.get('daterange', DateRange())
457 if date not in dateRange:
458 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
459 view_count = info_dict.get('view_count', None)
460 if view_count is not None:
461 min_views = self.params.get('min_views')
462 if min_views is not None and view_count < min_views:
463 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
464 max_views = self.params.get('max_views')
465 if max_views is not None and view_count > max_views:
466 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
467 age_limit = self.params.get('age_limit')
468 if age_limit is not None:
469 if age_limit < info_dict.get('age_limit', 0):
470 return 'Skipping "' + title + '" because it is age restricted'
471 if self.in_download_archive(info_dict):
472 return '%s has already been recorded in archive' % video_title
476 def add_extra_info(info_dict, extra_info):
477 '''Set the keys from extra_info in info dict if they are missing'''
478 for key, value in extra_info.items():
479 info_dict.setdefault(key, value)
481 def extract_info(self, url, download=True, ie_key=None, extra_info={},
484 Returns a list with a dictionary for each video we find.
485 If 'download', also downloads the videos.
486 extra_info is a dict containing the extra values to add to each result
490 ies = [self.get_info_extractor(ie_key)]
495 if not ie.suitable(url):
499 self.report_warning('The program functionality for this site has been marked as broken, '
500 'and will probably not work.')
503 ie_result = ie.extract(url)
504 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
506 if isinstance(ie_result, list):
507 # Backwards compatibility: old IE result format
509 '_type': 'compat_list',
510 'entries': ie_result,
512 self.add_extra_info(ie_result,
514 'extractor': ie.IE_NAME,
516 'webpage_url_basename': url_basename(url),
517 'extractor_key': ie.ie_key(),
520 return self.process_ie_result(ie_result, download, extra_info)
523 except ExtractorError as de: # An error we somewhat expected
524 self.report_error(compat_str(de), de.format_traceback())
526 except MaxDownloadsReached:
528 except Exception as e:
529 if self.params.get('ignoreerrors', False):
530 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
535 self.report_error('no suitable InfoExtractor: %s' % url)
537 def process_ie_result(self, ie_result, download=True, extra_info={}):
539 Take the result of the ie(may be modified) and resolve all unresolved
540 references (URLs, playlist items).
542 It will also download the videos if 'download'.
543 Returns the resolved ie_result.
546 result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
547 if result_type == 'video':
548 self.add_extra_info(ie_result, extra_info)
549 return self.process_video_result(ie_result, download=download)
550 elif result_type == 'url':
551 # We have to add extra_info to the results because it may be
552 # contained in a playlist
553 return self.extract_info(ie_result['url'],
555 ie_key=ie_result.get('ie_key'),
556 extra_info=extra_info)
557 elif result_type == 'url_transparent':
558 # Use the information from the embedding page
559 info = self.extract_info(
560 ie_result['url'], ie_key=ie_result.get('ie_key'),
561 extra_info=extra_info, download=False, process=False)
563 def make_result(embedded_info):
564 new_result = ie_result.copy()
565 for f in ('_type', 'url', 'ext', 'player_url', 'formats',
566 'entries', 'ie_key', 'duration',
567 'subtitles', 'annotations', 'format',
568 'thumbnail', 'thumbnails'):
571 if f in embedded_info:
572 new_result[f] = embedded_info[f]
574 new_result = make_result(info)
576 assert new_result.get('_type') != 'url_transparent'
577 if new_result.get('_type') == 'compat_list':
578 new_result['entries'] = [
579 make_result(e) for e in new_result['entries']]
581 return self.process_ie_result(
582 new_result, download=download, extra_info=extra_info)
583 elif result_type == 'playlist':
584 # We process each entry in the playlist
585 playlist = ie_result.get('title', None) or ie_result.get('id', None)
586 self.to_screen('[download] Downloading playlist: %s' % playlist)
588 playlist_results = []
590 playliststart = self.params.get('playliststart', 1) - 1
591 playlistend = self.params.get('playlistend', None)
592 # For backwards compatibility, interpret -1 as whole list
593 if playlistend == -1:
596 if isinstance(ie_result['entries'], list):
597 n_all_entries = len(ie_result['entries'])
598 entries = ie_result['entries'][playliststart:playlistend]
599 n_entries = len(entries)
601 "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
602 (ie_result['extractor'], playlist, n_all_entries, n_entries))
604 assert isinstance(ie_result['entries'], PagedList)
605 entries = ie_result['entries'].getslice(
606 playliststart, playlistend)
607 n_entries = len(entries)
609 "[%s] playlist %s: Downloading %d videos" %
610 (ie_result['extractor'], playlist, n_entries))
612 for i, entry in enumerate(entries, 1):
613 self.to_screen('[download] Downloading video #%s of %s' % (i, n_entries))
615 'playlist': playlist,
616 'playlist_index': i + playliststart,
617 'extractor': ie_result['extractor'],
618 'webpage_url': ie_result['webpage_url'],
619 'webpage_url_basename': url_basename(ie_result['webpage_url']),
620 'extractor_key': ie_result['extractor_key'],
623 reason = self._match_entry(entry)
624 if reason is not None:
625 self.to_screen('[download] ' + reason)
628 entry_result = self.process_ie_result(entry,
631 playlist_results.append(entry_result)
632 ie_result['entries'] = playlist_results
634 elif result_type == 'compat_list':
636 self.add_extra_info(r,
638 'extractor': ie_result['extractor'],
639 'webpage_url': ie_result['webpage_url'],
640 'webpage_url_basename': url_basename(ie_result['webpage_url']),
641 'extractor_key': ie_result['extractor_key'],
644 ie_result['entries'] = [
645 self.process_ie_result(_fixup(r), download, extra_info)
646 for r in ie_result['entries']
650 raise Exception('Invalid result type: %s' % result_type)
652 def select_format(self, format_spec, available_formats):
653 if format_spec == 'best' or format_spec is None:
654 return available_formats[-1]
655 elif format_spec == 'worst':
656 return available_formats[0]
657 elif format_spec == 'bestaudio':
659 f for f in available_formats
660 if f.get('vcodec') == 'none']
662 return audio_formats[-1]
663 elif format_spec == 'worstaudio':
665 f for f in available_formats
666 if f.get('vcodec') == 'none']
668 return audio_formats[0]
670 extensions = ['mp4', 'flv', 'webm', '3gp']
671 if format_spec in extensions:
672 filter_f = lambda f: f['ext'] == format_spec
674 filter_f = lambda f: f['format_id'] == format_spec
675 matches = list(filter(filter_f, available_formats))
680 def process_video_result(self, info_dict, download=True):
681 assert info_dict.get('_type', 'video') == 'video'
683 if 'playlist' not in info_dict:
684 # It isn't part of a playlist
685 info_dict['playlist'] = None
686 info_dict['playlist_index'] = None
688 if 'display_id' not in info_dict and 'id' in info_dict:
689 info_dict['display_id'] = info_dict['id']
691 # This extractors handle format selection themselves
692 if info_dict['extractor'] in ['Youku']:
694 self.process_info(info_dict)
697 # We now pick which formats have to be downloaded
698 if info_dict.get('formats') is None:
699 # There's only one format available
700 formats = [info_dict]
702 formats = info_dict['formats']
705 raise ExtractorError('No video formats found!')
707 # We check that all the formats have the format and format_id fields
708 for i, format in enumerate(formats):
709 if format.get('format_id') is None:
710 format['format_id'] = compat_str(i)
711 if format.get('format') is None:
712 format['format'] = '{id} - {res}{note}'.format(
713 id=format['format_id'],
714 res=self.format_resolution(format),
715 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
717 # Automatically determine file extension if missing
718 if 'ext' not in format:
719 format['ext'] = determine_ext(format['url'])
721 format_limit = self.params.get('format_limit', None)
723 formats = list(takewhile_inclusive(
724 lambda f: f['format_id'] != format_limit, formats
727 # TODO Central sorting goes here
729 if formats[0] is not info_dict:
730 # only set the 'formats' fields if the original info_dict list them
731 # otherwise we end up with a circular reference, the first (and unique)
732 # element in the 'formats' field in info_dict is info_dict itself,
733 # wich can't be exported to json
734 info_dict['formats'] = formats
735 if self.params.get('listformats', None):
736 self.list_formats(info_dict)
739 req_format = self.params.get('format')
740 if req_format is None:
742 formats_to_download = []
743 # The -1 is for supporting YoutubeIE
744 if req_format in ('-1', 'all'):
745 formats_to_download = formats
747 # We can accept formats requested in the format: 34/5/best, we pick
748 # the first that is available, starting from left
749 req_formats = req_format.split('/')
750 for rf in req_formats:
751 if re.match(r'.+?\+.+?', rf) is not None:
752 # Two formats have been requested like '137+139'
753 format_1, format_2 = rf.split('+')
754 formats_info = (self.select_format(format_1, formats),
755 self.select_format(format_2, formats))
756 if all(formats_info):
758 'requested_formats': formats_info,
760 'ext': formats_info[0]['ext'],
763 selected_format = None
765 selected_format = self.select_format(rf, formats)
766 if selected_format is not None:
767 formats_to_download = [selected_format]
769 if not formats_to_download:
770 raise ExtractorError('requested format not available',
774 if len(formats_to_download) > 1:
775 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
776 for format in formats_to_download:
777 new_info = dict(info_dict)
778 new_info.update(format)
779 self.process_info(new_info)
780 # We update the info dict with the best quality format (backwards compatibility)
781 info_dict.update(formats_to_download[-1])
784 def process_info(self, info_dict):
785 """Process a single resolved IE result."""
787 assert info_dict.get('_type', 'video') == 'video'
789 max_downloads = self.params.get('max_downloads')
790 if max_downloads is not None:
791 if self._num_downloads >= int(max_downloads):
792 raise MaxDownloadsReached()
794 info_dict['fulltitle'] = info_dict['title']
795 if len(info_dict['title']) > 200:
796 info_dict['title'] = info_dict['title'][:197] + '...'
798 # Keep for backwards compatibility
799 info_dict['stitle'] = info_dict['title']
801 if not 'format' in info_dict:
802 info_dict['format'] = info_dict['ext']
804 reason = self._match_entry(info_dict)
805 if reason is not None:
806 self.to_screen('[download] ' + reason)
809 self._num_downloads += 1
811 filename = self.prepare_filename(info_dict)
814 if self.params.get('forcetitle', False):
815 self.to_stdout(info_dict['fulltitle'])
816 if self.params.get('forceid', False):
817 self.to_stdout(info_dict['id'])
818 if self.params.get('forceurl', False):
819 # For RTMP URLs, also include the playpath
820 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
821 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
822 self.to_stdout(info_dict['thumbnail'])
823 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
824 self.to_stdout(info_dict['description'])
825 if self.params.get('forcefilename', False) and filename is not None:
826 self.to_stdout(filename)
827 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
828 self.to_stdout(formatSeconds(info_dict['duration']))
829 if self.params.get('forceformat', False):
830 self.to_stdout(info_dict['format'])
831 if self.params.get('forcejson', False):
832 info_dict['_filename'] = filename
833 self.to_stdout(json.dumps(info_dict))
835 # Do nothing else if in simulate mode
836 if self.params.get('simulate', False):
843 dn = os.path.dirname(encodeFilename(filename))
844 if dn != '' and not os.path.exists(dn):
846 except (OSError, IOError) as err:
847 self.report_error('unable to create directory ' + compat_str(err))
850 if self.params.get('writedescription', False):
851 descfn = filename + '.description'
852 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
853 self.to_screen('[info] Video description is already present')
856 self.to_screen('[info] Writing video description to: ' + descfn)
857 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
858 descfile.write(info_dict['description'])
859 except (KeyError, TypeError):
860 self.report_warning('There\'s no description to write.')
861 except (OSError, IOError):
862 self.report_error('Cannot write description file ' + descfn)
865 if self.params.get('writeannotations', False):
866 annofn = filename + '.annotations.xml'
867 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
868 self.to_screen('[info] Video annotations are already present')
871 self.to_screen('[info] Writing video annotations to: ' + annofn)
872 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
873 annofile.write(info_dict['annotations'])
874 except (KeyError, TypeError):
875 self.report_warning('There are no annotations to write.')
876 except (OSError, IOError):
877 self.report_error('Cannot write annotations file: ' + annofn)
880 subtitles_are_requested = any([self.params.get('writesubtitles', False),
881 self.params.get('writeautomaticsub')])
883 if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
884 # subtitles download errors are already managed as troubles in relevant IE
885 # that way it will silently go on when used with unsupporting IE
886 subtitles = info_dict['subtitles']
887 sub_format = self.params.get('subtitlesformat', 'srt')
888 for sub_lang in subtitles.keys():
889 sub = subtitles[sub_lang]
893 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
894 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
895 self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
897 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
898 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
900 except (OSError, IOError):
901 self.report_error('Cannot write subtitles file ' + descfn)
904 if self.params.get('writeinfojson', False):
905 infofn = os.path.splitext(filename)[0] + '.info.json'
906 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
907 self.to_screen('[info] Video description metadata is already present')
909 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
911 write_json_file(info_dict, encodeFilename(infofn))
912 except (OSError, IOError):
913 self.report_error('Cannot write metadata to JSON file ' + infofn)
916 if self.params.get('writethumbnail', False):
917 if info_dict.get('thumbnail') is not None:
918 thumb_format = determine_ext(info_dict['thumbnail'], 'jpg')
919 thumb_filename = os.path.splitext(filename)[0] + '.' + thumb_format
920 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
921 self.to_screen('[%s] %s: Thumbnail is already present' %
922 (info_dict['extractor'], info_dict['id']))
924 self.to_screen('[%s] %s: Downloading thumbnail ...' %
925 (info_dict['extractor'], info_dict['id']))
927 uf = self.urlopen(info_dict['thumbnail'])
928 with open(thumb_filename, 'wb') as thumbf:
929 shutil.copyfileobj(uf, thumbf)
930 self.to_screen('[%s] %s: Writing thumbnail to: %s' %
931 (info_dict['extractor'], info_dict['id'], thumb_filename))
932 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
933 self.report_warning('Unable to download thumbnail "%s": %s' %
934 (info_dict['thumbnail'], compat_str(err)))
936 if not self.params.get('skip_download', False):
937 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
942 fd = get_suitable_downloader(info)(self, self.params)
943 for ph in self._progress_hooks:
944 fd.add_progress_hook(ph)
945 return fd.download(name, info)
946 if info_dict.get('requested_formats') is not None:
949 merger = FFmpegMergerPP(self)
950 if not merger._get_executable():
952 self.report_warning('You have requested multiple '
953 'formats but ffmpeg or avconv are not installed.'
954 ' The formats won\'t be merged')
956 postprocessors = [merger]
957 for f in info_dict['requested_formats']:
958 new_info = dict(info_dict)
960 fname = self.prepare_filename(new_info)
961 fname = prepend_extension(fname, 'f%s' % f['format_id'])
962 downloaded.append(fname)
963 partial_success = dl(fname, new_info)
964 success = success and partial_success
965 info_dict['__postprocessors'] = postprocessors
966 info_dict['__files_to_merge'] = downloaded
969 success = dl(filename, info_dict)
970 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
971 self.report_error('unable to download video data: %s' % str(err))
973 except (OSError, IOError) as err:
974 raise UnavailableVideoError(err)
975 except (ContentTooShortError, ) as err:
976 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
981 self.post_process(filename, info_dict)
982 except (PostProcessingError) as err:
983 self.report_error('postprocessing: %s' % str(err))
986 self.record_download_archive(info_dict)
988 def download(self, url_list):
989 """Download a given list of URLs."""
990 if (len(url_list) > 1 and
991 '%' not in self.params['outtmpl']
992 and self.params.get('max_downloads') != 1):
993 raise SameFileError(self.params['outtmpl'])
997 #It also downloads the videos
998 self.extract_info(url)
999 except UnavailableVideoError:
1000 self.report_error('unable to download video')
1001 except MaxDownloadsReached:
1002 self.to_screen('[info] Maximum number of downloaded files reached.')
1005 return self._download_retcode
1007 def download_with_info_file(self, info_filename):
1008 with io.open(info_filename, 'r', encoding='utf-8') as f:
1011 self.process_ie_result(info, download=True)
1012 except DownloadError:
1013 webpage_url = info.get('webpage_url')
1014 if webpage_url is not None:
1015 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1016 return self.download([webpage_url])
1019 return self._download_retcode
1021 def post_process(self, filename, ie_info):
1022 """Run all the postprocessors on the given file."""
1023 info = dict(ie_info)
1024 info['filepath'] = filename
1027 if ie_info.get('__postprocessors') is not None:
1028 pps_chain.extend(ie_info['__postprocessors'])
1029 pps_chain.extend(self._pps)
1030 for pp in pps_chain:
1032 keep_video_wish, new_info = pp.run(info)
1033 if keep_video_wish is not None:
1035 keep_video = keep_video_wish
1036 elif keep_video is None:
1037 # No clear decision yet, let IE decide
1038 keep_video = keep_video_wish
1039 except PostProcessingError as e:
1040 self.report_error(e.msg)
1041 if keep_video is False and not self.params.get('keepvideo', False):
1043 self.to_screen('Deleting original file %s (pass -k to keep)' % filename)
1044 os.remove(encodeFilename(filename))
1045 except (IOError, OSError):
1046 self.report_warning('Unable to remove downloaded video file')
1048 def _make_archive_id(self, info_dict):
1049 # Future-proof against any change in case
1050 # and backwards compatibility with prior versions
1051 extractor = info_dict.get('extractor_key')
1052 if extractor is None:
1053 if 'id' in info_dict:
1054 extractor = info_dict.get('ie_key') # key in a playlist
1055 if extractor is None:
1056 return None # Incomplete video information
1057 return extractor.lower() + ' ' + info_dict['id']
1059 def in_download_archive(self, info_dict):
1060 fn = self.params.get('download_archive')
1064 vid_id = self._make_archive_id(info_dict)
1066 return False # Incomplete video information
1069 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1070 for line in archive_file:
1071 if line.strip() == vid_id:
1073 except IOError as ioe:
1074 if ioe.errno != errno.ENOENT:
1078 def record_download_archive(self, info_dict):
1079 fn = self.params.get('download_archive')
1082 vid_id = self._make_archive_id(info_dict)
1084 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1085 archive_file.write(vid_id + '\n')
1088 def format_resolution(format, default='unknown'):
1089 if format.get('vcodec') == 'none':
1091 if format.get('resolution') is not None:
1092 return format['resolution']
1093 if format.get('height') is not None:
1094 if format.get('width') is not None:
1095 res = '%sx%s' % (format['width'], format['height'])
1097 res = '%sp' % format['height']
1098 elif format.get('width') is not None:
1099 res = '?x%d' % format['width']
1104 def list_formats(self, info_dict):
1105 def format_note(fdict):
1107 if fdict.get('ext') in ['f4f', 'f4m']:
1108 res += '(unsupported) '
1109 if fdict.get('format_note') is not None:
1110 res += fdict['format_note'] + ' '
1111 if fdict.get('tbr') is not None:
1112 res += '%4dk ' % fdict['tbr']
1113 if fdict.get('container') is not None:
1116 res += '%s container' % fdict['container']
1117 if (fdict.get('vcodec') is not None and
1118 fdict.get('vcodec') != 'none'):
1121 res += fdict['vcodec']
1122 if fdict.get('vbr') is not None:
1124 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1126 if fdict.get('vbr') is not None:
1127 res += '%4dk' % fdict['vbr']
1128 if fdict.get('acodec') is not None:
1131 if fdict['acodec'] == 'none':
1134 res += '%-5s' % fdict['acodec']
1135 elif fdict.get('abr') is not None:
1139 if fdict.get('abr') is not None:
1140 res += '@%3dk' % fdict['abr']
1141 if fdict.get('asr') is not None:
1142 res += ' (%5dHz)' % fdict['asr']
1143 if fdict.get('filesize') is not None:
1146 res += format_bytes(fdict['filesize'])
1149 def line(format, idlen=20):
1150 return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
1151 format['format_id'],
1153 self.format_resolution(format),
1154 format_note(format),
1157 formats = info_dict.get('formats', [info_dict])
1158 idlen = max(len('format code'),
1159 max(len(f['format_id']) for f in formats))
1160 formats_s = [line(f, idlen) for f in formats]
1161 if len(formats) > 1:
1162 formats_s[0] += (' ' if format_note(formats[0]) else '') + '(worst)'
1163 formats_s[-1] += (' ' if format_note(formats[-1]) else '') + '(best)'
1165 header_line = line({
1166 'format_id': 'format code', 'ext': 'extension',
1167 'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
1168 self.to_screen('[info] Available formats for %s:\n%s\n%s' %
1169 (info_dict['id'], header_line, '\n'.join(formats_s)))
1171 def urlopen(self, req):
1172 """ Start an HTTP download """
1173 return self._opener.open(req, timeout=self._socket_timeout)
1175 def print_debug_header(self):
1176 if not self.params.get('verbose'):
1178 write_string('[debug] youtube-dl version ' + __version__ + '\n')
1180 sp = subprocess.Popen(
1181 ['git', 'rev-parse', '--short', 'HEAD'],
1182 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1183 cwd=os.path.dirname(os.path.abspath(__file__)))
1184 out, err = sp.communicate()
1185 out = out.decode().strip()
1186 if re.match('[0-9a-f]+', out):
1187 write_string('[debug] Git HEAD: ' + out + '\n')
1193 write_string('[debug] Python version %s - %s' %
1194 (platform.python_version(), platform_name()) + '\n')
1197 for handler in self._opener.handlers:
1198 if hasattr(handler, 'proxies'):
1199 proxy_map.update(handler.proxies)
1200 write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1202 def _setup_opener(self):
1203 timeout_val = self.params.get('socket_timeout')
1204 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1206 opts_cookiefile = self.params.get('cookiefile')
1207 opts_proxy = self.params.get('proxy')
1209 if opts_cookiefile is None:
1210 self.cookiejar = compat_cookiejar.CookieJar()
1212 self.cookiejar = compat_cookiejar.MozillaCookieJar(
1214 if os.access(opts_cookiefile, os.R_OK):
1215 self.cookiejar.load()
1217 cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1219 if opts_proxy is not None:
1220 if opts_proxy == '':
1223 proxies = {'http': opts_proxy, 'https': opts_proxy}
1225 proxies = compat_urllib_request.getproxies()
1226 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1227 if 'http' in proxies and 'https' not in proxies:
1228 proxies['https'] = proxies['http']
1229 proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1231 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1232 https_handler = make_HTTPS_handler(
1233 self.params.get('nocheckcertificate', False), debuglevel=debuglevel)
1234 ydlh = YoutubeDLHandler(debuglevel=debuglevel)
1235 opener = compat_urllib_request.build_opener(
1236 https_handler, proxy_handler, cookie_processor, ydlh)
1237 # Delete the default user-agent header, which would otherwise apply in
1238 # cases where our custom HTTP handler doesn't come into play
1239 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1240 opener.addheaders = []
1241 self._opener = opener