2 # -*- coding: utf-8 -*-
4 from __future__ import absolute_import
28 compat_urllib_request,
48 UnavailableVideoError,
53 from .extractor import get_info_extractor, gen_extractors
54 from .FileDownloader import FileDownloader
55 from .version import __version__
58 class YoutubeDL(object):
61 YoutubeDL objects are the ones responsible of downloading the
62 actual video file and writing it to disk if the user has requested
63 it, among some other tasks. In most cases there should be one per
64 program. As, given a video URL, the downloader doesn't know how to
65 extract all the needed information, task that InfoExtractors do, it
66 has to pass the URL to one of them.
68 For this, YoutubeDL objects have a method that allows
69 InfoExtractors to be registered in a given order. When it is passed
70 a URL, the YoutubeDL object handles it to the first InfoExtractor it
71 finds that reports being able to handle it. The InfoExtractor extracts
72 all the information about the video or videos the URL refers to, and
73 YoutubeDL process the extracted information, possibly using a File
74 Downloader to download the video.
76 YoutubeDL objects accept a lot of parameters. In order not to saturate
77 the object constructor with arguments, it receives a dictionary of
78 options instead. These options are available through the params
79 attribute for the InfoExtractors to use. The YoutubeDL also
80 registers itself as the downloader in charge for the InfoExtractors
81 that are added to it, so this is a "mutual registration".
85 username: Username for authentication purposes.
86 password: Password for authentication purposes.
87 videopassword: Password for acces a video.
88 usenetrc: Use netrc for authentication instead.
89 verbose: Print additional info to stdout.
90 quiet: Do not print messages to stdout.
91 forceurl: Force printing final URL.
92 forcetitle: Force printing title.
93 forceid: Force printing ID.
94 forcethumbnail: Force printing thumbnail URL.
95 forcedescription: Force printing description.
96 forcefilename: Force printing final filename.
97 forcejson: Force printing info_dict as JSON.
98 simulate: Do not download the video files.
99 format: Video format code.
100 format_limit: Highest quality format to try.
101 outtmpl: Template for output names.
102 restrictfilenames: Do not allow "&" and spaces in file names
103 ignoreerrors: Do not stop on download errors.
104 nooverwrites: Prevent overwriting files.
105 playliststart: Playlist item to start at.
106 playlistend: Playlist item to end at.
107 matchtitle: Download only matching titles.
108 rejecttitle: Reject downloads for matching titles.
109 logger: Log messages to a logging.Logger instance.
110 logtostderr: Log messages to stderr instead of stdout.
111 writedescription: Write the video description to a .description file
112 writeinfojson: Write the video description to a .info.json file
113 writeannotations: Write the video annotations to a .annotations.xml file
114 writethumbnail: Write the thumbnail image to a file
115 writesubtitles: Write the video subtitles to a file
116 writeautomaticsub: Write the automatic subtitles to a file
117 allsubtitles: Downloads all the subtitles of the video
118 (requires writesubtitles or writeautomaticsub)
119 listsubtitles: Lists all available subtitles for the video
120 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
121 subtitleslangs: List of languages of the subtitles to download
122 keepvideo: Keep the video file after post-processing
123 daterange: A DateRange object, download only if the upload_date is in the range.
124 skip_download: Skip the actual download of the video file
125 cachedir: Location of the cache files in the filesystem.
126 None to disable filesystem cache.
127 noplaylist: Download single video instead of a playlist if in doubt.
128 age_limit: An integer representing the user's age in years.
129 Unsuitable videos for the given age are skipped.
130 min_views: An integer representing the minimum view count the video
131 must have in order to not be skipped.
132 Videos without view count information are always
133 downloaded. None for no limit.
134 max_views: An integer representing the maximum view count.
135 Videos that are more popular than that are not
137 Videos without view count information are always
138 downloaded. None for no limit.
139 download_archive: File name of a file where all downloads are recorded.
140 Videos already present in the file are not downloaded
142 cookiefile: File name where cookies should be read from and dumped to.
143 nocheckcertificate:Do not verify SSL certificates
144 proxy: URL of the proxy server to use
145 socket_timeout: Time to wait for unresponsive hosts, in seconds
146 bidi_workaround: Work around buggy terminals without bidirectional text
147 support, using fridibi
149 The following parameters are not used by YoutubeDL itself, they are used by
151 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
152 noresizebuffer, retries, continuedl, noprogress, consoletitle
158 _download_retcode = None
159 _num_downloads = None
162 def __init__(self, params=None):
163 """Create a FileDownloader object with the given options."""
165 self._ies_instances = {}
167 self._progress_hooks = []
168 self._download_retcode = 0
169 self._num_downloads = 0
170 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
171 self._err_file = sys.stderr
172 self.params = {} if params is None else params
174 if params.get('bidi_workaround', False):
177 master, slave = pty.openpty()
178 width = get_term_width()
182 width_args = ['-w', str(width)]
183 self._fribidi = subprocess.Popen(
184 ['fribidi', '-c', 'UTF-8'] + width_args,
185 stdin=subprocess.PIPE,
187 stderr=self._err_file)
188 self._fribidi_channel = os.fdopen(master, 'rb')
189 except OSError as ose:
191 self.report_warning(u'Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
195 if (sys.version_info >= (3,) and sys.platform != 'win32' and
196 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
197 and not params['restrictfilenames']):
198 # On Python 3, the Unicode filesystem API will throw errors (#1474)
200 u'Assuming --restrict-filenames since file system encoding '
201 u'cannot encode all charactes. '
202 u'Set the LC_ALL environment variable to fix this.')
203 self.params['restrictfilenames'] = True
205 self.fd = FileDownloader(self, self.params)
207 if '%(stitle)s' in self.params.get('outtmpl', ''):
208 self.report_warning(u'%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
212 def add_info_extractor(self, ie):
213 """Add an InfoExtractor object to the end of the list."""
215 self._ies_instances[ie.ie_key()] = ie
216 ie.set_downloader(self)
218 def get_info_extractor(self, ie_key):
220 Get an instance of an IE with name ie_key, it will try to get one from
221 the _ies list, if there's no instance it will create a new one and add
222 it to the extractor list.
224 ie = self._ies_instances.get(ie_key)
226 ie = get_info_extractor(ie_key)()
227 self.add_info_extractor(ie)
230 def add_default_info_extractors(self):
232 Add the InfoExtractors returned by gen_extractors to the end of the list
234 for ie in gen_extractors():
235 self.add_info_extractor(ie)
237 def add_post_processor(self, pp):
238 """Add a PostProcessor object to the end of the chain."""
240 pp.set_downloader(self)
242 def _bidi_workaround(self, message):
243 if not hasattr(self, '_fribidi_channel'):
246 assert type(message) == type(u'')
247 line_count = message.count(u'\n') + 1
248 self._fribidi.stdin.write((message + u'\n').encode('utf-8'))
249 self._fribidi.stdin.flush()
250 res = u''.join(self._fribidi_channel.readline().decode('utf-8')
251 for _ in range(line_count))
252 return res[:-len(u'\n')]
254 def to_screen(self, message, skip_eol=False):
255 """Print message to stdout if not in quiet mode."""
256 return self.to_stdout(message, skip_eol, check_quiet=True)
258 def to_stdout(self, message, skip_eol=False, check_quiet=False):
259 """Print message to stdout if not in quiet mode."""
260 if self.params.get('logger'):
261 self.params['logger'].debug(message)
262 elif not check_quiet or not self.params.get('quiet', False):
263 message = self._bidi_workaround(message)
264 terminator = [u'\n', u''][skip_eol]
265 output = message + terminator
267 write_string(output, self._screen_file)
269 def to_stderr(self, message):
270 """Print message to stderr."""
271 assert type(message) == type(u'')
272 if self.params.get('logger'):
273 self.params['logger'].error(message)
275 message = self._bidi_workaround(message)
276 output = message + u'\n'
277 write_string(output, self._err_file)
279 def to_console_title(self, message):
280 if not self.params.get('consoletitle', False):
282 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
283 # c_wchar_p() might not be necessary if `message` is
284 # already of type unicode()
285 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
286 elif 'TERM' in os.environ:
287 write_string(u'\033]0;%s\007' % message, self._screen_file)
289 def save_console_title(self):
290 if not self.params.get('consoletitle', False):
292 if 'TERM' in os.environ:
293 # Save the title on stack
294 write_string(u'\033[22;0t', self._screen_file)
296 def restore_console_title(self):
297 if not self.params.get('consoletitle', False):
299 if 'TERM' in os.environ:
300 # Restore the title from stack
301 write_string(u'\033[23;0t', self._screen_file)
304 self.save_console_title()
307 def __exit__(self, *args):
308 self.restore_console_title()
310 if self.params.get('cookiefile') is not None:
311 self.cookiejar.save()
313 def trouble(self, message=None, tb=None):
314 """Determine action to take when a download problem appears.
316 Depending on if the downloader has been configured to ignore
317 download errors or not, this method may throw an exception or
318 not when errors are found, after printing the message.
320 tb, if given, is additional traceback information.
322 if message is not None:
323 self.to_stderr(message)
324 if self.params.get('verbose'):
326 if sys.exc_info()[0]: # if .trouble has been called from an except block
328 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
329 tb += u''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
330 tb += compat_str(traceback.format_exc())
332 tb_data = traceback.format_list(traceback.extract_stack())
333 tb = u''.join(tb_data)
335 if not self.params.get('ignoreerrors', False):
336 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
337 exc_info = sys.exc_info()[1].exc_info
339 exc_info = sys.exc_info()
340 raise DownloadError(message, exc_info)
341 self._download_retcode = 1
343 def report_warning(self, message):
345 Print the message to stderr, it will be prefixed with 'WARNING:'
346 If stderr is a tty file the 'WARNING:' will be colored
348 if self._err_file.isatty() and os.name != 'nt':
349 _msg_header = u'\033[0;33mWARNING:\033[0m'
351 _msg_header = u'WARNING:'
352 warning_message = u'%s %s' % (_msg_header, message)
353 self.to_stderr(warning_message)
355 def report_error(self, message, tb=None):
357 Do the same as trouble, but prefixes the message with 'ERROR:', colored
358 in red if stderr is a tty file.
360 if self._err_file.isatty() and os.name != 'nt':
361 _msg_header = u'\033[0;31mERROR:\033[0m'
363 _msg_header = u'ERROR:'
364 error_message = u'%s %s' % (_msg_header, message)
365 self.trouble(error_message, tb)
367 def report_writedescription(self, descfn):
368 """ Report that the description file is being written """
369 self.to_screen(u'[info] Writing video description to: ' + descfn)
371 def report_writesubtitles(self, sub_filename):
372 """ Report that the subtitles file is being written """
373 self.to_screen(u'[info] Writing video subtitles to: ' + sub_filename)
375 def report_writeinfojson(self, infofn):
376 """ Report that the metadata file has been written """
377 self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn)
379 def report_writeannotations(self, annofn):
380 """ Report that the annotations file has been written. """
381 self.to_screen(u'[info] Writing video annotations to: ' + annofn)
383 def report_file_already_downloaded(self, file_name):
384 """Report file has already been fully downloaded."""
386 self.to_screen(u'[download] %s has already been downloaded' % file_name)
387 except UnicodeEncodeError:
388 self.to_screen(u'[download] The file has already been downloaded')
390 def increment_downloads(self):
391 """Increment the ordinal that assigns a number to each file."""
392 self._num_downloads += 1
394 def prepare_filename(self, info_dict):
395 """Generate the output filename."""
397 template_dict = dict(info_dict)
399 template_dict['epoch'] = int(time.time())
400 autonumber_size = self.params.get('autonumber_size')
401 if autonumber_size is None:
403 autonumber_templ = u'%0' + str(autonumber_size) + u'd'
404 template_dict['autonumber'] = autonumber_templ % self._num_downloads
405 if template_dict.get('playlist_index') is not None:
406 template_dict['playlist_index'] = u'%05d' % template_dict['playlist_index']
408 sanitize = lambda k, v: sanitize_filename(
410 restricted=self.params.get('restrictfilenames'),
412 template_dict = dict((k, sanitize(k, v))
413 for k, v in template_dict.items()
415 template_dict = collections.defaultdict(lambda: u'NA', template_dict)
417 tmpl = os.path.expanduser(self.params['outtmpl'])
418 filename = tmpl % template_dict
420 except ValueError as err:
421 self.report_error(u'Error in output template: ' + str(err) + u' (encoding: ' + repr(preferredencoding()) + ')')
424 def _match_entry(self, info_dict):
425 """ Returns None iff the file should be downloaded """
427 video_title = info_dict.get('title', info_dict.get('id', u'video'))
428 if 'title' in info_dict:
429 # This can happen when we're just evaluating the playlist
430 title = info_dict['title']
431 matchtitle = self.params.get('matchtitle', False)
433 if not re.search(matchtitle, title, re.IGNORECASE):
434 return u'"' + title + '" title did not match pattern "' + matchtitle + '"'
435 rejecttitle = self.params.get('rejecttitle', False)
437 if re.search(rejecttitle, title, re.IGNORECASE):
438 return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
439 date = info_dict.get('upload_date', None)
441 dateRange = self.params.get('daterange', DateRange())
442 if date not in dateRange:
443 return u'%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
444 view_count = info_dict.get('view_count', None)
445 if view_count is not None:
446 min_views = self.params.get('min_views')
447 if min_views is not None and view_count < min_views:
448 return u'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
449 max_views = self.params.get('max_views')
450 if max_views is not None and view_count > max_views:
451 return u'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
452 age_limit = self.params.get('age_limit')
453 if age_limit is not None:
454 if age_limit < info_dict.get('age_limit', 0):
455 return u'Skipping "' + title + '" because it is age restricted'
456 if self.in_download_archive(info_dict):
457 return u'%s has already been recorded in archive' % video_title
461 def add_extra_info(info_dict, extra_info):
462 '''Set the keys from extra_info in info dict if they are missing'''
463 for key, value in extra_info.items():
464 info_dict.setdefault(key, value)
466 def extract_info(self, url, download=True, ie_key=None, extra_info={},
469 Returns a list with a dictionary for each video we find.
470 If 'download', also downloads the videos.
471 extra_info is a dict containing the extra values to add to each result
475 ies = [self.get_info_extractor(ie_key)]
480 if not ie.suitable(url):
484 self.report_warning(u'The program functionality for this site has been marked as broken, '
485 u'and will probably not work.')
488 ie_result = ie.extract(url)
489 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
491 if isinstance(ie_result, list):
492 # Backwards compatibility: old IE result format
494 '_type': 'compat_list',
495 'entries': ie_result,
497 self.add_extra_info(ie_result,
499 'extractor': ie.IE_NAME,
501 'extractor_key': ie.ie_key(),
504 return self.process_ie_result(ie_result, download, extra_info)
507 except ExtractorError as de: # An error we somewhat expected
508 self.report_error(compat_str(de), de.format_traceback())
510 except Exception as e:
511 if self.params.get('ignoreerrors', False):
512 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
517 self.report_error(u'no suitable InfoExtractor: %s' % url)
519 def process_ie_result(self, ie_result, download=True, extra_info={}):
521 Take the result of the ie(may be modified) and resolve all unresolved
522 references (URLs, playlist items).
524 It will also download the videos if 'download'.
525 Returns the resolved ie_result.
528 result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
529 if result_type == 'video':
530 self.add_extra_info(ie_result, extra_info)
531 return self.process_video_result(ie_result, download=download)
532 elif result_type == 'url':
533 # We have to add extra_info to the results because it may be
534 # contained in a playlist
535 return self.extract_info(ie_result['url'],
537 ie_key=ie_result.get('ie_key'),
538 extra_info=extra_info)
539 elif result_type == 'url_transparent':
540 # Use the information from the embedding page
541 info = self.extract_info(
542 ie_result['url'], ie_key=ie_result.get('ie_key'),
543 extra_info=extra_info, download=False, process=False)
545 def make_result(embedded_info):
546 new_result = ie_result.copy()
547 for f in ('_type', 'url', 'ext', 'player_url', 'formats',
548 'entries', 'urlhandle', 'ie_key', 'duration',
549 'subtitles', 'annotations', 'format',
550 'thumbnail', 'thumbnails'):
553 if f in embedded_info:
554 new_result[f] = embedded_info[f]
556 new_result = make_result(info)
558 assert new_result.get('_type') != 'url_transparent'
559 if new_result.get('_type') == 'compat_list':
560 new_result['entries'] = [
561 make_result(e) for e in new_result['entries']]
563 return self.process_ie_result(
564 new_result, download=download, extra_info=extra_info)
565 elif result_type == 'playlist':
566 # We process each entry in the playlist
567 playlist = ie_result.get('title', None) or ie_result.get('id', None)
568 self.to_screen(u'[download] Downloading playlist: %s' % playlist)
570 playlist_results = []
572 n_all_entries = len(ie_result['entries'])
573 playliststart = self.params.get('playliststart', 1) - 1
574 playlistend = self.params.get('playlistend', -1)
576 if playlistend == -1:
577 entries = ie_result['entries'][playliststart:]
579 entries = ie_result['entries'][playliststart:playlistend]
581 n_entries = len(entries)
583 self.to_screen(u"[%s] playlist '%s': Collected %d video ids (downloading %d of them)" %
584 (ie_result['extractor'], playlist, n_all_entries, n_entries))
586 for i, entry in enumerate(entries, 1):
587 self.to_screen(u'[download] Downloading video #%s of %s' % (i, n_entries))
589 'playlist': playlist,
590 'playlist_index': i + playliststart,
591 'extractor': ie_result['extractor'],
592 'webpage_url': ie_result['webpage_url'],
593 'extractor_key': ie_result['extractor_key'],
596 reason = self._match_entry(entry)
597 if reason is not None:
598 self.to_screen(u'[download] ' + reason)
601 entry_result = self.process_ie_result(entry,
604 playlist_results.append(entry_result)
605 ie_result['entries'] = playlist_results
607 elif result_type == 'compat_list':
609 self.add_extra_info(r,
611 'extractor': ie_result['extractor'],
612 'webpage_url': ie_result['webpage_url'],
613 'extractor_key': ie_result['extractor_key'],
616 ie_result['entries'] = [
617 self.process_ie_result(_fixup(r), download, extra_info)
618 for r in ie_result['entries']
622 raise Exception('Invalid result type: %s' % result_type)
624 def select_format(self, format_spec, available_formats):
625 if format_spec == 'best' or format_spec is None:
626 return available_formats[-1]
627 elif format_spec == 'worst':
628 return available_formats[0]
630 extensions = [u'mp4', u'flv', u'webm', u'3gp']
631 if format_spec in extensions:
632 filter_f = lambda f: f['ext'] == format_spec
634 filter_f = lambda f: f['format_id'] == format_spec
635 matches = list(filter(filter_f, available_formats))
640 def process_video_result(self, info_dict, download=True):
641 assert info_dict.get('_type', 'video') == 'video'
643 if 'playlist' not in info_dict:
644 # It isn't part of a playlist
645 info_dict['playlist'] = None
646 info_dict['playlist_index'] = None
648 # This extractors handle format selection themselves
649 if info_dict['extractor'] in [u'youtube', u'Youku']:
651 self.process_info(info_dict)
654 # We now pick which formats have to be downloaded
655 if info_dict.get('formats') is None:
656 # There's only one format available
657 formats = [info_dict]
659 formats = info_dict['formats']
661 # We check that all the formats have the format and format_id fields
662 for (i, format) in enumerate(formats):
663 if format.get('format_id') is None:
664 format['format_id'] = compat_str(i)
665 if format.get('format') is None:
666 format['format'] = u'{id} - {res}{note}'.format(
667 id=format['format_id'],
668 res=self.format_resolution(format),
669 note=u' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
671 # Automatically determine file extension if missing
672 if 'ext' not in format:
673 format['ext'] = determine_ext(format['url'])
675 if self.params.get('listformats', None):
676 self.list_formats(info_dict)
679 format_limit = self.params.get('format_limit', None)
681 formats = list(takewhile_inclusive(
682 lambda f: f['format_id'] != format_limit, formats
684 if self.params.get('prefer_free_formats'):
685 def _free_formats_key(f):
687 ext_ord = [u'flv', u'mp4', u'webm'].index(f['ext'])
690 # We only compare the extension if they have the same height and width
691 return (f.get('height'), f.get('width'), ext_ord)
692 formats = sorted(formats, key=_free_formats_key)
694 req_format = self.params.get('format', 'best')
695 if req_format is None:
697 formats_to_download = []
698 # The -1 is for supporting YoutubeIE
699 if req_format in ('-1', 'all'):
700 formats_to_download = formats
702 # We can accept formats requestd in the format: 34/5/best, we pick
703 # the first that is available, starting from left
704 req_formats = req_format.split('/')
705 for rf in req_formats:
706 selected_format = self.select_format(rf, formats)
707 if selected_format is not None:
708 formats_to_download = [selected_format]
710 if not formats_to_download:
711 raise ExtractorError(u'requested format not available',
715 if len(formats_to_download) > 1:
716 self.to_screen(u'[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
717 for format in formats_to_download:
718 new_info = dict(info_dict)
719 new_info.update(format)
720 self.process_info(new_info)
721 # We update the info dict with the best quality format (backwards compatibility)
722 info_dict.update(formats_to_download[-1])
725 def process_info(self, info_dict):
726 """Process a single resolved IE result."""
728 assert info_dict.get('_type', 'video') == 'video'
729 #We increment the download the download count here to match the previous behaviour.
730 self.increment_downloads()
732 info_dict['fulltitle'] = info_dict['title']
733 if len(info_dict['title']) > 200:
734 info_dict['title'] = info_dict['title'][:197] + u'...'
736 # Keep for backwards compatibility
737 info_dict['stitle'] = info_dict['title']
739 if not 'format' in info_dict:
740 info_dict['format'] = info_dict['ext']
742 reason = self._match_entry(info_dict)
743 if reason is not None:
744 self.to_screen(u'[download] ' + reason)
747 max_downloads = self.params.get('max_downloads')
748 if max_downloads is not None:
749 if self._num_downloads > int(max_downloads):
750 raise MaxDownloadsReached()
752 filename = self.prepare_filename(info_dict)
755 if self.params.get('forcetitle', False):
756 self.to_stdout(info_dict['fulltitle'])
757 if self.params.get('forceid', False):
758 self.to_stdout(info_dict['id'])
759 if self.params.get('forceurl', False):
760 # For RTMP URLs, also include the playpath
761 self.to_stdout(info_dict['url'] + info_dict.get('play_path', u''))
762 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
763 self.to_stdout(info_dict['thumbnail'])
764 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
765 self.to_stdout(info_dict['description'])
766 if self.params.get('forcefilename', False) and filename is not None:
767 self.to_stdout(filename)
768 if self.params.get('forceformat', False):
769 self.to_stdout(info_dict['format'])
770 if self.params.get('forcejson', False):
771 info_dict['_filename'] = filename
772 self.to_stdout(json.dumps(info_dict))
774 # Do nothing else if in simulate mode
775 if self.params.get('simulate', False):
782 dn = os.path.dirname(encodeFilename(filename))
783 if dn != '' and not os.path.exists(dn):
785 except (OSError, IOError) as err:
786 self.report_error(u'unable to create directory ' + compat_str(err))
789 if self.params.get('writedescription', False):
791 descfn = filename + u'.description'
792 self.report_writedescription(descfn)
793 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
794 descfile.write(info_dict['description'])
795 except (KeyError, TypeError):
796 self.report_warning(u'There\'s no description to write.')
797 except (OSError, IOError):
798 self.report_error(u'Cannot write description file ' + descfn)
801 if self.params.get('writeannotations', False):
803 annofn = filename + u'.annotations.xml'
804 self.report_writeannotations(annofn)
805 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
806 annofile.write(info_dict['annotations'])
807 except (KeyError, TypeError):
808 self.report_warning(u'There are no annotations to write.')
809 except (OSError, IOError):
810 self.report_error(u'Cannot write annotations file: ' + annofn)
813 subtitles_are_requested = any([self.params.get('writesubtitles', False),
814 self.params.get('writeautomaticsub')])
816 if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
817 # subtitles download errors are already managed as troubles in relevant IE
818 # that way it will silently go on when used with unsupporting IE
819 subtitles = info_dict['subtitles']
820 sub_format = self.params.get('subtitlesformat', 'srt')
821 for sub_lang in subtitles.keys():
822 sub = subtitles[sub_lang]
826 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
827 self.report_writesubtitles(sub_filename)
828 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
830 except (OSError, IOError):
831 self.report_error(u'Cannot write subtitles file ' + descfn)
834 if self.params.get('writeinfojson', False):
835 infofn = os.path.splitext(filename)[0] + u'.info.json'
836 self.report_writeinfojson(infofn)
838 json_info_dict = dict((k, v) for k, v in info_dict.items() if not k in ['urlhandle'])
839 write_json_file(json_info_dict, encodeFilename(infofn))
840 except (OSError, IOError):
841 self.report_error(u'Cannot write metadata to JSON file ' + infofn)
844 if self.params.get('writethumbnail', False):
845 if info_dict.get('thumbnail') is not None:
846 thumb_format = determine_ext(info_dict['thumbnail'], u'jpg')
847 thumb_filename = os.path.splitext(filename)[0] + u'.' + thumb_format
848 self.to_screen(u'[%s] %s: Downloading thumbnail ...' %
849 (info_dict['extractor'], info_dict['id']))
851 uf = compat_urllib_request.urlopen(info_dict['thumbnail'])
852 with open(thumb_filename, 'wb') as thumbf:
853 shutil.copyfileobj(uf, thumbf)
854 self.to_screen(u'[%s] %s: Writing thumbnail to: %s' %
855 (info_dict['extractor'], info_dict['id'], thumb_filename))
856 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
857 self.report_warning(u'Unable to download thumbnail "%s": %s' %
858 (info_dict['thumbnail'], compat_str(err)))
860 if not self.params.get('skip_download', False):
861 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
865 success = self.fd._do_download(filename, info_dict)
866 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
867 self.report_error(u'unable to download video data: %s' % str(err))
869 except (OSError, IOError) as err:
870 raise UnavailableVideoError(err)
871 except (ContentTooShortError, ) as err:
872 self.report_error(u'content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
877 self.post_process(filename, info_dict)
878 except (PostProcessingError) as err:
879 self.report_error(u'postprocessing: %s' % str(err))
882 self.record_download_archive(info_dict)
884 def download(self, url_list):
885 """Download a given list of URLs."""
886 if (len(url_list) > 1 and
887 '%' not in self.params['outtmpl']
888 and self.params.get('max_downloads') != 1):
889 raise SameFileError(self.params['outtmpl'])
893 #It also downloads the videos
894 self.extract_info(url)
895 except UnavailableVideoError:
896 self.report_error(u'unable to download video')
897 except MaxDownloadsReached:
898 self.to_screen(u'[info] Maximum number of downloaded files reached.')
901 return self._download_retcode
903 def download_with_info_file(self, info_filename):
904 with io.open(info_filename, 'r', encoding='utf-8') as f:
907 self.process_ie_result(info, download=True)
908 except DownloadError:
909 webpage_url = info.get('webpage_url')
910 if webpage_url is not None:
911 self.report_warning(u'The info failed to download, trying with "%s"' % webpage_url)
912 return self.download([webpage_url])
915 return self._download_retcode
917 def post_process(self, filename, ie_info):
918 """Run all the postprocessors on the given file."""
920 info['filepath'] = filename
924 keep_video_wish, new_info = pp.run(info)
925 if keep_video_wish is not None:
927 keep_video = keep_video_wish
928 elif keep_video is None:
929 # No clear decision yet, let IE decide
930 keep_video = keep_video_wish
931 except PostProcessingError as e:
932 self.report_error(e.msg)
933 if keep_video is False and not self.params.get('keepvideo', False):
935 self.to_screen(u'Deleting original file %s (pass -k to keep)' % filename)
936 os.remove(encodeFilename(filename))
937 except (IOError, OSError):
938 self.report_warning(u'Unable to remove downloaded video file')
940 def _make_archive_id(self, info_dict):
941 # Future-proof against any change in case
942 # and backwards compatibility with prior versions
943 extractor = info_dict.get('extractor_key')
944 if extractor is None:
945 if 'id' in info_dict:
946 extractor = info_dict.get('ie_key') # key in a playlist
947 if extractor is None:
948 return None # Incomplete video information
949 return extractor.lower() + u' ' + info_dict['id']
951 def in_download_archive(self, info_dict):
952 fn = self.params.get('download_archive')
956 vid_id = self._make_archive_id(info_dict)
958 return False # Incomplete video information
961 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
962 for line in archive_file:
963 if line.strip() == vid_id:
965 except IOError as ioe:
966 if ioe.errno != errno.ENOENT:
970 def record_download_archive(self, info_dict):
971 fn = self.params.get('download_archive')
974 vid_id = self._make_archive_id(info_dict)
976 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
977 archive_file.write(vid_id + u'\n')
980 def format_resolution(format, default='unknown'):
981 if format.get('vcodec') == 'none':
983 if format.get('_resolution') is not None:
984 return format['_resolution']
985 if format.get('height') is not None:
986 if format.get('width') is not None:
987 res = u'%sx%s' % (format['width'], format['height'])
989 res = u'%sp' % format['height']
994 def list_formats(self, info_dict):
995 def format_note(fdict):
997 if fdict.get('format_note') is not None:
998 res += fdict['format_note'] + u' '
999 if (fdict.get('vcodec') is not None and
1000 fdict.get('vcodec') != 'none'):
1001 res += u'%-5s' % fdict['vcodec']
1002 elif fdict.get('vbr') is not None:
1004 if fdict.get('vbr') is not None:
1005 res += u'@%4dk' % fdict['vbr']
1006 if fdict.get('acodec') is not None:
1009 res += u'%-5s' % fdict['acodec']
1010 elif fdict.get('abr') is not None:
1014 if fdict.get('abr') is not None:
1015 res += u'@%3dk' % fdict['abr']
1016 if fdict.get('filesize') is not None:
1019 res += format_bytes(fdict['filesize'])
1022 def line(format, idlen=20):
1023 return ((u'%-' + compat_str(idlen + 1) + u's%-10s%-12s%s') % (
1024 format['format_id'],
1026 self.format_resolution(format),
1027 format_note(format),
1030 formats = info_dict.get('formats', [info_dict])
1031 idlen = max(len(u'format code'),
1032 max(len(f['format_id']) for f in formats))
1033 formats_s = [line(f, idlen) for f in formats]
1034 if len(formats) > 1:
1035 formats_s[0] += (' ' if format_note(formats[0]) else '') + '(worst)'
1036 formats_s[-1] += (' ' if format_note(formats[-1]) else '') + '(best)'
1038 header_line = line({
1039 'format_id': u'format code', 'ext': u'extension',
1040 '_resolution': u'resolution', 'format_note': u'note'}, idlen=idlen)
1041 self.to_screen(u'[info] Available formats for %s:\n%s\n%s' %
1042 (info_dict['id'], header_line, u"\n".join(formats_s)))
1044 def urlopen(self, req):
1045 """ Start an HTTP download """
1046 return self._opener.open(req)
1048 def print_debug_header(self):
1049 if not self.params.get('verbose'):
1051 write_string(u'[debug] youtube-dl version ' + __version__ + u'\n')
1053 sp = subprocess.Popen(
1054 ['git', 'rev-parse', '--short', 'HEAD'],
1055 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1056 cwd=os.path.dirname(os.path.abspath(__file__)))
1057 out, err = sp.communicate()
1058 out = out.decode().strip()
1059 if re.match('[0-9a-f]+', out):
1060 write_string(u'[debug] Git HEAD: ' + out + u'\n')
1066 write_string(u'[debug] Python version %s - %s' %
1067 (platform.python_version(), platform_name()) + u'\n')
1070 for handler in self._opener.handlers:
1071 if hasattr(handler, 'proxies'):
1072 proxy_map.update(handler.proxies)
1073 write_string(u'[debug] Proxy map: ' + compat_str(proxy_map) + u'\n')
1075 def _setup_opener(self):
1076 timeout_val = self.params.get('socket_timeout')
1077 timeout = 600 if timeout_val is None else float(timeout_val)
1079 opts_cookiefile = self.params.get('cookiefile')
1080 opts_proxy = self.params.get('proxy')
1082 if opts_cookiefile is None:
1083 self.cookiejar = compat_cookiejar.CookieJar()
1085 self.cookiejar = compat_cookiejar.MozillaCookieJar(
1087 if os.access(opts_cookiefile, os.R_OK):
1088 self.cookiejar.load()
1090 cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1092 if opts_proxy is not None:
1093 if opts_proxy == '':
1096 proxies = {'http': opts_proxy, 'https': opts_proxy}
1098 proxies = compat_urllib_request.getproxies()
1099 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1100 if 'http' in proxies and 'https' not in proxies:
1101 proxies['https'] = proxies['http']
1102 proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1103 https_handler = make_HTTPS_handler(
1104 self.params.get('nocheckcertificate', False))
1105 opener = compat_urllib_request.build_opener(
1106 https_handler, proxy_handler, cookie_processor, YoutubeDLHandler())
1107 # Delete the default user-agent header, which would otherwise apply in
1108 # cases where our custom HTTP handler doesn't come into play
1109 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1110 opener.addheaders = []
1111 self._opener = opener
1113 # TODO remove this global modification
1114 compat_urllib_request.install_opener(opener)
1115 socket.setdefaulttimeout(timeout)