2 # -*- coding: utf-8 -*-
4 from __future__ import absolute_import
28 compat_urllib_request,
49 UnavailableVideoError,
55 from .extractor import get_info_extractor, gen_extractors
56 from .FileDownloader import FileDownloader
57 from .version import __version__
60 class YoutubeDL(object):
63 YoutubeDL objects are the ones responsible of downloading the
64 actual video file and writing it to disk if the user has requested
65 it, among some other tasks. In most cases there should be one per
66 program. As, given a video URL, the downloader doesn't know how to
67 extract all the needed information, task that InfoExtractors do, it
68 has to pass the URL to one of them.
70 For this, YoutubeDL objects have a method that allows
71 InfoExtractors to be registered in a given order. When it is passed
72 a URL, the YoutubeDL object handles it to the first InfoExtractor it
73 finds that reports being able to handle it. The InfoExtractor extracts
74 all the information about the video or videos the URL refers to, and
75 YoutubeDL process the extracted information, possibly using a File
76 Downloader to download the video.
78 YoutubeDL objects accept a lot of parameters. In order not to saturate
79 the object constructor with arguments, it receives a dictionary of
80 options instead. These options are available through the params
81 attribute for the InfoExtractors to use. The YoutubeDL also
82 registers itself as the downloader in charge for the InfoExtractors
83 that are added to it, so this is a "mutual registration".
87 username: Username for authentication purposes.
88 password: Password for authentication purposes.
89 videopassword: Password for acces a video.
90 usenetrc: Use netrc for authentication instead.
91 verbose: Print additional info to stdout.
92 quiet: Do not print messages to stdout.
93 forceurl: Force printing final URL.
94 forcetitle: Force printing title.
95 forceid: Force printing ID.
96 forcethumbnail: Force printing thumbnail URL.
97 forcedescription: Force printing description.
98 forcefilename: Force printing final filename.
99 forceduration: Force printing duration.
100 forcejson: Force printing info_dict as JSON.
101 simulate: Do not download the video files.
102 format: Video format code.
103 format_limit: Highest quality format to try.
104 outtmpl: Template for output names.
105 restrictfilenames: Do not allow "&" and spaces in file names
106 ignoreerrors: Do not stop on download errors.
107 nooverwrites: Prevent overwriting files.
108 playliststart: Playlist item to start at.
109 playlistend: Playlist item to end at.
110 matchtitle: Download only matching titles.
111 rejecttitle: Reject downloads for matching titles.
112 logger: Log messages to a logging.Logger instance.
113 logtostderr: Log messages to stderr instead of stdout.
114 writedescription: Write the video description to a .description file
115 writeinfojson: Write the video description to a .info.json file
116 writeannotations: Write the video annotations to a .annotations.xml file
117 writethumbnail: Write the thumbnail image to a file
118 writesubtitles: Write the video subtitles to a file
119 writeautomaticsub: Write the automatic subtitles to a file
120 allsubtitles: Downloads all the subtitles of the video
121 (requires writesubtitles or writeautomaticsub)
122 listsubtitles: Lists all available subtitles for the video
123 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
124 subtitleslangs: List of languages of the subtitles to download
125 keepvideo: Keep the video file after post-processing
126 daterange: A DateRange object, download only if the upload_date is in the range.
127 skip_download: Skip the actual download of the video file
128 cachedir: Location of the cache files in the filesystem.
129 None to disable filesystem cache.
130 noplaylist: Download single video instead of a playlist if in doubt.
131 age_limit: An integer representing the user's age in years.
132 Unsuitable videos for the given age are skipped.
133 min_views: An integer representing the minimum view count the video
134 must have in order to not be skipped.
135 Videos without view count information are always
136 downloaded. None for no limit.
137 max_views: An integer representing the maximum view count.
138 Videos that are more popular than that are not
140 Videos without view count information are always
141 downloaded. None for no limit.
142 download_archive: File name of a file where all downloads are recorded.
143 Videos already present in the file are not downloaded
145 cookiefile: File name where cookies should be read from and dumped to.
146 nocheckcertificate:Do not verify SSL certificates
147 proxy: URL of the proxy server to use
148 socket_timeout: Time to wait for unresponsive hosts, in seconds
149 bidi_workaround: Work around buggy terminals without bidirectional text
150 support, using fridibi
152 The following parameters are not used by YoutubeDL itself, they are used by
154 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
155 noresizebuffer, retries, continuedl, noprogress, consoletitle
161 _download_retcode = None
162 _num_downloads = None
165 def __init__(self, params=None):
166 """Create a FileDownloader object with the given options."""
168 self._ies_instances = {}
170 self._progress_hooks = []
171 self._download_retcode = 0
172 self._num_downloads = 0
173 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
174 self._err_file = sys.stderr
175 self.params = {} if params is None else params
177 if params.get('bidi_workaround', False):
180 master, slave = pty.openpty()
181 width = get_term_width()
185 width_args = ['-w', str(width)]
187 stdin=subprocess.PIPE,
189 stderr=self._err_file)
191 self._output_process = subprocess.Popen(
192 ['bidiv'] + width_args, **sp_kwargs
195 print('Falling back to fribidi')
196 self._output_process = subprocess.Popen(
197 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
198 self._output_channel = os.fdopen(master, 'rb')
199 except OSError as ose:
201 self.report_warning(u'Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
205 if (sys.version_info >= (3,) and sys.platform != 'win32' and
206 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
207 and not params['restrictfilenames']):
208 # On Python 3, the Unicode filesystem API will throw errors (#1474)
210 u'Assuming --restrict-filenames since file system encoding '
211 u'cannot encode all charactes. '
212 u'Set the LC_ALL environment variable to fix this.')
213 self.params['restrictfilenames'] = True
215 self.fd = FileDownloader(self, self.params)
217 if '%(stitle)s' in self.params.get('outtmpl', ''):
218 self.report_warning(u'%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
222 def add_info_extractor(self, ie):
223 """Add an InfoExtractor object to the end of the list."""
225 self._ies_instances[ie.ie_key()] = ie
226 ie.set_downloader(self)
228 def get_info_extractor(self, ie_key):
230 Get an instance of an IE with name ie_key, it will try to get one from
231 the _ies list, if there's no instance it will create a new one and add
232 it to the extractor list.
234 ie = self._ies_instances.get(ie_key)
236 ie = get_info_extractor(ie_key)()
237 self.add_info_extractor(ie)
240 def add_default_info_extractors(self):
242 Add the InfoExtractors returned by gen_extractors to the end of the list
244 for ie in gen_extractors():
245 self.add_info_extractor(ie)
247 def add_post_processor(self, pp):
248 """Add a PostProcessor object to the end of the chain."""
250 pp.set_downloader(self)
252 def _bidi_workaround(self, message):
253 if not hasattr(self, '_output_channel'):
254 print('WORKAROUND NOT ENABLED')
257 assert hasattr(self, '_output_process')
258 assert type(message) == type(u'')
259 line_count = message.count(u'\n') + 1
260 self._output_process.stdin.write((message + u'\n').encode('utf-8'))
261 self._output_process.stdin.flush()
262 res = u''.join(self._output_channel.readline().decode('utf-8')
263 for _ in range(line_count))
264 return res[:-len(u'\n')]
266 def to_screen(self, message, skip_eol=False):
267 """Print message to stdout if not in quiet mode."""
268 return self.to_stdout(message, skip_eol, check_quiet=True)
270 def to_stdout(self, message, skip_eol=False, check_quiet=False):
271 """Print message to stdout if not in quiet mode."""
272 if self.params.get('logger'):
273 self.params['logger'].debug(message)
274 elif not check_quiet or not self.params.get('quiet', False):
275 message = self._bidi_workaround(message)
276 terminator = [u'\n', u''][skip_eol]
277 output = message + terminator
279 write_string(output, self._screen_file)
281 def to_stderr(self, message):
282 """Print message to stderr."""
283 assert type(message) == type(u'')
284 if self.params.get('logger'):
285 self.params['logger'].error(message)
287 message = self._bidi_workaround(message)
288 output = message + u'\n'
289 write_string(output, self._err_file)
291 def to_console_title(self, message):
292 if not self.params.get('consoletitle', False):
294 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
295 # c_wchar_p() might not be necessary if `message` is
296 # already of type unicode()
297 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
298 elif 'TERM' in os.environ:
299 write_string(u'\033]0;%s\007' % message, self._screen_file)
301 def save_console_title(self):
302 if not self.params.get('consoletitle', False):
304 if 'TERM' in os.environ:
305 # Save the title on stack
306 write_string(u'\033[22;0t', self._screen_file)
308 def restore_console_title(self):
309 if not self.params.get('consoletitle', False):
311 if 'TERM' in os.environ:
312 # Restore the title from stack
313 write_string(u'\033[23;0t', self._screen_file)
316 self.save_console_title()
319 def __exit__(self, *args):
320 self.restore_console_title()
322 if self.params.get('cookiefile') is not None:
323 self.cookiejar.save()
325 def trouble(self, message=None, tb=None):
326 """Determine action to take when a download problem appears.
328 Depending on if the downloader has been configured to ignore
329 download errors or not, this method may throw an exception or
330 not when errors are found, after printing the message.
332 tb, if given, is additional traceback information.
334 if message is not None:
335 self.to_stderr(message)
336 if self.params.get('verbose'):
338 if sys.exc_info()[0]: # if .trouble has been called from an except block
340 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
341 tb += u''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
342 tb += compat_str(traceback.format_exc())
344 tb_data = traceback.format_list(traceback.extract_stack())
345 tb = u''.join(tb_data)
347 if not self.params.get('ignoreerrors', False):
348 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
349 exc_info = sys.exc_info()[1].exc_info
351 exc_info = sys.exc_info()
352 raise DownloadError(message, exc_info)
353 self._download_retcode = 1
355 def report_warning(self, message):
357 Print the message to stderr, it will be prefixed with 'WARNING:'
358 If stderr is a tty file the 'WARNING:' will be colored
360 if self._err_file.isatty() and os.name != 'nt':
361 _msg_header = u'\033[0;33mWARNING:\033[0m'
363 _msg_header = u'WARNING:'
364 warning_message = u'%s %s' % (_msg_header, message)
365 self.to_stderr(warning_message)
367 def report_error(self, message, tb=None):
369 Do the same as trouble, but prefixes the message with 'ERROR:', colored
370 in red if stderr is a tty file.
372 if self._err_file.isatty() and os.name != 'nt':
373 _msg_header = u'\033[0;31mERROR:\033[0m'
375 _msg_header = u'ERROR:'
376 error_message = u'%s %s' % (_msg_header, message)
377 self.trouble(error_message, tb)
379 def report_file_already_downloaded(self, file_name):
380 """Report file has already been fully downloaded."""
382 self.to_screen(u'[download] %s has already been downloaded' % file_name)
383 except UnicodeEncodeError:
384 self.to_screen(u'[download] The file has already been downloaded')
386 def increment_downloads(self):
387 """Increment the ordinal that assigns a number to each file."""
388 self._num_downloads += 1
390 def prepare_filename(self, info_dict):
391 """Generate the output filename."""
393 template_dict = dict(info_dict)
395 template_dict['epoch'] = int(time.time())
396 autonumber_size = self.params.get('autonumber_size')
397 if autonumber_size is None:
399 autonumber_templ = u'%0' + str(autonumber_size) + u'd'
400 template_dict['autonumber'] = autonumber_templ % self._num_downloads
401 if template_dict.get('playlist_index') is not None:
402 template_dict['playlist_index'] = u'%05d' % template_dict['playlist_index']
404 sanitize = lambda k, v: sanitize_filename(
406 restricted=self.params.get('restrictfilenames'),
408 template_dict = dict((k, sanitize(k, v))
409 for k, v in template_dict.items()
411 template_dict = collections.defaultdict(lambda: u'NA', template_dict)
413 tmpl = os.path.expanduser(self.params['outtmpl'])
414 filename = tmpl % template_dict
416 except ValueError as err:
417 self.report_error(u'Error in output template: ' + str(err) + u' (encoding: ' + repr(preferredencoding()) + ')')
420 def _match_entry(self, info_dict):
421 """ Returns None iff the file should be downloaded """
423 video_title = info_dict.get('title', info_dict.get('id', u'video'))
424 if 'title' in info_dict:
425 # This can happen when we're just evaluating the playlist
426 title = info_dict['title']
427 matchtitle = self.params.get('matchtitle', False)
429 if not re.search(matchtitle, title, re.IGNORECASE):
430 return u'"' + title + '" title did not match pattern "' + matchtitle + '"'
431 rejecttitle = self.params.get('rejecttitle', False)
433 if re.search(rejecttitle, title, re.IGNORECASE):
434 return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
435 date = info_dict.get('upload_date', None)
437 dateRange = self.params.get('daterange', DateRange())
438 if date not in dateRange:
439 return u'%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
440 view_count = info_dict.get('view_count', None)
441 if view_count is not None:
442 min_views = self.params.get('min_views')
443 if min_views is not None and view_count < min_views:
444 return u'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
445 max_views = self.params.get('max_views')
446 if max_views is not None and view_count > max_views:
447 return u'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
448 age_limit = self.params.get('age_limit')
449 if age_limit is not None:
450 if age_limit < info_dict.get('age_limit', 0):
451 return u'Skipping "' + title + '" because it is age restricted'
452 if self.in_download_archive(info_dict):
453 return u'%s has already been recorded in archive' % video_title
457 def add_extra_info(info_dict, extra_info):
458 '''Set the keys from extra_info in info dict if they are missing'''
459 for key, value in extra_info.items():
460 info_dict.setdefault(key, value)
462 def extract_info(self, url, download=True, ie_key=None, extra_info={},
465 Returns a list with a dictionary for each video we find.
466 If 'download', also downloads the videos.
467 extra_info is a dict containing the extra values to add to each result
471 ies = [self.get_info_extractor(ie_key)]
476 if not ie.suitable(url):
480 self.report_warning(u'The program functionality for this site has been marked as broken, '
481 u'and will probably not work.')
484 ie_result = ie.extract(url)
485 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
487 if isinstance(ie_result, list):
488 # Backwards compatibility: old IE result format
490 '_type': 'compat_list',
491 'entries': ie_result,
493 self.add_extra_info(ie_result,
495 'extractor': ie.IE_NAME,
497 'webpage_url_basename': url_basename(url),
498 'extractor_key': ie.ie_key(),
501 return self.process_ie_result(ie_result, download, extra_info)
504 except ExtractorError as de: # An error we somewhat expected
505 self.report_error(compat_str(de), de.format_traceback())
507 except Exception as e:
508 if self.params.get('ignoreerrors', False):
509 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
514 self.report_error(u'no suitable InfoExtractor: %s' % url)
516 def process_ie_result(self, ie_result, download=True, extra_info={}):
518 Take the result of the ie(may be modified) and resolve all unresolved
519 references (URLs, playlist items).
521 It will also download the videos if 'download'.
522 Returns the resolved ie_result.
525 result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
526 if result_type == 'video':
527 self.add_extra_info(ie_result, extra_info)
528 return self.process_video_result(ie_result, download=download)
529 elif result_type == 'url':
530 # We have to add extra_info to the results because it may be
531 # contained in a playlist
532 return self.extract_info(ie_result['url'],
534 ie_key=ie_result.get('ie_key'),
535 extra_info=extra_info)
536 elif result_type == 'url_transparent':
537 # Use the information from the embedding page
538 info = self.extract_info(
539 ie_result['url'], ie_key=ie_result.get('ie_key'),
540 extra_info=extra_info, download=False, process=False)
542 def make_result(embedded_info):
543 new_result = ie_result.copy()
544 for f in ('_type', 'url', 'ext', 'player_url', 'formats',
545 'entries', 'urlhandle', 'ie_key', 'duration',
546 'subtitles', 'annotations', 'format',
547 'thumbnail', 'thumbnails'):
550 if f in embedded_info:
551 new_result[f] = embedded_info[f]
553 new_result = make_result(info)
555 assert new_result.get('_type') != 'url_transparent'
556 if new_result.get('_type') == 'compat_list':
557 new_result['entries'] = [
558 make_result(e) for e in new_result['entries']]
560 return self.process_ie_result(
561 new_result, download=download, extra_info=extra_info)
562 elif result_type == 'playlist':
563 # We process each entry in the playlist
564 playlist = ie_result.get('title', None) or ie_result.get('id', None)
565 self.to_screen(u'[download] Downloading playlist: %s' % playlist)
567 playlist_results = []
569 n_all_entries = len(ie_result['entries'])
570 playliststart = self.params.get('playliststart', 1) - 1
571 playlistend = self.params.get('playlistend', None)
572 # For backwards compatibility, interpret -1 as whole list
573 if playlistend == -1:
576 entries = ie_result['entries'][playliststart:playlistend]
577 n_entries = len(entries)
580 u"[%s] playlist '%s': Collected %d video ids (downloading %d of them)" %
581 (ie_result['extractor'], playlist, n_all_entries, n_entries))
583 for i, entry in enumerate(entries, 1):
584 self.to_screen(u'[download] Downloading video #%s of %s' % (i, n_entries))
586 'playlist': playlist,
587 'playlist_index': i + playliststart,
588 'extractor': ie_result['extractor'],
589 'webpage_url': ie_result['webpage_url'],
590 'webpage_url_basename': url_basename(ie_result['webpage_url']),
591 'extractor_key': ie_result['extractor_key'],
594 reason = self._match_entry(entry)
595 if reason is not None:
596 self.to_screen(u'[download] ' + reason)
599 entry_result = self.process_ie_result(entry,
602 playlist_results.append(entry_result)
603 ie_result['entries'] = playlist_results
605 elif result_type == 'compat_list':
607 self.add_extra_info(r,
609 'extractor': ie_result['extractor'],
610 'webpage_url': ie_result['webpage_url'],
611 'webpage_url_basename': url_basename(ie_result['webpage_url']),
612 'extractor_key': ie_result['extractor_key'],
615 ie_result['entries'] = [
616 self.process_ie_result(_fixup(r), download, extra_info)
617 for r in ie_result['entries']
621 raise Exception('Invalid result type: %s' % result_type)
623 def select_format(self, format_spec, available_formats):
624 if format_spec == 'best' or format_spec is None:
625 return available_formats[-1]
626 elif format_spec == 'worst':
627 return available_formats[0]
629 extensions = [u'mp4', u'flv', u'webm', u'3gp']
630 if format_spec in extensions:
631 filter_f = lambda f: f['ext'] == format_spec
633 filter_f = lambda f: f['format_id'] == format_spec
634 matches = list(filter(filter_f, available_formats))
639 def process_video_result(self, info_dict, download=True):
640 assert info_dict.get('_type', 'video') == 'video'
642 if 'playlist' not in info_dict:
643 # It isn't part of a playlist
644 info_dict['playlist'] = None
645 info_dict['playlist_index'] = None
647 # This extractors handle format selection themselves
648 if info_dict['extractor'] in [u'youtube', u'Youku']:
650 self.process_info(info_dict)
653 # We now pick which formats have to be downloaded
654 if info_dict.get('formats') is None:
655 # There's only one format available
656 formats = [info_dict]
658 formats = info_dict['formats']
660 # We check that all the formats have the format and format_id fields
661 for (i, format) in enumerate(formats):
662 if format.get('format_id') is None:
663 format['format_id'] = compat_str(i)
664 if format.get('format') is None:
665 format['format'] = u'{id} - {res}{note}'.format(
666 id=format['format_id'],
667 res=self.format_resolution(format),
668 note=u' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
670 # Automatically determine file extension if missing
671 if 'ext' not in format:
672 format['ext'] = determine_ext(format['url'])
674 if self.params.get('listformats', None):
675 self.list_formats(info_dict)
678 format_limit = self.params.get('format_limit', None)
680 formats = list(takewhile_inclusive(
681 lambda f: f['format_id'] != format_limit, formats
683 if self.params.get('prefer_free_formats'):
684 def _free_formats_key(f):
686 ext_ord = [u'flv', u'mp4', u'webm'].index(f['ext'])
689 # We only compare the extension if they have the same height and width
690 return (f.get('height'), f.get('width'), ext_ord)
691 formats = sorted(formats, key=_free_formats_key)
693 req_format = self.params.get('format', 'best')
694 if req_format is None:
696 formats_to_download = []
697 # The -1 is for supporting YoutubeIE
698 if req_format in ('-1', 'all'):
699 formats_to_download = formats
701 # We can accept formats requestd in the format: 34/5/best, we pick
702 # the first that is available, starting from left
703 req_formats = req_format.split('/')
704 for rf in req_formats:
705 selected_format = self.select_format(rf, formats)
706 if selected_format is not None:
707 formats_to_download = [selected_format]
709 if not formats_to_download:
710 raise ExtractorError(u'requested format not available',
714 if len(formats_to_download) > 1:
715 self.to_screen(u'[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
716 for format in formats_to_download:
717 new_info = dict(info_dict)
718 new_info.update(format)
719 self.process_info(new_info)
720 # We update the info dict with the best quality format (backwards compatibility)
721 info_dict.update(formats_to_download[-1])
724 def process_info(self, info_dict):
725 """Process a single resolved IE result."""
727 assert info_dict.get('_type', 'video') == 'video'
728 #We increment the download the download count here to match the previous behaviour.
729 self.increment_downloads()
731 info_dict['fulltitle'] = info_dict['title']
732 if len(info_dict['title']) > 200:
733 info_dict['title'] = info_dict['title'][:197] + u'...'
735 # Keep for backwards compatibility
736 info_dict['stitle'] = info_dict['title']
738 if not 'format' in info_dict:
739 info_dict['format'] = info_dict['ext']
741 reason = self._match_entry(info_dict)
742 if reason is not None:
743 self.to_screen(u'[download] ' + reason)
746 max_downloads = self.params.get('max_downloads')
747 if max_downloads is not None:
748 if self._num_downloads > int(max_downloads):
749 raise MaxDownloadsReached()
751 filename = self.prepare_filename(info_dict)
754 if self.params.get('forcetitle', False):
755 self.to_stdout(info_dict['fulltitle'])
756 if self.params.get('forceid', False):
757 self.to_stdout(info_dict['id'])
758 if self.params.get('forceurl', False):
759 # For RTMP URLs, also include the playpath
760 self.to_stdout(info_dict['url'] + info_dict.get('play_path', u''))
761 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
762 self.to_stdout(info_dict['thumbnail'])
763 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
764 self.to_stdout(info_dict['description'])
765 if self.params.get('forcefilename', False) and filename is not None:
766 self.to_stdout(filename)
767 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
768 self.to_stdout(formatSeconds(info_dict['duration']))
769 if self.params.get('forceformat', False):
770 self.to_stdout(info_dict['format'])
771 if self.params.get('forcejson', False):
772 info_dict['_filename'] = filename
773 self.to_stdout(json.dumps(info_dict))
775 # Do nothing else if in simulate mode
776 if self.params.get('simulate', False):
783 dn = os.path.dirname(encodeFilename(filename))
784 if dn != '' and not os.path.exists(dn):
786 except (OSError, IOError) as err:
787 self.report_error(u'unable to create directory ' + compat_str(err))
790 if self.params.get('writedescription', False):
791 descfn = filename + u'.description'
792 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
793 self.to_screen(u'[info] Video description is already present')
796 self.to_screen(u'[info] Writing video description to: ' + descfn)
797 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
798 descfile.write(info_dict['description'])
799 except (KeyError, TypeError):
800 self.report_warning(u'There\'s no description to write.')
801 except (OSError, IOError):
802 self.report_error(u'Cannot write description file ' + descfn)
805 if self.params.get('writeannotations', False):
806 annofn = filename + u'.annotations.xml'
807 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
808 self.to_screen(u'[info] Video annotations are already present')
811 self.to_screen(u'[info] Writing video annotations to: ' + annofn)
812 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
813 annofile.write(info_dict['annotations'])
814 except (KeyError, TypeError):
815 self.report_warning(u'There are no annotations to write.')
816 except (OSError, IOError):
817 self.report_error(u'Cannot write annotations file: ' + annofn)
820 subtitles_are_requested = any([self.params.get('writesubtitles', False),
821 self.params.get('writeautomaticsub')])
823 if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
824 # subtitles download errors are already managed as troubles in relevant IE
825 # that way it will silently go on when used with unsupporting IE
826 subtitles = info_dict['subtitles']
827 sub_format = self.params.get('subtitlesformat', 'srt')
828 for sub_lang in subtitles.keys():
829 sub = subtitles[sub_lang]
833 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
834 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
835 self.to_screen(u'[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
837 self.to_screen(u'[info] Writing video subtitles to: ' + sub_filename)
838 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
840 except (OSError, IOError):
841 self.report_error(u'Cannot write subtitles file ' + descfn)
844 if self.params.get('writeinfojson', False):
845 infofn = os.path.splitext(filename)[0] + u'.info.json'
846 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
847 self.to_screen(u'[info] Video description metadata is already present')
849 self.to_screen(u'[info] Writing video description metadata as JSON to: ' + infofn)
851 json_info_dict = dict((k, v) for k, v in info_dict.items() if not k in ['urlhandle'])
852 write_json_file(json_info_dict, encodeFilename(infofn))
853 except (OSError, IOError):
854 self.report_error(u'Cannot write metadata to JSON file ' + infofn)
857 if self.params.get('writethumbnail', False):
858 if info_dict.get('thumbnail') is not None:
859 thumb_format = determine_ext(info_dict['thumbnail'], u'jpg')
860 thumb_filename = os.path.splitext(filename)[0] + u'.' + thumb_format
861 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
862 self.to_screen(u'[%s] %s: Thumbnail is already present' %
863 (info_dict['extractor'], info_dict['id']))
865 self.to_screen(u'[%s] %s: Downloading thumbnail ...' %
866 (info_dict['extractor'], info_dict['id']))
868 uf = compat_urllib_request.urlopen(info_dict['thumbnail'])
869 with open(thumb_filename, 'wb') as thumbf:
870 shutil.copyfileobj(uf, thumbf)
871 self.to_screen(u'[%s] %s: Writing thumbnail to: %s' %
872 (info_dict['extractor'], info_dict['id'], thumb_filename))
873 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
874 self.report_warning(u'Unable to download thumbnail "%s": %s' %
875 (info_dict['thumbnail'], compat_str(err)))
877 if not self.params.get('skip_download', False):
878 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
882 success = self.fd._do_download(filename, info_dict)
883 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
884 self.report_error(u'unable to download video data: %s' % str(err))
886 except (OSError, IOError) as err:
887 raise UnavailableVideoError(err)
888 except (ContentTooShortError, ) as err:
889 self.report_error(u'content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
894 self.post_process(filename, info_dict)
895 except (PostProcessingError) as err:
896 self.report_error(u'postprocessing: %s' % str(err))
899 self.record_download_archive(info_dict)
901 def download(self, url_list):
902 """Download a given list of URLs."""
903 if (len(url_list) > 1 and
904 '%' not in self.params['outtmpl']
905 and self.params.get('max_downloads') != 1):
906 raise SameFileError(self.params['outtmpl'])
910 #It also downloads the videos
911 self.extract_info(url)
912 except UnavailableVideoError:
913 self.report_error(u'unable to download video')
914 except MaxDownloadsReached:
915 self.to_screen(u'[info] Maximum number of downloaded files reached.')
918 return self._download_retcode
920 def download_with_info_file(self, info_filename):
921 with io.open(info_filename, 'r', encoding='utf-8') as f:
924 self.process_ie_result(info, download=True)
925 except DownloadError:
926 webpage_url = info.get('webpage_url')
927 if webpage_url is not None:
928 self.report_warning(u'The info failed to download, trying with "%s"' % webpage_url)
929 return self.download([webpage_url])
932 return self._download_retcode
934 def post_process(self, filename, ie_info):
935 """Run all the postprocessors on the given file."""
937 info['filepath'] = filename
941 keep_video_wish, new_info = pp.run(info)
942 if keep_video_wish is not None:
944 keep_video = keep_video_wish
945 elif keep_video is None:
946 # No clear decision yet, let IE decide
947 keep_video = keep_video_wish
948 except PostProcessingError as e:
949 self.report_error(e.msg)
950 if keep_video is False and not self.params.get('keepvideo', False):
952 self.to_screen(u'Deleting original file %s (pass -k to keep)' % filename)
953 os.remove(encodeFilename(filename))
954 except (IOError, OSError):
955 self.report_warning(u'Unable to remove downloaded video file')
957 def _make_archive_id(self, info_dict):
958 # Future-proof against any change in case
959 # and backwards compatibility with prior versions
960 extractor = info_dict.get('extractor_key')
961 if extractor is None:
962 if 'id' in info_dict:
963 extractor = info_dict.get('ie_key') # key in a playlist
964 if extractor is None:
965 return None # Incomplete video information
966 return extractor.lower() + u' ' + info_dict['id']
968 def in_download_archive(self, info_dict):
969 fn = self.params.get('download_archive')
973 vid_id = self._make_archive_id(info_dict)
975 return False # Incomplete video information
978 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
979 for line in archive_file:
980 if line.strip() == vid_id:
982 except IOError as ioe:
983 if ioe.errno != errno.ENOENT:
987 def record_download_archive(self, info_dict):
988 fn = self.params.get('download_archive')
991 vid_id = self._make_archive_id(info_dict)
993 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
994 archive_file.write(vid_id + u'\n')
997 def format_resolution(format, default='unknown'):
998 if format.get('vcodec') == 'none':
1000 if format.get('_resolution') is not None:
1001 return format['_resolution']
1002 if format.get('height') is not None:
1003 if format.get('width') is not None:
1004 res = u'%sx%s' % (format['width'], format['height'])
1006 res = u'%sp' % format['height']
1011 def list_formats(self, info_dict):
1012 def format_note(fdict):
1014 if fdict.get('format_note') is not None:
1015 res += fdict['format_note'] + u' '
1016 if (fdict.get('vcodec') is not None and
1017 fdict.get('vcodec') != 'none'):
1018 res += u'%-5s' % fdict['vcodec']
1019 elif fdict.get('vbr') is not None:
1021 if fdict.get('vbr') is not None:
1022 res += u'@%4dk' % fdict['vbr']
1023 if fdict.get('acodec') is not None:
1026 res += u'%-5s' % fdict['acodec']
1027 elif fdict.get('abr') is not None:
1031 if fdict.get('abr') is not None:
1032 res += u'@%3dk' % fdict['abr']
1033 if fdict.get('filesize') is not None:
1036 res += format_bytes(fdict['filesize'])
1039 def line(format, idlen=20):
1040 return ((u'%-' + compat_str(idlen + 1) + u's%-10s%-12s%s') % (
1041 format['format_id'],
1043 self.format_resolution(format),
1044 format_note(format),
1047 formats = info_dict.get('formats', [info_dict])
1048 idlen = max(len(u'format code'),
1049 max(len(f['format_id']) for f in formats))
1050 formats_s = [line(f, idlen) for f in formats]
1051 if len(formats) > 1:
1052 formats_s[0] += (' ' if format_note(formats[0]) else '') + '(worst)'
1053 formats_s[-1] += (' ' if format_note(formats[-1]) else '') + '(best)'
1055 header_line = line({
1056 'format_id': u'format code', 'ext': u'extension',
1057 '_resolution': u'resolution', 'format_note': u'note'}, idlen=idlen)
1058 self.to_screen(u'[info] Available formats for %s:\n%s\n%s' %
1059 (info_dict['id'], header_line, u"\n".join(formats_s)))
1061 def urlopen(self, req):
1062 """ Start an HTTP download """
1063 return self._opener.open(req)
1065 def print_debug_header(self):
1066 if not self.params.get('verbose'):
1068 write_string(u'[debug] youtube-dl version ' + __version__ + u'\n')
1070 sp = subprocess.Popen(
1071 ['git', 'rev-parse', '--short', 'HEAD'],
1072 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1073 cwd=os.path.dirname(os.path.abspath(__file__)))
1074 out, err = sp.communicate()
1075 out = out.decode().strip()
1076 if re.match('[0-9a-f]+', out):
1077 write_string(u'[debug] Git HEAD: ' + out + u'\n')
1083 write_string(u'[debug] Python version %s - %s' %
1084 (platform.python_version(), platform_name()) + u'\n')
1087 for handler in self._opener.handlers:
1088 if hasattr(handler, 'proxies'):
1089 proxy_map.update(handler.proxies)
1090 write_string(u'[debug] Proxy map: ' + compat_str(proxy_map) + u'\n')
1092 def _setup_opener(self):
1093 timeout_val = self.params.get('socket_timeout')
1094 timeout = 600 if timeout_val is None else float(timeout_val)
1096 opts_cookiefile = self.params.get('cookiefile')
1097 opts_proxy = self.params.get('proxy')
1099 if opts_cookiefile is None:
1100 self.cookiejar = compat_cookiejar.CookieJar()
1102 self.cookiejar = compat_cookiejar.MozillaCookieJar(
1104 if os.access(opts_cookiefile, os.R_OK):
1105 self.cookiejar.load()
1107 cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1109 if opts_proxy is not None:
1110 if opts_proxy == '':
1113 proxies = {'http': opts_proxy, 'https': opts_proxy}
1115 proxies = compat_urllib_request.getproxies()
1116 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1117 if 'http' in proxies and 'https' not in proxies:
1118 proxies['https'] = proxies['http']
1119 proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1120 https_handler = make_HTTPS_handler(
1121 self.params.get('nocheckcertificate', False))
1122 opener = compat_urllib_request.build_opener(
1123 https_handler, proxy_handler, cookie_processor, YoutubeDLHandler())
1124 # Delete the default user-agent header, which would otherwise apply in
1125 # cases where our custom HTTP handler doesn't come into play
1126 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1127 opener.addheaders = []
1128 self._opener = opener
1130 # TODO remove this global modification
1131 compat_urllib_request.install_opener(opener)
1132 socket.setdefaulttimeout(timeout)