2 # -*- coding: utf-8 -*-
4 from __future__ import absolute_import
28 compat_urllib_request,
49 UnavailableVideoError,
55 from .extractor import get_info_extractor, gen_extractors
56 from .downloader import get_suitable_downloader
57 from .version import __version__
60 class YoutubeDL(object):
63 YoutubeDL objects are the ones responsible of downloading the
64 actual video file and writing it to disk if the user has requested
65 it, among some other tasks. In most cases there should be one per
66 program. As, given a video URL, the downloader doesn't know how to
67 extract all the needed information, task that InfoExtractors do, it
68 has to pass the URL to one of them.
70 For this, YoutubeDL objects have a method that allows
71 InfoExtractors to be registered in a given order. When it is passed
72 a URL, the YoutubeDL object handles it to the first InfoExtractor it
73 finds that reports being able to handle it. The InfoExtractor extracts
74 all the information about the video or videos the URL refers to, and
75 YoutubeDL process the extracted information, possibly using a File
76 Downloader to download the video.
78 YoutubeDL objects accept a lot of parameters. In order not to saturate
79 the object constructor with arguments, it receives a dictionary of
80 options instead. These options are available through the params
81 attribute for the InfoExtractors to use. The YoutubeDL also
82 registers itself as the downloader in charge for the InfoExtractors
83 that are added to it, so this is a "mutual registration".
87 username: Username for authentication purposes.
88 password: Password for authentication purposes.
89 videopassword: Password for acces a video.
90 usenetrc: Use netrc for authentication instead.
91 verbose: Print additional info to stdout.
92 quiet: Do not print messages to stdout.
93 forceurl: Force printing final URL.
94 forcetitle: Force printing title.
95 forceid: Force printing ID.
96 forcethumbnail: Force printing thumbnail URL.
97 forcedescription: Force printing description.
98 forcefilename: Force printing final filename.
99 forceduration: Force printing duration.
100 forcejson: Force printing info_dict as JSON.
101 simulate: Do not download the video files.
102 format: Video format code.
103 format_limit: Highest quality format to try.
104 outtmpl: Template for output names.
105 restrictfilenames: Do not allow "&" and spaces in file names
106 ignoreerrors: Do not stop on download errors.
107 nooverwrites: Prevent overwriting files.
108 playliststart: Playlist item to start at.
109 playlistend: Playlist item to end at.
110 matchtitle: Download only matching titles.
111 rejecttitle: Reject downloads for matching titles.
112 logger: Log messages to a logging.Logger instance.
113 logtostderr: Log messages to stderr instead of stdout.
114 writedescription: Write the video description to a .description file
115 writeinfojson: Write the video description to a .info.json file
116 writeannotations: Write the video annotations to a .annotations.xml file
117 writethumbnail: Write the thumbnail image to a file
118 writesubtitles: Write the video subtitles to a file
119 writeautomaticsub: Write the automatic subtitles to a file
120 allsubtitles: Downloads all the subtitles of the video
121 (requires writesubtitles or writeautomaticsub)
122 listsubtitles: Lists all available subtitles for the video
123 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
124 subtitleslangs: List of languages of the subtitles to download
125 keepvideo: Keep the video file after post-processing
126 daterange: A DateRange object, download only if the upload_date is in the range.
127 skip_download: Skip the actual download of the video file
128 cachedir: Location of the cache files in the filesystem.
129 None to disable filesystem cache.
130 noplaylist: Download single video instead of a playlist if in doubt.
131 age_limit: An integer representing the user's age in years.
132 Unsuitable videos for the given age are skipped.
133 min_views: An integer representing the minimum view count the video
134 must have in order to not be skipped.
135 Videos without view count information are always
136 downloaded. None for no limit.
137 max_views: An integer representing the maximum view count.
138 Videos that are more popular than that are not
140 Videos without view count information are always
141 downloaded. None for no limit.
142 download_archive: File name of a file where all downloads are recorded.
143 Videos already present in the file are not downloaded
145 cookiefile: File name where cookies should be read from and dumped to.
146 nocheckcertificate:Do not verify SSL certificates
147 proxy: URL of the proxy server to use
148 socket_timeout: Time to wait for unresponsive hosts, in seconds
149 bidi_workaround: Work around buggy terminals without bidirectional text
150 support, using fridibi
152 The following parameters are not used by YoutubeDL itself, they are used by
154 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
155 noresizebuffer, retries, continuedl, noprogress, consoletitle
161 _download_retcode = None
162 _num_downloads = None
165 def __init__(self, params=None):
166 """Create a FileDownloader object with the given options."""
170 self._ies_instances = {}
172 self._progress_hooks = []
173 self._download_retcode = 0
174 self._num_downloads = 0
175 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
176 self._err_file = sys.stderr
179 if params.get('bidi_workaround', False):
182 master, slave = pty.openpty()
183 width = get_term_width()
187 width_args = ['-w', str(width)]
189 stdin=subprocess.PIPE,
191 stderr=self._err_file)
193 self._output_process = subprocess.Popen(
194 ['bidiv'] + width_args, **sp_kwargs
197 self._output_process = subprocess.Popen(
198 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
199 self._output_channel = os.fdopen(master, 'rb')
200 except OSError as ose:
202 self.report_warning(u'Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
206 if (sys.version_info >= (3,) and sys.platform != 'win32' and
207 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
208 and not params['restrictfilenames']):
209 # On Python 3, the Unicode filesystem API will throw errors (#1474)
211 u'Assuming --restrict-filenames since file system encoding '
212 u'cannot encode all charactes. '
213 u'Set the LC_ALL environment variable to fix this.')
214 self.params['restrictfilenames'] = True
216 if '%(stitle)s' in self.params.get('outtmpl', ''):
217 self.report_warning(u'%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
221 def add_info_extractor(self, ie):
222 """Add an InfoExtractor object to the end of the list."""
224 self._ies_instances[ie.ie_key()] = ie
225 ie.set_downloader(self)
227 def get_info_extractor(self, ie_key):
229 Get an instance of an IE with name ie_key, it will try to get one from
230 the _ies list, if there's no instance it will create a new one and add
231 it to the extractor list.
233 ie = self._ies_instances.get(ie_key)
235 ie = get_info_extractor(ie_key)()
236 self.add_info_extractor(ie)
239 def add_default_info_extractors(self):
241 Add the InfoExtractors returned by gen_extractors to the end of the list
243 for ie in gen_extractors():
244 self.add_info_extractor(ie)
246 def add_post_processor(self, pp):
247 """Add a PostProcessor object to the end of the chain."""
249 pp.set_downloader(self)
251 def add_progress_hook(self, ph):
252 """Add the progress hook (currently only for the file downloader)"""
253 self._progress_hooks.append(ph)
255 def _bidi_workaround(self, message):
256 if not hasattr(self, '_output_channel'):
259 assert hasattr(self, '_output_process')
260 assert type(message) == type(u'')
261 line_count = message.count(u'\n') + 1
262 self._output_process.stdin.write((message + u'\n').encode('utf-8'))
263 self._output_process.stdin.flush()
264 res = u''.join(self._output_channel.readline().decode('utf-8')
265 for _ in range(line_count))
266 return res[:-len(u'\n')]
268 def to_screen(self, message, skip_eol=False):
269 """Print message to stdout if not in quiet mode."""
270 return self.to_stdout(message, skip_eol, check_quiet=True)
272 def to_stdout(self, message, skip_eol=False, check_quiet=False):
273 """Print message to stdout if not in quiet mode."""
274 if self.params.get('logger'):
275 self.params['logger'].debug(message)
276 elif not check_quiet or not self.params.get('quiet', False):
277 message = self._bidi_workaround(message)
278 terminator = [u'\n', u''][skip_eol]
279 output = message + terminator
281 write_string(output, self._screen_file)
283 def to_stderr(self, message):
284 """Print message to stderr."""
285 assert type(message) == type(u'')
286 if self.params.get('logger'):
287 self.params['logger'].error(message)
289 message = self._bidi_workaround(message)
290 output = message + u'\n'
291 write_string(output, self._err_file)
293 def to_console_title(self, message):
294 if not self.params.get('consoletitle', False):
296 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
297 # c_wchar_p() might not be necessary if `message` is
298 # already of type unicode()
299 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
300 elif 'TERM' in os.environ:
301 write_string(u'\033]0;%s\007' % message, self._screen_file)
303 def save_console_title(self):
304 if not self.params.get('consoletitle', False):
306 if 'TERM' in os.environ:
307 # Save the title on stack
308 write_string(u'\033[22;0t', self._screen_file)
310 def restore_console_title(self):
311 if not self.params.get('consoletitle', False):
313 if 'TERM' in os.environ:
314 # Restore the title from stack
315 write_string(u'\033[23;0t', self._screen_file)
318 self.save_console_title()
321 def __exit__(self, *args):
322 self.restore_console_title()
324 if self.params.get('cookiefile') is not None:
325 self.cookiejar.save()
327 def trouble(self, message=None, tb=None):
328 """Determine action to take when a download problem appears.
330 Depending on if the downloader has been configured to ignore
331 download errors or not, this method may throw an exception or
332 not when errors are found, after printing the message.
334 tb, if given, is additional traceback information.
336 if message is not None:
337 self.to_stderr(message)
338 if self.params.get('verbose'):
340 if sys.exc_info()[0]: # if .trouble has been called from an except block
342 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
343 tb += u''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
344 tb += compat_str(traceback.format_exc())
346 tb_data = traceback.format_list(traceback.extract_stack())
347 tb = u''.join(tb_data)
349 if not self.params.get('ignoreerrors', False):
350 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
351 exc_info = sys.exc_info()[1].exc_info
353 exc_info = sys.exc_info()
354 raise DownloadError(message, exc_info)
355 self._download_retcode = 1
357 def report_warning(self, message):
359 Print the message to stderr, it will be prefixed with 'WARNING:'
360 If stderr is a tty file the 'WARNING:' will be colored
362 if self._err_file.isatty() and os.name != 'nt':
363 _msg_header = u'\033[0;33mWARNING:\033[0m'
365 _msg_header = u'WARNING:'
366 warning_message = u'%s %s' % (_msg_header, message)
367 self.to_stderr(warning_message)
369 def report_error(self, message, tb=None):
371 Do the same as trouble, but prefixes the message with 'ERROR:', colored
372 in red if stderr is a tty file.
374 if self._err_file.isatty() and os.name != 'nt':
375 _msg_header = u'\033[0;31mERROR:\033[0m'
377 _msg_header = u'ERROR:'
378 error_message = u'%s %s' % (_msg_header, message)
379 self.trouble(error_message, tb)
381 def report_file_already_downloaded(self, file_name):
382 """Report file has already been fully downloaded."""
384 self.to_screen(u'[download] %s has already been downloaded' % file_name)
385 except UnicodeEncodeError:
386 self.to_screen(u'[download] The file has already been downloaded')
388 def increment_downloads(self):
389 """Increment the ordinal that assigns a number to each file."""
390 self._num_downloads += 1
392 def prepare_filename(self, info_dict):
393 """Generate the output filename."""
395 template_dict = dict(info_dict)
397 template_dict['epoch'] = int(time.time())
398 autonumber_size = self.params.get('autonumber_size')
399 if autonumber_size is None:
401 autonumber_templ = u'%0' + str(autonumber_size) + u'd'
402 template_dict['autonumber'] = autonumber_templ % self._num_downloads
403 if template_dict.get('playlist_index') is not None:
404 template_dict['playlist_index'] = u'%05d' % template_dict['playlist_index']
406 sanitize = lambda k, v: sanitize_filename(
408 restricted=self.params.get('restrictfilenames'),
410 template_dict = dict((k, sanitize(k, v))
411 for k, v in template_dict.items()
413 template_dict = collections.defaultdict(lambda: u'NA', template_dict)
415 tmpl = os.path.expanduser(self.params['outtmpl'])
416 filename = tmpl % template_dict
418 except ValueError as err:
419 self.report_error(u'Error in output template: ' + str(err) + u' (encoding: ' + repr(preferredencoding()) + ')')
422 def _match_entry(self, info_dict):
423 """ Returns None iff the file should be downloaded """
425 video_title = info_dict.get('title', info_dict.get('id', u'video'))
426 if 'title' in info_dict:
427 # This can happen when we're just evaluating the playlist
428 title = info_dict['title']
429 matchtitle = self.params.get('matchtitle', False)
431 if not re.search(matchtitle, title, re.IGNORECASE):
432 return u'"' + title + '" title did not match pattern "' + matchtitle + '"'
433 rejecttitle = self.params.get('rejecttitle', False)
435 if re.search(rejecttitle, title, re.IGNORECASE):
436 return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
437 date = info_dict.get('upload_date', None)
439 dateRange = self.params.get('daterange', DateRange())
440 if date not in dateRange:
441 return u'%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
442 view_count = info_dict.get('view_count', None)
443 if view_count is not None:
444 min_views = self.params.get('min_views')
445 if min_views is not None and view_count < min_views:
446 return u'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
447 max_views = self.params.get('max_views')
448 if max_views is not None and view_count > max_views:
449 return u'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
450 age_limit = self.params.get('age_limit')
451 if age_limit is not None:
452 if age_limit < info_dict.get('age_limit', 0):
453 return u'Skipping "' + title + '" because it is age restricted'
454 if self.in_download_archive(info_dict):
455 return u'%s has already been recorded in archive' % video_title
459 def add_extra_info(info_dict, extra_info):
460 '''Set the keys from extra_info in info dict if they are missing'''
461 for key, value in extra_info.items():
462 info_dict.setdefault(key, value)
464 def extract_info(self, url, download=True, ie_key=None, extra_info={},
467 Returns a list with a dictionary for each video we find.
468 If 'download', also downloads the videos.
469 extra_info is a dict containing the extra values to add to each result
473 ies = [self.get_info_extractor(ie_key)]
478 if not ie.suitable(url):
482 self.report_warning(u'The program functionality for this site has been marked as broken, '
483 u'and will probably not work.')
486 ie_result = ie.extract(url)
487 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
489 if isinstance(ie_result, list):
490 # Backwards compatibility: old IE result format
492 '_type': 'compat_list',
493 'entries': ie_result,
495 self.add_extra_info(ie_result,
497 'extractor': ie.IE_NAME,
499 'webpage_url_basename': url_basename(url),
500 'extractor_key': ie.ie_key(),
503 return self.process_ie_result(ie_result, download, extra_info)
506 except ExtractorError as de: # An error we somewhat expected
507 self.report_error(compat_str(de), de.format_traceback())
509 except Exception as e:
510 if self.params.get('ignoreerrors', False):
511 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
516 self.report_error(u'no suitable InfoExtractor: %s' % url)
518 def process_ie_result(self, ie_result, download=True, extra_info={}):
520 Take the result of the ie(may be modified) and resolve all unresolved
521 references (URLs, playlist items).
523 It will also download the videos if 'download'.
524 Returns the resolved ie_result.
527 result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
528 if result_type == 'video':
529 self.add_extra_info(ie_result, extra_info)
530 return self.process_video_result(ie_result, download=download)
531 elif result_type == 'url':
532 # We have to add extra_info to the results because it may be
533 # contained in a playlist
534 return self.extract_info(ie_result['url'],
536 ie_key=ie_result.get('ie_key'),
537 extra_info=extra_info)
538 elif result_type == 'url_transparent':
539 # Use the information from the embedding page
540 info = self.extract_info(
541 ie_result['url'], ie_key=ie_result.get('ie_key'),
542 extra_info=extra_info, download=False, process=False)
544 def make_result(embedded_info):
545 new_result = ie_result.copy()
546 for f in ('_type', 'url', 'ext', 'player_url', 'formats',
547 'entries', 'ie_key', 'duration',
548 'subtitles', 'annotations', 'format',
549 'thumbnail', 'thumbnails'):
552 if f in embedded_info:
553 new_result[f] = embedded_info[f]
555 new_result = make_result(info)
557 assert new_result.get('_type') != 'url_transparent'
558 if new_result.get('_type') == 'compat_list':
559 new_result['entries'] = [
560 make_result(e) for e in new_result['entries']]
562 return self.process_ie_result(
563 new_result, download=download, extra_info=extra_info)
564 elif result_type == 'playlist':
565 # We process each entry in the playlist
566 playlist = ie_result.get('title', None) or ie_result.get('id', None)
567 self.to_screen(u'[download] Downloading playlist: %s' % playlist)
569 playlist_results = []
571 n_all_entries = len(ie_result['entries'])
572 playliststart = self.params.get('playliststart', 1) - 1
573 playlistend = self.params.get('playlistend', None)
574 # For backwards compatibility, interpret -1 as whole list
575 if playlistend == -1:
578 entries = ie_result['entries'][playliststart:playlistend]
579 n_entries = len(entries)
582 u"[%s] playlist '%s': Collected %d video ids (downloading %d of them)" %
583 (ie_result['extractor'], playlist, n_all_entries, n_entries))
585 for i, entry in enumerate(entries, 1):
586 self.to_screen(u'[download] Downloading video #%s of %s' % (i, n_entries))
588 'playlist': playlist,
589 'playlist_index': i + playliststart,
590 'extractor': ie_result['extractor'],
591 'webpage_url': ie_result['webpage_url'],
592 'webpage_url_basename': url_basename(ie_result['webpage_url']),
593 'extractor_key': ie_result['extractor_key'],
596 reason = self._match_entry(entry)
597 if reason is not None:
598 self.to_screen(u'[download] ' + reason)
601 entry_result = self.process_ie_result(entry,
604 playlist_results.append(entry_result)
605 ie_result['entries'] = playlist_results
607 elif result_type == 'compat_list':
609 self.add_extra_info(r,
611 'extractor': ie_result['extractor'],
612 'webpage_url': ie_result['webpage_url'],
613 'webpage_url_basename': url_basename(ie_result['webpage_url']),
614 'extractor_key': ie_result['extractor_key'],
617 ie_result['entries'] = [
618 self.process_ie_result(_fixup(r), download, extra_info)
619 for r in ie_result['entries']
623 raise Exception('Invalid result type: %s' % result_type)
625 def select_format(self, format_spec, available_formats):
626 if format_spec == 'best' or format_spec is None:
627 return available_formats[-1]
628 elif format_spec == 'worst':
629 return available_formats[0]
631 extensions = [u'mp4', u'flv', u'webm', u'3gp']
632 if format_spec in extensions:
633 filter_f = lambda f: f['ext'] == format_spec
635 filter_f = lambda f: f['format_id'] == format_spec
636 matches = list(filter(filter_f, available_formats))
641 def process_video_result(self, info_dict, download=True):
642 assert info_dict.get('_type', 'video') == 'video'
644 if 'playlist' not in info_dict:
645 # It isn't part of a playlist
646 info_dict['playlist'] = None
647 info_dict['playlist_index'] = None
649 # This extractors handle format selection themselves
650 if info_dict['extractor'] in [u'Youku']:
652 self.process_info(info_dict)
655 # We now pick which formats have to be downloaded
656 if info_dict.get('formats') is None:
657 # There's only one format available
658 formats = [info_dict]
660 formats = info_dict['formats']
662 # We check that all the formats have the format and format_id fields
663 for (i, format) in enumerate(formats):
664 if format.get('format_id') is None:
665 format['format_id'] = compat_str(i)
666 if format.get('format') is None:
667 format['format'] = u'{id} - {res}{note}'.format(
668 id=format['format_id'],
669 res=self.format_resolution(format),
670 note=u' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
672 # Automatically determine file extension if missing
673 if 'ext' not in format:
674 format['ext'] = determine_ext(format['url'])
676 format_limit = self.params.get('format_limit', None)
678 formats = list(takewhile_inclusive(
679 lambda f: f['format_id'] != format_limit, formats
682 # TODO Central sorting goes here
684 if formats[0] is not info_dict:
685 # only set the 'formats' fields if the original info_dict list them
686 # otherwise we end up with a circular reference, the first (and unique)
687 # element in the 'formats' field in info_dict is info_dict itself,
688 # wich can't be exported to json
689 info_dict['formats'] = formats
690 if self.params.get('listformats', None):
691 self.list_formats(info_dict)
694 req_format = self.params.get('format', 'best')
695 if req_format is None:
697 formats_to_download = []
698 # The -1 is for supporting YoutubeIE
699 if req_format in ('-1', 'all'):
700 formats_to_download = formats
702 # We can accept formats requestd in the format: 34/5/best, we pick
703 # the first that is available, starting from left
704 req_formats = req_format.split('/')
705 for rf in req_formats:
706 selected_format = self.select_format(rf, formats)
707 if selected_format is not None:
708 formats_to_download = [selected_format]
710 if not formats_to_download:
711 raise ExtractorError(u'requested format not available',
715 if len(formats_to_download) > 1:
716 self.to_screen(u'[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
717 for format in formats_to_download:
718 new_info = dict(info_dict)
719 new_info.update(format)
720 self.process_info(new_info)
721 # We update the info dict with the best quality format (backwards compatibility)
722 info_dict.update(formats_to_download[-1])
725 def process_info(self, info_dict):
726 """Process a single resolved IE result."""
728 assert info_dict.get('_type', 'video') == 'video'
729 #We increment the download the download count here to match the previous behaviour.
730 self.increment_downloads()
732 info_dict['fulltitle'] = info_dict['title']
733 if len(info_dict['title']) > 200:
734 info_dict['title'] = info_dict['title'][:197] + u'...'
736 # Keep for backwards compatibility
737 info_dict['stitle'] = info_dict['title']
739 if not 'format' in info_dict:
740 info_dict['format'] = info_dict['ext']
742 reason = self._match_entry(info_dict)
743 if reason is not None:
744 self.to_screen(u'[download] ' + reason)
747 max_downloads = self.params.get('max_downloads')
748 if max_downloads is not None:
749 if self._num_downloads > int(max_downloads):
750 raise MaxDownloadsReached()
752 filename = self.prepare_filename(info_dict)
755 if self.params.get('forcetitle', False):
756 self.to_stdout(info_dict['fulltitle'])
757 if self.params.get('forceid', False):
758 self.to_stdout(info_dict['id'])
759 if self.params.get('forceurl', False):
760 # For RTMP URLs, also include the playpath
761 self.to_stdout(info_dict['url'] + info_dict.get('play_path', u''))
762 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
763 self.to_stdout(info_dict['thumbnail'])
764 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
765 self.to_stdout(info_dict['description'])
766 if self.params.get('forcefilename', False) and filename is not None:
767 self.to_stdout(filename)
768 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
769 self.to_stdout(formatSeconds(info_dict['duration']))
770 if self.params.get('forceformat', False):
771 self.to_stdout(info_dict['format'])
772 if self.params.get('forcejson', False):
773 info_dict['_filename'] = filename
774 self.to_stdout(json.dumps(info_dict))
776 # Do nothing else if in simulate mode
777 if self.params.get('simulate', False):
784 dn = os.path.dirname(encodeFilename(filename))
785 if dn != '' and not os.path.exists(dn):
787 except (OSError, IOError) as err:
788 self.report_error(u'unable to create directory ' + compat_str(err))
791 if self.params.get('writedescription', False):
792 descfn = filename + u'.description'
793 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
794 self.to_screen(u'[info] Video description is already present')
797 self.to_screen(u'[info] Writing video description to: ' + descfn)
798 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
799 descfile.write(info_dict['description'])
800 except (KeyError, TypeError):
801 self.report_warning(u'There\'s no description to write.')
802 except (OSError, IOError):
803 self.report_error(u'Cannot write description file ' + descfn)
806 if self.params.get('writeannotations', False):
807 annofn = filename + u'.annotations.xml'
808 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
809 self.to_screen(u'[info] Video annotations are already present')
812 self.to_screen(u'[info] Writing video annotations to: ' + annofn)
813 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
814 annofile.write(info_dict['annotations'])
815 except (KeyError, TypeError):
816 self.report_warning(u'There are no annotations to write.')
817 except (OSError, IOError):
818 self.report_error(u'Cannot write annotations file: ' + annofn)
821 subtitles_are_requested = any([self.params.get('writesubtitles', False),
822 self.params.get('writeautomaticsub')])
824 if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
825 # subtitles download errors are already managed as troubles in relevant IE
826 # that way it will silently go on when used with unsupporting IE
827 subtitles = info_dict['subtitles']
828 sub_format = self.params.get('subtitlesformat', 'srt')
829 for sub_lang in subtitles.keys():
830 sub = subtitles[sub_lang]
834 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
835 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
836 self.to_screen(u'[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
838 self.to_screen(u'[info] Writing video subtitles to: ' + sub_filename)
839 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
841 except (OSError, IOError):
842 self.report_error(u'Cannot write subtitles file ' + descfn)
845 if self.params.get('writeinfojson', False):
846 infofn = os.path.splitext(filename)[0] + u'.info.json'
847 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
848 self.to_screen(u'[info] Video description metadata is already present')
850 self.to_screen(u'[info] Writing video description metadata as JSON to: ' + infofn)
852 write_json_file(info_dict, encodeFilename(infofn))
853 except (OSError, IOError):
854 self.report_error(u'Cannot write metadata to JSON file ' + infofn)
857 if self.params.get('writethumbnail', False):
858 if info_dict.get('thumbnail') is not None:
859 thumb_format = determine_ext(info_dict['thumbnail'], u'jpg')
860 thumb_filename = os.path.splitext(filename)[0] + u'.' + thumb_format
861 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
862 self.to_screen(u'[%s] %s: Thumbnail is already present' %
863 (info_dict['extractor'], info_dict['id']))
865 self.to_screen(u'[%s] %s: Downloading thumbnail ...' %
866 (info_dict['extractor'], info_dict['id']))
868 uf = compat_urllib_request.urlopen(info_dict['thumbnail'])
869 with open(thumb_filename, 'wb') as thumbf:
870 shutil.copyfileobj(uf, thumbf)
871 self.to_screen(u'[%s] %s: Writing thumbnail to: %s' %
872 (info_dict['extractor'], info_dict['id'], thumb_filename))
873 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
874 self.report_warning(u'Unable to download thumbnail "%s": %s' %
875 (info_dict['thumbnail'], compat_str(err)))
877 if not self.params.get('skip_download', False):
878 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
882 fd = get_suitable_downloader(info_dict)(self, self.params)
883 for ph in self._progress_hooks:
884 fd.add_progress_hook(ph)
885 success = fd.download(filename, info_dict)
886 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
887 self.report_error(u'unable to download video data: %s' % str(err))
889 except (OSError, IOError) as err:
890 raise UnavailableVideoError(err)
891 except (ContentTooShortError, ) as err:
892 self.report_error(u'content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
897 self.post_process(filename, info_dict)
898 except (PostProcessingError) as err:
899 self.report_error(u'postprocessing: %s' % str(err))
902 self.record_download_archive(info_dict)
904 def download(self, url_list):
905 """Download a given list of URLs."""
906 if (len(url_list) > 1 and
907 '%' not in self.params['outtmpl']
908 and self.params.get('max_downloads') != 1):
909 raise SameFileError(self.params['outtmpl'])
913 #It also downloads the videos
914 self.extract_info(url)
915 except UnavailableVideoError:
916 self.report_error(u'unable to download video')
917 except MaxDownloadsReached:
918 self.to_screen(u'[info] Maximum number of downloaded files reached.')
921 return self._download_retcode
923 def download_with_info_file(self, info_filename):
924 with io.open(info_filename, 'r', encoding='utf-8') as f:
927 self.process_ie_result(info, download=True)
928 except DownloadError:
929 webpage_url = info.get('webpage_url')
930 if webpage_url is not None:
931 self.report_warning(u'The info failed to download, trying with "%s"' % webpage_url)
932 return self.download([webpage_url])
935 return self._download_retcode
937 def post_process(self, filename, ie_info):
938 """Run all the postprocessors on the given file."""
940 info['filepath'] = filename
944 keep_video_wish, new_info = pp.run(info)
945 if keep_video_wish is not None:
947 keep_video = keep_video_wish
948 elif keep_video is None:
949 # No clear decision yet, let IE decide
950 keep_video = keep_video_wish
951 except PostProcessingError as e:
952 self.report_error(e.msg)
953 if keep_video is False and not self.params.get('keepvideo', False):
955 self.to_screen(u'Deleting original file %s (pass -k to keep)' % filename)
956 os.remove(encodeFilename(filename))
957 except (IOError, OSError):
958 self.report_warning(u'Unable to remove downloaded video file')
960 def _make_archive_id(self, info_dict):
961 # Future-proof against any change in case
962 # and backwards compatibility with prior versions
963 extractor = info_dict.get('extractor_key')
964 if extractor is None:
965 if 'id' in info_dict:
966 extractor = info_dict.get('ie_key') # key in a playlist
967 if extractor is None:
968 return None # Incomplete video information
969 return extractor.lower() + u' ' + info_dict['id']
971 def in_download_archive(self, info_dict):
972 fn = self.params.get('download_archive')
976 vid_id = self._make_archive_id(info_dict)
978 return False # Incomplete video information
981 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
982 for line in archive_file:
983 if line.strip() == vid_id:
985 except IOError as ioe:
986 if ioe.errno != errno.ENOENT:
990 def record_download_archive(self, info_dict):
991 fn = self.params.get('download_archive')
994 vid_id = self._make_archive_id(info_dict)
996 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
997 archive_file.write(vid_id + u'\n')
1000 def format_resolution(format, default='unknown'):
1001 if format.get('vcodec') == 'none':
1003 if format.get('resolution') is not None:
1004 return format['resolution']
1005 if format.get('height') is not None:
1006 if format.get('width') is not None:
1007 res = u'%sx%s' % (format['width'], format['height'])
1009 res = u'%sp' % format['height']
1010 elif format.get('width') is not None:
1011 res = u'?x%d' % format['width']
1016 def list_formats(self, info_dict):
1017 def format_note(fdict):
1019 if fdict.get('ext') in ['f4f', 'f4m']:
1020 res += u'(unsupported) '
1021 if fdict.get('format_note') is not None:
1022 res += fdict['format_note'] + u' '
1023 if fdict.get('tbr') is not None:
1024 res += u'%4dk ' % fdict['tbr']
1025 if (fdict.get('vcodec') is not None and
1026 fdict.get('vcodec') != 'none'):
1027 res += u'%-5s@' % fdict['vcodec']
1028 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1030 if fdict.get('vbr') is not None:
1031 res += u'%4dk' % fdict['vbr']
1032 if fdict.get('acodec') is not None:
1035 res += u'%-5s' % fdict['acodec']
1036 elif fdict.get('abr') is not None:
1040 if fdict.get('abr') is not None:
1041 res += u'@%3dk' % fdict['abr']
1042 if fdict.get('filesize') is not None:
1045 res += format_bytes(fdict['filesize'])
1048 def line(format, idlen=20):
1049 return ((u'%-' + compat_str(idlen + 1) + u's%-10s%-12s%s') % (
1050 format['format_id'],
1052 self.format_resolution(format),
1053 format_note(format),
1056 formats = info_dict.get('formats', [info_dict])
1057 idlen = max(len(u'format code'),
1058 max(len(f['format_id']) for f in formats))
1059 formats_s = [line(f, idlen) for f in formats]
1060 if len(formats) > 1:
1061 formats_s[0] += (' ' if format_note(formats[0]) else '') + '(worst)'
1062 formats_s[-1] += (' ' if format_note(formats[-1]) else '') + '(best)'
1064 header_line = line({
1065 'format_id': u'format code', 'ext': u'extension',
1066 'resolution': u'resolution', 'format_note': u'note'}, idlen=idlen)
1067 self.to_screen(u'[info] Available formats for %s:\n%s\n%s' %
1068 (info_dict['id'], header_line, u"\n".join(formats_s)))
1070 def urlopen(self, req):
1071 """ Start an HTTP download """
1072 return self._opener.open(req)
1074 def print_debug_header(self):
1075 if not self.params.get('verbose'):
1077 write_string(u'[debug] youtube-dl version ' + __version__ + u'\n')
1079 sp = subprocess.Popen(
1080 ['git', 'rev-parse', '--short', 'HEAD'],
1081 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1082 cwd=os.path.dirname(os.path.abspath(__file__)))
1083 out, err = sp.communicate()
1084 out = out.decode().strip()
1085 if re.match('[0-9a-f]+', out):
1086 write_string(u'[debug] Git HEAD: ' + out + u'\n')
1092 write_string(u'[debug] Python version %s - %s' %
1093 (platform.python_version(), platform_name()) + u'\n')
1096 for handler in self._opener.handlers:
1097 if hasattr(handler, 'proxies'):
1098 proxy_map.update(handler.proxies)
1099 write_string(u'[debug] Proxy map: ' + compat_str(proxy_map) + u'\n')
1101 def _setup_opener(self):
1102 timeout_val = self.params.get('socket_timeout')
1103 timeout = 600 if timeout_val is None else float(timeout_val)
1105 opts_cookiefile = self.params.get('cookiefile')
1106 opts_proxy = self.params.get('proxy')
1108 if opts_cookiefile is None:
1109 self.cookiejar = compat_cookiejar.CookieJar()
1111 self.cookiejar = compat_cookiejar.MozillaCookieJar(
1113 if os.access(opts_cookiefile, os.R_OK):
1114 self.cookiejar.load()
1116 cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1118 if opts_proxy is not None:
1119 if opts_proxy == '':
1122 proxies = {'http': opts_proxy, 'https': opts_proxy}
1124 proxies = compat_urllib_request.getproxies()
1125 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1126 if 'http' in proxies and 'https' not in proxies:
1127 proxies['https'] = proxies['http']
1128 proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1129 https_handler = make_HTTPS_handler(
1130 self.params.get('nocheckcertificate', False))
1131 opener = compat_urllib_request.build_opener(
1132 https_handler, proxy_handler, cookie_processor, YoutubeDLHandler())
1133 # Delete the default user-agent header, which would otherwise apply in
1134 # cases where our custom HTTP handler doesn't come into play
1135 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1136 opener.addheaders = []
1137 self._opener = opener
1139 # TODO remove this global modification
1140 compat_urllib_request.install_opener(opener)
1141 socket.setdefaulttimeout(timeout)