2 # -*- coding: utf-8 -*-
4 from __future__ import absolute_import
28 compat_urllib_request,
49 UnavailableVideoError,
55 from .extractor import get_info_extractor, gen_extractors
56 from .FileDownloader import FileDownloader
57 from .version import __version__
60 class YoutubeDL(object):
63 YoutubeDL objects are the ones responsible of downloading the
64 actual video file and writing it to disk if the user has requested
65 it, among some other tasks. In most cases there should be one per
66 program. As, given a video URL, the downloader doesn't know how to
67 extract all the needed information, task that InfoExtractors do, it
68 has to pass the URL to one of them.
70 For this, YoutubeDL objects have a method that allows
71 InfoExtractors to be registered in a given order. When it is passed
72 a URL, the YoutubeDL object handles it to the first InfoExtractor it
73 finds that reports being able to handle it. The InfoExtractor extracts
74 all the information about the video or videos the URL refers to, and
75 YoutubeDL process the extracted information, possibly using a File
76 Downloader to download the video.
78 YoutubeDL objects accept a lot of parameters. In order not to saturate
79 the object constructor with arguments, it receives a dictionary of
80 options instead. These options are available through the params
81 attribute for the InfoExtractors to use. The YoutubeDL also
82 registers itself as the downloader in charge for the InfoExtractors
83 that are added to it, so this is a "mutual registration".
87 username: Username for authentication purposes.
88 password: Password for authentication purposes.
89 videopassword: Password for acces a video.
90 usenetrc: Use netrc for authentication instead.
91 verbose: Print additional info to stdout.
92 quiet: Do not print messages to stdout.
93 forceurl: Force printing final URL.
94 forcetitle: Force printing title.
95 forceid: Force printing ID.
96 forcethumbnail: Force printing thumbnail URL.
97 forcedescription: Force printing description.
98 forcefilename: Force printing final filename.
99 forceduration: Force printing duration.
100 forcejson: Force printing info_dict as JSON.
101 simulate: Do not download the video files.
102 format: Video format code.
103 format_limit: Highest quality format to try.
104 outtmpl: Template for output names.
105 restrictfilenames: Do not allow "&" and spaces in file names
106 ignoreerrors: Do not stop on download errors.
107 nooverwrites: Prevent overwriting files.
108 playliststart: Playlist item to start at.
109 playlistend: Playlist item to end at.
110 matchtitle: Download only matching titles.
111 rejecttitle: Reject downloads for matching titles.
112 logger: Log messages to a logging.Logger instance.
113 logtostderr: Log messages to stderr instead of stdout.
114 writedescription: Write the video description to a .description file
115 writeinfojson: Write the video description to a .info.json file
116 writeannotations: Write the video annotations to a .annotations.xml file
117 writethumbnail: Write the thumbnail image to a file
118 writesubtitles: Write the video subtitles to a file
119 writeautomaticsub: Write the automatic subtitles to a file
120 allsubtitles: Downloads all the subtitles of the video
121 (requires writesubtitles or writeautomaticsub)
122 listsubtitles: Lists all available subtitles for the video
123 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
124 subtitleslangs: List of languages of the subtitles to download
125 keepvideo: Keep the video file after post-processing
126 daterange: A DateRange object, download only if the upload_date is in the range.
127 skip_download: Skip the actual download of the video file
128 cachedir: Location of the cache files in the filesystem.
129 None to disable filesystem cache.
130 noplaylist: Download single video instead of a playlist if in doubt.
131 age_limit: An integer representing the user's age in years.
132 Unsuitable videos for the given age are skipped.
133 min_views: An integer representing the minimum view count the video
134 must have in order to not be skipped.
135 Videos without view count information are always
136 downloaded. None for no limit.
137 max_views: An integer representing the maximum view count.
138 Videos that are more popular than that are not
140 Videos without view count information are always
141 downloaded. None for no limit.
142 download_archive: File name of a file where all downloads are recorded.
143 Videos already present in the file are not downloaded
145 cookiefile: File name where cookies should be read from and dumped to.
146 nocheckcertificate:Do not verify SSL certificates
147 proxy: URL of the proxy server to use
148 socket_timeout: Time to wait for unresponsive hosts, in seconds
149 bidi_workaround: Work around buggy terminals without bidirectional text
150 support, using fridibi
152 The following parameters are not used by YoutubeDL itself, they are used by
154 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
155 noresizebuffer, retries, continuedl, noprogress, consoletitle
161 _download_retcode = None
162 _num_downloads = None
165 def __init__(self, params=None):
166 """Create a FileDownloader object with the given options."""
168 self._ies_instances = {}
170 self._progress_hooks = []
171 self._download_retcode = 0
172 self._num_downloads = 0
173 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
174 self._err_file = sys.stderr
175 self.params = {} if params is None else params
177 if params.get('bidi_workaround', False):
180 master, slave = pty.openpty()
181 width = get_term_width()
185 width_args = ['-w', str(width)]
187 stdin=subprocess.PIPE,
189 stderr=self._err_file)
191 self._output_process = subprocess.Popen(
192 ['bidiv'] + width_args, **sp_kwargs
195 self._output_process = subprocess.Popen(
196 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
197 self._output_channel = os.fdopen(master, 'rb')
198 except OSError as ose:
200 self.report_warning(u'Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
204 if (sys.version_info >= (3,) and sys.platform != 'win32' and
205 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
206 and not params['restrictfilenames']):
207 # On Python 3, the Unicode filesystem API will throw errors (#1474)
209 u'Assuming --restrict-filenames since file system encoding '
210 u'cannot encode all charactes. '
211 u'Set the LC_ALL environment variable to fix this.')
212 self.params['restrictfilenames'] = True
214 self.fd = FileDownloader(self, self.params)
216 if '%(stitle)s' in self.params.get('outtmpl', ''):
217 self.report_warning(u'%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
221 def add_info_extractor(self, ie):
222 """Add an InfoExtractor object to the end of the list."""
224 self._ies_instances[ie.ie_key()] = ie
225 ie.set_downloader(self)
227 def get_info_extractor(self, ie_key):
229 Get an instance of an IE with name ie_key, it will try to get one from
230 the _ies list, if there's no instance it will create a new one and add
231 it to the extractor list.
233 ie = self._ies_instances.get(ie_key)
235 ie = get_info_extractor(ie_key)()
236 self.add_info_extractor(ie)
239 def add_default_info_extractors(self):
241 Add the InfoExtractors returned by gen_extractors to the end of the list
243 for ie in gen_extractors():
244 self.add_info_extractor(ie)
246 def add_post_processor(self, pp):
247 """Add a PostProcessor object to the end of the chain."""
249 pp.set_downloader(self)
251 def _bidi_workaround(self, message):
252 if not hasattr(self, '_output_channel'):
255 assert hasattr(self, '_output_process')
256 assert type(message) == type(u'')
257 line_count = message.count(u'\n') + 1
258 self._output_process.stdin.write((message + u'\n').encode('utf-8'))
259 self._output_process.stdin.flush()
260 res = u''.join(self._output_channel.readline().decode('utf-8')
261 for _ in range(line_count))
262 return res[:-len(u'\n')]
264 def to_screen(self, message, skip_eol=False):
265 """Print message to stdout if not in quiet mode."""
266 return self.to_stdout(message, skip_eol, check_quiet=True)
268 def to_stdout(self, message, skip_eol=False, check_quiet=False):
269 """Print message to stdout if not in quiet mode."""
270 if self.params.get('logger'):
271 self.params['logger'].debug(message)
272 elif not check_quiet or not self.params.get('quiet', False):
273 message = self._bidi_workaround(message)
274 terminator = [u'\n', u''][skip_eol]
275 output = message + terminator
277 write_string(output, self._screen_file)
279 def to_stderr(self, message):
280 """Print message to stderr."""
281 assert type(message) == type(u'')
282 if self.params.get('logger'):
283 self.params['logger'].error(message)
285 message = self._bidi_workaround(message)
286 output = message + u'\n'
287 write_string(output, self._err_file)
289 def to_console_title(self, message):
290 if not self.params.get('consoletitle', False):
292 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
293 # c_wchar_p() might not be necessary if `message` is
294 # already of type unicode()
295 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
296 elif 'TERM' in os.environ:
297 write_string(u'\033]0;%s\007' % message, self._screen_file)
299 def save_console_title(self):
300 if not self.params.get('consoletitle', False):
302 if 'TERM' in os.environ:
303 # Save the title on stack
304 write_string(u'\033[22;0t', self._screen_file)
306 def restore_console_title(self):
307 if not self.params.get('consoletitle', False):
309 if 'TERM' in os.environ:
310 # Restore the title from stack
311 write_string(u'\033[23;0t', self._screen_file)
314 self.save_console_title()
317 def __exit__(self, *args):
318 self.restore_console_title()
320 if self.params.get('cookiefile') is not None:
321 self.cookiejar.save()
323 def trouble(self, message=None, tb=None):
324 """Determine action to take when a download problem appears.
326 Depending on if the downloader has been configured to ignore
327 download errors or not, this method may throw an exception or
328 not when errors are found, after printing the message.
330 tb, if given, is additional traceback information.
332 if message is not None:
333 self.to_stderr(message)
334 if self.params.get('verbose'):
336 if sys.exc_info()[0]: # if .trouble has been called from an except block
338 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
339 tb += u''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
340 tb += compat_str(traceback.format_exc())
342 tb_data = traceback.format_list(traceback.extract_stack())
343 tb = u''.join(tb_data)
345 if not self.params.get('ignoreerrors', False):
346 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
347 exc_info = sys.exc_info()[1].exc_info
349 exc_info = sys.exc_info()
350 raise DownloadError(message, exc_info)
351 self._download_retcode = 1
353 def report_warning(self, message):
355 Print the message to stderr, it will be prefixed with 'WARNING:'
356 If stderr is a tty file the 'WARNING:' will be colored
358 if self._err_file.isatty() and os.name != 'nt':
359 _msg_header = u'\033[0;33mWARNING:\033[0m'
361 _msg_header = u'WARNING:'
362 warning_message = u'%s %s' % (_msg_header, message)
363 self.to_stderr(warning_message)
365 def report_error(self, message, tb=None):
367 Do the same as trouble, but prefixes the message with 'ERROR:', colored
368 in red if stderr is a tty file.
370 if self._err_file.isatty() and os.name != 'nt':
371 _msg_header = u'\033[0;31mERROR:\033[0m'
373 _msg_header = u'ERROR:'
374 error_message = u'%s %s' % (_msg_header, message)
375 self.trouble(error_message, tb)
377 def report_file_already_downloaded(self, file_name):
378 """Report file has already been fully downloaded."""
380 self.to_screen(u'[download] %s has already been downloaded' % file_name)
381 except UnicodeEncodeError:
382 self.to_screen(u'[download] The file has already been downloaded')
384 def increment_downloads(self):
385 """Increment the ordinal that assigns a number to each file."""
386 self._num_downloads += 1
388 def prepare_filename(self, info_dict):
389 """Generate the output filename."""
391 template_dict = dict(info_dict)
393 template_dict['epoch'] = int(time.time())
394 autonumber_size = self.params.get('autonumber_size')
395 if autonumber_size is None:
397 autonumber_templ = u'%0' + str(autonumber_size) + u'd'
398 template_dict['autonumber'] = autonumber_templ % self._num_downloads
399 if template_dict.get('playlist_index') is not None:
400 template_dict['playlist_index'] = u'%05d' % template_dict['playlist_index']
402 sanitize = lambda k, v: sanitize_filename(
404 restricted=self.params.get('restrictfilenames'),
406 template_dict = dict((k, sanitize(k, v))
407 for k, v in template_dict.items()
409 template_dict = collections.defaultdict(lambda: u'NA', template_dict)
411 tmpl = os.path.expanduser(self.params['outtmpl'])
412 filename = tmpl % template_dict
414 except ValueError as err:
415 self.report_error(u'Error in output template: ' + str(err) + u' (encoding: ' + repr(preferredencoding()) + ')')
418 def _match_entry(self, info_dict):
419 """ Returns None iff the file should be downloaded """
421 video_title = info_dict.get('title', info_dict.get('id', u'video'))
422 if 'title' in info_dict:
423 # This can happen when we're just evaluating the playlist
424 title = info_dict['title']
425 matchtitle = self.params.get('matchtitle', False)
427 if not re.search(matchtitle, title, re.IGNORECASE):
428 return u'"' + title + '" title did not match pattern "' + matchtitle + '"'
429 rejecttitle = self.params.get('rejecttitle', False)
431 if re.search(rejecttitle, title, re.IGNORECASE):
432 return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
433 date = info_dict.get('upload_date', None)
435 dateRange = self.params.get('daterange', DateRange())
436 if date not in dateRange:
437 return u'%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
438 view_count = info_dict.get('view_count', None)
439 if view_count is not None:
440 min_views = self.params.get('min_views')
441 if min_views is not None and view_count < min_views:
442 return u'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
443 max_views = self.params.get('max_views')
444 if max_views is not None and view_count > max_views:
445 return u'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
446 age_limit = self.params.get('age_limit')
447 if age_limit is not None:
448 if age_limit < info_dict.get('age_limit', 0):
449 return u'Skipping "' + title + '" because it is age restricted'
450 if self.in_download_archive(info_dict):
451 return u'%s has already been recorded in archive' % video_title
455 def add_extra_info(info_dict, extra_info):
456 '''Set the keys from extra_info in info dict if they are missing'''
457 for key, value in extra_info.items():
458 info_dict.setdefault(key, value)
460 def extract_info(self, url, download=True, ie_key=None, extra_info={},
463 Returns a list with a dictionary for each video we find.
464 If 'download', also downloads the videos.
465 extra_info is a dict containing the extra values to add to each result
469 ies = [self.get_info_extractor(ie_key)]
474 if not ie.suitable(url):
478 self.report_warning(u'The program functionality for this site has been marked as broken, '
479 u'and will probably not work.')
482 ie_result = ie.extract(url)
483 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
485 if isinstance(ie_result, list):
486 # Backwards compatibility: old IE result format
488 '_type': 'compat_list',
489 'entries': ie_result,
491 self.add_extra_info(ie_result,
493 'extractor': ie.IE_NAME,
495 'webpage_url_basename': url_basename(url),
496 'extractor_key': ie.ie_key(),
499 return self.process_ie_result(ie_result, download, extra_info)
502 except ExtractorError as de: # An error we somewhat expected
503 self.report_error(compat_str(de), de.format_traceback())
505 except Exception as e:
506 if self.params.get('ignoreerrors', False):
507 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
512 self.report_error(u'no suitable InfoExtractor: %s' % url)
514 def process_ie_result(self, ie_result, download=True, extra_info={}):
516 Take the result of the ie(may be modified) and resolve all unresolved
517 references (URLs, playlist items).
519 It will also download the videos if 'download'.
520 Returns the resolved ie_result.
523 result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
524 if result_type == 'video':
525 self.add_extra_info(ie_result, extra_info)
526 return self.process_video_result(ie_result, download=download)
527 elif result_type == 'url':
528 # We have to add extra_info to the results because it may be
529 # contained in a playlist
530 return self.extract_info(ie_result['url'],
532 ie_key=ie_result.get('ie_key'),
533 extra_info=extra_info)
534 elif result_type == 'url_transparent':
535 # Use the information from the embedding page
536 info = self.extract_info(
537 ie_result['url'], ie_key=ie_result.get('ie_key'),
538 extra_info=extra_info, download=False, process=False)
540 def make_result(embedded_info):
541 new_result = ie_result.copy()
542 for f in ('_type', 'url', 'ext', 'player_url', 'formats',
543 'entries', 'urlhandle', 'ie_key', 'duration',
544 'subtitles', 'annotations', 'format',
545 'thumbnail', 'thumbnails'):
548 if f in embedded_info:
549 new_result[f] = embedded_info[f]
551 new_result = make_result(info)
553 assert new_result.get('_type') != 'url_transparent'
554 if new_result.get('_type') == 'compat_list':
555 new_result['entries'] = [
556 make_result(e) for e in new_result['entries']]
558 return self.process_ie_result(
559 new_result, download=download, extra_info=extra_info)
560 elif result_type == 'playlist':
561 # We process each entry in the playlist
562 playlist = ie_result.get('title', None) or ie_result.get('id', None)
563 self.to_screen(u'[download] Downloading playlist: %s' % playlist)
565 playlist_results = []
567 n_all_entries = len(ie_result['entries'])
568 playliststart = self.params.get('playliststart', 1) - 1
569 playlistend = self.params.get('playlistend', None)
570 # For backwards compatibility, interpret -1 as whole list
571 if playlistend == -1:
574 entries = ie_result['entries'][playliststart:playlistend]
575 n_entries = len(entries)
578 u"[%s] playlist '%s': Collected %d video ids (downloading %d of them)" %
579 (ie_result['extractor'], playlist, n_all_entries, n_entries))
581 for i, entry in enumerate(entries, 1):
582 self.to_screen(u'[download] Downloading video #%s of %s' % (i, n_entries))
584 'playlist': playlist,
585 'playlist_index': i + playliststart,
586 'extractor': ie_result['extractor'],
587 'webpage_url': ie_result['webpage_url'],
588 'webpage_url_basename': url_basename(ie_result['webpage_url']),
589 'extractor_key': ie_result['extractor_key'],
592 reason = self._match_entry(entry)
593 if reason is not None:
594 self.to_screen(u'[download] ' + reason)
597 entry_result = self.process_ie_result(entry,
600 playlist_results.append(entry_result)
601 ie_result['entries'] = playlist_results
603 elif result_type == 'compat_list':
605 self.add_extra_info(r,
607 'extractor': ie_result['extractor'],
608 'webpage_url': ie_result['webpage_url'],
609 'webpage_url_basename': url_basename(ie_result['webpage_url']),
610 'extractor_key': ie_result['extractor_key'],
613 ie_result['entries'] = [
614 self.process_ie_result(_fixup(r), download, extra_info)
615 for r in ie_result['entries']
619 raise Exception('Invalid result type: %s' % result_type)
621 def select_format(self, format_spec, available_formats):
622 if format_spec == 'best' or format_spec is None:
623 return available_formats[-1]
624 elif format_spec == 'worst':
625 return available_formats[0]
627 extensions = [u'mp4', u'flv', u'webm', u'3gp']
628 if format_spec in extensions:
629 filter_f = lambda f: f['ext'] == format_spec
631 filter_f = lambda f: f['format_id'] == format_spec
632 matches = list(filter(filter_f, available_formats))
637 def process_video_result(self, info_dict, download=True):
638 assert info_dict.get('_type', 'video') == 'video'
640 if 'playlist' not in info_dict:
641 # It isn't part of a playlist
642 info_dict['playlist'] = None
643 info_dict['playlist_index'] = None
645 # This extractors handle format selection themselves
646 if info_dict['extractor'] in [u'Youku']:
648 self.process_info(info_dict)
651 # We now pick which formats have to be downloaded
652 if info_dict.get('formats') is None:
653 # There's only one format available
654 formats = [info_dict]
656 formats = info_dict['formats']
658 # We check that all the formats have the format and format_id fields
659 for (i, format) in enumerate(formats):
660 if format.get('format_id') is None:
661 format['format_id'] = compat_str(i)
662 if format.get('format') is None:
663 format['format'] = u'{id} - {res}{note}'.format(
664 id=format['format_id'],
665 res=self.format_resolution(format),
666 note=u' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
668 # Automatically determine file extension if missing
669 if 'ext' not in format:
670 format['ext'] = determine_ext(format['url'])
672 format_limit = self.params.get('format_limit', None)
674 formats = list(takewhile_inclusive(
675 lambda f: f['format_id'] != format_limit, formats
677 if self.params.get('prefer_free_formats'):
678 def _free_formats_key(f):
680 ext_ord = [u'flv', u'mp4', u'webm'].index(f['ext'])
683 # We only compare the extension if they have the same height and width
684 return (f.get('height') if f.get('height') is not None else -1,
685 f.get('width') if f.get('width') is not None else -1,
687 formats = sorted(formats, key=_free_formats_key)
689 info_dict['formats'] = formats
690 if self.params.get('listformats', None):
691 self.list_formats(info_dict)
694 req_format = self.params.get('format', 'best')
695 if req_format is None:
697 formats_to_download = []
698 # The -1 is for supporting YoutubeIE
699 if req_format in ('-1', 'all'):
700 formats_to_download = formats
702 # We can accept formats requestd in the format: 34/5/best, we pick
703 # the first that is available, starting from left
704 req_formats = req_format.split('/')
705 for rf in req_formats:
706 selected_format = self.select_format(rf, formats)
707 if selected_format is not None:
708 formats_to_download = [selected_format]
710 if not formats_to_download:
711 raise ExtractorError(u'requested format not available',
715 if len(formats_to_download) > 1:
716 self.to_screen(u'[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
717 for format in formats_to_download:
718 new_info = dict(info_dict)
719 new_info.update(format)
720 self.process_info(new_info)
721 # We update the info dict with the best quality format (backwards compatibility)
722 info_dict.update(formats_to_download[-1])
725 def process_info(self, info_dict):
726 """Process a single resolved IE result."""
728 assert info_dict.get('_type', 'video') == 'video'
729 #We increment the download the download count here to match the previous behaviour.
730 self.increment_downloads()
732 info_dict['fulltitle'] = info_dict['title']
733 if len(info_dict['title']) > 200:
734 info_dict['title'] = info_dict['title'][:197] + u'...'
736 # Keep for backwards compatibility
737 info_dict['stitle'] = info_dict['title']
739 if not 'format' in info_dict:
740 info_dict['format'] = info_dict['ext']
742 reason = self._match_entry(info_dict)
743 if reason is not None:
744 self.to_screen(u'[download] ' + reason)
747 max_downloads = self.params.get('max_downloads')
748 if max_downloads is not None:
749 if self._num_downloads > int(max_downloads):
750 raise MaxDownloadsReached()
752 filename = self.prepare_filename(info_dict)
755 if self.params.get('forcetitle', False):
756 self.to_stdout(info_dict['fulltitle'])
757 if self.params.get('forceid', False):
758 self.to_stdout(info_dict['id'])
759 if self.params.get('forceurl', False):
760 # For RTMP URLs, also include the playpath
761 self.to_stdout(info_dict['url'] + info_dict.get('play_path', u''))
762 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
763 self.to_stdout(info_dict['thumbnail'])
764 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
765 self.to_stdout(info_dict['description'])
766 if self.params.get('forcefilename', False) and filename is not None:
767 self.to_stdout(filename)
768 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
769 self.to_stdout(formatSeconds(info_dict['duration']))
770 if self.params.get('forceformat', False):
771 self.to_stdout(info_dict['format'])
772 if self.params.get('forcejson', False):
773 info_dict['_filename'] = filename
774 self.to_stdout(json.dumps(info_dict))
776 # Do nothing else if in simulate mode
777 if self.params.get('simulate', False):
784 dn = os.path.dirname(encodeFilename(filename))
785 if dn != '' and not os.path.exists(dn):
787 except (OSError, IOError) as err:
788 self.report_error(u'unable to create directory ' + compat_str(err))
791 if self.params.get('writedescription', False):
792 descfn = filename + u'.description'
793 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
794 self.to_screen(u'[info] Video description is already present')
797 self.to_screen(u'[info] Writing video description to: ' + descfn)
798 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
799 descfile.write(info_dict['description'])
800 except (KeyError, TypeError):
801 self.report_warning(u'There\'s no description to write.')
802 except (OSError, IOError):
803 self.report_error(u'Cannot write description file ' + descfn)
806 if self.params.get('writeannotations', False):
807 annofn = filename + u'.annotations.xml'
808 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
809 self.to_screen(u'[info] Video annotations are already present')
812 self.to_screen(u'[info] Writing video annotations to: ' + annofn)
813 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
814 annofile.write(info_dict['annotations'])
815 except (KeyError, TypeError):
816 self.report_warning(u'There are no annotations to write.')
817 except (OSError, IOError):
818 self.report_error(u'Cannot write annotations file: ' + annofn)
821 subtitles_are_requested = any([self.params.get('writesubtitles', False),
822 self.params.get('writeautomaticsub')])
824 if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
825 # subtitles download errors are already managed as troubles in relevant IE
826 # that way it will silently go on when used with unsupporting IE
827 subtitles = info_dict['subtitles']
828 sub_format = self.params.get('subtitlesformat', 'srt')
829 for sub_lang in subtitles.keys():
830 sub = subtitles[sub_lang]
834 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
835 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
836 self.to_screen(u'[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
838 self.to_screen(u'[info] Writing video subtitles to: ' + sub_filename)
839 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
841 except (OSError, IOError):
842 self.report_error(u'Cannot write subtitles file ' + descfn)
845 if self.params.get('writeinfojson', False):
846 infofn = os.path.splitext(filename)[0] + u'.info.json'
847 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
848 self.to_screen(u'[info] Video description metadata is already present')
850 self.to_screen(u'[info] Writing video description metadata as JSON to: ' + infofn)
852 json_info_dict = dict((k, v) for k, v in info_dict.items() if not k in ['urlhandle'])
853 write_json_file(json_info_dict, encodeFilename(infofn))
854 except (OSError, IOError):
855 self.report_error(u'Cannot write metadata to JSON file ' + infofn)
858 if self.params.get('writethumbnail', False):
859 if info_dict.get('thumbnail') is not None:
860 thumb_format = determine_ext(info_dict['thumbnail'], u'jpg')
861 thumb_filename = os.path.splitext(filename)[0] + u'.' + thumb_format
862 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
863 self.to_screen(u'[%s] %s: Thumbnail is already present' %
864 (info_dict['extractor'], info_dict['id']))
866 self.to_screen(u'[%s] %s: Downloading thumbnail ...' %
867 (info_dict['extractor'], info_dict['id']))
869 uf = compat_urllib_request.urlopen(info_dict['thumbnail'])
870 with open(thumb_filename, 'wb') as thumbf:
871 shutil.copyfileobj(uf, thumbf)
872 self.to_screen(u'[%s] %s: Writing thumbnail to: %s' %
873 (info_dict['extractor'], info_dict['id'], thumb_filename))
874 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
875 self.report_warning(u'Unable to download thumbnail "%s": %s' %
876 (info_dict['thumbnail'], compat_str(err)))
878 if not self.params.get('skip_download', False):
879 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
883 success = self.fd._do_download(filename, info_dict)
884 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
885 self.report_error(u'unable to download video data: %s' % str(err))
887 except (OSError, IOError) as err:
888 raise UnavailableVideoError(err)
889 except (ContentTooShortError, ) as err:
890 self.report_error(u'content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
895 self.post_process(filename, info_dict)
896 except (PostProcessingError) as err:
897 self.report_error(u'postprocessing: %s' % str(err))
900 self.record_download_archive(info_dict)
902 def download(self, url_list):
903 """Download a given list of URLs."""
904 if (len(url_list) > 1 and
905 '%' not in self.params['outtmpl']
906 and self.params.get('max_downloads') != 1):
907 raise SameFileError(self.params['outtmpl'])
911 #It also downloads the videos
912 self.extract_info(url)
913 except UnavailableVideoError:
914 self.report_error(u'unable to download video')
915 except MaxDownloadsReached:
916 self.to_screen(u'[info] Maximum number of downloaded files reached.')
919 return self._download_retcode
921 def download_with_info_file(self, info_filename):
922 with io.open(info_filename, 'r', encoding='utf-8') as f:
925 self.process_ie_result(info, download=True)
926 except DownloadError:
927 webpage_url = info.get('webpage_url')
928 if webpage_url is not None:
929 self.report_warning(u'The info failed to download, trying with "%s"' % webpage_url)
930 return self.download([webpage_url])
933 return self._download_retcode
935 def post_process(self, filename, ie_info):
936 """Run all the postprocessors on the given file."""
938 info['filepath'] = filename
942 keep_video_wish, new_info = pp.run(info)
943 if keep_video_wish is not None:
945 keep_video = keep_video_wish
946 elif keep_video is None:
947 # No clear decision yet, let IE decide
948 keep_video = keep_video_wish
949 except PostProcessingError as e:
950 self.report_error(e.msg)
951 if keep_video is False and not self.params.get('keepvideo', False):
953 self.to_screen(u'Deleting original file %s (pass -k to keep)' % filename)
954 os.remove(encodeFilename(filename))
955 except (IOError, OSError):
956 self.report_warning(u'Unable to remove downloaded video file')
958 def _make_archive_id(self, info_dict):
959 # Future-proof against any change in case
960 # and backwards compatibility with prior versions
961 extractor = info_dict.get('extractor_key')
962 if extractor is None:
963 if 'id' in info_dict:
964 extractor = info_dict.get('ie_key') # key in a playlist
965 if extractor is None:
966 return None # Incomplete video information
967 return extractor.lower() + u' ' + info_dict['id']
969 def in_download_archive(self, info_dict):
970 fn = self.params.get('download_archive')
974 vid_id = self._make_archive_id(info_dict)
976 return False # Incomplete video information
979 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
980 for line in archive_file:
981 if line.strip() == vid_id:
983 except IOError as ioe:
984 if ioe.errno != errno.ENOENT:
988 def record_download_archive(self, info_dict):
989 fn = self.params.get('download_archive')
992 vid_id = self._make_archive_id(info_dict)
994 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
995 archive_file.write(vid_id + u'\n')
998 def format_resolution(format, default='unknown'):
999 if format.get('vcodec') == 'none':
1001 if format.get('_resolution') is not None:
1002 return format['_resolution']
1003 if format.get('height') is not None:
1004 if format.get('width') is not None:
1005 res = u'%sx%s' % (format['width'], format['height'])
1007 res = u'%sp' % format['height']
1012 def list_formats(self, info_dict):
1013 def format_note(fdict):
1015 if fdict.get('format_note') is not None:
1016 res += fdict['format_note'] + u' '
1017 if (fdict.get('vcodec') is not None and
1018 fdict.get('vcodec') != 'none'):
1019 res += u'%-5s' % fdict['vcodec']
1020 elif fdict.get('vbr') is not None:
1022 if fdict.get('vbr') is not None:
1023 res += u'@%4dk' % fdict['vbr']
1024 if fdict.get('acodec') is not None:
1027 res += u'%-5s' % fdict['acodec']
1028 elif fdict.get('abr') is not None:
1032 if fdict.get('abr') is not None:
1033 res += u'@%3dk' % fdict['abr']
1034 if fdict.get('filesize') is not None:
1037 res += format_bytes(fdict['filesize'])
1040 def line(format, idlen=20):
1041 return ((u'%-' + compat_str(idlen + 1) + u's%-10s%-12s%s') % (
1042 format['format_id'],
1044 self.format_resolution(format),
1045 format_note(format),
1048 formats = info_dict.get('formats', [info_dict])
1049 idlen = max(len(u'format code'),
1050 max(len(f['format_id']) for f in formats))
1051 formats_s = [line(f, idlen) for f in formats]
1052 if len(formats) > 1:
1053 formats_s[0] += (' ' if format_note(formats[0]) else '') + '(worst)'
1054 formats_s[-1] += (' ' if format_note(formats[-1]) else '') + '(best)'
1056 header_line = line({
1057 'format_id': u'format code', 'ext': u'extension',
1058 '_resolution': u'resolution', 'format_note': u'note'}, idlen=idlen)
1059 self.to_screen(u'[info] Available formats for %s:\n%s\n%s' %
1060 (info_dict['id'], header_line, u"\n".join(formats_s)))
1062 def urlopen(self, req):
1063 """ Start an HTTP download """
1064 return self._opener.open(req)
1066 def print_debug_header(self):
1067 if not self.params.get('verbose'):
1069 write_string(u'[debug] youtube-dl version ' + __version__ + u'\n')
1071 sp = subprocess.Popen(
1072 ['git', 'rev-parse', '--short', 'HEAD'],
1073 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1074 cwd=os.path.dirname(os.path.abspath(__file__)))
1075 out, err = sp.communicate()
1076 out = out.decode().strip()
1077 if re.match('[0-9a-f]+', out):
1078 write_string(u'[debug] Git HEAD: ' + out + u'\n')
1084 write_string(u'[debug] Python version %s - %s' %
1085 (platform.python_version(), platform_name()) + u'\n')
1088 for handler in self._opener.handlers:
1089 if hasattr(handler, 'proxies'):
1090 proxy_map.update(handler.proxies)
1091 write_string(u'[debug] Proxy map: ' + compat_str(proxy_map) + u'\n')
1093 def _setup_opener(self):
1094 timeout_val = self.params.get('socket_timeout')
1095 timeout = 600 if timeout_val is None else float(timeout_val)
1097 opts_cookiefile = self.params.get('cookiefile')
1098 opts_proxy = self.params.get('proxy')
1100 if opts_cookiefile is None:
1101 self.cookiejar = compat_cookiejar.CookieJar()
1103 self.cookiejar = compat_cookiejar.MozillaCookieJar(
1105 if os.access(opts_cookiefile, os.R_OK):
1106 self.cookiejar.load()
1108 cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1110 if opts_proxy is not None:
1111 if opts_proxy == '':
1114 proxies = {'http': opts_proxy, 'https': opts_proxy}
1116 proxies = compat_urllib_request.getproxies()
1117 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1118 if 'http' in proxies and 'https' not in proxies:
1119 proxies['https'] = proxies['http']
1120 proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1121 https_handler = make_HTTPS_handler(
1122 self.params.get('nocheckcertificate', False))
1123 opener = compat_urllib_request.build_opener(
1124 https_handler, proxy_handler, cookie_processor, YoutubeDLHandler())
1125 # Delete the default user-agent header, which would otherwise apply in
1126 # cases where our custom HTTP handler doesn't come into play
1127 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1128 opener.addheaders = []
1129 self._opener = opener
1131 # TODO remove this global modification
1132 compat_urllib_request.install_opener(opener)
1133 socket.setdefaulttimeout(timeout)