2 # -*- coding: utf-8 -*-
4 from __future__ import absolute_import
28 compat_urllib_request,
49 UnavailableVideoError,
55 from .extractor import get_info_extractor, gen_extractors
56 from .downloader import get_suitable_downloader
57 from .version import __version__
60 class YoutubeDL(object):
63 YoutubeDL objects are the ones responsible of downloading the
64 actual video file and writing it to disk if the user has requested
65 it, among some other tasks. In most cases there should be one per
66 program. As, given a video URL, the downloader doesn't know how to
67 extract all the needed information, task that InfoExtractors do, it
68 has to pass the URL to one of them.
70 For this, YoutubeDL objects have a method that allows
71 InfoExtractors to be registered in a given order. When it is passed
72 a URL, the YoutubeDL object handles it to the first InfoExtractor it
73 finds that reports being able to handle it. The InfoExtractor extracts
74 all the information about the video or videos the URL refers to, and
75 YoutubeDL process the extracted information, possibly using a File
76 Downloader to download the video.
78 YoutubeDL objects accept a lot of parameters. In order not to saturate
79 the object constructor with arguments, it receives a dictionary of
80 options instead. These options are available through the params
81 attribute for the InfoExtractors to use. The YoutubeDL also
82 registers itself as the downloader in charge for the InfoExtractors
83 that are added to it, so this is a "mutual registration".
87 username: Username for authentication purposes.
88 password: Password for authentication purposes.
89 videopassword: Password for acces a video.
90 usenetrc: Use netrc for authentication instead.
91 verbose: Print additional info to stdout.
92 quiet: Do not print messages to stdout.
93 forceurl: Force printing final URL.
94 forcetitle: Force printing title.
95 forceid: Force printing ID.
96 forcethumbnail: Force printing thumbnail URL.
97 forcedescription: Force printing description.
98 forcefilename: Force printing final filename.
99 forceduration: Force printing duration.
100 forcejson: Force printing info_dict as JSON.
101 simulate: Do not download the video files.
102 format: Video format code.
103 format_limit: Highest quality format to try.
104 outtmpl: Template for output names.
105 restrictfilenames: Do not allow "&" and spaces in file names
106 ignoreerrors: Do not stop on download errors.
107 nooverwrites: Prevent overwriting files.
108 playliststart: Playlist item to start at.
109 playlistend: Playlist item to end at.
110 matchtitle: Download only matching titles.
111 rejecttitle: Reject downloads for matching titles.
112 logger: Log messages to a logging.Logger instance.
113 logtostderr: Log messages to stderr instead of stdout.
114 writedescription: Write the video description to a .description file
115 writeinfojson: Write the video description to a .info.json file
116 writeannotations: Write the video annotations to a .annotations.xml file
117 writethumbnail: Write the thumbnail image to a file
118 writesubtitles: Write the video subtitles to a file
119 writeautomaticsub: Write the automatic subtitles to a file
120 allsubtitles: Downloads all the subtitles of the video
121 (requires writesubtitles or writeautomaticsub)
122 listsubtitles: Lists all available subtitles for the video
123 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
124 subtitleslangs: List of languages of the subtitles to download
125 keepvideo: Keep the video file after post-processing
126 daterange: A DateRange object, download only if the upload_date is in the range.
127 skip_download: Skip the actual download of the video file
128 cachedir: Location of the cache files in the filesystem.
129 None to disable filesystem cache.
130 noplaylist: Download single video instead of a playlist if in doubt.
131 age_limit: An integer representing the user's age in years.
132 Unsuitable videos for the given age are skipped.
133 min_views: An integer representing the minimum view count the video
134 must have in order to not be skipped.
135 Videos without view count information are always
136 downloaded. None for no limit.
137 max_views: An integer representing the maximum view count.
138 Videos that are more popular than that are not
140 Videos without view count information are always
141 downloaded. None for no limit.
142 download_archive: File name of a file where all downloads are recorded.
143 Videos already present in the file are not downloaded
145 cookiefile: File name where cookies should be read from and dumped to.
146 nocheckcertificate:Do not verify SSL certificates
147 proxy: URL of the proxy server to use
148 socket_timeout: Time to wait for unresponsive hosts, in seconds
149 bidi_workaround: Work around buggy terminals without bidirectional text
150 support, using fridibi
151 debug_printtraffic:Print out sent and received HTTP traffic
153 The following parameters are not used by YoutubeDL itself, they are used by
155 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
156 noresizebuffer, retries, continuedl, noprogress, consoletitle
162 _download_retcode = None
163 _num_downloads = None
166 def __init__(self, params=None):
167 """Create a FileDownloader object with the given options."""
169 self._ies_instances = {}
171 self._progress_hooks = []
172 self._download_retcode = 0
173 self._num_downloads = 0
174 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
175 self._err_file = sys.stderr
176 self.params = {} if params is None else params
178 if params.get('bidi_workaround', False):
181 master, slave = pty.openpty()
182 width = get_term_width()
186 width_args = ['-w', str(width)]
188 stdin=subprocess.PIPE,
190 stderr=self._err_file)
192 self._output_process = subprocess.Popen(
193 ['bidiv'] + width_args, **sp_kwargs
196 self._output_process = subprocess.Popen(
197 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
198 self._output_channel = os.fdopen(master, 'rb')
199 except OSError as ose:
201 self.report_warning(u'Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
205 if (sys.version_info >= (3,) and sys.platform != 'win32' and
206 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
207 and not params['restrictfilenames']):
208 # On Python 3, the Unicode filesystem API will throw errors (#1474)
210 u'Assuming --restrict-filenames since file system encoding '
211 u'cannot encode all charactes. '
212 u'Set the LC_ALL environment variable to fix this.')
213 self.params['restrictfilenames'] = True
215 if '%(stitle)s' in self.params.get('outtmpl', ''):
216 self.report_warning(u'%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
220 def add_info_extractor(self, ie):
221 """Add an InfoExtractor object to the end of the list."""
223 self._ies_instances[ie.ie_key()] = ie
224 ie.set_downloader(self)
226 def get_info_extractor(self, ie_key):
228 Get an instance of an IE with name ie_key, it will try to get one from
229 the _ies list, if there's no instance it will create a new one and add
230 it to the extractor list.
232 ie = self._ies_instances.get(ie_key)
234 ie = get_info_extractor(ie_key)()
235 self.add_info_extractor(ie)
238 def add_default_info_extractors(self):
240 Add the InfoExtractors returned by gen_extractors to the end of the list
242 for ie in gen_extractors():
243 self.add_info_extractor(ie)
245 def add_post_processor(self, pp):
246 """Add a PostProcessor object to the end of the chain."""
248 pp.set_downloader(self)
250 def add_progress_hook(self, ph):
251 """Add the progress hook (currently only for the file downloader)"""
252 self._progress_hooks.append(ph)
254 def _bidi_workaround(self, message):
255 if not hasattr(self, '_output_channel'):
258 assert hasattr(self, '_output_process')
259 assert type(message) == type(u'')
260 line_count = message.count(u'\n') + 1
261 self._output_process.stdin.write((message + u'\n').encode('utf-8'))
262 self._output_process.stdin.flush()
263 res = u''.join(self._output_channel.readline().decode('utf-8')
264 for _ in range(line_count))
265 return res[:-len(u'\n')]
267 def to_screen(self, message, skip_eol=False):
268 """Print message to stdout if not in quiet mode."""
269 return self.to_stdout(message, skip_eol, check_quiet=True)
271 def to_stdout(self, message, skip_eol=False, check_quiet=False):
272 """Print message to stdout if not in quiet mode."""
273 if self.params.get('logger'):
274 self.params['logger'].debug(message)
275 elif not check_quiet or not self.params.get('quiet', False):
276 message = self._bidi_workaround(message)
277 terminator = [u'\n', u''][skip_eol]
278 output = message + terminator
280 write_string(output, self._screen_file)
282 def to_stderr(self, message):
283 """Print message to stderr."""
284 assert type(message) == type(u'')
285 if self.params.get('logger'):
286 self.params['logger'].error(message)
288 message = self._bidi_workaround(message)
289 output = message + u'\n'
290 write_string(output, self._err_file)
292 def to_console_title(self, message):
293 if not self.params.get('consoletitle', False):
295 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
296 # c_wchar_p() might not be necessary if `message` is
297 # already of type unicode()
298 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
299 elif 'TERM' in os.environ:
300 write_string(u'\033]0;%s\007' % message, self._screen_file)
302 def save_console_title(self):
303 if not self.params.get('consoletitle', False):
305 if 'TERM' in os.environ:
306 # Save the title on stack
307 write_string(u'\033[22;0t', self._screen_file)
309 def restore_console_title(self):
310 if not self.params.get('consoletitle', False):
312 if 'TERM' in os.environ:
313 # Restore the title from stack
314 write_string(u'\033[23;0t', self._screen_file)
317 self.save_console_title()
320 def __exit__(self, *args):
321 self.restore_console_title()
323 if self.params.get('cookiefile') is not None:
324 self.cookiejar.save()
326 def trouble(self, message=None, tb=None):
327 """Determine action to take when a download problem appears.
329 Depending on if the downloader has been configured to ignore
330 download errors or not, this method may throw an exception or
331 not when errors are found, after printing the message.
333 tb, if given, is additional traceback information.
335 if message is not None:
336 self.to_stderr(message)
337 if self.params.get('verbose'):
339 if sys.exc_info()[0]: # if .trouble has been called from an except block
341 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
342 tb += u''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
343 tb += compat_str(traceback.format_exc())
345 tb_data = traceback.format_list(traceback.extract_stack())
346 tb = u''.join(tb_data)
348 if not self.params.get('ignoreerrors', False):
349 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
350 exc_info = sys.exc_info()[1].exc_info
352 exc_info = sys.exc_info()
353 raise DownloadError(message, exc_info)
354 self._download_retcode = 1
356 def report_warning(self, message):
358 Print the message to stderr, it will be prefixed with 'WARNING:'
359 If stderr is a tty file the 'WARNING:' will be colored
361 if self._err_file.isatty() and os.name != 'nt':
362 _msg_header = u'\033[0;33mWARNING:\033[0m'
364 _msg_header = u'WARNING:'
365 warning_message = u'%s %s' % (_msg_header, message)
366 self.to_stderr(warning_message)
368 def report_error(self, message, tb=None):
370 Do the same as trouble, but prefixes the message with 'ERROR:', colored
371 in red if stderr is a tty file.
373 if self._err_file.isatty() and os.name != 'nt':
374 _msg_header = u'\033[0;31mERROR:\033[0m'
376 _msg_header = u'ERROR:'
377 error_message = u'%s %s' % (_msg_header, message)
378 self.trouble(error_message, tb)
380 def report_file_already_downloaded(self, file_name):
381 """Report file has already been fully downloaded."""
383 self.to_screen(u'[download] %s has already been downloaded' % file_name)
384 except UnicodeEncodeError:
385 self.to_screen(u'[download] The file has already been downloaded')
387 def increment_downloads(self):
388 """Increment the ordinal that assigns a number to each file."""
389 self._num_downloads += 1
391 def prepare_filename(self, info_dict):
392 """Generate the output filename."""
394 template_dict = dict(info_dict)
396 template_dict['epoch'] = int(time.time())
397 autonumber_size = self.params.get('autonumber_size')
398 if autonumber_size is None:
400 autonumber_templ = u'%0' + str(autonumber_size) + u'd'
401 template_dict['autonumber'] = autonumber_templ % self._num_downloads
402 if template_dict.get('playlist_index') is not None:
403 template_dict['playlist_index'] = u'%05d' % template_dict['playlist_index']
405 sanitize = lambda k, v: sanitize_filename(
407 restricted=self.params.get('restrictfilenames'),
409 template_dict = dict((k, sanitize(k, v))
410 for k, v in template_dict.items()
412 template_dict = collections.defaultdict(lambda: u'NA', template_dict)
414 tmpl = os.path.expanduser(self.params['outtmpl'])
415 filename = tmpl % template_dict
417 except ValueError as err:
418 self.report_error(u'Error in output template: ' + str(err) + u' (encoding: ' + repr(preferredencoding()) + ')')
421 def _match_entry(self, info_dict):
422 """ Returns None iff the file should be downloaded """
424 video_title = info_dict.get('title', info_dict.get('id', u'video'))
425 if 'title' in info_dict:
426 # This can happen when we're just evaluating the playlist
427 title = info_dict['title']
428 matchtitle = self.params.get('matchtitle', False)
430 if not re.search(matchtitle, title, re.IGNORECASE):
431 return u'"' + title + '" title did not match pattern "' + matchtitle + '"'
432 rejecttitle = self.params.get('rejecttitle', False)
434 if re.search(rejecttitle, title, re.IGNORECASE):
435 return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
436 date = info_dict.get('upload_date', None)
438 dateRange = self.params.get('daterange', DateRange())
439 if date not in dateRange:
440 return u'%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
441 view_count = info_dict.get('view_count', None)
442 if view_count is not None:
443 min_views = self.params.get('min_views')
444 if min_views is not None and view_count < min_views:
445 return u'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
446 max_views = self.params.get('max_views')
447 if max_views is not None and view_count > max_views:
448 return u'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
449 age_limit = self.params.get('age_limit')
450 if age_limit is not None:
451 if age_limit < info_dict.get('age_limit', 0):
452 return u'Skipping "' + title + '" because it is age restricted'
453 if self.in_download_archive(info_dict):
454 return u'%s has already been recorded in archive' % video_title
458 def add_extra_info(info_dict, extra_info):
459 '''Set the keys from extra_info in info dict if they are missing'''
460 for key, value in extra_info.items():
461 info_dict.setdefault(key, value)
463 def extract_info(self, url, download=True, ie_key=None, extra_info={},
466 Returns a list with a dictionary for each video we find.
467 If 'download', also downloads the videos.
468 extra_info is a dict containing the extra values to add to each result
472 ies = [self.get_info_extractor(ie_key)]
477 if not ie.suitable(url):
481 self.report_warning(u'The program functionality for this site has been marked as broken, '
482 u'and will probably not work.')
485 ie_result = ie.extract(url)
486 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
488 if isinstance(ie_result, list):
489 # Backwards compatibility: old IE result format
491 '_type': 'compat_list',
492 'entries': ie_result,
494 self.add_extra_info(ie_result,
496 'extractor': ie.IE_NAME,
498 'webpage_url_basename': url_basename(url),
499 'extractor_key': ie.ie_key(),
502 return self.process_ie_result(ie_result, download, extra_info)
505 except ExtractorError as de: # An error we somewhat expected
506 self.report_error(compat_str(de), de.format_traceback())
508 except Exception as e:
509 if self.params.get('ignoreerrors', False):
510 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
515 self.report_error(u'no suitable InfoExtractor: %s' % url)
517 def process_ie_result(self, ie_result, download=True, extra_info={}):
519 Take the result of the ie(may be modified) and resolve all unresolved
520 references (URLs, playlist items).
522 It will also download the videos if 'download'.
523 Returns the resolved ie_result.
526 result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
527 if result_type == 'video':
528 self.add_extra_info(ie_result, extra_info)
529 return self.process_video_result(ie_result, download=download)
530 elif result_type == 'url':
531 # We have to add extra_info to the results because it may be
532 # contained in a playlist
533 return self.extract_info(ie_result['url'],
535 ie_key=ie_result.get('ie_key'),
536 extra_info=extra_info)
537 elif result_type == 'url_transparent':
538 # Use the information from the embedding page
539 info = self.extract_info(
540 ie_result['url'], ie_key=ie_result.get('ie_key'),
541 extra_info=extra_info, download=False, process=False)
543 def make_result(embedded_info):
544 new_result = ie_result.copy()
545 for f in ('_type', 'url', 'ext', 'player_url', 'formats',
546 'entries', 'ie_key', 'duration',
547 'subtitles', 'annotations', 'format',
548 'thumbnail', 'thumbnails'):
551 if f in embedded_info:
552 new_result[f] = embedded_info[f]
554 new_result = make_result(info)
556 assert new_result.get('_type') != 'url_transparent'
557 if new_result.get('_type') == 'compat_list':
558 new_result['entries'] = [
559 make_result(e) for e in new_result['entries']]
561 return self.process_ie_result(
562 new_result, download=download, extra_info=extra_info)
563 elif result_type == 'playlist':
564 # We process each entry in the playlist
565 playlist = ie_result.get('title', None) or ie_result.get('id', None)
566 self.to_screen(u'[download] Downloading playlist: %s' % playlist)
568 playlist_results = []
570 n_all_entries = len(ie_result['entries'])
571 playliststart = self.params.get('playliststart', 1) - 1
572 playlistend = self.params.get('playlistend', None)
573 # For backwards compatibility, interpret -1 as whole list
574 if playlistend == -1:
577 entries = ie_result['entries'][playliststart:playlistend]
578 n_entries = len(entries)
581 u"[%s] playlist '%s': Collected %d video ids (downloading %d of them)" %
582 (ie_result['extractor'], playlist, n_all_entries, n_entries))
584 for i, entry in enumerate(entries, 1):
585 self.to_screen(u'[download] Downloading video #%s of %s' % (i, n_entries))
587 'playlist': playlist,
588 'playlist_index': i + playliststart,
589 'extractor': ie_result['extractor'],
590 'webpage_url': ie_result['webpage_url'],
591 'webpage_url_basename': url_basename(ie_result['webpage_url']),
592 'extractor_key': ie_result['extractor_key'],
595 reason = self._match_entry(entry)
596 if reason is not None:
597 self.to_screen(u'[download] ' + reason)
600 entry_result = self.process_ie_result(entry,
603 playlist_results.append(entry_result)
604 ie_result['entries'] = playlist_results
606 elif result_type == 'compat_list':
608 self.add_extra_info(r,
610 'extractor': ie_result['extractor'],
611 'webpage_url': ie_result['webpage_url'],
612 'webpage_url_basename': url_basename(ie_result['webpage_url']),
613 'extractor_key': ie_result['extractor_key'],
616 ie_result['entries'] = [
617 self.process_ie_result(_fixup(r), download, extra_info)
618 for r in ie_result['entries']
622 raise Exception('Invalid result type: %s' % result_type)
624 def select_format(self, format_spec, available_formats):
625 if format_spec == 'best' or format_spec is None:
626 return available_formats[-1]
627 elif format_spec == 'worst':
628 return available_formats[0]
630 extensions = [u'mp4', u'flv', u'webm', u'3gp']
631 if format_spec in extensions:
632 filter_f = lambda f: f['ext'] == format_spec
634 filter_f = lambda f: f['format_id'] == format_spec
635 matches = list(filter(filter_f, available_formats))
640 def process_video_result(self, info_dict, download=True):
641 assert info_dict.get('_type', 'video') == 'video'
643 if 'playlist' not in info_dict:
644 # It isn't part of a playlist
645 info_dict['playlist'] = None
646 info_dict['playlist_index'] = None
648 # This extractors handle format selection themselves
649 if info_dict['extractor'] in [u'Youku']:
651 self.process_info(info_dict)
654 # We now pick which formats have to be downloaded
655 if info_dict.get('formats') is None:
656 # There's only one format available
657 formats = [info_dict]
659 formats = info_dict['formats']
661 # We check that all the formats have the format and format_id fields
662 for (i, format) in enumerate(formats):
663 if format.get('format_id') is None:
664 format['format_id'] = compat_str(i)
665 if format.get('format') is None:
666 format['format'] = u'{id} - {res}{note}'.format(
667 id=format['format_id'],
668 res=self.format_resolution(format),
669 note=u' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
671 # Automatically determine file extension if missing
672 if 'ext' not in format:
673 format['ext'] = determine_ext(format['url'])
675 format_limit = self.params.get('format_limit', None)
677 formats = list(takewhile_inclusive(
678 lambda f: f['format_id'] != format_limit, formats
681 # TODO Central sorting goes here
683 if formats[0] is not info_dict:
684 # only set the 'formats' fields if the original info_dict list them
685 # otherwise we end up with a circular reference, the first (and unique)
686 # element in the 'formats' field in info_dict is info_dict itself,
687 # wich can't be exported to json
688 info_dict['formats'] = formats
689 if self.params.get('listformats', None):
690 self.list_formats(info_dict)
693 req_format = self.params.get('format', 'best')
694 if req_format is None:
696 formats_to_download = []
697 # The -1 is for supporting YoutubeIE
698 if req_format in ('-1', 'all'):
699 formats_to_download = formats
701 # We can accept formats requestd in the format: 34/5/best, we pick
702 # the first that is available, starting from left
703 req_formats = req_format.split('/')
704 for rf in req_formats:
705 selected_format = self.select_format(rf, formats)
706 if selected_format is not None:
707 formats_to_download = [selected_format]
709 if not formats_to_download:
710 raise ExtractorError(u'requested format not available',
714 if len(formats_to_download) > 1:
715 self.to_screen(u'[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
716 for format in formats_to_download:
717 new_info = dict(info_dict)
718 new_info.update(format)
719 self.process_info(new_info)
720 # We update the info dict with the best quality format (backwards compatibility)
721 info_dict.update(formats_to_download[-1])
724 def process_info(self, info_dict):
725 """Process a single resolved IE result."""
727 assert info_dict.get('_type', 'video') == 'video'
728 #We increment the download the download count here to match the previous behaviour.
729 self.increment_downloads()
731 info_dict['fulltitle'] = info_dict['title']
732 if len(info_dict['title']) > 200:
733 info_dict['title'] = info_dict['title'][:197] + u'...'
735 # Keep for backwards compatibility
736 info_dict['stitle'] = info_dict['title']
738 if not 'format' in info_dict:
739 info_dict['format'] = info_dict['ext']
741 reason = self._match_entry(info_dict)
742 if reason is not None:
743 self.to_screen(u'[download] ' + reason)
746 max_downloads = self.params.get('max_downloads')
747 if max_downloads is not None:
748 if self._num_downloads > int(max_downloads):
749 raise MaxDownloadsReached()
751 filename = self.prepare_filename(info_dict)
754 if self.params.get('forcetitle', False):
755 self.to_stdout(info_dict['fulltitle'])
756 if self.params.get('forceid', False):
757 self.to_stdout(info_dict['id'])
758 if self.params.get('forceurl', False):
759 # For RTMP URLs, also include the playpath
760 self.to_stdout(info_dict['url'] + info_dict.get('play_path', u''))
761 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
762 self.to_stdout(info_dict['thumbnail'])
763 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
764 self.to_stdout(info_dict['description'])
765 if self.params.get('forcefilename', False) and filename is not None:
766 self.to_stdout(filename)
767 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
768 self.to_stdout(formatSeconds(info_dict['duration']))
769 if self.params.get('forceformat', False):
770 self.to_stdout(info_dict['format'])
771 if self.params.get('forcejson', False):
772 info_dict['_filename'] = filename
773 self.to_stdout(json.dumps(info_dict))
775 # Do nothing else if in simulate mode
776 if self.params.get('simulate', False):
783 dn = os.path.dirname(encodeFilename(filename))
784 if dn != '' and not os.path.exists(dn):
786 except (OSError, IOError) as err:
787 self.report_error(u'unable to create directory ' + compat_str(err))
790 if self.params.get('writedescription', False):
791 descfn = filename + u'.description'
792 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
793 self.to_screen(u'[info] Video description is already present')
796 self.to_screen(u'[info] Writing video description to: ' + descfn)
797 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
798 descfile.write(info_dict['description'])
799 except (KeyError, TypeError):
800 self.report_warning(u'There\'s no description to write.')
801 except (OSError, IOError):
802 self.report_error(u'Cannot write description file ' + descfn)
805 if self.params.get('writeannotations', False):
806 annofn = filename + u'.annotations.xml'
807 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
808 self.to_screen(u'[info] Video annotations are already present')
811 self.to_screen(u'[info] Writing video annotations to: ' + annofn)
812 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
813 annofile.write(info_dict['annotations'])
814 except (KeyError, TypeError):
815 self.report_warning(u'There are no annotations to write.')
816 except (OSError, IOError):
817 self.report_error(u'Cannot write annotations file: ' + annofn)
820 subtitles_are_requested = any([self.params.get('writesubtitles', False),
821 self.params.get('writeautomaticsub')])
823 if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
824 # subtitles download errors are already managed as troubles in relevant IE
825 # that way it will silently go on when used with unsupporting IE
826 subtitles = info_dict['subtitles']
827 sub_format = self.params.get('subtitlesformat', 'srt')
828 for sub_lang in subtitles.keys():
829 sub = subtitles[sub_lang]
833 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
834 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
835 self.to_screen(u'[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
837 self.to_screen(u'[info] Writing video subtitles to: ' + sub_filename)
838 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
840 except (OSError, IOError):
841 self.report_error(u'Cannot write subtitles file ' + descfn)
844 if self.params.get('writeinfojson', False):
845 infofn = os.path.splitext(filename)[0] + u'.info.json'
846 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
847 self.to_screen(u'[info] Video description metadata is already present')
849 self.to_screen(u'[info] Writing video description metadata as JSON to: ' + infofn)
851 write_json_file(info_dict, encodeFilename(infofn))
852 except (OSError, IOError):
853 self.report_error(u'Cannot write metadata to JSON file ' + infofn)
856 if self.params.get('writethumbnail', False):
857 if info_dict.get('thumbnail') is not None:
858 thumb_format = determine_ext(info_dict['thumbnail'], u'jpg')
859 thumb_filename = os.path.splitext(filename)[0] + u'.' + thumb_format
860 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
861 self.to_screen(u'[%s] %s: Thumbnail is already present' %
862 (info_dict['extractor'], info_dict['id']))
864 self.to_screen(u'[%s] %s: Downloading thumbnail ...' %
865 (info_dict['extractor'], info_dict['id']))
867 uf = compat_urllib_request.urlopen(info_dict['thumbnail'])
868 with open(thumb_filename, 'wb') as thumbf:
869 shutil.copyfileobj(uf, thumbf)
870 self.to_screen(u'[%s] %s: Writing thumbnail to: %s' %
871 (info_dict['extractor'], info_dict['id'], thumb_filename))
872 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
873 self.report_warning(u'Unable to download thumbnail "%s": %s' %
874 (info_dict['thumbnail'], compat_str(err)))
876 if not self.params.get('skip_download', False):
877 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
881 fd = get_suitable_downloader(info_dict)(self, self.params)
882 for ph in self._progress_hooks:
883 fd.add_progress_hook(ph)
884 success = fd.download(filename, info_dict)
885 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
886 self.report_error(u'unable to download video data: %s' % str(err))
888 except (OSError, IOError) as err:
889 raise UnavailableVideoError(err)
890 except (ContentTooShortError, ) as err:
891 self.report_error(u'content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
896 self.post_process(filename, info_dict)
897 except (PostProcessingError) as err:
898 self.report_error(u'postprocessing: %s' % str(err))
901 self.record_download_archive(info_dict)
903 def download(self, url_list):
904 """Download a given list of URLs."""
905 if (len(url_list) > 1 and
906 '%' not in self.params['outtmpl']
907 and self.params.get('max_downloads') != 1):
908 raise SameFileError(self.params['outtmpl'])
912 #It also downloads the videos
913 self.extract_info(url)
914 except UnavailableVideoError:
915 self.report_error(u'unable to download video')
916 except MaxDownloadsReached:
917 self.to_screen(u'[info] Maximum number of downloaded files reached.')
920 return self._download_retcode
922 def download_with_info_file(self, info_filename):
923 with io.open(info_filename, 'r', encoding='utf-8') as f:
926 self.process_ie_result(info, download=True)
927 except DownloadError:
928 webpage_url = info.get('webpage_url')
929 if webpage_url is not None:
930 self.report_warning(u'The info failed to download, trying with "%s"' % webpage_url)
931 return self.download([webpage_url])
934 return self._download_retcode
936 def post_process(self, filename, ie_info):
937 """Run all the postprocessors on the given file."""
939 info['filepath'] = filename
943 keep_video_wish, new_info = pp.run(info)
944 if keep_video_wish is not None:
946 keep_video = keep_video_wish
947 elif keep_video is None:
948 # No clear decision yet, let IE decide
949 keep_video = keep_video_wish
950 except PostProcessingError as e:
951 self.report_error(e.msg)
952 if keep_video is False and not self.params.get('keepvideo', False):
954 self.to_screen(u'Deleting original file %s (pass -k to keep)' % filename)
955 os.remove(encodeFilename(filename))
956 except (IOError, OSError):
957 self.report_warning(u'Unable to remove downloaded video file')
959 def _make_archive_id(self, info_dict):
960 # Future-proof against any change in case
961 # and backwards compatibility with prior versions
962 extractor = info_dict.get('extractor_key')
963 if extractor is None:
964 if 'id' in info_dict:
965 extractor = info_dict.get('ie_key') # key in a playlist
966 if extractor is None:
967 return None # Incomplete video information
968 return extractor.lower() + u' ' + info_dict['id']
970 def in_download_archive(self, info_dict):
971 fn = self.params.get('download_archive')
975 vid_id = self._make_archive_id(info_dict)
977 return False # Incomplete video information
980 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
981 for line in archive_file:
982 if line.strip() == vid_id:
984 except IOError as ioe:
985 if ioe.errno != errno.ENOENT:
989 def record_download_archive(self, info_dict):
990 fn = self.params.get('download_archive')
993 vid_id = self._make_archive_id(info_dict)
995 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
996 archive_file.write(vid_id + u'\n')
999 def format_resolution(format, default='unknown'):
1000 if format.get('vcodec') == 'none':
1002 if format.get('resolution') is not None:
1003 return format['resolution']
1004 if format.get('height') is not None:
1005 if format.get('width') is not None:
1006 res = u'%sx%s' % (format['width'], format['height'])
1008 res = u'%sp' % format['height']
1009 elif format.get('width') is not None:
1010 res = u'?x%d' % format['width']
1015 def list_formats(self, info_dict):
1016 def format_note(fdict):
1018 if f.get('ext') in ['f4f', 'f4m']:
1019 res += u'(unsupported) '
1020 if fdict.get('format_note') is not None:
1021 res += fdict['format_note'] + u' '
1022 if fdict.get('tbr') is not None:
1023 res += u'%4dk ' % fdict['tbr']
1024 if (fdict.get('vcodec') is not None and
1025 fdict.get('vcodec') != 'none'):
1026 res += u'%-5s@' % fdict['vcodec']
1027 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1029 if fdict.get('vbr') is not None:
1030 res += u'%4dk' % fdict['vbr']
1031 if fdict.get('acodec') is not None:
1034 res += u'%-5s' % fdict['acodec']
1035 elif fdict.get('abr') is not None:
1039 if fdict.get('abr') is not None:
1040 res += u'@%3dk' % fdict['abr']
1041 if fdict.get('filesize') is not None:
1044 res += format_bytes(fdict['filesize'])
1047 def line(format, idlen=20):
1048 return ((u'%-' + compat_str(idlen + 1) + u's%-10s%-12s%s') % (
1049 format['format_id'],
1051 self.format_resolution(format),
1052 format_note(format),
1055 formats = info_dict.get('formats', [info_dict])
1056 idlen = max(len(u'format code'),
1057 max(len(f['format_id']) for f in formats))
1058 formats_s = [line(f, idlen) for f in formats]
1059 if len(formats) > 1:
1060 formats_s[0] += (' ' if format_note(formats[0]) else '') + '(worst)'
1061 formats_s[-1] += (' ' if format_note(formats[-1]) else '') + '(best)'
1063 header_line = line({
1064 'format_id': u'format code', 'ext': u'extension',
1065 'resolution': u'resolution', 'format_note': u'note'}, idlen=idlen)
1066 self.to_screen(u'[info] Available formats for %s:\n%s\n%s' %
1067 (info_dict['id'], header_line, u"\n".join(formats_s)))
1069 def urlopen(self, req):
1070 """ Start an HTTP download """
1071 return self._opener.open(req)
1073 def print_debug_header(self):
1074 if not self.params.get('verbose'):
1076 write_string(u'[debug] youtube-dl version ' + __version__ + u'\n')
1078 sp = subprocess.Popen(
1079 ['git', 'rev-parse', '--short', 'HEAD'],
1080 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1081 cwd=os.path.dirname(os.path.abspath(__file__)))
1082 out, err = sp.communicate()
1083 out = out.decode().strip()
1084 if re.match('[0-9a-f]+', out):
1085 write_string(u'[debug] Git HEAD: ' + out + u'\n')
1091 write_string(u'[debug] Python version %s - %s' %
1092 (platform.python_version(), platform_name()) + u'\n')
1095 for handler in self._opener.handlers:
1096 if hasattr(handler, 'proxies'):
1097 proxy_map.update(handler.proxies)
1098 write_string(u'[debug] Proxy map: ' + compat_str(proxy_map) + u'\n')
1100 def _setup_opener(self):
1101 timeout_val = self.params.get('socket_timeout')
1102 timeout = 600 if timeout_val is None else float(timeout_val)
1104 opts_cookiefile = self.params.get('cookiefile')
1105 opts_proxy = self.params.get('proxy')
1107 if opts_cookiefile is None:
1108 self.cookiejar = compat_cookiejar.CookieJar()
1110 self.cookiejar = compat_cookiejar.MozillaCookieJar(
1112 if os.access(opts_cookiefile, os.R_OK):
1113 self.cookiejar.load()
1115 cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1117 if opts_proxy is not None:
1118 if opts_proxy == '':
1121 proxies = {'http': opts_proxy, 'https': opts_proxy}
1123 proxies = compat_urllib_request.getproxies()
1124 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1125 if 'http' in proxies and 'https' not in proxies:
1126 proxies['https'] = proxies['http']
1127 proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1129 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1130 https_handler = make_HTTPS_handler(
1131 self.params.get('nocheckcertificate', False), debuglevel=debuglevel)
1132 ydlh = YoutubeDLHandler(debuglevel=debuglevel)
1133 opener = compat_urllib_request.build_opener(
1134 https_handler, proxy_handler, cookie_processor, ydlh)
1135 # Delete the default user-agent header, which would otherwise apply in
1136 # cases where our custom HTTP handler doesn't come into play
1137 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1138 opener.addheaders = []
1139 self._opener = opener
1141 # TODO remove this global modification
1142 compat_urllib_request.install_opener(opener)
1143 socket.setdefaulttimeout(timeout)