2 # -*- coding: utf-8 -*-
4 from __future__ import absolute_import, unicode_literals
31 compat_urllib_request,
55 UnavailableVideoError,
62 from .cache import Cache
63 from .extractor import get_info_extractor, gen_extractors
64 from .downloader import get_suitable_downloader
65 from .postprocessor import FFmpegMergerPP
66 from .version import __version__
69 class YoutubeDL(object):
72 YoutubeDL objects are the ones responsible of downloading the
73 actual video file and writing it to disk if the user has requested
74 it, among some other tasks. In most cases there should be one per
75 program. As, given a video URL, the downloader doesn't know how to
76 extract all the needed information, task that InfoExtractors do, it
77 has to pass the URL to one of them.
79 For this, YoutubeDL objects have a method that allows
80 InfoExtractors to be registered in a given order. When it is passed
81 a URL, the YoutubeDL object handles it to the first InfoExtractor it
82 finds that reports being able to handle it. The InfoExtractor extracts
83 all the information about the video or videos the URL refers to, and
84 YoutubeDL process the extracted information, possibly using a File
85 Downloader to download the video.
87 YoutubeDL objects accept a lot of parameters. In order not to saturate
88 the object constructor with arguments, it receives a dictionary of
89 options instead. These options are available through the params
90 attribute for the InfoExtractors to use. The YoutubeDL also
91 registers itself as the downloader in charge for the InfoExtractors
92 that are added to it, so this is a "mutual registration".
96 username: Username for authentication purposes.
97 password: Password for authentication purposes.
98 videopassword: Password for acces a video.
99 usenetrc: Use netrc for authentication instead.
100 verbose: Print additional info to stdout.
101 quiet: Do not print messages to stdout.
102 no_warnings: Do not print out anything for warnings.
103 forceurl: Force printing final URL.
104 forcetitle: Force printing title.
105 forceid: Force printing ID.
106 forcethumbnail: Force printing thumbnail URL.
107 forcedescription: Force printing description.
108 forcefilename: Force printing final filename.
109 forceduration: Force printing duration.
110 forcejson: Force printing info_dict as JSON.
111 simulate: Do not download the video files.
112 format: Video format code.
113 format_limit: Highest quality format to try.
114 outtmpl: Template for output names.
115 restrictfilenames: Do not allow "&" and spaces in file names
116 ignoreerrors: Do not stop on download errors.
117 nooverwrites: Prevent overwriting files.
118 playliststart: Playlist item to start at.
119 playlistend: Playlist item to end at.
120 matchtitle: Download only matching titles.
121 rejecttitle: Reject downloads for matching titles.
122 logger: Log messages to a logging.Logger instance.
123 logtostderr: Log messages to stderr instead of stdout.
124 writedescription: Write the video description to a .description file
125 writeinfojson: Write the video description to a .info.json file
126 writeannotations: Write the video annotations to a .annotations.xml file
127 writethumbnail: Write the thumbnail image to a file
128 writesubtitles: Write the video subtitles to a file
129 writeautomaticsub: Write the automatic subtitles to a file
130 allsubtitles: Downloads all the subtitles of the video
131 (requires writesubtitles or writeautomaticsub)
132 listsubtitles: Lists all available subtitles for the video
133 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
134 subtitleslangs: List of languages of the subtitles to download
135 keepvideo: Keep the video file after post-processing
136 daterange: A DateRange object, download only if the upload_date is in the range.
137 skip_download: Skip the actual download of the video file
138 cachedir: Location of the cache files in the filesystem.
139 False to disable filesystem cache.
140 noplaylist: Download single video instead of a playlist if in doubt.
141 age_limit: An integer representing the user's age in years.
142 Unsuitable videos for the given age are skipped.
143 min_views: An integer representing the minimum view count the video
144 must have in order to not be skipped.
145 Videos without view count information are always
146 downloaded. None for no limit.
147 max_views: An integer representing the maximum view count.
148 Videos that are more popular than that are not
150 Videos without view count information are always
151 downloaded. None for no limit.
152 download_archive: File name of a file where all downloads are recorded.
153 Videos already present in the file are not downloaded
155 cookiefile: File name where cookies should be read from and dumped to.
156 nocheckcertificate:Do not verify SSL certificates
157 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
158 At the moment, this is only supported by YouTube.
159 proxy: URL of the proxy server to use
160 socket_timeout: Time to wait for unresponsive hosts, in seconds
161 bidi_workaround: Work around buggy terminals without bidirectional text
162 support, using fridibi
163 debug_printtraffic:Print out sent and received HTTP traffic
164 include_ads: Download ads as well
165 default_search: Prepend this string if an input url is not valid.
166 'auto' for elaborate guessing
167 encoding: Use this encoding instead of the system-specified.
168 extract_flat: Do not resolve URLs, return the immediate result.
170 The following parameters are not used by YoutubeDL itself, they are used by
172 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
173 noresizebuffer, retries, continuedl, noprogress, consoletitle
175 The following options are used by the post processors:
176 prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
177 otherwise prefer avconv.
178 exec_cmd: Arbitrary command to run after downloading
184 _download_retcode = None
185 _num_downloads = None
188 def __init__(self, params=None):
189 """Create a FileDownloader object with the given options."""
193 self._ies_instances = {}
195 self._progress_hooks = []
196 self._download_retcode = 0
197 self._num_downloads = 0
198 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
199 self._err_file = sys.stderr
201 self.cache = Cache(self)
203 if params.get('bidi_workaround', False):
206 master, slave = pty.openpty()
207 width = get_term_width()
211 width_args = ['-w', str(width)]
213 stdin=subprocess.PIPE,
215 stderr=self._err_file)
217 self._output_process = subprocess.Popen(
218 ['bidiv'] + width_args, **sp_kwargs
221 self._output_process = subprocess.Popen(
222 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
223 self._output_channel = os.fdopen(master, 'rb')
224 except OSError as ose:
226 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
230 if (sys.version_info >= (3,) and sys.platform != 'win32' and
231 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
232 and not params['restrictfilenames']):
233 # On Python 3, the Unicode filesystem API will throw errors (#1474)
235 'Assuming --restrict-filenames since file system encoding '
236 'cannot encode all charactes. '
237 'Set the LC_ALL environment variable to fix this.')
238 self.params['restrictfilenames'] = True
240 if '%(stitle)s' in self.params.get('outtmpl', ''):
241 self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
245 def add_info_extractor(self, ie):
246 """Add an InfoExtractor object to the end of the list."""
248 self._ies_instances[ie.ie_key()] = ie
249 ie.set_downloader(self)
251 def get_info_extractor(self, ie_key):
253 Get an instance of an IE with name ie_key, it will try to get one from
254 the _ies list, if there's no instance it will create a new one and add
255 it to the extractor list.
257 ie = self._ies_instances.get(ie_key)
259 ie = get_info_extractor(ie_key)()
260 self.add_info_extractor(ie)
263 def add_default_info_extractors(self):
265 Add the InfoExtractors returned by gen_extractors to the end of the list
267 for ie in gen_extractors():
268 self.add_info_extractor(ie)
270 def add_post_processor(self, pp):
271 """Add a PostProcessor object to the end of the chain."""
273 pp.set_downloader(self)
275 def add_progress_hook(self, ph):
276 """Add the progress hook (currently only for the file downloader)"""
277 self._progress_hooks.append(ph)
279 def _bidi_workaround(self, message):
280 if not hasattr(self, '_output_channel'):
283 assert hasattr(self, '_output_process')
284 assert isinstance(message, compat_str)
285 line_count = message.count('\n') + 1
286 self._output_process.stdin.write((message + '\n').encode('utf-8'))
287 self._output_process.stdin.flush()
288 res = ''.join(self._output_channel.readline().decode('utf-8')
289 for _ in range(line_count))
290 return res[:-len('\n')]
292 def to_screen(self, message, skip_eol=False):
293 """Print message to stdout if not in quiet mode."""
294 return self.to_stdout(message, skip_eol, check_quiet=True)
296 def _write_string(self, s, out=None):
297 write_string(s, out=out, encoding=self.params.get('encoding'))
299 def to_stdout(self, message, skip_eol=False, check_quiet=False):
300 """Print message to stdout if not in quiet mode."""
301 if self.params.get('logger'):
302 self.params['logger'].debug(message)
303 elif not check_quiet or not self.params.get('quiet', False):
304 message = self._bidi_workaround(message)
305 terminator = ['\n', ''][skip_eol]
306 output = message + terminator
308 self._write_string(output, self._screen_file)
310 def to_stderr(self, message):
311 """Print message to stderr."""
312 assert isinstance(message, compat_str)
313 if self.params.get('logger'):
314 self.params['logger'].error(message)
316 message = self._bidi_workaround(message)
317 output = message + '\n'
318 self._write_string(output, self._err_file)
320 def to_console_title(self, message):
321 if not self.params.get('consoletitle', False):
323 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
324 # c_wchar_p() might not be necessary if `message` is
325 # already of type unicode()
326 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
327 elif 'TERM' in os.environ:
328 self._write_string('\033]0;%s\007' % message, self._screen_file)
330 def save_console_title(self):
331 if not self.params.get('consoletitle', False):
333 if 'TERM' in os.environ:
334 # Save the title on stack
335 self._write_string('\033[22;0t', self._screen_file)
337 def restore_console_title(self):
338 if not self.params.get('consoletitle', False):
340 if 'TERM' in os.environ:
341 # Restore the title from stack
342 self._write_string('\033[23;0t', self._screen_file)
345 self.save_console_title()
348 def __exit__(self, *args):
349 self.restore_console_title()
351 if self.params.get('cookiefile') is not None:
352 self.cookiejar.save()
354 def trouble(self, message=None, tb=None):
355 """Determine action to take when a download problem appears.
357 Depending on if the downloader has been configured to ignore
358 download errors or not, this method may throw an exception or
359 not when errors are found, after printing the message.
361 tb, if given, is additional traceback information.
363 if message is not None:
364 self.to_stderr(message)
365 if self.params.get('verbose'):
367 if sys.exc_info()[0]: # if .trouble has been called from an except block
369 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
370 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
371 tb += compat_str(traceback.format_exc())
373 tb_data = traceback.format_list(traceback.extract_stack())
374 tb = ''.join(tb_data)
376 if not self.params.get('ignoreerrors', False):
377 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
378 exc_info = sys.exc_info()[1].exc_info
380 exc_info = sys.exc_info()
381 raise DownloadError(message, exc_info)
382 self._download_retcode = 1
384 def report_warning(self, message):
386 Print the message to stderr, it will be prefixed with 'WARNING:'
387 If stderr is a tty file the 'WARNING:' will be colored
389 if self.params.get('logger') is not None:
390 self.params['logger'].warning(message)
392 if self.params.get('no_warnings'):
394 if self._err_file.isatty() and os.name != 'nt':
395 _msg_header = '\033[0;33mWARNING:\033[0m'
397 _msg_header = 'WARNING:'
398 warning_message = '%s %s' % (_msg_header, message)
399 self.to_stderr(warning_message)
401 def report_error(self, message, tb=None):
403 Do the same as trouble, but prefixes the message with 'ERROR:', colored
404 in red if stderr is a tty file.
406 if self._err_file.isatty() and os.name != 'nt':
407 _msg_header = '\033[0;31mERROR:\033[0m'
409 _msg_header = 'ERROR:'
410 error_message = '%s %s' % (_msg_header, message)
411 self.trouble(error_message, tb)
413 def report_file_already_downloaded(self, file_name):
414 """Report file has already been fully downloaded."""
416 self.to_screen('[download] %s has already been downloaded' % file_name)
417 except UnicodeEncodeError:
418 self.to_screen('[download] The file has already been downloaded')
420 def prepare_filename(self, info_dict):
421 """Generate the output filename."""
423 template_dict = dict(info_dict)
425 template_dict['epoch'] = int(time.time())
426 autonumber_size = self.params.get('autonumber_size')
427 if autonumber_size is None:
429 autonumber_templ = '%0' + str(autonumber_size) + 'd'
430 template_dict['autonumber'] = autonumber_templ % self._num_downloads
431 if template_dict.get('playlist_index') is not None:
432 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
433 if template_dict.get('resolution') is None:
434 if template_dict.get('width') and template_dict.get('height'):
435 template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
436 elif template_dict.get('height'):
437 template_dict['resolution'] = '%sp' % template_dict['height']
438 elif template_dict.get('width'):
439 template_dict['resolution'] = '?x%d' % template_dict['width']
441 sanitize = lambda k, v: sanitize_filename(
443 restricted=self.params.get('restrictfilenames'),
445 template_dict = dict((k, sanitize(k, v))
446 for k, v in template_dict.items()
448 template_dict = collections.defaultdict(lambda: 'NA', template_dict)
450 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
451 tmpl = compat_expanduser(outtmpl)
452 filename = tmpl % template_dict
454 except ValueError as err:
455 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
458 def _match_entry(self, info_dict):
459 """ Returns None iff the file should be downloaded """
461 video_title = info_dict.get('title', info_dict.get('id', 'video'))
462 if 'title' in info_dict:
463 # This can happen when we're just evaluating the playlist
464 title = info_dict['title']
465 matchtitle = self.params.get('matchtitle', False)
467 if not re.search(matchtitle, title, re.IGNORECASE):
468 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
469 rejecttitle = self.params.get('rejecttitle', False)
471 if re.search(rejecttitle, title, re.IGNORECASE):
472 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
473 date = info_dict.get('upload_date', None)
475 dateRange = self.params.get('daterange', DateRange())
476 if date not in dateRange:
477 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
478 view_count = info_dict.get('view_count', None)
479 if view_count is not None:
480 min_views = self.params.get('min_views')
481 if min_views is not None and view_count < min_views:
482 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
483 max_views = self.params.get('max_views')
484 if max_views is not None and view_count > max_views:
485 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
486 age_limit = self.params.get('age_limit')
487 if age_limit is not None:
488 actual_age_limit = info_dict.get('age_limit')
489 if actual_age_limit is None:
491 if age_limit < actual_age_limit:
492 return 'Skipping "' + title + '" because it is age restricted'
493 if self.in_download_archive(info_dict):
494 return '%s has already been recorded in archive' % video_title
498 def add_extra_info(info_dict, extra_info):
499 '''Set the keys from extra_info in info dict if they are missing'''
500 for key, value in extra_info.items():
501 info_dict.setdefault(key, value)
503 def extract_info(self, url, download=True, ie_key=None, extra_info={},
506 Returns a list with a dictionary for each video we find.
507 If 'download', also downloads the videos.
508 extra_info is a dict containing the extra values to add to each result
512 ies = [self.get_info_extractor(ie_key)]
517 if not ie.suitable(url):
521 self.report_warning('The program functionality for this site has been marked as broken, '
522 'and will probably not work.')
525 ie_result = ie.extract(url)
526 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
528 if isinstance(ie_result, list):
529 # Backwards compatibility: old IE result format
531 '_type': 'compat_list',
532 'entries': ie_result,
534 self.add_default_extra_info(ie_result, ie, url)
536 return self.process_ie_result(ie_result, download, extra_info)
539 except ExtractorError as de: # An error we somewhat expected
540 self.report_error(compat_str(de), de.format_traceback())
542 except MaxDownloadsReached:
544 except Exception as e:
545 if self.params.get('ignoreerrors', False):
546 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
551 self.report_error('no suitable InfoExtractor for URL %s' % url)
553 def add_default_extra_info(self, ie_result, ie, url):
554 self.add_extra_info(ie_result, {
555 'extractor': ie.IE_NAME,
557 'webpage_url_basename': url_basename(url),
558 'extractor_key': ie.ie_key(),
561 def process_ie_result(self, ie_result, download=True, extra_info={}):
563 Take the result of the ie(may be modified) and resolve all unresolved
564 references (URLs, playlist items).
566 It will also download the videos if 'download'.
567 Returns the resolved ie_result.
570 result_type = ie_result.get('_type', 'video')
572 if self.params.get('extract_flat', False):
573 if result_type in ('url', 'url_transparent'):
576 if result_type == 'video':
577 self.add_extra_info(ie_result, extra_info)
578 return self.process_video_result(ie_result, download=download)
579 elif result_type == 'url':
580 # We have to add extra_info to the results because it may be
581 # contained in a playlist
582 return self.extract_info(ie_result['url'],
584 ie_key=ie_result.get('ie_key'),
585 extra_info=extra_info)
586 elif result_type == 'url_transparent':
587 # Use the information from the embedding page
588 info = self.extract_info(
589 ie_result['url'], ie_key=ie_result.get('ie_key'),
590 extra_info=extra_info, download=False, process=False)
592 def make_result(embedded_info):
593 new_result = ie_result.copy()
594 for f in ('_type', 'url', 'ext', 'player_url', 'formats',
595 'entries', 'ie_key', 'duration',
596 'subtitles', 'annotations', 'format',
597 'thumbnail', 'thumbnails'):
600 if f in embedded_info:
601 new_result[f] = embedded_info[f]
603 new_result = make_result(info)
605 assert new_result.get('_type') != 'url_transparent'
606 if new_result.get('_type') == 'compat_list':
607 new_result['entries'] = [
608 make_result(e) for e in new_result['entries']]
610 return self.process_ie_result(
611 new_result, download=download, extra_info=extra_info)
612 elif result_type == 'playlist':
613 # We process each entry in the playlist
614 playlist = ie_result.get('title', None) or ie_result.get('id', None)
615 self.to_screen('[download] Downloading playlist: %s' % playlist)
617 playlist_results = []
619 playliststart = self.params.get('playliststart', 1) - 1
620 playlistend = self.params.get('playlistend', None)
621 # For backwards compatibility, interpret -1 as whole list
622 if playlistend == -1:
625 if isinstance(ie_result['entries'], list):
626 n_all_entries = len(ie_result['entries'])
627 entries = ie_result['entries'][playliststart:playlistend]
628 n_entries = len(entries)
630 "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
631 (ie_result['extractor'], playlist, n_all_entries, n_entries))
633 assert isinstance(ie_result['entries'], PagedList)
634 entries = ie_result['entries'].getslice(
635 playliststart, playlistend)
636 n_entries = len(entries)
638 "[%s] playlist %s: Downloading %d videos" %
639 (ie_result['extractor'], playlist, n_entries))
641 for i, entry in enumerate(entries, 1):
642 self.to_screen('[download] Downloading video #%s of %s' % (i, n_entries))
644 'n_entries': n_entries,
645 'playlist': playlist,
646 'playlist_index': i + playliststart,
647 'extractor': ie_result['extractor'],
648 'webpage_url': ie_result['webpage_url'],
649 'webpage_url_basename': url_basename(ie_result['webpage_url']),
650 'extractor_key': ie_result['extractor_key'],
653 reason = self._match_entry(entry)
654 if reason is not None:
655 self.to_screen('[download] ' + reason)
658 entry_result = self.process_ie_result(entry,
661 playlist_results.append(entry_result)
662 ie_result['entries'] = playlist_results
664 elif result_type == 'compat_list':
666 self.add_extra_info(r,
668 'extractor': ie_result['extractor'],
669 'webpage_url': ie_result['webpage_url'],
670 'webpage_url_basename': url_basename(ie_result['webpage_url']),
671 'extractor_key': ie_result['extractor_key'],
674 ie_result['entries'] = [
675 self.process_ie_result(_fixup(r), download, extra_info)
676 for r in ie_result['entries']
680 raise Exception('Invalid result type: %s' % result_type)
682 def select_format(self, format_spec, available_formats):
683 if format_spec == 'best' or format_spec is None:
684 return available_formats[-1]
685 elif format_spec == 'worst':
686 return available_formats[0]
687 elif format_spec == 'bestaudio':
689 f for f in available_formats
690 if f.get('vcodec') == 'none']
692 return audio_formats[-1]
693 elif format_spec == 'worstaudio':
695 f for f in available_formats
696 if f.get('vcodec') == 'none']
698 return audio_formats[0]
699 elif format_spec == 'bestvideo':
701 f for f in available_formats
702 if f.get('acodec') == 'none']
704 return video_formats[-1]
705 elif format_spec == 'worstvideo':
707 f for f in available_formats
708 if f.get('acodec') == 'none']
710 return video_formats[0]
712 extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a']
713 if format_spec in extensions:
714 filter_f = lambda f: f['ext'] == format_spec
716 filter_f = lambda f: f['format_id'] == format_spec
717 matches = list(filter(filter_f, available_formats))
722 def process_video_result(self, info_dict, download=True):
723 assert info_dict.get('_type', 'video') == 'video'
725 if 'id' not in info_dict:
726 raise ExtractorError('Missing "id" field in extractor result')
727 if 'title' not in info_dict:
728 raise ExtractorError('Missing "title" field in extractor result')
730 if 'playlist' not in info_dict:
731 # It isn't part of a playlist
732 info_dict['playlist'] = None
733 info_dict['playlist_index'] = None
735 thumbnails = info_dict.get('thumbnails')
737 thumbnails.sort(key=lambda t: (
738 t.get('width'), t.get('height'), t.get('url')))
740 if 'width' in t and 'height' in t:
741 t['resolution'] = '%dx%d' % (t['width'], t['height'])
743 if thumbnails and 'thumbnail' not in info_dict:
744 info_dict['thumbnail'] = thumbnails[-1]['url']
746 if 'display_id' not in info_dict and 'id' in info_dict:
747 info_dict['display_id'] = info_dict['id']
749 if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
750 upload_date = datetime.datetime.utcfromtimestamp(
751 info_dict['timestamp'])
752 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
754 # This extractors handle format selection themselves
755 if info_dict['extractor'] in ['Youku']:
757 self.process_info(info_dict)
760 # We now pick which formats have to be downloaded
761 if info_dict.get('formats') is None:
762 # There's only one format available
763 formats = [info_dict]
765 formats = info_dict['formats']
768 raise ExtractorError('No video formats found!')
770 # We check that all the formats have the format and format_id fields
771 for i, format in enumerate(formats):
772 if 'url' not in format:
773 raise ExtractorError('Missing "url" key in result (index %d)' % i)
775 if format.get('format_id') is None:
776 format['format_id'] = compat_str(i)
777 if format.get('format') is None:
778 format['format'] = '{id} - {res}{note}'.format(
779 id=format['format_id'],
780 res=self.format_resolution(format),
781 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
783 # Automatically determine file extension if missing
784 if 'ext' not in format:
785 format['ext'] = determine_ext(format['url']).lower()
787 format_limit = self.params.get('format_limit', None)
789 formats = list(takewhile_inclusive(
790 lambda f: f['format_id'] != format_limit, formats
793 # TODO Central sorting goes here
795 if formats[0] is not info_dict:
796 # only set the 'formats' fields if the original info_dict list them
797 # otherwise we end up with a circular reference, the first (and unique)
798 # element in the 'formats' field in info_dict is info_dict itself,
799 # wich can't be exported to json
800 info_dict['formats'] = formats
801 if self.params.get('listformats', None):
802 self.list_formats(info_dict)
805 req_format = self.params.get('format')
806 if req_format is None:
808 formats_to_download = []
809 # The -1 is for supporting YoutubeIE
810 if req_format in ('-1', 'all'):
811 formats_to_download = formats
813 for rfstr in req_format.split(','):
814 # We can accept formats requested in the format: 34/5/best, we pick
815 # the first that is available, starting from left
816 req_formats = rfstr.split('/')
817 for rf in req_formats:
818 if re.match(r'.+?\+.+?', rf) is not None:
819 # Two formats have been requested like '137+139'
820 format_1, format_2 = rf.split('+')
821 formats_info = (self.select_format(format_1, formats),
822 self.select_format(format_2, formats))
823 if all(formats_info):
825 'requested_formats': formats_info,
827 'ext': formats_info[0]['ext'],
830 selected_format = None
832 selected_format = self.select_format(rf, formats)
833 if selected_format is not None:
834 formats_to_download.append(selected_format)
836 if not formats_to_download:
837 raise ExtractorError('requested format not available',
841 if len(formats_to_download) > 1:
842 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
843 for format in formats_to_download:
844 new_info = dict(info_dict)
845 new_info.update(format)
846 self.process_info(new_info)
847 # We update the info dict with the best quality format (backwards compatibility)
848 info_dict.update(formats_to_download[-1])
851 def process_info(self, info_dict):
852 """Process a single resolved IE result."""
854 assert info_dict.get('_type', 'video') == 'video'
856 max_downloads = self.params.get('max_downloads')
857 if max_downloads is not None:
858 if self._num_downloads >= int(max_downloads):
859 raise MaxDownloadsReached()
861 info_dict['fulltitle'] = info_dict['title']
862 if len(info_dict['title']) > 200:
863 info_dict['title'] = info_dict['title'][:197] + '...'
865 # Keep for backwards compatibility
866 info_dict['stitle'] = info_dict['title']
868 if 'format' not in info_dict:
869 info_dict['format'] = info_dict['ext']
871 reason = self._match_entry(info_dict)
872 if reason is not None:
873 self.to_screen('[download] ' + reason)
876 self._num_downloads += 1
878 filename = self.prepare_filename(info_dict)
881 if self.params.get('forcetitle', False):
882 self.to_stdout(info_dict['fulltitle'])
883 if self.params.get('forceid', False):
884 self.to_stdout(info_dict['id'])
885 if self.params.get('forceurl', False):
886 # For RTMP URLs, also include the playpath
887 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
888 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
889 self.to_stdout(info_dict['thumbnail'])
890 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
891 self.to_stdout(info_dict['description'])
892 if self.params.get('forcefilename', False) and filename is not None:
893 self.to_stdout(filename)
894 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
895 self.to_stdout(formatSeconds(info_dict['duration']))
896 if self.params.get('forceformat', False):
897 self.to_stdout(info_dict['format'])
898 if self.params.get('forcejson', False):
899 info_dict['_filename'] = filename
900 self.to_stdout(json.dumps(info_dict))
902 # Do nothing else if in simulate mode
903 if self.params.get('simulate', False):
910 dn = os.path.dirname(encodeFilename(filename))
911 if dn and not os.path.exists(dn):
913 except (OSError, IOError) as err:
914 self.report_error('unable to create directory ' + compat_str(err))
917 if self.params.get('writedescription', False):
918 descfn = filename + '.description'
919 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
920 self.to_screen('[info] Video description is already present')
923 self.to_screen('[info] Writing video description to: ' + descfn)
924 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
925 descfile.write(info_dict['description'])
926 except (KeyError, TypeError):
927 self.report_warning('There\'s no description to write.')
928 except (OSError, IOError):
929 self.report_error('Cannot write description file ' + descfn)
932 if self.params.get('writeannotations', False):
933 annofn = filename + '.annotations.xml'
934 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
935 self.to_screen('[info] Video annotations are already present')
938 self.to_screen('[info] Writing video annotations to: ' + annofn)
939 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
940 annofile.write(info_dict['annotations'])
941 except (KeyError, TypeError):
942 self.report_warning('There are no annotations to write.')
943 except (OSError, IOError):
944 self.report_error('Cannot write annotations file: ' + annofn)
947 subtitles_are_requested = any([self.params.get('writesubtitles', False),
948 self.params.get('writeautomaticsub')])
950 if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
951 # subtitles download errors are already managed as troubles in relevant IE
952 # that way it will silently go on when used with unsupporting IE
953 subtitles = info_dict['subtitles']
954 sub_format = self.params.get('subtitlesformat', 'srt')
955 for sub_lang in subtitles.keys():
956 sub = subtitles[sub_lang]
960 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
961 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
962 self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
964 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
965 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
967 except (OSError, IOError):
968 self.report_error('Cannot write subtitles file ' + sub_filename)
971 if self.params.get('writeinfojson', False):
972 infofn = os.path.splitext(filename)[0] + '.info.json'
973 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
974 self.to_screen('[info] Video description metadata is already present')
976 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
978 write_json_file(info_dict, encodeFilename(infofn))
979 except (OSError, IOError):
980 self.report_error('Cannot write metadata to JSON file ' + infofn)
983 if self.params.get('writethumbnail', False):
984 if info_dict.get('thumbnail') is not None:
985 thumb_format = determine_ext(info_dict['thumbnail'], 'jpg')
986 thumb_filename = os.path.splitext(filename)[0] + '.' + thumb_format
987 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
988 self.to_screen('[%s] %s: Thumbnail is already present' %
989 (info_dict['extractor'], info_dict['id']))
991 self.to_screen('[%s] %s: Downloading thumbnail ...' %
992 (info_dict['extractor'], info_dict['id']))
994 uf = self.urlopen(info_dict['thumbnail'])
995 with open(thumb_filename, 'wb') as thumbf:
996 shutil.copyfileobj(uf, thumbf)
997 self.to_screen('[%s] %s: Writing thumbnail to: %s' %
998 (info_dict['extractor'], info_dict['id'], thumb_filename))
999 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1000 self.report_warning('Unable to download thumbnail "%s": %s' %
1001 (info_dict['thumbnail'], compat_str(err)))
1003 if not self.params.get('skip_download', False):
1004 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
1009 fd = get_suitable_downloader(info)(self, self.params)
1010 for ph in self._progress_hooks:
1011 fd.add_progress_hook(ph)
1012 if self.params.get('verbose'):
1013 self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1014 return fd.download(name, info)
1015 if info_dict.get('requested_formats') is not None:
1018 merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
1019 if not merger._get_executable():
1021 self.report_warning('You have requested multiple '
1022 'formats but ffmpeg or avconv are not installed.'
1023 ' The formats won\'t be merged')
1025 postprocessors = [merger]
1026 for f in info_dict['requested_formats']:
1027 new_info = dict(info_dict)
1029 fname = self.prepare_filename(new_info)
1030 fname = prepend_extension(fname, 'f%s' % f['format_id'])
1031 downloaded.append(fname)
1032 partial_success = dl(fname, new_info)
1033 success = success and partial_success
1034 info_dict['__postprocessors'] = postprocessors
1035 info_dict['__files_to_merge'] = downloaded
1037 # Just a single file
1038 success = dl(filename, info_dict)
1039 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1040 self.report_error('unable to download video data: %s' % str(err))
1042 except (OSError, IOError) as err:
1043 raise UnavailableVideoError(err)
1044 except (ContentTooShortError, ) as err:
1045 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1050 self.post_process(filename, info_dict)
1051 except (PostProcessingError) as err:
1052 self.report_error('postprocessing: %s' % str(err))
1055 self.record_download_archive(info_dict)
1057 def download(self, url_list):
1058 """Download a given list of URLs."""
1059 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1060 if (len(url_list) > 1 and
1062 and self.params.get('max_downloads') != 1):
1063 raise SameFileError(outtmpl)
1065 for url in url_list:
1067 #It also downloads the videos
1068 self.extract_info(url)
1069 except UnavailableVideoError:
1070 self.report_error('unable to download video')
1071 except MaxDownloadsReached:
1072 self.to_screen('[info] Maximum number of downloaded files reached.')
1075 return self._download_retcode
1077 def download_with_info_file(self, info_filename):
1078 with io.open(info_filename, 'r', encoding='utf-8') as f:
1081 self.process_ie_result(info, download=True)
1082 except DownloadError:
1083 webpage_url = info.get('webpage_url')
1084 if webpage_url is not None:
1085 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1086 return self.download([webpage_url])
1089 return self._download_retcode
1091 def post_process(self, filename, ie_info):
1092 """Run all the postprocessors on the given file."""
1093 info = dict(ie_info)
1094 info['filepath'] = filename
1097 if ie_info.get('__postprocessors') is not None:
1098 pps_chain.extend(ie_info['__postprocessors'])
1099 pps_chain.extend(self._pps)
1100 for pp in pps_chain:
1102 keep_video_wish, new_info = pp.run(info)
1103 if keep_video_wish is not None:
1105 keep_video = keep_video_wish
1106 elif keep_video is None:
1107 # No clear decision yet, let IE decide
1108 keep_video = keep_video_wish
1109 except PostProcessingError as e:
1110 self.report_error(e.msg)
1111 if keep_video is False and not self.params.get('keepvideo', False):
1113 self.to_screen('Deleting original file %s (pass -k to keep)' % filename)
1114 os.remove(encodeFilename(filename))
1115 except (IOError, OSError):
1116 self.report_warning('Unable to remove downloaded video file')
1118 def _make_archive_id(self, info_dict):
1119 # Future-proof against any change in case
1120 # and backwards compatibility with prior versions
1121 extractor = info_dict.get('extractor_key')
1122 if extractor is None:
1123 if 'id' in info_dict:
1124 extractor = info_dict.get('ie_key') # key in a playlist
1125 if extractor is None:
1126 return None # Incomplete video information
1127 return extractor.lower() + ' ' + info_dict['id']
1129 def in_download_archive(self, info_dict):
1130 fn = self.params.get('download_archive')
1134 vid_id = self._make_archive_id(info_dict)
1136 return False # Incomplete video information
1139 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1140 for line in archive_file:
1141 if line.strip() == vid_id:
1143 except IOError as ioe:
1144 if ioe.errno != errno.ENOENT:
1148 def record_download_archive(self, info_dict):
1149 fn = self.params.get('download_archive')
1152 vid_id = self._make_archive_id(info_dict)
1154 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1155 archive_file.write(vid_id + '\n')
1158 def format_resolution(format, default='unknown'):
1159 if format.get('vcodec') == 'none':
1161 if format.get('resolution') is not None:
1162 return format['resolution']
1163 if format.get('height') is not None:
1164 if format.get('width') is not None:
1165 res = '%sx%s' % (format['width'], format['height'])
1167 res = '%sp' % format['height']
1168 elif format.get('width') is not None:
1169 res = '?x%d' % format['width']
1174 def _format_note(self, fdict):
1176 if fdict.get('ext') in ['f4f', 'f4m']:
1177 res += '(unsupported) '
1178 if fdict.get('format_note') is not None:
1179 res += fdict['format_note'] + ' '
1180 if fdict.get('tbr') is not None:
1181 res += '%4dk ' % fdict['tbr']
1182 if fdict.get('container') is not None:
1185 res += '%s container' % fdict['container']
1186 if (fdict.get('vcodec') is not None and
1187 fdict.get('vcodec') != 'none'):
1190 res += fdict['vcodec']
1191 if fdict.get('vbr') is not None:
1193 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1195 if fdict.get('vbr') is not None:
1196 res += '%4dk' % fdict['vbr']
1197 if fdict.get('acodec') is not None:
1200 if fdict['acodec'] == 'none':
1203 res += '%-5s' % fdict['acodec']
1204 elif fdict.get('abr') is not None:
1208 if fdict.get('abr') is not None:
1209 res += '@%3dk' % fdict['abr']
1210 if fdict.get('asr') is not None:
1211 res += ' (%5dHz)' % fdict['asr']
1212 if fdict.get('filesize') is not None:
1215 res += format_bytes(fdict['filesize'])
1216 elif fdict.get('filesize_approx') is not None:
1219 res += '~' + format_bytes(fdict['filesize_approx'])
1222 def list_formats(self, info_dict):
1223 def line(format, idlen=20):
1224 return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
1225 format['format_id'],
1227 self.format_resolution(format),
1228 self._format_note(format),
1231 formats = info_dict.get('formats', [info_dict])
1232 idlen = max(len('format code'),
1233 max(len(f['format_id']) for f in formats))
1234 formats_s = [line(f, idlen) for f in formats]
1235 if len(formats) > 1:
1236 formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)'
1237 formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
1239 header_line = line({
1240 'format_id': 'format code', 'ext': 'extension',
1241 'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
1242 self.to_screen('[info] Available formats for %s:\n%s\n%s' %
1243 (info_dict['id'], header_line, '\n'.join(formats_s)))
1245 def urlopen(self, req):
1246 """ Start an HTTP download """
1248 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1249 # always respected by websites, some tend to give out URLs with non percent-encoded
1250 # non-ASCII characters (see telemb.py, ard.py [#3412])
1251 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1252 # To work around aforementioned issue we will replace request's original URL with
1253 # percent-encoded one
1254 req_is_string = isinstance(req, basestring if sys.version_info < (3, 0) else compat_str)
1255 url = req if req_is_string else req.get_full_url()
1256 url_escaped = escape_url(url)
1258 # Substitute URL if any change after escaping
1259 if url != url_escaped:
1263 req = compat_urllib_request.Request(
1264 url_escaped, data=req.data, headers=req.headers,
1265 origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
1267 return self._opener.open(req, timeout=self._socket_timeout)
1269 def print_debug_header(self):
1270 if not self.params.get('verbose'):
1273 if type('') is not compat_str:
1274 # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1275 self.report_warning(
1276 'Your Python is broken! Update to a newer and supported version')
1279 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1280 locale.getpreferredencoding(),
1281 sys.getfilesystemencoding(),
1282 sys.stdout.encoding,
1283 self.get_encoding()))
1284 write_string(encoding_str, encoding=None)
1286 self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1288 sp = subprocess.Popen(
1289 ['git', 'rev-parse', '--short', 'HEAD'],
1290 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1291 cwd=os.path.dirname(os.path.abspath(__file__)))
1292 out, err = sp.communicate()
1293 out = out.decode().strip()
1294 if re.match('[0-9a-f]+', out):
1295 self._write_string('[debug] Git HEAD: ' + out + '\n')
1301 self._write_string('[debug] Python version %s - %s' %
1302 (platform.python_version(), platform_name()) + '\n')
1305 for handler in self._opener.handlers:
1306 if hasattr(handler, 'proxies'):
1307 proxy_map.update(handler.proxies)
1308 self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1310 def _setup_opener(self):
1311 timeout_val = self.params.get('socket_timeout')
1312 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1314 opts_cookiefile = self.params.get('cookiefile')
1315 opts_proxy = self.params.get('proxy')
1317 if opts_cookiefile is None:
1318 self.cookiejar = compat_cookiejar.CookieJar()
1320 self.cookiejar = compat_cookiejar.MozillaCookieJar(
1322 if os.access(opts_cookiefile, os.R_OK):
1323 self.cookiejar.load()
1325 cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1327 if opts_proxy is not None:
1328 if opts_proxy == '':
1331 proxies = {'http': opts_proxy, 'https': opts_proxy}
1333 proxies = compat_urllib_request.getproxies()
1334 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1335 if 'http' in proxies and 'https' not in proxies:
1336 proxies['https'] = proxies['http']
1337 proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1339 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1340 https_handler = make_HTTPS_handler(
1341 self.params.get('nocheckcertificate', False), debuglevel=debuglevel)
1342 ydlh = YoutubeDLHandler(debuglevel=debuglevel)
1343 opener = compat_urllib_request.build_opener(
1344 https_handler, proxy_handler, cookie_processor, ydlh)
1345 # Delete the default user-agent header, which would otherwise apply in
1346 # cases where our custom HTTP handler doesn't come into play
1347 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1348 opener.addheaders = []
1349 self._opener = opener
1351 def encode(self, s):
1352 if isinstance(s, bytes):
1353 return s # Already encoded
1356 return s.encode(self.get_encoding())
1357 except UnicodeEncodeError as err:
1358 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1361 def get_encoding(self):
1362 encoding = self.params.get('encoding')
1363 if encoding is None:
1364 encoding = preferredencoding()