2 # -*- coding: utf-8 -*-
4 from __future__ import absolute_import, unicode_literals
34 compat_urllib_request,
63 UnavailableVideoError,
73 from .cache import Cache
74 from .extractor import get_info_extractor, gen_extractors
75 from .downloader import get_suitable_downloader
76 from .downloader.rtmp import rtmpdump_version
77 from .postprocessor import (
79 FFmpegFixupStretchedPP,
84 from .version import __version__
87 class YoutubeDL(object):
90 YoutubeDL objects are the ones responsible of downloading the
91 actual video file and writing it to disk if the user has requested
92 it, among some other tasks. In most cases there should be one per
93 program. As, given a video URL, the downloader doesn't know how to
94 extract all the needed information, task that InfoExtractors do, it
95 has to pass the URL to one of them.
97 For this, YoutubeDL objects have a method that allows
98 InfoExtractors to be registered in a given order. When it is passed
99 a URL, the YoutubeDL object handles it to the first InfoExtractor it
100 finds that reports being able to handle it. The InfoExtractor extracts
101 all the information about the video or videos the URL refers to, and
102 YoutubeDL process the extracted information, possibly using a File
103 Downloader to download the video.
105 YoutubeDL objects accept a lot of parameters. In order not to saturate
106 the object constructor with arguments, it receives a dictionary of
107 options instead. These options are available through the params
108 attribute for the InfoExtractors to use. The YoutubeDL also
109 registers itself as the downloader in charge for the InfoExtractors
110 that are added to it, so this is a "mutual registration".
114 username: Username for authentication purposes.
115 password: Password for authentication purposes.
116 videopassword: Password for acces a video.
117 usenetrc: Use netrc for authentication instead.
118 verbose: Print additional info to stdout.
119 quiet: Do not print messages to stdout.
120 no_warnings: Do not print out anything for warnings.
121 forceurl: Force printing final URL.
122 forcetitle: Force printing title.
123 forceid: Force printing ID.
124 forcethumbnail: Force printing thumbnail URL.
125 forcedescription: Force printing description.
126 forcefilename: Force printing final filename.
127 forceduration: Force printing duration.
128 forcejson: Force printing info_dict as JSON.
129 dump_single_json: Force printing the info_dict of the whole playlist
130 (or video) as a single JSON line.
131 simulate: Do not download the video files.
132 format: Video format code. See options.py for more information.
133 format_limit: Highest quality format to try.
134 outtmpl: Template for output names.
135 restrictfilenames: Do not allow "&" and spaces in file names
136 ignoreerrors: Do not stop on download errors.
137 nooverwrites: Prevent overwriting files.
138 playliststart: Playlist item to start at.
139 playlistend: Playlist item to end at.
140 playlistreverse: Download playlist items in reverse order.
141 matchtitle: Download only matching titles.
142 rejecttitle: Reject downloads for matching titles.
143 logger: Log messages to a logging.Logger instance.
144 logtostderr: Log messages to stderr instead of stdout.
145 writedescription: Write the video description to a .description file
146 writeinfojson: Write the video description to a .info.json file
147 writeannotations: Write the video annotations to a .annotations.xml file
148 writethumbnail: Write the thumbnail image to a file
149 writesubtitles: Write the video subtitles to a file
150 writeautomaticsub: Write the automatic subtitles to a file
151 allsubtitles: Downloads all the subtitles of the video
152 (requires writesubtitles or writeautomaticsub)
153 listsubtitles: Lists all available subtitles for the video
154 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
155 subtitleslangs: List of languages of the subtitles to download
156 keepvideo: Keep the video file after post-processing
157 daterange: A DateRange object, download only if the upload_date is in the range.
158 skip_download: Skip the actual download of the video file
159 cachedir: Location of the cache files in the filesystem.
160 False to disable filesystem cache.
161 noplaylist: Download single video instead of a playlist if in doubt.
162 age_limit: An integer representing the user's age in years.
163 Unsuitable videos for the given age are skipped.
164 min_views: An integer representing the minimum view count the video
165 must have in order to not be skipped.
166 Videos without view count information are always
167 downloaded. None for no limit.
168 max_views: An integer representing the maximum view count.
169 Videos that are more popular than that are not
171 Videos without view count information are always
172 downloaded. None for no limit.
173 download_archive: File name of a file where all downloads are recorded.
174 Videos already present in the file are not downloaded
176 cookiefile: File name where cookies should be read from and dumped to.
177 nocheckcertificate:Do not verify SSL certificates
178 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
179 At the moment, this is only supported by YouTube.
180 proxy: URL of the proxy server to use
181 socket_timeout: Time to wait for unresponsive hosts, in seconds
182 bidi_workaround: Work around buggy terminals without bidirectional text
183 support, using fridibi
184 debug_printtraffic:Print out sent and received HTTP traffic
185 include_ads: Download ads as well
186 default_search: Prepend this string if an input url is not valid.
187 'auto' for elaborate guessing
188 encoding: Use this encoding instead of the system-specified.
189 extract_flat: Do not resolve URLs, return the immediate result.
190 Pass in 'in_playlist' to only show this behavior for
192 postprocessors: A list of dictionaries, each with an entry
193 * key: The name of the postprocessor. See
194 youtube_dl/postprocessor/__init__.py for a list.
195 as well as any further keyword arguments for the
197 progress_hooks: A list of functions that get called on download
198 progress, with a dictionary with the entries
199 * filename: The final filename
200 * status: One of "downloading" and "finished"
202 The dict may also have some of the following entries:
204 * downloaded_bytes: Bytes on disk
205 * total_bytes: Size of the whole file, None if unknown
206 * tmpfilename: The filename we're currently writing to
207 * eta: The estimated time in seconds, None if unknown
208 * speed: The download speed in bytes/second, None if
211 Progress hooks are guaranteed to be called at least once
212 (with status "finished") if the download is successful.
213 merge_output_format: Extension to use when merging formats.
214 fixup: Automatically correct known faults of the file.
216 - "never": do nothing
217 - "warn": only emit a warning
218 - "detect_or_warn": check whether we can do anything
219 about it, warn otherwise (default)
220 source_address: (Experimental) Client-side IP address to bind to.
221 call_home: Boolean, true iff we are allowed to contact the
222 youtube-dl servers for debugging.
223 sleep_interval: Number of seconds to sleep before each download.
224 external_downloader: Executable of the external downloader to call.
225 listformats: Print an overview of available video formats and exit.
226 list_thumbnails: Print a table of all thumbnails and exit.
229 The following parameters are not used by YoutubeDL itself, they are used by
231 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
232 noresizebuffer, retries, continuedl, noprogress, consoletitle
234 The following options are used by the post processors:
235 prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
236 otherwise prefer avconv.
237 exec_cmd: Arbitrary command to run after downloading
243 _download_retcode = None
244 _num_downloads = None
247 def __init__(self, params=None, auto_init=True):
248 """Create a FileDownloader object with the given options."""
252 self._ies_instances = {}
254 self._progress_hooks = []
255 self._download_retcode = 0
256 self._num_downloads = 0
257 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
258 self._err_file = sys.stderr
260 self.cache = Cache(self)
262 if params.get('bidi_workaround', False):
265 master, slave = pty.openpty()
266 width = get_term_width()
270 width_args = ['-w', str(width)]
272 stdin=subprocess.PIPE,
274 stderr=self._err_file)
276 self._output_process = subprocess.Popen(
277 ['bidiv'] + width_args, **sp_kwargs
280 self._output_process = subprocess.Popen(
281 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
282 self._output_channel = os.fdopen(master, 'rb')
283 except OSError as ose:
285 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
289 if (sys.version_info >= (3,) and sys.platform != 'win32' and
290 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
291 and not params.get('restrictfilenames', False)):
292 # On Python 3, the Unicode filesystem API will throw errors (#1474)
294 'Assuming --restrict-filenames since file system encoding '
295 'cannot encode all characters. '
296 'Set the LC_ALL environment variable to fix this.')
297 self.params['restrictfilenames'] = True
299 if '%(stitle)s' in self.params.get('outtmpl', ''):
300 self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
305 self.print_debug_header()
306 self.add_default_info_extractors()
308 for pp_def_raw in self.params.get('postprocessors', []):
309 pp_class = get_postprocessor(pp_def_raw['key'])
310 pp_def = dict(pp_def_raw)
312 pp = pp_class(self, **compat_kwargs(pp_def))
313 self.add_post_processor(pp)
315 for ph in self.params.get('progress_hooks', []):
316 self.add_progress_hook(ph)
318 def warn_if_short_id(self, argv):
319 # short YouTube ID starting with dash?
321 i for i, a in enumerate(argv)
322 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
326 [a for i, a in enumerate(argv) if i not in idxs] +
327 ['--'] + [argv[i] for i in idxs]
330 'Long argument string detected. '
331 'Use -- to separate parameters and URLs, like this:\n%s\n' %
332 args_to_str(correct_argv))
334 def add_info_extractor(self, ie):
335 """Add an InfoExtractor object to the end of the list."""
337 self._ies_instances[ie.ie_key()] = ie
338 ie.set_downloader(self)
340 def get_info_extractor(self, ie_key):
342 Get an instance of an IE with name ie_key, it will try to get one from
343 the _ies list, if there's no instance it will create a new one and add
344 it to the extractor list.
346 ie = self._ies_instances.get(ie_key)
348 ie = get_info_extractor(ie_key)()
349 self.add_info_extractor(ie)
352 def add_default_info_extractors(self):
354 Add the InfoExtractors returned by gen_extractors to the end of the list
356 for ie in gen_extractors():
357 self.add_info_extractor(ie)
359 def add_post_processor(self, pp):
360 """Add a PostProcessor object to the end of the chain."""
362 pp.set_downloader(self)
364 def add_progress_hook(self, ph):
365 """Add the progress hook (currently only for the file downloader)"""
366 self._progress_hooks.append(ph)
368 def _bidi_workaround(self, message):
369 if not hasattr(self, '_output_channel'):
372 assert hasattr(self, '_output_process')
373 assert isinstance(message, compat_str)
374 line_count = message.count('\n') + 1
375 self._output_process.stdin.write((message + '\n').encode('utf-8'))
376 self._output_process.stdin.flush()
377 res = ''.join(self._output_channel.readline().decode('utf-8')
378 for _ in range(line_count))
379 return res[:-len('\n')]
381 def to_screen(self, message, skip_eol=False):
382 """Print message to stdout if not in quiet mode."""
383 return self.to_stdout(message, skip_eol, check_quiet=True)
385 def _write_string(self, s, out=None):
386 write_string(s, out=out, encoding=self.params.get('encoding'))
388 def to_stdout(self, message, skip_eol=False, check_quiet=False):
389 """Print message to stdout if not in quiet mode."""
390 if self.params.get('logger'):
391 self.params['logger'].debug(message)
392 elif not check_quiet or not self.params.get('quiet', False):
393 message = self._bidi_workaround(message)
394 terminator = ['\n', ''][skip_eol]
395 output = message + terminator
397 self._write_string(output, self._screen_file)
399 def to_stderr(self, message):
400 """Print message to stderr."""
401 assert isinstance(message, compat_str)
402 if self.params.get('logger'):
403 self.params['logger'].error(message)
405 message = self._bidi_workaround(message)
406 output = message + '\n'
407 self._write_string(output, self._err_file)
409 def to_console_title(self, message):
410 if not self.params.get('consoletitle', False):
412 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
413 # c_wchar_p() might not be necessary if `message` is
414 # already of type unicode()
415 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
416 elif 'TERM' in os.environ:
417 self._write_string('\033]0;%s\007' % message, self._screen_file)
419 def save_console_title(self):
420 if not self.params.get('consoletitle', False):
422 if 'TERM' in os.environ:
423 # Save the title on stack
424 self._write_string('\033[22;0t', self._screen_file)
426 def restore_console_title(self):
427 if not self.params.get('consoletitle', False):
429 if 'TERM' in os.environ:
430 # Restore the title from stack
431 self._write_string('\033[23;0t', self._screen_file)
434 self.save_console_title()
437 def __exit__(self, *args):
438 self.restore_console_title()
440 if self.params.get('cookiefile') is not None:
441 self.cookiejar.save()
443 def trouble(self, message=None, tb=None):
444 """Determine action to take when a download problem appears.
446 Depending on if the downloader has been configured to ignore
447 download errors or not, this method may throw an exception or
448 not when errors are found, after printing the message.
450 tb, if given, is additional traceback information.
452 if message is not None:
453 self.to_stderr(message)
454 if self.params.get('verbose'):
456 if sys.exc_info()[0]: # if .trouble has been called from an except block
458 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
459 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
460 tb += compat_str(traceback.format_exc())
462 tb_data = traceback.format_list(traceback.extract_stack())
463 tb = ''.join(tb_data)
465 if not self.params.get('ignoreerrors', False):
466 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
467 exc_info = sys.exc_info()[1].exc_info
469 exc_info = sys.exc_info()
470 raise DownloadError(message, exc_info)
471 self._download_retcode = 1
473 def report_warning(self, message):
475 Print the message to stderr, it will be prefixed with 'WARNING:'
476 If stderr is a tty file the 'WARNING:' will be colored
478 if self.params.get('logger') is not None:
479 self.params['logger'].warning(message)
481 if self.params.get('no_warnings'):
483 if self._err_file.isatty() and os.name != 'nt':
484 _msg_header = '\033[0;33mWARNING:\033[0m'
486 _msg_header = 'WARNING:'
487 warning_message = '%s %s' % (_msg_header, message)
488 self.to_stderr(warning_message)
490 def report_error(self, message, tb=None):
492 Do the same as trouble, but prefixes the message with 'ERROR:', colored
493 in red if stderr is a tty file.
495 if self._err_file.isatty() and os.name != 'nt':
496 _msg_header = '\033[0;31mERROR:\033[0m'
498 _msg_header = 'ERROR:'
499 error_message = '%s %s' % (_msg_header, message)
500 self.trouble(error_message, tb)
502 def report_file_already_downloaded(self, file_name):
503 """Report file has already been fully downloaded."""
505 self.to_screen('[download] %s has already been downloaded' % file_name)
506 except UnicodeEncodeError:
507 self.to_screen('[download] The file has already been downloaded')
509 def prepare_filename(self, info_dict):
510 """Generate the output filename."""
512 template_dict = dict(info_dict)
514 template_dict['epoch'] = int(time.time())
515 autonumber_size = self.params.get('autonumber_size')
516 if autonumber_size is None:
518 autonumber_templ = '%0' + str(autonumber_size) + 'd'
519 template_dict['autonumber'] = autonumber_templ % self._num_downloads
520 if template_dict.get('playlist_index') is not None:
521 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
522 if template_dict.get('resolution') is None:
523 if template_dict.get('width') and template_dict.get('height'):
524 template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
525 elif template_dict.get('height'):
526 template_dict['resolution'] = '%sp' % template_dict['height']
527 elif template_dict.get('width'):
528 template_dict['resolution'] = '?x%d' % template_dict['width']
530 sanitize = lambda k, v: sanitize_filename(
532 restricted=self.params.get('restrictfilenames'),
534 template_dict = dict((k, sanitize(k, v))
535 for k, v in template_dict.items()
537 template_dict = collections.defaultdict(lambda: 'NA', template_dict)
539 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
540 tmpl = compat_expanduser(outtmpl)
541 filename = tmpl % template_dict
543 except ValueError as err:
544 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
547 def _match_entry(self, info_dict):
548 """ Returns None iff the file should be downloaded """
550 video_title = info_dict.get('title', info_dict.get('id', 'video'))
551 if 'title' in info_dict:
552 # This can happen when we're just evaluating the playlist
553 title = info_dict['title']
554 matchtitle = self.params.get('matchtitle', False)
556 if not re.search(matchtitle, title, re.IGNORECASE):
557 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
558 rejecttitle = self.params.get('rejecttitle', False)
560 if re.search(rejecttitle, title, re.IGNORECASE):
561 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
562 date = info_dict.get('upload_date', None)
564 dateRange = self.params.get('daterange', DateRange())
565 if date not in dateRange:
566 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
567 view_count = info_dict.get('view_count', None)
568 if view_count is not None:
569 min_views = self.params.get('min_views')
570 if min_views is not None and view_count < min_views:
571 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
572 max_views = self.params.get('max_views')
573 if max_views is not None and view_count > max_views:
574 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
575 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
576 return 'Skipping "%s" because it is age restricted' % title
577 if self.in_download_archive(info_dict):
578 return '%s has already been recorded in archive' % video_title
582 def add_extra_info(info_dict, extra_info):
583 '''Set the keys from extra_info in info dict if they are missing'''
584 for key, value in extra_info.items():
585 info_dict.setdefault(key, value)
587 def extract_info(self, url, download=True, ie_key=None, extra_info={},
590 Returns a list with a dictionary for each video we find.
591 If 'download', also downloads the videos.
592 extra_info is a dict containing the extra values to add to each result
596 ies = [self.get_info_extractor(ie_key)]
601 if not ie.suitable(url):
605 self.report_warning('The program functionality for this site has been marked as broken, '
606 'and will probably not work.')
609 ie_result = ie.extract(url)
610 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
612 if isinstance(ie_result, list):
613 # Backwards compatibility: old IE result format
615 '_type': 'compat_list',
616 'entries': ie_result,
618 self.add_default_extra_info(ie_result, ie, url)
620 return self.process_ie_result(ie_result, download, extra_info)
623 except ExtractorError as de: # An error we somewhat expected
624 self.report_error(compat_str(de), de.format_traceback())
626 except MaxDownloadsReached:
628 except Exception as e:
629 if self.params.get('ignoreerrors', False):
630 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
635 self.report_error('no suitable InfoExtractor for URL %s' % url)
637 def add_default_extra_info(self, ie_result, ie, url):
638 self.add_extra_info(ie_result, {
639 'extractor': ie.IE_NAME,
641 'webpage_url_basename': url_basename(url),
642 'extractor_key': ie.ie_key(),
645 def process_ie_result(self, ie_result, download=True, extra_info={}):
647 Take the result of the ie(may be modified) and resolve all unresolved
648 references (URLs, playlist items).
650 It will also download the videos if 'download'.
651 Returns the resolved ie_result.
654 result_type = ie_result.get('_type', 'video')
656 if result_type in ('url', 'url_transparent'):
657 extract_flat = self.params.get('extract_flat', False)
658 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
659 extract_flat is True):
660 if self.params.get('forcejson', False):
661 self.to_stdout(json.dumps(ie_result))
664 if result_type == 'video':
665 self.add_extra_info(ie_result, extra_info)
666 return self.process_video_result(ie_result, download=download)
667 elif result_type == 'url':
668 # We have to add extra_info to the results because it may be
669 # contained in a playlist
670 return self.extract_info(ie_result['url'],
672 ie_key=ie_result.get('ie_key'),
673 extra_info=extra_info)
674 elif result_type == 'url_transparent':
675 # Use the information from the embedding page
676 info = self.extract_info(
677 ie_result['url'], ie_key=ie_result.get('ie_key'),
678 extra_info=extra_info, download=False, process=False)
680 force_properties = dict(
681 (k, v) for k, v in ie_result.items() if v is not None)
682 for f in ('_type', 'url'):
683 if f in force_properties:
684 del force_properties[f]
685 new_result = info.copy()
686 new_result.update(force_properties)
688 assert new_result.get('_type') != 'url_transparent'
690 return self.process_ie_result(
691 new_result, download=download, extra_info=extra_info)
692 elif result_type == 'playlist' or result_type == 'multi_video':
693 # We process each entry in the playlist
694 playlist = ie_result.get('title', None) or ie_result.get('id', None)
695 self.to_screen('[download] Downloading playlist: %s' % playlist)
697 playlist_results = []
699 playliststart = self.params.get('playliststart', 1) - 1
700 playlistend = self.params.get('playlistend', None)
701 # For backwards compatibility, interpret -1 as whole list
702 if playlistend == -1:
705 ie_entries = ie_result['entries']
706 if isinstance(ie_entries, list):
707 n_all_entries = len(ie_entries)
708 entries = ie_entries[playliststart:playlistend]
709 n_entries = len(entries)
711 "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
712 (ie_result['extractor'], playlist, n_all_entries, n_entries))
713 elif isinstance(ie_entries, PagedList):
714 entries = ie_entries.getslice(
715 playliststart, playlistend)
716 n_entries = len(entries)
718 "[%s] playlist %s: Downloading %d videos" %
719 (ie_result['extractor'], playlist, n_entries))
721 entries = list(itertools.islice(
722 ie_entries, playliststart, playlistend))
723 n_entries = len(entries)
725 "[%s] playlist %s: Downloading %d videos" %
726 (ie_result['extractor'], playlist, n_entries))
728 if self.params.get('playlistreverse', False):
729 entries = entries[::-1]
731 for i, entry in enumerate(entries, 1):
732 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
734 'n_entries': n_entries,
735 'playlist': playlist,
736 'playlist_id': ie_result.get('id'),
737 'playlist_title': ie_result.get('title'),
738 'playlist_index': i + playliststart,
739 'extractor': ie_result['extractor'],
740 'webpage_url': ie_result['webpage_url'],
741 'webpage_url_basename': url_basename(ie_result['webpage_url']),
742 'extractor_key': ie_result['extractor_key'],
745 reason = self._match_entry(entry)
746 if reason is not None:
747 self.to_screen('[download] ' + reason)
750 entry_result = self.process_ie_result(entry,
753 playlist_results.append(entry_result)
754 ie_result['entries'] = playlist_results
756 elif result_type == 'compat_list':
758 'Extractor %s returned a compat_list result. '
759 'It needs to be updated.' % ie_result.get('extractor'))
765 'extractor': ie_result['extractor'],
766 'webpage_url': ie_result['webpage_url'],
767 'webpage_url_basename': url_basename(ie_result['webpage_url']),
768 'extractor_key': ie_result['extractor_key'],
772 ie_result['entries'] = [
773 self.process_ie_result(_fixup(r), download, extra_info)
774 for r in ie_result['entries']
778 raise Exception('Invalid result type: %s' % result_type)
780 def _apply_format_filter(self, format_spec, available_formats):
781 " Returns a tuple of the remaining format_spec and filtered formats "
791 operator_rex = re.compile(r'''(?x)\s*\[
792 (?P<key>width|height|tbr|abr|vbr|filesize)
793 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
794 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
796 ''' % '|'.join(map(re.escape, OPERATORS.keys())))
797 m = operator_rex.search(format_spec)
799 raise ValueError('Invalid format specification %r' % format_spec)
802 comparison_value = int(m.group('value'))
804 comparison_value = parse_filesize(m.group('value'))
805 if comparison_value is None:
806 comparison_value = parse_filesize(m.group('value') + 'B')
807 if comparison_value is None:
809 'Invalid value %r in format specification %r' % (
810 m.group('value'), format_spec))
811 op = OPERATORS[m.group('op')]
814 actual_value = f.get(m.group('key'))
815 if actual_value is None:
816 return m.group('none_inclusive')
817 return op(actual_value, comparison_value)
818 new_formats = [f for f in available_formats if _filter(f)]
820 new_format_spec = format_spec[:-len(m.group(0))]
821 if not new_format_spec:
822 new_format_spec = 'best'
824 return (new_format_spec, new_formats)
826 def select_format(self, format_spec, available_formats):
827 while format_spec.endswith(']'):
828 format_spec, available_formats = self._apply_format_filter(
829 format_spec, available_formats)
830 if not available_formats:
833 if format_spec == 'best' or format_spec is None:
834 return available_formats[-1]
835 elif format_spec == 'worst':
836 return available_formats[0]
837 elif format_spec == 'bestaudio':
839 f for f in available_formats
840 if f.get('vcodec') == 'none']
842 return audio_formats[-1]
843 elif format_spec == 'worstaudio':
845 f for f in available_formats
846 if f.get('vcodec') == 'none']
848 return audio_formats[0]
849 elif format_spec == 'bestvideo':
851 f for f in available_formats
852 if f.get('acodec') == 'none']
854 return video_formats[-1]
855 elif format_spec == 'worstvideo':
857 f for f in available_formats
858 if f.get('acodec') == 'none']
860 return video_formats[0]
862 extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
863 if format_spec in extensions:
864 filter_f = lambda f: f['ext'] == format_spec
866 filter_f = lambda f: f['format_id'] == format_spec
867 matches = list(filter(filter_f, available_formats))
872 def _calc_headers(self, info_dict):
873 res = std_headers.copy()
875 add_headers = info_dict.get('http_headers')
877 res.update(add_headers)
879 cookies = self._calc_cookies(info_dict)
881 res['Cookie'] = cookies
885 def _calc_cookies(self, info_dict):
886 class _PseudoRequest(object):
887 def __init__(self, url):
890 self.unverifiable = False
892 def add_unredirected_header(self, k, v):
895 def get_full_url(self):
898 def is_unverifiable(self):
899 return self.unverifiable
901 def has_header(self, h):
902 return h in self.headers
904 pr = _PseudoRequest(info_dict['url'])
905 self.cookiejar.add_cookie_header(pr)
906 return pr.headers.get('Cookie')
908 def process_video_result(self, info_dict, download=True):
909 assert info_dict.get('_type', 'video') == 'video'
911 if 'id' not in info_dict:
912 raise ExtractorError('Missing "id" field in extractor result')
913 if 'title' not in info_dict:
914 raise ExtractorError('Missing "title" field in extractor result')
916 if 'playlist' not in info_dict:
917 # It isn't part of a playlist
918 info_dict['playlist'] = None
919 info_dict['playlist_index'] = None
921 thumbnails = info_dict.get('thumbnails')
922 if thumbnails is None:
923 thumbnail = info_dict.get('thumbnail')
925 thumbnails = [{'url': thumbnail}]
927 thumbnails.sort(key=lambda t: (
928 t.get('preference'), t.get('width'), t.get('height'),
929 t.get('id'), t.get('url')))
931 if 'width' in t and 'height' in t:
932 t['resolution'] = '%dx%d' % (t['width'], t['height'])
934 if thumbnails and 'thumbnail' not in info_dict:
935 info_dict['thumbnail'] = thumbnails[-1]['url']
937 if 'display_id' not in info_dict and 'id' in info_dict:
938 info_dict['display_id'] = info_dict['id']
940 if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
941 # Working around negative timestamps in Windows
942 # (see http://bugs.python.org/issue1646728)
943 if info_dict['timestamp'] < 0 and os.name == 'nt':
944 info_dict['timestamp'] = 0
945 upload_date = datetime.datetime.utcfromtimestamp(
946 info_dict['timestamp'])
947 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
949 # This extractors handle format selection themselves
950 if info_dict['extractor'] in ['Youku']:
952 self.process_info(info_dict)
955 # We now pick which formats have to be downloaded
956 if info_dict.get('formats') is None:
957 # There's only one format available
958 formats = [info_dict]
960 formats = info_dict['formats']
963 raise ExtractorError('No video formats found!')
965 # We check that all the formats have the format and format_id fields
966 for i, format in enumerate(formats):
967 if 'url' not in format:
968 raise ExtractorError('Missing "url" key in result (index %d)' % i)
970 if format.get('format_id') is None:
971 format['format_id'] = compat_str(i)
972 if format.get('format') is None:
973 format['format'] = '{id} - {res}{note}'.format(
974 id=format['format_id'],
975 res=self.format_resolution(format),
976 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
978 # Automatically determine file extension if missing
979 if 'ext' not in format:
980 format['ext'] = determine_ext(format['url']).lower()
981 # Add HTTP headers, so that external programs can use them from the
983 full_format_info = info_dict.copy()
984 full_format_info.update(format)
985 format['http_headers'] = self._calc_headers(full_format_info)
987 format_limit = self.params.get('format_limit', None)
989 formats = list(takewhile_inclusive(
990 lambda f: f['format_id'] != format_limit, formats
993 # TODO Central sorting goes here
995 if formats[0] is not info_dict:
996 # only set the 'formats' fields if the original info_dict list them
997 # otherwise we end up with a circular reference, the first (and unique)
998 # element in the 'formats' field in info_dict is info_dict itself,
999 # wich can't be exported to json
1000 info_dict['formats'] = formats
1001 if self.params.get('listformats'):
1002 self.list_formats(info_dict)
1004 if self.params.get('list_thumbnails'):
1005 self.list_thumbnails(info_dict)
1008 req_format = self.params.get('format')
1009 if req_format is None:
1011 formats_to_download = []
1012 # The -1 is for supporting YoutubeIE
1013 if req_format in ('-1', 'all'):
1014 formats_to_download = formats
1016 for rfstr in req_format.split(','):
1017 # We can accept formats requested in the format: 34/5/best, we pick
1018 # the first that is available, starting from left
1019 req_formats = rfstr.split('/')
1020 for rf in req_formats:
1021 if re.match(r'.+?\+.+?', rf) is not None:
1022 # Two formats have been requested like '137+139'
1023 format_1, format_2 = rf.split('+')
1024 formats_info = (self.select_format(format_1, formats),
1025 self.select_format(format_2, formats))
1026 if all(formats_info):
1027 # The first format must contain the video and the
1029 if formats_info[0].get('vcodec') == 'none':
1030 self.report_error('The first format must '
1031 'contain the video, try using '
1032 '"-f %s+%s"' % (format_2, format_1))
1035 formats_info[0]['ext']
1036 if self.params.get('merge_output_format') is None
1037 else self.params['merge_output_format'])
1039 'requested_formats': formats_info,
1041 'ext': formats_info[0]['ext'],
1042 'width': formats_info[0].get('width'),
1043 'height': formats_info[0].get('height'),
1044 'resolution': formats_info[0].get('resolution'),
1045 'fps': formats_info[0].get('fps'),
1046 'vcodec': formats_info[0].get('vcodec'),
1047 'vbr': formats_info[0].get('vbr'),
1048 'stretched_ratio': formats_info[0].get('stretched_ratio'),
1049 'acodec': formats_info[1].get('acodec'),
1050 'abr': formats_info[1].get('abr'),
1054 selected_format = None
1056 selected_format = self.select_format(rf, formats)
1057 if selected_format is not None:
1058 formats_to_download.append(selected_format)
1060 if not formats_to_download:
1061 raise ExtractorError('requested format not available',
1065 if len(formats_to_download) > 1:
1066 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1067 for format in formats_to_download:
1068 new_info = dict(info_dict)
1069 new_info.update(format)
1070 self.process_info(new_info)
1071 # We update the info dict with the best quality format (backwards compatibility)
1072 info_dict.update(formats_to_download[-1])
1075 def process_info(self, info_dict):
1076 """Process a single resolved IE result."""
1078 assert info_dict.get('_type', 'video') == 'video'
1080 max_downloads = self.params.get('max_downloads')
1081 if max_downloads is not None:
1082 if self._num_downloads >= int(max_downloads):
1083 raise MaxDownloadsReached()
1085 info_dict['fulltitle'] = info_dict['title']
1086 if len(info_dict['title']) > 200:
1087 info_dict['title'] = info_dict['title'][:197] + '...'
1089 # Keep for backwards compatibility
1090 info_dict['stitle'] = info_dict['title']
1092 if 'format' not in info_dict:
1093 info_dict['format'] = info_dict['ext']
1095 reason = self._match_entry(info_dict)
1096 if reason is not None:
1097 self.to_screen('[download] ' + reason)
1100 self._num_downloads += 1
1102 filename = self.prepare_filename(info_dict)
1105 if self.params.get('forcetitle', False):
1106 self.to_stdout(info_dict['fulltitle'])
1107 if self.params.get('forceid', False):
1108 self.to_stdout(info_dict['id'])
1109 if self.params.get('forceurl', False):
1110 if info_dict.get('requested_formats') is not None:
1111 for f in info_dict['requested_formats']:
1112 self.to_stdout(f['url'] + f.get('play_path', ''))
1114 # For RTMP URLs, also include the playpath
1115 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1116 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
1117 self.to_stdout(info_dict['thumbnail'])
1118 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
1119 self.to_stdout(info_dict['description'])
1120 if self.params.get('forcefilename', False) and filename is not None:
1121 self.to_stdout(filename)
1122 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1123 self.to_stdout(formatSeconds(info_dict['duration']))
1124 if self.params.get('forceformat', False):
1125 self.to_stdout(info_dict['format'])
1126 if self.params.get('forcejson', False):
1127 info_dict['_filename'] = filename
1128 self.to_stdout(json.dumps(info_dict))
1129 if self.params.get('dump_single_json', False):
1130 info_dict['_filename'] = filename
1132 # Do nothing else if in simulate mode
1133 if self.params.get('simulate', False):
1136 if filename is None:
1140 dn = os.path.dirname(encodeFilename(filename))
1141 if dn and not os.path.exists(dn):
1143 except (OSError, IOError) as err:
1144 self.report_error('unable to create directory ' + compat_str(err))
1147 if self.params.get('writedescription', False):
1148 descfn = filename + '.description'
1149 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1150 self.to_screen('[info] Video description is already present')
1151 elif info_dict.get('description') is None:
1152 self.report_warning('There\'s no description to write.')
1155 self.to_screen('[info] Writing video description to: ' + descfn)
1156 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1157 descfile.write(info_dict['description'])
1158 except (OSError, IOError):
1159 self.report_error('Cannot write description file ' + descfn)
1162 if self.params.get('writeannotations', False):
1163 annofn = filename + '.annotations.xml'
1164 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1165 self.to_screen('[info] Video annotations are already present')
1168 self.to_screen('[info] Writing video annotations to: ' + annofn)
1169 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1170 annofile.write(info_dict['annotations'])
1171 except (KeyError, TypeError):
1172 self.report_warning('There are no annotations to write.')
1173 except (OSError, IOError):
1174 self.report_error('Cannot write annotations file: ' + annofn)
1177 subtitles_are_requested = any([self.params.get('writesubtitles', False),
1178 self.params.get('writeautomaticsub')])
1180 if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
1181 # subtitles download errors are already managed as troubles in relevant IE
1182 # that way it will silently go on when used with unsupporting IE
1183 subtitles = info_dict['subtitles']
1184 sub_format = self.params.get('subtitlesformat', 'srt')
1185 for sub_lang in subtitles.keys():
1186 sub = subtitles[sub_lang]
1190 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1191 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1192 self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1194 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1195 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
1197 except (OSError, IOError):
1198 self.report_error('Cannot write subtitles file ' + sub_filename)
1201 if self.params.get('writeinfojson', False):
1202 infofn = os.path.splitext(filename)[0] + '.info.json'
1203 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1204 self.to_screen('[info] Video description metadata is already present')
1206 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1208 write_json_file(info_dict, infofn)
1209 except (OSError, IOError):
1210 self.report_error('Cannot write metadata to JSON file ' + infofn)
1213 if self.params.get('writethumbnail', False):
1214 if info_dict.get('thumbnail') is not None:
1215 thumb_format = determine_ext(info_dict['thumbnail'], 'jpg')
1216 thumb_filename = os.path.splitext(filename)[0] + '.' + thumb_format
1217 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
1218 self.to_screen('[%s] %s: Thumbnail is already present' %
1219 (info_dict['extractor'], info_dict['id']))
1221 self.to_screen('[%s] %s: Downloading thumbnail ...' %
1222 (info_dict['extractor'], info_dict['id']))
1224 uf = self.urlopen(info_dict['thumbnail'])
1225 with open(thumb_filename, 'wb') as thumbf:
1226 shutil.copyfileobj(uf, thumbf)
1227 self.to_screen('[%s] %s: Writing thumbnail to: %s' %
1228 (info_dict['extractor'], info_dict['id'], thumb_filename))
1229 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1230 self.report_warning('Unable to download thumbnail "%s": %s' %
1231 (info_dict['thumbnail'], compat_str(err)))
1233 if not self.params.get('skip_download', False):
1236 fd = get_suitable_downloader(info, self.params)(self, self.params)
1237 for ph in self._progress_hooks:
1238 fd.add_progress_hook(ph)
1239 if self.params.get('verbose'):
1240 self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1241 return fd.download(name, info)
1242 if info_dict.get('requested_formats') is not None:
1245 merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
1246 if not merger._executable:
1248 self.report_warning('You have requested multiple '
1249 'formats but ffmpeg or avconv are not installed.'
1250 ' The formats won\'t be merged')
1252 postprocessors = [merger]
1253 for f in info_dict['requested_formats']:
1254 new_info = dict(info_dict)
1256 fname = self.prepare_filename(new_info)
1257 fname = prepend_extension(fname, 'f%s' % f['format_id'])
1258 downloaded.append(fname)
1259 partial_success = dl(fname, new_info)
1260 success = success and partial_success
1261 info_dict['__postprocessors'] = postprocessors
1262 info_dict['__files_to_merge'] = downloaded
1264 # Just a single file
1265 success = dl(filename, info_dict)
1266 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1267 self.report_error('unable to download video data: %s' % str(err))
1269 except (OSError, IOError) as err:
1270 raise UnavailableVideoError(err)
1271 except (ContentTooShortError, ) as err:
1272 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1277 fixup_policy = self.params.get('fixup')
1278 if fixup_policy is None:
1279 fixup_policy = 'detect_or_warn'
1281 stretched_ratio = info_dict.get('stretched_ratio')
1282 if stretched_ratio is not None and stretched_ratio != 1:
1283 if fixup_policy == 'warn':
1284 self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1285 info_dict['id'], stretched_ratio))
1286 elif fixup_policy == 'detect_or_warn':
1287 stretched_pp = FFmpegFixupStretchedPP(self)
1288 if stretched_pp.available:
1289 info_dict.setdefault('__postprocessors', [])
1290 info_dict['__postprocessors'].append(stretched_pp)
1292 self.report_warning(
1293 '%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % (
1294 info_dict['id'], stretched_ratio))
1296 assert fixup_policy in ('ignore', 'never')
1298 if info_dict.get('requested_formats') is None and info_dict.get('container') == 'm4a_dash':
1299 if fixup_policy == 'warn':
1300 self.report_warning('%s: writing DASH m4a. Only some players support this container.' % (
1302 elif fixup_policy == 'detect_or_warn':
1303 fixup_pp = FFmpegFixupM4aPP(self)
1304 if fixup_pp.available:
1305 info_dict.setdefault('__postprocessors', [])
1306 info_dict['__postprocessors'].append(fixup_pp)
1308 self.report_warning(
1309 '%s: writing DASH m4a. Only some players support this container. Install ffmpeg or avconv to fix this automatically.' % (
1312 assert fixup_policy in ('ignore', 'never')
1315 self.post_process(filename, info_dict)
1316 except (PostProcessingError) as err:
1317 self.report_error('postprocessing: %s' % str(err))
1319 self.record_download_archive(info_dict)
1321 def download(self, url_list):
1322 """Download a given list of URLs."""
1323 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1324 if (len(url_list) > 1 and
1326 and self.params.get('max_downloads') != 1):
1327 raise SameFileError(outtmpl)
1329 for url in url_list:
1331 # It also downloads the videos
1332 res = self.extract_info(url)
1333 except UnavailableVideoError:
1334 self.report_error('unable to download video')
1335 except MaxDownloadsReached:
1336 self.to_screen('[info] Maximum number of downloaded files reached.')
1339 if self.params.get('dump_single_json', False):
1340 self.to_stdout(json.dumps(res))
1342 return self._download_retcode
1344 def download_with_info_file(self, info_filename):
1345 with io.open(info_filename, 'r', encoding='utf-8') as f:
1348 self.process_ie_result(info, download=True)
1349 except DownloadError:
1350 webpage_url = info.get('webpage_url')
1351 if webpage_url is not None:
1352 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1353 return self.download([webpage_url])
1356 return self._download_retcode
1358 def post_process(self, filename, ie_info):
1359 """Run all the postprocessors on the given file."""
1360 info = dict(ie_info)
1361 info['filepath'] = filename
1363 if ie_info.get('__postprocessors') is not None:
1364 pps_chain.extend(ie_info['__postprocessors'])
1365 pps_chain.extend(self._pps)
1366 for pp in pps_chain:
1368 old_filename = info['filepath']
1370 keep_video_wish, info = pp.run(info)
1371 if keep_video_wish is not None:
1373 keep_video = keep_video_wish
1374 elif keep_video is None:
1375 # No clear decision yet, let IE decide
1376 keep_video = keep_video_wish
1377 except PostProcessingError as e:
1378 self.report_error(e.msg)
1379 if keep_video is False and not self.params.get('keepvideo', False):
1381 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
1382 os.remove(encodeFilename(old_filename))
1383 except (IOError, OSError):
1384 self.report_warning('Unable to remove downloaded video file')
1386 def _make_archive_id(self, info_dict):
1387 # Future-proof against any change in case
1388 # and backwards compatibility with prior versions
1389 extractor = info_dict.get('extractor_key')
1390 if extractor is None:
1391 if 'id' in info_dict:
1392 extractor = info_dict.get('ie_key') # key in a playlist
1393 if extractor is None:
1394 return None # Incomplete video information
1395 return extractor.lower() + ' ' + info_dict['id']
1397 def in_download_archive(self, info_dict):
1398 fn = self.params.get('download_archive')
1402 vid_id = self._make_archive_id(info_dict)
1404 return False # Incomplete video information
1407 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1408 for line in archive_file:
1409 if line.strip() == vid_id:
1411 except IOError as ioe:
1412 if ioe.errno != errno.ENOENT:
1416 def record_download_archive(self, info_dict):
1417 fn = self.params.get('download_archive')
1420 vid_id = self._make_archive_id(info_dict)
1422 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1423 archive_file.write(vid_id + '\n')
1426 def format_resolution(format, default='unknown'):
1427 if format.get('vcodec') == 'none':
1429 if format.get('resolution') is not None:
1430 return format['resolution']
1431 if format.get('height') is not None:
1432 if format.get('width') is not None:
1433 res = '%sx%s' % (format['width'], format['height'])
1435 res = '%sp' % format['height']
1436 elif format.get('width') is not None:
1437 res = '?x%d' % format['width']
1442 def _format_note(self, fdict):
1444 if fdict.get('ext') in ['f4f', 'f4m']:
1445 res += '(unsupported) '
1446 if fdict.get('format_note') is not None:
1447 res += fdict['format_note'] + ' '
1448 if fdict.get('tbr') is not None:
1449 res += '%4dk ' % fdict['tbr']
1450 if fdict.get('container') is not None:
1453 res += '%s container' % fdict['container']
1454 if (fdict.get('vcodec') is not None and
1455 fdict.get('vcodec') != 'none'):
1458 res += fdict['vcodec']
1459 if fdict.get('vbr') is not None:
1461 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1463 if fdict.get('vbr') is not None:
1464 res += '%4dk' % fdict['vbr']
1465 if fdict.get('fps') is not None:
1466 res += ', %sfps' % fdict['fps']
1467 if fdict.get('acodec') is not None:
1470 if fdict['acodec'] == 'none':
1473 res += '%-5s' % fdict['acodec']
1474 elif fdict.get('abr') is not None:
1478 if fdict.get('abr') is not None:
1479 res += '@%3dk' % fdict['abr']
1480 if fdict.get('asr') is not None:
1481 res += ' (%5dHz)' % fdict['asr']
1482 if fdict.get('filesize') is not None:
1485 res += format_bytes(fdict['filesize'])
1486 elif fdict.get('filesize_approx') is not None:
1489 res += '~' + format_bytes(fdict['filesize_approx'])
1492 def list_formats(self, info_dict):
1493 def line(format, idlen=20):
1494 return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
1495 format['format_id'],
1497 self.format_resolution(format),
1498 self._format_note(format),
1501 formats = info_dict.get('formats', [info_dict])
1502 idlen = max(len('format code'),
1503 max(len(f['format_id']) for f in formats))
1505 line(f, idlen) for f in formats
1506 if f.get('preference') is None or f['preference'] >= -1000]
1507 if len(formats) > 1:
1508 formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)'
1509 formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
1511 header_line = line({
1512 'format_id': 'format code', 'ext': 'extension',
1513 'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
1515 '[info] Available formats for %s:\n%s\n%s' %
1516 (info_dict['id'], header_line, '\n'.join(formats_s)))
1518 def list_thumbnails(self, info_dict):
1519 thumbnails = info_dict.get('thumbnails')
1521 tn_url = info_dict.get('thumbnail')
1523 thumbnails = [{'id': '0', 'url': tn_url}]
1526 '[info] No thumbnails present for %s' % info_dict['id'])
1530 '[info] Thumbnails for %s:' % info_dict['id'])
1531 self.to_screen(render_table(
1532 ['ID', 'width', 'height', 'URL'],
1533 [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
1535 def urlopen(self, req):
1536 """ Start an HTTP download """
1538 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1539 # always respected by websites, some tend to give out URLs with non percent-encoded
1540 # non-ASCII characters (see telemb.py, ard.py [#3412])
1541 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1542 # To work around aforementioned issue we will replace request's original URL with
1543 # percent-encoded one
1544 req_is_string = isinstance(req, basestring if sys.version_info < (3, 0) else compat_str)
1545 url = req if req_is_string else req.get_full_url()
1546 url_escaped = escape_url(url)
1548 # Substitute URL if any change after escaping
1549 if url != url_escaped:
1553 req = compat_urllib_request.Request(
1554 url_escaped, data=req.data, headers=req.headers,
1555 origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
1557 return self._opener.open(req, timeout=self._socket_timeout)
1559 def print_debug_header(self):
1560 if not self.params.get('verbose'):
1563 if type('') is not compat_str:
1564 # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1565 self.report_warning(
1566 'Your Python is broken! Update to a newer and supported version')
1568 stdout_encoding = getattr(
1569 sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
1571 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1572 locale.getpreferredencoding(),
1573 sys.getfilesystemencoding(),
1575 self.get_encoding()))
1576 write_string(encoding_str, encoding=None)
1578 self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1580 sp = subprocess.Popen(
1581 ['git', 'rev-parse', '--short', 'HEAD'],
1582 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1583 cwd=os.path.dirname(os.path.abspath(__file__)))
1584 out, err = sp.communicate()
1585 out = out.decode().strip()
1586 if re.match('[0-9a-f]+', out):
1587 self._write_string('[debug] Git HEAD: ' + out + '\n')
1593 self._write_string('[debug] Python version %s - %s\n' % (
1594 platform.python_version(), platform_name()))
1596 exe_versions = FFmpegPostProcessor.get_versions()
1597 exe_versions['rtmpdump'] = rtmpdump_version()
1598 exe_str = ', '.join(
1600 for exe, v in sorted(exe_versions.items())
1605 self._write_string('[debug] exe versions: %s\n' % exe_str)
1608 for handler in self._opener.handlers:
1609 if hasattr(handler, 'proxies'):
1610 proxy_map.update(handler.proxies)
1611 self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1613 if self.params.get('call_home', False):
1614 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
1615 self._write_string('[debug] Public IP address: %s\n' % ipaddr)
1616 latest_version = self.urlopen(
1617 'https://yt-dl.org/latest/version').read().decode('utf-8')
1618 if version_tuple(latest_version) > version_tuple(__version__):
1619 self.report_warning(
1620 'You are using an outdated version (newest version: %s)! '
1621 'See https://yt-dl.org/update if you need help updating.' %
1624 def _setup_opener(self):
1625 timeout_val = self.params.get('socket_timeout')
1626 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1628 opts_cookiefile = self.params.get('cookiefile')
1629 opts_proxy = self.params.get('proxy')
1631 if opts_cookiefile is None:
1632 self.cookiejar = compat_cookiejar.CookieJar()
1634 self.cookiejar = compat_cookiejar.MozillaCookieJar(
1636 if os.access(opts_cookiefile, os.R_OK):
1637 self.cookiejar.load()
1639 cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1641 if opts_proxy is not None:
1642 if opts_proxy == '':
1645 proxies = {'http': opts_proxy, 'https': opts_proxy}
1647 proxies = compat_urllib_request.getproxies()
1648 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1649 if 'http' in proxies and 'https' not in proxies:
1650 proxies['https'] = proxies['http']
1651 proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1653 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1654 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
1655 ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
1656 opener = compat_urllib_request.build_opener(
1657 https_handler, proxy_handler, cookie_processor, ydlh)
1658 # Delete the default user-agent header, which would otherwise apply in
1659 # cases where our custom HTTP handler doesn't come into play
1660 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1661 opener.addheaders = []
1662 self._opener = opener
1664 def encode(self, s):
1665 if isinstance(s, bytes):
1666 return s # Already encoded
1669 return s.encode(self.get_encoding())
1670 except UnicodeEncodeError as err:
1671 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1674 def get_encoding(self):
1675 encoding = self.params.get('encoding')
1676 if encoding is None:
1677 encoding = preferredencoding()