X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2FYoutubeDL.py;h=bfb4ff225f2376c09b2e595276244d887a5e879c;hb=a88d461dff67205fcec684426afbcbeb4b0e7cf5;hp=d7aa951ff39fc54238c5759ee5a57f2c70d9de0d;hpb=33f3040a3e611f45ad920bd06030691910ddf815;p=youtube-dl diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index d7aa951ff..bfb4ff225 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1,10 +1,11 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import absolute_import, unicode_literals import collections import contextlib +import copy import datetime import errno import fileinput @@ -23,14 +24,17 @@ import sys import time import tokenize import traceback +import random + +from string import ascii_letters from .compat import ( compat_basestring, compat_cookiejar, - compat_expanduser, compat_get_terminal_size, compat_http_client, compat_kwargs, + compat_numeric_types, compat_os_name, compat_str, compat_tokenize_tokenize, @@ -51,9 +55,13 @@ from .utils import ( encode_compat_str, encodeFilename, error_to_compat_str, + expand_path, ExtractorError, format_bytes, formatSeconds, + GeoRestrictedError, + int_or_none, + ISO3166Utils, locked_file, make_HTTPS_handler, MaxDownloadsReached, @@ -64,6 +72,7 @@ from .utils import ( PostProcessingError, preferredencoding, prepend_extension, + register_socks_protocols, render_table, replace_extension, SameFileError, @@ -80,9 +89,10 @@ from .utils import ( write_string, YoutubeDLCookieProcessor, YoutubeDLHandler, + PhantomJSwrapper, ) from .cache import Cache -from .extractor import get_info_extractor, gen_extractors +from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER from .downloader import get_suitable_downloader from .downloader.rtmp import rtmpdump_version from .postprocessor import ( @@ -129,6 +139,9 @@ class YoutubeDL(object): username: Username for authentication purposes. password: Password for authentication purposes. videopassword: Password for accessing a video. + ap_mso: Adobe Pass multiple-system operator identifier. + ap_username: Multiple-system operator account username. + ap_password: Multiple-system operator account password. usenetrc: Use netrc for authentication instead. verbose: Print additional info to stdout. quiet: Do not print messages to stdout. @@ -154,6 +167,7 @@ class YoutubeDL(object): playlistend: Playlist item to end at. playlist_items: Specific indices of playlist to download. playlistreverse: Download playlist items in reverse order. + playlistrandom: Download playlist items in random order. matchtitle: Download only matching titles. rejecttitle: Reject downloads for matching titles. logger: Log messages to a logging.Logger instance. @@ -195,8 +209,8 @@ class YoutubeDL(object): prefer_insecure: Use HTTP instead of HTTPS to retrieve information. At the moment, this is only supported by YouTube. proxy: URL of the proxy server to use - cn_verification_proxy: URL of the proxy to use for IP address verification - on Chinese sites. (Experimental) + geo_verification_proxy: URL of the proxy to use for IP address verification + on geo-restricted sites. (Experimental) socket_timeout: Time to wait for unresponsive hosts, in seconds bidi_workaround: Work around buggy terminals without bidirectional text support, using fridibi @@ -247,7 +261,16 @@ class YoutubeDL(object): source_address: (Experimental) Client-side IP address to bind to. call_home: Boolean, true iff we are allowed to contact the youtube-dl servers for debugging. - sleep_interval: Number of seconds to sleep before each download. + sleep_interval: Number of seconds to sleep before each download when + used alone or a lower bound of a range for randomized + sleep before each download (minimum possible number + of seconds to sleep) when used along with + max_sleep_interval. + max_sleep_interval:Upper bound of a range for randomized sleep before each + download (maximum possible number of seconds to sleep). + Must only be used along with sleep_interval. + Actual sleep time will be a random float from range + [sleep_interval; max_sleep_interval]. listformats: Print an overview of available video formats and exit. list_thumbnails: Print a table of all thumbnails and exit. match_filter: A function that gets called with the info_dict of @@ -256,11 +279,19 @@ class YoutubeDL(object): If it returns None, the video is downloaded. match_filter_func in utils.py is one example for this. no_color: Do not emit color codes in output. + geo_bypass: Bypass geographic restriction via faking X-Forwarded-For + HTTP header (experimental) + geo_bypass_country: + Two-letter ISO 3166-2 country code that will be used for + explicit geographic restriction bypassing via faking + X-Forwarded-For HTTP header (experimental) The following options determine which downloader is picked: external_downloader: Executable of the external downloader to call. None or unset for standard (built-in) downloader. - hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv. + hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv + if True, otherwise use ffmpeg/avconv if False, otherwise + use downloader suggested by extractor if None. The following parameters are not used by YoutubeDL itself, they are used by the downloader (see youtube_dl/downloader/common.py): @@ -275,6 +306,17 @@ class YoutubeDL(object): postprocessor. """ + _NUMERIC_FIELDS = set(( + 'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx', + 'timestamp', 'upload_year', 'upload_month', 'upload_day', + 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count', + 'average_rating', 'comment_count', 'age_limit', + 'start_time', 'end_time', + 'chapter_number', 'season_number', 'episode_number', + 'track_number', 'disc_number', 'release_year', + 'playlist_index', + )) + params = None _ies = [] _pps = [] @@ -301,6 +343,21 @@ class YoutubeDL(object): self.params.update(params) self.cache = Cache(self) + def check_deprecated(param, option, suggestion): + if self.params.get(param) is not None: + self.report_warning( + '%s is deprecated. Use %s instead.' % (option, suggestion)) + return True + return False + + if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'): + if self.params.get('geo_verification_proxy') is None: + self.params['geo_verification_proxy'] = self.params['cn_verification_proxy'] + + check_deprecated('autonumber_size', '--autonumber-size', 'output template with %(autonumber)0Nd, where N in the number of digits') + check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"') + check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"') + if params.get('bidi_workaround', False): try: import pty @@ -323,15 +380,15 @@ class YoutubeDL(object): ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs) self._output_channel = os.fdopen(master, 'rb') except OSError as ose: - if ose.errno == 2: + if ose.errno == errno.ENOENT: self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.') else: raise - if (sys.version_info >= (3,) and sys.platform != 'win32' and + if (sys.platform != 'win32' and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and not params.get('restrictfilenames', False)): - # On Python 3, the Unicode filesystem API will throw errors (#1474) + # Unicode filesystem API will throw errors (#1474, #13027) self.report_warning( 'Assuming --restrict-filenames since file system encoding ' 'cannot encode all characters. ' @@ -359,6 +416,8 @@ class YoutubeDL(object): for ph in self.params.get('progress_hooks', []): self.add_progress_hook(ph) + register_socks_protocols() + def warn_if_short_id(self, argv): # short YouTube ID starting with dash? idxs = [ @@ -378,8 +437,9 @@ class YoutubeDL(object): def add_info_extractor(self, ie): """Add an InfoExtractor object to the end of the list.""" self._ies.append(ie) - self._ies_instances[ie.ie_key()] = ie - ie.set_downloader(self) + if not isinstance(ie, type): + self._ies_instances[ie.ie_key()] = ie + ie.set_downloader(self) def get_info_extractor(self, ie_key): """ @@ -397,7 +457,7 @@ class YoutubeDL(object): """ Add the InfoExtractors returned by gen_extractors to the end of the list """ - for ie in gen_extractors(): + for ie in gen_extractor_classes(): self.add_info_extractor(ie) def add_post_processor(self, pp): @@ -453,24 +513,25 @@ class YoutubeDL(object): def to_console_title(self, message): if not self.params.get('consoletitle', False): return - if compat_os_name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow(): - # c_wchar_p() might not be necessary if `message` is - # already of type unicode() - ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message)) + if compat_os_name == 'nt': + if ctypes.windll.kernel32.GetConsoleWindow(): + # c_wchar_p() might not be necessary if `message` is + # already of type unicode() + ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message)) elif 'TERM' in os.environ: self._write_string('\033]0;%s\007' % message, self._screen_file) def save_console_title(self): if not self.params.get('consoletitle', False): return - if 'TERM' in os.environ: + if compat_os_name != 'nt' and 'TERM' in os.environ: # Save the title on stack self._write_string('\033[22;0t', self._screen_file) def restore_console_title(self): if not self.params.get('consoletitle', False): return - if 'TERM' in os.environ: + if compat_os_name != 'nt' and 'TERM' in os.environ: # Restore the title from stack self._write_string('\033[23;0t', self._screen_file) @@ -559,10 +620,7 @@ class YoutubeDL(object): autonumber_size = self.params.get('autonumber_size') if autonumber_size is None: autonumber_size = 5 - autonumber_templ = '%0' + str(autonumber_size) + 'd' - template_dict['autonumber'] = autonumber_templ % self._num_downloads - if template_dict.get('playlist_index') is not None: - template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index']) + template_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads if template_dict.get('resolution') is None: if template_dict.get('width') and template_dict.get('height'): template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height']) @@ -574,15 +632,64 @@ class YoutubeDL(object): sanitize = lambda k, v: sanitize_filename( compat_str(v), restricted=self.params.get('restrictfilenames'), - is_id=(k == 'id')) - template_dict = dict((k, sanitize(k, v)) + is_id=(k == 'id' or k.endswith('_id'))) + template_dict = dict((k, v if isinstance(v, compat_numeric_types) else sanitize(k, v)) for k, v in template_dict.items() - if v is not None) + if v is not None and not isinstance(v, (list, tuple, dict))) template_dict = collections.defaultdict(lambda: 'NA', template_dict) outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL) - tmpl = compat_expanduser(outtmpl) - filename = tmpl % template_dict + + # For fields playlist_index and autonumber convert all occurrences + # of %(field)s to %(field)0Nd for backward compatibility + field_size_compat_map = { + 'playlist_index': len(str(template_dict['n_entries'])), + 'autonumber': autonumber_size, + } + FIELD_SIZE_COMPAT_RE = r'(?autonumber|playlist_index)\)s' + mobj = re.search(FIELD_SIZE_COMPAT_RE, outtmpl) + if mobj: + outtmpl = re.sub( + FIELD_SIZE_COMPAT_RE, + r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')], + outtmpl) + + # Missing numeric fields used together with integer presentation types + # in format specification will break the argument substitution since + # string 'NA' is returned for missing fields. We will patch output + # template for missing fields to meet string presentation type. + for numeric_field in self._NUMERIC_FIELDS: + if numeric_field not in template_dict: + # As of [1] format syntax is: + # %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type + # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting + FORMAT_RE = r'''(?x) + (? 1 and + outtmpl != '-' and '%' not in outtmpl and self.params.get('max_downloads') != 1): raise SameFileError(outtmpl) @@ -1954,6 +2197,8 @@ class YoutubeDL(object): write_string(encoding_str, encoding=None) self._write_string('[debug] youtube-dl version ' + __version__ + '\n') + if _LAZY_LOADER: + self._write_string('[debug] Lazy loading extractors enabled' + '\n') try: sp = subprocess.Popen( ['git', 'rev-parse', '--short', 'HEAD'], @@ -1973,6 +2218,7 @@ class YoutubeDL(object): exe_versions = FFmpegPostProcessor.get_versions(self) exe_versions['rtmpdump'] = rtmpdump_version() + exe_versions['phantomjs'] = PhantomJSwrapper._version() exe_str = ', '.join( '%s %s' % (exe, v) for exe, v in sorted(exe_versions.items()) @@ -2009,6 +2255,7 @@ class YoutubeDL(object): if opts_cookiefile is None: self.cookiejar = compat_cookiejar.CookieJar() else: + opts_cookiefile = expand_path(opts_cookiefile) self.cookiejar = compat_cookiejar.MozillaCookieJar( opts_cookiefile) if os.access(opts_cookiefile, os.R_OK):