X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2FYoutubeDL.py;h=cad6b026e81936e1f80cc527cc43c16bd4b9896d;hb=d5d7bdaeb517f389fff5a6557f072f3586e3c440;hp=5a79e5f1d16cfc33d136574d2eb7b0b42cdf6142;hpb=f5f4a27a964b41646303921104f4d6d6fd2098e4;p=youtube-dl diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 5a79e5f1d..cad6b026e 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -28,7 +28,6 @@ if os.name == 'nt': import ctypes from .compat import ( - compat_basestring, compat_cookiejar, compat_expanduser, compat_get_terminal_size, @@ -40,7 +39,6 @@ from .compat import ( compat_urllib_request, ) from .utils import ( - escape_url, ContentTooShortError, date_from_str, DateRange, @@ -51,7 +49,6 @@ from .utils import ( ExtractorError, format_bytes, formatSeconds, - HEADRequest, locked_file, make_HTTPS_handler, MaxDownloadsReached, @@ -264,6 +261,8 @@ class YoutubeDL(object): The following options are used by the post processors: prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available, otherwise prefer avconv. + postprocessor_args: A list of additional command-line arguments for the + postprocessor. """ params = None @@ -931,6 +930,37 @@ class YoutubeDL(object): else: filter_parts.append(string) + def _remove_unused_ops(tokens): + # Remove operators that we don't use and join them with the sourrounding strings + # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9' + ALLOWED_OPS = ('/', '+', ',', '(', ')') + last_string, last_start, last_end, last_line = None, None, None, None + for type, string, start, end, line in tokens: + if type == tokenize.OP and string == '[': + if last_string: + yield tokenize.NAME, last_string, last_start, last_end, last_line + last_string = None + yield type, string, start, end, line + # everything inside brackets will be handled by _parse_filter + for type, string, start, end, line in tokens: + yield type, string, start, end, line + if type == tokenize.OP and string == ']': + break + elif type == tokenize.OP and string in ALLOWED_OPS: + if last_string: + yield tokenize.NAME, last_string, last_start, last_end, last_line + last_string = None + yield type, string, start, end, line + elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]: + if not last_string: + last_string = string + last_start = start + last_end = end + else: + last_string += string + if last_string: + yield tokenize.NAME, last_string, last_start, last_end, last_line + def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False): selectors = [] current_selector = None @@ -953,9 +983,13 @@ class YoutubeDL(object): tokens.restore_last_token() break elif string == ',': + if not current_selector: + raise syntax_error('"," must follow a format selector', start) selectors.append(current_selector) current_selector = None elif string == '/': + if not current_selector: + raise syntax_error('"/" must follow a format selector', start) first_choice = current_selector second_choice = _parse_format_selection(tokens, inside_choice=True) current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), []) @@ -972,6 +1006,8 @@ class YoutubeDL(object): elif string == '+': video_selector = current_selector audio_selector = _parse_format_selection(tokens, inside_merge=True) + if not video_selector or not audio_selector: + raise syntax_error('"+" must be between two format selectors', start) current_selector = FormatSelector(MERGE, (video_selector, audio_selector), []) else: raise syntax_error('Operator not recognized: "{0}"'.format(string), start) @@ -1005,6 +1041,9 @@ class YoutubeDL(object): format_spec = selector.selector def selector_function(formats): + formats = list(formats) + if not formats: + return if format_spec == 'all': for f in formats: yield f @@ -1100,7 +1139,7 @@ class YoutubeDL(object): stream = io.BytesIO(format_spec.encode('utf-8')) try: - tokens = list(compat_tokenize_tokenize(stream.readline)) + tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline))) except tokenize.TokenError: raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec))) @@ -1168,7 +1207,7 @@ class YoutubeDL(object): t.get('preference'), t.get('width'), t.get('height'), t.get('id'), t.get('url'))) for i, t in enumerate(thumbnails): - if 'width' in t and 'height' in t: + if t.get('width') and t.get('height'): t['resolution'] = '%dx%d' % (t['width'], t['height']) if t.get('id') is None: t['id'] = '%d' % i @@ -1262,7 +1301,8 @@ class YoutubeDL(object): if req_format is None: req_format_list = [] if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and - info_dict['extractor'] in ['youtube', 'ted']): + info_dict['extractor'] in ['youtube', 'ted'] and + not info_dict.get('is_live')): merger = FFmpegMergerPP(self) if merger.available and merger.can_merge(): req_format_list.append('bestvideo+bestaudio') @@ -1817,27 +1857,6 @@ class YoutubeDL(object): def urlopen(self, req): """ Start an HTTP download """ - - # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not - # always respected by websites, some tend to give out URLs with non percent-encoded - # non-ASCII characters (see telemb.py, ard.py [#3412]) - # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991) - # To work around aforementioned issue we will replace request's original URL with - # percent-encoded one - req_is_string = isinstance(req, compat_basestring) - url = req if req_is_string else req.get_full_url() - url_escaped = escape_url(url) - - # Substitute URL if any change after escaping - if url != url_escaped: - if req_is_string: - req = url_escaped - else: - req_type = HEADRequest if req.get_method() == 'HEAD' else compat_urllib_request.Request - req = req_type( - url_escaped, data=req.data, headers=req.headers, - origin_req_host=req.origin_req_host, unverifiable=req.unverifiable) - return self._opener.open(req, timeout=self._socket_timeout) def print_debug_header(self):