YoutubeDL: format spec: don't accept a bare '/' (#6124)
[youtube-dl] / youtube_dl / YoutubeDL.py
index 258e612afa5540ebefe4189d91d029768672f0ae..c608ff91a91636bc40b4fa1c99c013aadcc80820 100755 (executable)
@@ -264,6 +264,8 @@ class YoutubeDL(object):
     The following options are used by the post processors:
     prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
                        otherwise prefer avconv.
+    postprocessor_args: A list of additional command-line arguments for the
+                        postprocessor.
     """
 
     params = None
@@ -920,6 +922,7 @@ class YoutubeDL(object):
         PICKFIRST = 'PICKFIRST'
         MERGE = 'MERGE'
         SINGLE = 'SINGLE'
+        GROUP = 'GROUP'
         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
 
         def _parse_filter(tokens):
@@ -930,7 +933,7 @@ class YoutubeDL(object):
                 else:
                     filter_parts.append(string)
 
-        def _parse_format_selection(tokens, endwith=[]):
+        def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
             selectors = []
             current_selector = None
             for type, string, start, _, _ in tokens:
@@ -940,26 +943,44 @@ class YoutubeDL(object):
                 elif type in [tokenize.NAME, tokenize.NUMBER]:
                     current_selector = FormatSelector(SINGLE, string, [])
                 elif type == tokenize.OP:
-                    if string in endwith:
+                    if string == ')':
+                        if not inside_group:
+                            # ')' will be handled by the parentheses group
+                            tokens.restore_last_token()
                         break
-                    if string == ',':
+                    elif inside_merge and string in ['/', ',']:
+                        tokens.restore_last_token()
+                        break
+                    elif inside_choice and string == ',':
+                        tokens.restore_last_token()
+                        break
+                    elif string == ',':
+                        if not current_selector:
+                            raise syntax_error('"," must follow a format selector', start)
                         selectors.append(current_selector)
                         current_selector = None
                     elif string == '/':
+                        if not current_selector:
+                            raise syntax_error('"/" must follow a format selector', start)
                         first_choice = current_selector
-                        second_choice = _parse_format_selection(tokens, [','])
-                        current_selector = None
-                        selectors.append(FormatSelector(PICKFIRST, (first_choice, second_choice), []))
+                        second_choice = _parse_format_selection(tokens, inside_choice=True)
+                        current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
                     elif string == '[':
                         if not current_selector:
                             current_selector = FormatSelector(SINGLE, 'best', [])
                         format_filter = _parse_filter(tokens)
                         current_selector.filters.append(format_filter)
+                    elif string == '(':
+                        if current_selector:
+                            raise syntax_error('Unexpected "("', start)
+                        group = _parse_format_selection(tokens, inside_group=True)
+                        current_selector = FormatSelector(GROUP, group, [])
                     elif string == '+':
                         video_selector = current_selector
-                        audio_selector = _parse_format_selection(tokens, [','])
-                        current_selector = None
-                        selectors.append(FormatSelector(MERGE, (video_selector, audio_selector), []))
+                        audio_selector = _parse_format_selection(tokens, inside_merge=True)
+                        if not video_selector or not audio_selector:
+                            raise syntax_error('"+" must be between two format selectors', start)
+                        current_selector = FormatSelector(MERGE, (video_selector, audio_selector), [])
                     else:
                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
                 elif type == tokenize.ENDMARKER:
@@ -977,6 +998,8 @@ class YoutubeDL(object):
                         for format in f(formats):
                             yield format
                 return selector_function
+            elif selector.type == GROUP:
+                selector_function = _build_selector_function(selector.selector)
             elif selector.type == PICKFIRST:
                 fs = [_build_selector_function(s) for s in selector.selector]
 
@@ -990,6 +1013,9 @@ class YoutubeDL(object):
                 format_spec = selector.selector
 
                 def selector_function(formats):
+                    formats = list(formats)
+                    if not formats:
+                        return
                     if format_spec == 'all':
                         for f in formats:
                             yield f
@@ -1084,8 +1110,32 @@ class YoutubeDL(object):
             return final_selector
 
         stream = io.BytesIO(format_spec.encode('utf-8'))
-        tokens = compat_tokenize_tokenize(stream.readline)
-        parsed_selector = _parse_format_selection(tokens)
+        try:
+            tokens = list(compat_tokenize_tokenize(stream.readline))
+        except tokenize.TokenError:
+            raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
+
+        class TokenIterator(object):
+            def __init__(self, tokens):
+                self.tokens = tokens
+                self.counter = 0
+
+            def __iter__(self):
+                return self
+
+            def __next__(self):
+                if self.counter >= len(self.tokens):
+                    raise StopIteration()
+                value = self.tokens[self.counter]
+                self.counter += 1
+                return value
+
+            next = __next__
+
+            def restore_last_token(self):
+                self.counter -= 1
+
+        parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
         return _build_selector_function(parsed_selector)
 
     def _calc_headers(self, info_dict):
@@ -1129,7 +1179,7 @@ class YoutubeDL(object):
                 t.get('preference'), t.get('width'), t.get('height'),
                 t.get('id'), t.get('url')))
             for i, t in enumerate(thumbnails):
-                if 'width' in t and 'height' in t:
+                if t.get('width') and t.get('height'):
                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
                 if t.get('id') is None:
                     t['id'] = '%d' % i
@@ -1223,7 +1273,8 @@ class YoutubeDL(object):
         if req_format is None:
             req_format_list = []
             if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and
-                    info_dict['extractor'] in ['youtube', 'ted']):
+                    info_dict['extractor'] in ['youtube', 'ted'] and
+                    not info_dict.get('is_live')):
                 merger = FFmpegMergerPP(self)
                 if merger.available and merger.can_merge():
                     req_format_list.append('bestvideo+bestaudio')