YoutubeDL: format spec: don't accept a bare '/' (#6124)

[youtube-dl] / youtube_dl / YoutubeDL.py
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py

index 258e612afa5540ebefe4189d91d029768672f0ae..c608ff91a91636bc40b4fa1c99c013aadcc80820 100755 (executable)
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -264,6 +264,8 @@ class YoutubeDL(object):
      The following options are used by the post processors:
      prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
                         otherwise prefer avconv.
+    postprocessor_args: A list of additional command-line arguments for the
+                        postprocessor.
      """
  
      params = None
@@ -920,6 +922,7 @@ class YoutubeDL(object):
          PICKFIRST = 'PICKFIRST'
          MERGE = 'MERGE'
          SINGLE = 'SINGLE'
+        GROUP = 'GROUP'
          FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
  
          def _parse_filter(tokens):
@@ -930,7 +933,7 @@ class YoutubeDL(object):
                  else:
                      filter_parts.append(string)
  
-        def _parse_format_selection(tokens, endwith=[]):
+        def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
              selectors = []
              current_selector = None
              for type, string, start, _, _ in tokens:
@@ -940,26 +943,44 @@ class YoutubeDL(object):
                  elif type in [tokenize.NAME, tokenize.NUMBER]:
                      current_selector = FormatSelector(SINGLE, string, [])
                  elif type == tokenize.OP:
-                    if string in endwith:
+                    if string == ')':
+                        if not inside_group:
+                            # ')' will be handled by the parentheses group
+                            tokens.restore_last_token()
                          break
-                    if string == ',':
+                    elif inside_merge and string in ['/', ',']:
+                        tokens.restore_last_token()
+                        break
+                    elif inside_choice and string == ',':
+                        tokens.restore_last_token()
+                        break
+                    elif string == ',':
+                        if not current_selector:
+                            raise syntax_error('"," must follow a format selector', start)
                          selectors.append(current_selector)
                          current_selector = None
                      elif string == '/':
+                        if not current_selector:
+                            raise syntax_error('"/" must follow a format selector', start)
                          first_choice = current_selector
-                        second_choice = _parse_format_selection(tokens, [','])
-                        current_selector = None
-                        selectors.append(FormatSelector(PICKFIRST, (first_choice, second_choice), []))
+                        second_choice = _parse_format_selection(tokens, inside_choice=True)
+                        current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
                      elif string == '[':
                          if not current_selector:
                              current_selector = FormatSelector(SINGLE, 'best', [])
                          format_filter = _parse_filter(tokens)
                          current_selector.filters.append(format_filter)
+                    elif string == '(':
+                        if current_selector:
+                            raise syntax_error('Unexpected "("', start)
+                        group = _parse_format_selection(tokens, inside_group=True)
+                        current_selector = FormatSelector(GROUP, group, [])
                      elif string == '+':
                          video_selector = current_selector
-                        audio_selector = _parse_format_selection(tokens, [','])
-                        current_selector = None
-                        selectors.append(FormatSelector(MERGE, (video_selector, audio_selector), []))
+                        audio_selector = _parse_format_selection(tokens, inside_merge=True)
+                        if not video_selector or not audio_selector:
+                            raise syntax_error('"+" must be between two format selectors', start)
+                        current_selector = FormatSelector(MERGE, (video_selector, audio_selector), [])
                      else:
                          raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
                  elif type == tokenize.ENDMARKER:
@@ -977,6 +998,8 @@ class YoutubeDL(object):
                          for format in f(formats):
                              yield format
                  return selector_function
+            elif selector.type == GROUP:
+                selector_function = _build_selector_function(selector.selector)
              elif selector.type == PICKFIRST:
                  fs = [_build_selector_function(s) for s in selector.selector]
  
@@ -990,6 +1013,9 @@ class YoutubeDL(object):
                  format_spec = selector.selector
  
                  def selector_function(formats):
+                    formats = list(formats)
+                    if not formats:
+                        return
                      if format_spec == 'all':
                          for f in formats:
                              yield f
@@ -1084,8 +1110,32 @@ class YoutubeDL(object):
              return final_selector
  
          stream = io.BytesIO(format_spec.encode('utf-8'))
-        tokens = compat_tokenize_tokenize(stream.readline)
-        parsed_selector = _parse_format_selection(tokens)
+        try:
+            tokens = list(compat_tokenize_tokenize(stream.readline))
+        except tokenize.TokenError:
+            raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
+
+        class TokenIterator(object):
+            def __init__(self, tokens):
+                self.tokens = tokens
+                self.counter = 0
+
+            def __iter__(self):
+                return self
+
+            def __next__(self):
+                if self.counter >= len(self.tokens):
+                    raise StopIteration()
+                value = self.tokens[self.counter]
+                self.counter += 1
+                return value
+
+            next = __next__
+
+            def restore_last_token(self):
+                self.counter -= 1
+
+        parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
          return _build_selector_function(parsed_selector)
  
      def _calc_headers(self, info_dict):
@@ -1129,7 +1179,7 @@ class YoutubeDL(object):
                  t.get('preference'), t.get('width'), t.get('height'),
                  t.get('id'), t.get('url')))
              for i, t in enumerate(thumbnails):
-                if 'width' in t and 'height' in t:
+                if t.get('width') and t.get('height'):
                      t['resolution'] = '%dx%d' % (t['width'], t['height'])
                  if t.get('id') is None:
                      t['id'] = '%d' % i
@@ -1223,7 +1273,8 @@ class YoutubeDL(object):
          if req_format is None:
              req_format_list = []
              if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and
-                    info_dict['extractor'] in ['youtube', 'ted']):
+                    info_dict['extractor'] in ['youtube', 'ted'] and
+                    not info_dict.get('is_live')):
                  merger = FFmpegMergerPP(self)
                  if merger.available and merger.can_merge():
                      req_format_list.append('bestvideo+bestaudio')