PICKFIRST = 'PICKFIRST'
MERGE = 'MERGE'
SINGLE = 'SINGLE'
+ GROUP = 'GROUP'
FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
def _parse_filter(tokens):
else:
filter_parts.append(string)
- def _parse_format_selection(tokens, endwith=[]):
+ def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
selectors = []
current_selector = None
for type, string, start, _, _ in tokens:
elif type in [tokenize.NAME, tokenize.NUMBER]:
current_selector = FormatSelector(SINGLE, string, [])
elif type == tokenize.OP:
- if string in endwith:
+ if string == ')':
+ if not inside_group:
+ # ')' will be handled by the parentheses group
+ tokens.restore_last_token()
break
- if string == ',':
+ elif inside_merge and string in ['/', ',']:
+ tokens.restore_last_token()
+ break
+ elif inside_choice and string == ',':
+ tokens.restore_last_token()
+ break
+ elif string == ',':
+ if not current_selector:
+ raise syntax_error('"," must follow a format selector', start)
selectors.append(current_selector)
current_selector = None
elif string == '/':
first_choice = current_selector
- second_choice = _parse_format_selection(tokens, [','])
- current_selector = None
- selectors.append(FormatSelector(PICKFIRST, (first_choice, second_choice), []))
+ second_choice = _parse_format_selection(tokens, inside_choice=True)
+ current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
elif string == '[':
if not current_selector:
current_selector = FormatSelector(SINGLE, 'best', [])
format_filter = _parse_filter(tokens)
current_selector.filters.append(format_filter)
+ elif string == '(':
+ if current_selector:
+ raise syntax_error('Unexpected "("', start)
+ group = _parse_format_selection(tokens, inside_group=True)
+ current_selector = FormatSelector(GROUP, group, [])
elif string == '+':
video_selector = current_selector
- audio_selector = _parse_format_selection(tokens, [','])
- current_selector = None
- selectors.append(FormatSelector(MERGE, (video_selector, audio_selector), []))
+ audio_selector = _parse_format_selection(tokens, inside_merge=True)
+ if not video_selector or not audio_selector:
+ raise syntax_error('"+" must be between two format selectors', start)
+ current_selector = FormatSelector(MERGE, (video_selector, audio_selector), [])
else:
raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
elif type == tokenize.ENDMARKER:
for format in f(formats):
yield format
return selector_function
+ elif selector.type == GROUP:
+ selector_function = _build_selector_function(selector.selector)
elif selector.type == PICKFIRST:
fs = [_build_selector_function(s) for s in selector.selector]
format_spec = selector.selector
def selector_function(formats):
- if format_spec in ['best', 'worst', None]:
+ formats = list(formats)
+ if not formats:
+ return
+ if format_spec == 'all':
+ for f in formats:
+ yield f
+ elif format_spec in ['best', 'worst', None]:
format_idx = 0 if format_spec == 'worst' else -1
audiovideo_formats = [
f for f in formats
return final_selector
stream = io.BytesIO(format_spec.encode('utf-8'))
- tokens = compat_tokenize_tokenize(stream.readline)
- parsed_selector = _parse_format_selection(tokens)
+ try:
+ tokens = list(compat_tokenize_tokenize(stream.readline))
+ except tokenize.TokenError:
+ raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
+
+ class TokenIterator(object):
+ def __init__(self, tokens):
+ self.tokens = tokens
+ self.counter = 0
+
+ def __iter__(self):
+ return self
+
+ def __next__(self):
+ if self.counter >= len(self.tokens):
+ raise StopIteration()
+ value = self.tokens[self.counter]
+ self.counter += 1
+ return value
+
+ next = __next__
+
+ def restore_last_token(self):
+ self.counter -= 1
+
+ parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
return _build_selector_function(parsed_selector)
def _calc_headers(self, info_dict):
req_format_list.append('bestvideo+bestaudio')
req_format_list.append('best')
req_format = '/'.join(req_format_list)
- formats_to_download = []
- if req_format == 'all':
- formats_to_download = formats
- else:
- format_selector = self.build_format_selector(req_format)
- formats_to_download = list(format_selector(formats))
+ format_selector = self.build_format_selector(req_format)
+ formats_to_download = list(format_selector(formats))
if not formats_to_download:
raise ExtractorError('requested format not available',
expected=True)