[YoutubeDL] format spec: Fix handling of '+' with '/'
authorJaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>
Tue, 30 Jun 2015 17:45:42 +0000 (19:45 +0200)
committerJaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>
Tue, 30 Jun 2015 17:50:17 +0000 (19:50 +0200)
'bestvideo+bestaudio/best' was incorrectly interpreted as 'bestvideo+(bestaudio/best)', so it would fail if 'bestaudio' doesn't exist instead of falling back to 'best'.

test/test_YoutubeDL.py
youtube_dl/YoutubeDL.py

index 6f374d7ea7b1fbe018f4c0cfcbc7392b67696da6..1e4aaa559422453df745d92a8741e29d95a3837b 100644 (file)
@@ -245,6 +245,14 @@ class TestFormatSelection(unittest.TestCase):
         self.assertEqual(downloaded['format_id'], '137+141')
         self.assertEqual(downloaded['ext'], 'mp4')
 
+        info_dict = _make_result(list(formats_order), extractor='youtube')
+        ydl = YDL({'format': 'bestvideo[height>=999999]+bestaudio/best'})
+        yie = YoutubeIE(ydl)
+        yie._sort_formats(info_dict['formats'])
+        ydl.process_ie_result(info_dict)
+        downloaded = ydl.downloaded_info_dicts[0]
+        self.assertEqual(downloaded['format_id'], '38')
+
         info_dict = _make_result(list(formats_order), extractor='youtube')
         ydl = YDL({'format': '(bestvideo[ext=mp4],bestvideo[ext=webm])+bestaudio'})
         yie = YoutubeIE(ydl)
index e5b46f87ec0ca31ca432ce1e2e43906af65ab321..5deb4848e8c9fc29245782a9f4dde55cd6e16133 100755 (executable)
@@ -931,7 +931,7 @@ class YoutubeDL(object):
                 else:
                     filter_parts.append(string)
 
-        def _parse_format_selection(tokens, endwith=[]):
+        def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
             selectors = []
             current_selector = None
             for type, string, start, _, _ in tokens:
@@ -941,18 +941,23 @@ class YoutubeDL(object):
                 elif type in [tokenize.NAME, tokenize.NUMBER]:
                     current_selector = FormatSelector(SINGLE, string, [])
                 elif type == tokenize.OP:
-                    if string in endwith:
+                    if string == ')':
+                        if not inside_group:
+                            # ')' will be handled by the parentheses group
+                            tokens.restore_last_token()
                         break
-                    elif string == ')':
-                        # ')' will be handled by the parentheses group
+                    elif inside_merge and string in ['/', ',']:
                         tokens.restore_last_token()
                         break
-                    if string == ',':
+                    elif inside_choice and string == ',':
+                        tokens.restore_last_token()
+                        break
+                    elif string == ',':
                         selectors.append(current_selector)
                         current_selector = None
                     elif string == '/':
                         first_choice = current_selector
-                        second_choice = _parse_format_selection(tokens, [','])
+                        second_choice = _parse_format_selection(tokens, inside_choice=True)
                         current_selector = None
                         selectors.append(FormatSelector(PICKFIRST, (first_choice, second_choice), []))
                     elif string == '[':
@@ -963,12 +968,12 @@ class YoutubeDL(object):
                     elif string == '(':
                         if current_selector:
                             raise syntax_error('Unexpected "("', start)
-                        current_selector = FormatSelector(GROUP, _parse_format_selection(tokens, [')']), [])
+                        group = _parse_format_selection(tokens, inside_group=True)
+                        current_selector = FormatSelector(GROUP, group, [])
                     elif string == '+':
                         video_selector = current_selector
-                        audio_selector = _parse_format_selection(tokens, [','])
-                        current_selector = None
-                        selectors.append(FormatSelector(MERGE, (video_selector, audio_selector), []))
+                        audio_selector = _parse_format_selection(tokens, inside_merge=True)
+                        current_selector = FormatSelector(MERGE, (video_selector, audio_selector), [])
                     else:
                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
                 elif type == tokenize.ENDMARKER: