[extractor/common] Properly escape % in MPD templates (closes #16867)
authorSergey M․ <dstftw@gmail.com>
Sat, 30 Jun 2018 19:00:16 +0000 (02:00 +0700)
committerSergey M․ <dstftw@gmail.com>
Sat, 30 Jun 2018 19:11:36 +0000 (02:11 +0700)
youtube_dl/extractor/common.py

index f3fec160dcd0795840725227edcbfbfe3f3673c3..78f053f1829406e721bded4680c22d372085d709 100644 (file)
@@ -2106,7 +2106,21 @@ class InfoExtractor(object):
                         representation_ms_info = extract_multisegment_info(representation, adaption_set_ms_info)
 
                         def prepare_template(template_name, identifiers):
-                            t = representation_ms_info[template_name]
+                            tmpl = representation_ms_info[template_name]
+                            # First of, % characters outside $...$ templates
+                            # must be escaped by doubling for proper processing
+                            # by % operator string formatting used further (see
+                            # https://github.com/rg3/youtube-dl/issues/16867).
+                            t = ''
+                            in_template = False
+                            for c in tmpl:
+                                t += c
+                                if c == '$':
+                                    in_template = not in_template
+                                elif c == '%' and not in_template:
+                                    t += c
+                            # Next, $...$ templates are translated to their
+                            # %(...) counterparts to be used with % operator
                             t = t.replace('$RepresentationID$', representation_id)
                             t = re.sub(r'\$(%s)\$' % '|'.join(identifiers), r'%(\1)d', t)
                             t = re.sub(r'\$(%s)%%([^$]+)\$' % '|'.join(identifiers), r'%(\1)\2', t)