[YoutubeDL] Improve default format specification (closes #13704)
[youtube-dl] / youtube_dl / YoutubeDL.py
index 60ee4b7d811419281a6d95be3da03907fd76e167..367ae3533054bc360a82443ea59dc8bd9f9764c4 100755 (executable)
@@ -26,6 +26,8 @@ import tokenize
 import traceback
 import random
 
+from string import ascii_letters
+
 from .compat import (
     compat_basestring,
     compat_cookiejar,
@@ -674,7 +676,19 @@ class YoutubeDL(object):
                         FORMAT_RE.format(numeric_field),
                         r'%({0})s'.format(numeric_field), outtmpl)
 
-            filename = expand_path(outtmpl % template_dict)
+            # expand_path translates '%%' into '%' and '$$' into '$'
+            # correspondingly that is not what we want since we need to keep
+            # '%%' intact for template dict substitution step. Working around
+            # with boundary-alike separator hack.
+            sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
+            outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
+
+            # outtmpl should be expand_path'ed before template dict substitution
+            # because meta fields may contain env variables we don't want to
+            # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
+            # title "Hello $PATH", we don't want `$PATH` to be expanded.
+            filename = expand_path(outtmpl).replace(sep, '') % template_dict
+
             # Temporary fix for #4787
             # 'Treat' all problem characters by passing filename through preferredencoding
             # to workaround encoding issues with subprocess on python2 @ Windows
@@ -846,7 +860,7 @@ class YoutubeDL(object):
 
             force_properties = dict(
                 (k, v) for k, v in ie_result.items() if v is not None)
-            for f in ('_type', 'url', 'ie_key'):
+            for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
                 if f in force_properties:
                     del force_properties[f]
             new_result = info.copy()
@@ -1050,6 +1064,25 @@ class YoutubeDL(object):
             return op(actual_value, comparison_value)
         return _filter
 
+    def _default_format_spec(self, info_dict, download=True):
+        req_format_list = []
+
+        def can_have_partial_formats():
+            if self.params.get('simulate', False):
+                return True
+            if not download:
+                return True
+            if self.params.get('outtmpl', DEFAULT_OUTTMPL) == '-':
+                return False
+            if info_dict.get('is_live'):
+                return False
+            merger = FFmpegMergerPP(self)
+            return merger.available and merger.can_merge()
+        if can_have_partial_formats():
+            req_format_list.append('bestvideo+bestaudio')
+        req_format_list.append('best')
+        return '/'.join(req_format_list)
+
     def build_format_selector(self, format_spec):
         def syntax_error(note, start):
             message = (
@@ -1520,14 +1553,10 @@ class YoutubeDL(object):
 
         req_format = self.params.get('format')
         if req_format is None:
-            req_format_list = []
-            if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and
-                    not info_dict.get('is_live')):
-                merger = FFmpegMergerPP(self)
-                if merger.available and merger.can_merge():
-                    req_format_list.append('bestvideo+bestaudio')
-            req_format_list.append('best')
-            req_format = '/'.join(req_format_list)
+            req_format = self._default_format_spec(info_dict, download=download)
+            if self.params.get('verbose'):
+                self.to_stdout('[debug] Default format spec: %s' % req_format)
+
         format_selector = self.build_format_selector(req_format)
 
         # While in format selection we may need to have an access to the original