Merge pull request #14225 from Tithen-Firion/openload-phantomjs-method

[youtube-dl] / youtube_dl / YoutubeDL.py
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py

index b3a6d4d3b080e2899d1dbb9868340bac39c368a7..bfb4ff225f2376c09b2e595276244d887a5e879c 100755 (executable)
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -26,6 +26,8 @@ import tokenize
  import traceback
  import random
  
+from string import ascii_letters
+
  from .compat import (
      compat_basestring,
      compat_cookiejar,
@@ -87,6 +89,7 @@ from .utils import (
      write_string,
      YoutubeDLCookieProcessor,
      YoutubeDLHandler,
+    PhantomJSwrapper,
  )
  from .cache import Cache
  from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER
@@ -674,7 +677,19 @@ class YoutubeDL(object):
                          FORMAT_RE.format(numeric_field),
                          r'%({0})s'.format(numeric_field), outtmpl)
  
-            filename = expand_path(outtmpl % template_dict)
+            # expand_path translates '%%' into '%' and '$$' into '$'
+            # correspondingly that is not what we want since we need to keep
+            # '%%' intact for template dict substitution step. Working around
+            # with boundary-alike separator hack.
+            sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
+            outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
+
+            # outtmpl should be expand_path'ed before template dict substitution
+            # because meta fields may contain env variables we don't want to
+            # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
+            # title "Hello $PATH", we don't want `$PATH` to be expanded.
+            filename = expand_path(outtmpl).replace(sep, '') % template_dict
+
              # Temporary fix for #4787
              # 'Treat' all problem characters by passing filename through preferredencoding
              # to workaround encoding issues with subprocess on python2 @ Windows
@@ -846,7 +861,7 @@ class YoutubeDL(object):
  
              force_properties = dict(
                  (k, v) for k, v in ie_result.items() if v is not None)
-            for f in ('_type', 'url', 'ie_key'):
+            for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
                  if f in force_properties:
                      del force_properties[f]
              new_result = info.copy()
@@ -1050,6 +1065,25 @@ class YoutubeDL(object):
              return op(actual_value, comparison_value)
          return _filter
  
+    def _default_format_spec(self, info_dict, download=True):
+        req_format_list = []
+
+        def can_have_partial_formats():
+            if self.params.get('simulate', False):
+                return True
+            if not download:
+                return True
+            if self.params.get('outtmpl', DEFAULT_OUTTMPL) == '-':
+                return False
+            if info_dict.get('is_live'):
+                return False
+            merger = FFmpegMergerPP(self)
+            return merger.available and merger.can_merge()
+        if can_have_partial_formats():
+            req_format_list.append('bestvideo+bestaudio')
+        req_format_list.append('best')
+        return '/'.join(req_format_list)
+
      def build_format_selector(self, format_spec):
          def syntax_error(note, start):
              message = (
@@ -1450,12 +1484,14 @@ class YoutubeDL(object):
  
          def is_wellformed(f):
              url = f.get('url')
-            valid_url = url and isinstance(url, compat_str)
-            if not valid_url:
+            if not url:
                  self.report_warning(
                      '"url" field is missing or empty - skipping format, '
                      'there is an error in extractor')
-            return valid_url
+                return False
+            if isinstance(url, bytes):
+                sanitize_string_field(f, 'url')
+            return True
  
          # Filter out malformed formats for better extraction robustness
          formats = list(filter(is_wellformed, formats))
@@ -1467,7 +1503,7 @@ class YoutubeDL(object):
              sanitize_string_field(format, 'format_id')
              sanitize_numeric_fields(format)
              format['url'] = sanitize_url(format['url'])
-            if format.get('format_id') is None:
+            if not format.get('format_id'):
                  format['format_id'] = compat_str(i)
              else:
                  # Sanitize format_id from characters used in format selector expression
@@ -1520,14 +1556,10 @@ class YoutubeDL(object):
  
          req_format = self.params.get('format')
          if req_format is None:
-            req_format_list = []
-            if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and
-                    not info_dict.get('is_live')):
-                merger = FFmpegMergerPP(self)
-                if merger.available and merger.can_merge():
-                    req_format_list.append('bestvideo+bestaudio')
-            req_format_list.append('best')
-            req_format = '/'.join(req_format_list)
+            req_format = self._default_format_spec(info_dict, download=download)
+            if self.params.get('verbose'):
+                self.to_stdout('[debug] Default format spec: %s' % req_format)
+
          format_selector = self.build_format_selector(req_format)
  
          # While in format selection we may need to have an access to the original
@@ -1679,12 +1711,17 @@ class YoutubeDL(object):
          if filename is None:
              return
  
-        try:
-            dn = os.path.dirname(sanitize_path(encodeFilename(filename)))
-            if dn and not os.path.exists(dn):
-                os.makedirs(dn)
-        except (OSError, IOError) as err:
-            self.report_error('unable to create directory ' + error_to_compat_str(err))
+        def ensure_dir_exists(path):
+            try:
+                dn = os.path.dirname(path)
+                if dn and not os.path.exists(dn):
+                    os.makedirs(dn)
+                return True
+            except (OSError, IOError) as err:
+                self.report_error('unable to create directory ' + error_to_compat_str(err))
+                return False
+
+        if not ensure_dir_exists(sanitize_path(encodeFilename(filename))):
              return
  
          if self.params.get('writedescription', False):
@@ -1727,29 +1764,30 @@ class YoutubeDL(object):
              ie = self.get_info_extractor(info_dict['extractor_key'])
              for sub_lang, sub_info in subtitles.items():
                  sub_format = sub_info['ext']
-                if sub_info.get('data') is not None:
-                    sub_data = sub_info['data']
+                sub_filename = subtitles_filename(filename, sub_lang, sub_format)
+                if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
+                    self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
                  else:
-                    try:
-                        sub_data = ie._download_webpage(
-                            sub_info['url'], info_dict['id'], note=False)
-                    except ExtractorError as err:
-                        self.report_warning('Unable to download subtitle for "%s": %s' %
-                                            (sub_lang, error_to_compat_str(err.cause)))
-                        continue
-                try:
-                    sub_filename = subtitles_filename(filename, sub_lang, sub_format)
-                    if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
-                        self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
+                    self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
+                    if sub_info.get('data') is not None:
+                        try:
+                            # Use newline='' to prevent conversion of newline characters
+                            # See https://github.com/rg3/youtube-dl/issues/10268
+                            with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
+                                subfile.write(sub_info['data'])
+                        except (OSError, IOError):
+                            self.report_error('Cannot write subtitles file ' + sub_filename)
+                            return
                      else:
-                        self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
-                        # Use newline='' to prevent conversion of newline characters
-                        # See https://github.com/rg3/youtube-dl/issues/10268
-                        with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
-                            subfile.write(sub_data)
-                except (OSError, IOError):
-                    self.report_error('Cannot write subtitles file ' + sub_filename)
-                    return
+                        try:
+                            sub_data = ie._request_webpage(
+                                sub_info['url'], info_dict['id'], note=False).read()
+                            with io.open(encodeFilename(sub_filename), 'wb') as subfile:
+                                subfile.write(sub_data)
+                        except (ExtractorError, IOError, OSError, ValueError) as err:
+                            self.report_warning('Unable to download subtitle for "%s": %s' %
+                                                (sub_lang, error_to_compat_str(err)))
+                            continue
  
          if self.params.get('writeinfojson', False):
              infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
@@ -1822,8 +1860,11 @@ class YoutubeDL(object):
                          for f in requested_formats:
                              new_info = dict(info_dict)
                              new_info.update(f)
-                            fname = self.prepare_filename(new_info)
-                            fname = prepend_extension(fname, 'f%s' % f['format_id'], new_info['ext'])
+                            fname = prepend_extension(
+                                self.prepare_filename(new_info),
+                                'f%s' % f['format_id'], new_info['ext'])
+                            if not ensure_dir_exists(fname):
+                                return
                              downloaded.append(fname)
                              partial_success = dl(fname, new_info)
                              success = success and partial_success
@@ -1890,7 +1931,7 @@ class YoutubeDL(object):
                          info_dict.get('protocol') == 'm3u8' and
                          self.params.get('hls_prefer_native')):
                      if fixup_policy == 'warn':
-                        self.report_warning('%s: malformated aac bitstream.' % (
+                        self.report_warning('%s: malformed AAC bitstream detected.' % (
                              info_dict['id']))
                      elif fixup_policy == 'detect_or_warn':
                          fixup_pp = FFmpegFixupM3u8PP(self)
@@ -1899,7 +1940,7 @@ class YoutubeDL(object):
                              info_dict['__postprocessors'].append(fixup_pp)
                          else:
                              self.report_warning(
-                                '%s: malformated aac bitstream. %s'
+                                '%s: malformed AAC bitstream detected. %s'
                                  % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
                      else:
                          assert fixup_policy in ('ignore', 'never')
@@ -2177,6 +2218,7 @@ class YoutubeDL(object):
  
          exe_versions = FFmpegPostProcessor.get_versions(self)
          exe_versions['rtmpdump'] = rtmpdump_version()
+        exe_versions['phantomjs'] = PhantomJSwrapper._version()
          exe_str = ', '.join(
              '%s %s' % (exe, v)
              for exe, v in sorted(exe_versions.items())