[safari] extract free and preview videos(#7491)

[youtube-dl] / youtube_dl / YoutubeDL.py
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py

index 50425b8d7ea4855ddf1c67ce17df715b19cc0fc2..b3391088f9003c3a94daa2acd4e130c6f5223cd2 100755 (executable)
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -24,9 +24,6 @@ import time
  import tokenize
  import traceback
  
-if os.name == 'nt':
-    import ctypes
-
  from .compat import (
      compat_basestring,
      compat_cookiejar,
@@ -34,6 +31,7 @@ from .compat import (
      compat_get_terminal_size,
      compat_http_client,
      compat_kwargs,
+    compat_os_name,
      compat_str,
      compat_tokenize_tokenize,
      compat_urllib_error,
@@ -46,6 +44,7 @@ from .utils import (
      DateRange,
      DEFAULT_OUTTMPL,
      determine_ext,
+    determine_protocol,
      DownloadError,
      encode_compat_str,
      encodeFilename,
@@ -86,6 +85,7 @@ from .extractor import get_info_extractor, gen_extractors
  from .downloader import get_suitable_downloader
  from .downloader.rtmp import rtmpdump_version
  from .postprocessor import (
+    FFmpegFixupM3u8PP,
      FFmpegFixupM4aPP,
      FFmpegFixupStretchedPP,
      FFmpegMergerPP,
@@ -94,6 +94,9 @@ from .postprocessor import (
  )
  from .version import __version__
  
+if compat_os_name == 'nt':
+    import ctypes
+
  
  class YoutubeDL(object):
      """YoutubeDL class.
@@ -262,7 +265,7 @@ class YoutubeDL(object):
      the downloader (see youtube_dl/downloader/common.py):
      nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
      noresizebuffer, retries, continuedl, noprogress, consoletitle,
-    xattr_set_filesize, external_downloader_args.
+    xattr_set_filesize, external_downloader_args, hls_use_mpegts.
  
      The following options are used by the post processors:
      prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
@@ -449,7 +452,7 @@ class YoutubeDL(object):
      def to_console_title(self, message):
          if not self.params.get('consoletitle', False):
              return
-        if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
+        if compat_os_name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
              # c_wchar_p() might not be necessary if `message` is
              # already of type unicode()
              ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
@@ -520,7 +523,7 @@ class YoutubeDL(object):
          else:
              if self.params.get('no_warnings'):
                  return
-            if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
+            if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
                  _msg_header = '\033[0;33mWARNING:\033[0m'
              else:
                  _msg_header = 'WARNING:'
@@ -532,7 +535,7 @@ class YoutubeDL(object):
          Do the same as trouble, but prefixes the message with 'ERROR:', colored
          in red if stderr is a tty file.
          '''
-        if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
+        if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
              _msg_header = '\033[0;31mERROR:\033[0m'
          else:
              _msg_header = 'ERROR:'
@@ -565,7 +568,7 @@ class YoutubeDL(object):
                  elif template_dict.get('height'):
                      template_dict['resolution'] = '%sp' % template_dict['height']
                  elif template_dict.get('width'):
-                    template_dict['resolution'] = '?x%d' % template_dict['width']
+                    template_dict['resolution'] = '%dx?' % template_dict['width']
  
              sanitize = lambda k, v: sanitize_filename(
                  compat_str(v),
@@ -604,12 +607,12 @@ class YoutubeDL(object):
              if rejecttitle:
                  if re.search(rejecttitle, title, re.IGNORECASE):
                      return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
-        date = info_dict.get('upload_date', None)
+        date = info_dict.get('upload_date')
          if date is not None:
              dateRange = self.params.get('daterange', DateRange())
              if date not in dateRange:
                  return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
-        view_count = info_dict.get('view_count', None)
+        view_count = info_dict.get('view_count')
          if view_count is not None:
              min_views = self.params.get('min_views')
              if min_views is not None and view_count < min_views:
@@ -706,7 +709,6 @@ class YoutubeDL(object):
          It will also download the videos if 'download'.
          Returns the resolved ie_result.
          """
-
          result_type = ie_result.get('_type', 'video')
  
          if result_type in ('url', 'url_transparent'):
@@ -735,7 +737,7 @@ class YoutubeDL(object):
  
              force_properties = dict(
                  (k, v) for k, v in ie_result.items() if v is not None)
-            for f in ('_type', 'url'):
+            for f in ('_type', 'url', 'ie_key'):
                  if f in force_properties:
                      del force_properties[f]
              new_result = info.copy()
@@ -747,18 +749,18 @@ class YoutubeDL(object):
                  new_result, download=download, extra_info=extra_info)
          elif result_type == 'playlist' or result_type == 'multi_video':
              # We process each entry in the playlist
-            playlist = ie_result.get('title', None) or ie_result.get('id', None)
+            playlist = ie_result.get('title') or ie_result.get('id')
              self.to_screen('[download] Downloading playlist: %s' % playlist)
  
              playlist_results = []
  
              playliststart = self.params.get('playliststart', 1) - 1
-            playlistend = self.params.get('playlistend', None)
+            playlistend = self.params.get('playlistend')
              # For backwards compatibility, interpret -1 as whole list
              if playlistend == -1:
                  playlistend = None
  
-            playlistitems_str = self.params.get('playlist_items', None)
+            playlistitems_str = self.params.get('playlist_items')
              playlistitems = None
              if playlistitems_str is not None:
                  def iter_playlistitems(format):
@@ -782,7 +784,7 @@ class YoutubeDL(object):
                      entries = ie_entries[playliststart:playlistend]
                  n_entries = len(entries)
                  self.to_screen(
-                    "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
+                    '[%s] playlist %s: Collected %d video ids (downloading %d of them)' %
                      (ie_result['extractor'], playlist, n_all_entries, n_entries))
              elif isinstance(ie_entries, PagedList):
                  if playlistitems:
@@ -796,7 +798,7 @@ class YoutubeDL(object):
                          playliststart, playlistend)
                  n_entries = len(entries)
                  self.to_screen(
-                    "[%s] playlist %s: Downloading %d videos" %
+                    '[%s] playlist %s: Downloading %d videos' %
                      (ie_result['extractor'], playlist, n_entries))
              else:  # iterable
                  if playlistitems:
@@ -807,7 +809,7 @@ class YoutubeDL(object):
                          ie_entries, playliststart, playlistend))
                  n_entries = len(entries)
                  self.to_screen(
-                    "[%s] playlist %s: Downloading %d videos" %
+                    '[%s] playlist %s: Downloading %d videos' %
                      (ie_result['extractor'], playlist, n_entries))
  
              if self.params.get('playlistreverse', False):
@@ -898,11 +900,14 @@ class YoutubeDL(object):
              STR_OPERATORS = {
                  '=': operator.eq,
                  '!=': operator.ne,
+                '^=': lambda attr, value: attr.startswith(value),
+                '$=': lambda attr, value: attr.endswith(value),
+                '*=': lambda attr, value: value in attr,
              }
              str_operator_rex = re.compile(r'''(?x)
                  \s*(?P<key>ext|acodec|vcodec|container|protocol)
                  \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
-                \s*(?P<value>[a-zA-Z0-9_-]+)
+                \s*(?P<value>[a-zA-Z0-9._-]+)
                  \s*$
                  ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
              m = str_operator_rex.search(filter_spec)
@@ -1244,6 +1249,12 @@ class YoutubeDL(object):
              except (ValueError, OverflowError, OSError):
                  pass
  
+        # Auto generate title fields corresponding to the *_number fields when missing
+        # in order to always have clean titles. This is very common for TV series.
+        for field in ('chapter', 'season', 'episode'):
+            if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
+                info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
+
          subtitles = info_dict.get('subtitles')
          if subtitles:
              for _, subtitle in subtitles.items():
@@ -1279,6 +1290,9 @@ class YoutubeDL(object):
  
              if format.get('format_id') is None:
                  format['format_id'] = compat_str(i)
+            else:
+                # Sanitize format_id from characters used in format selector expression
+                format['format_id'] = re.sub('[\s,/+\[\]()]', '_', format['format_id'])
              format_id = format['format_id']
              if format_id not in formats_dict:
                  formats_dict[format_id] = []
@@ -1300,6 +1314,10 @@ class YoutubeDL(object):
              # Automatically determine file extension if missing
              if 'ext' not in format:
                  format['ext'] = determine_ext(format['url']).lower()
+            # Automatically determine protocol if missing (useful for format
+            # selection purposes)
+            if 'protocol' not in format:
+                format['protocol'] = determine_protocol(format)
              # Add HTTP headers, so that external programs can use them from the
              # json output
              full_format_info = info_dict.copy()
@@ -1312,7 +1330,7 @@ class YoutubeDL(object):
              # only set the 'formats' fields if the original info_dict list them
              # otherwise we end up with a circular reference, the first (and unique)
              # element in the 'formats' field in info_dict is info_dict itself,
-            # wich can't be exported to json
+            # which can't be exported to json
              info_dict['formats'] = formats
          if self.params.get('listformats'):
              self.list_formats(info_dict)
@@ -1325,7 +1343,6 @@ class YoutubeDL(object):
          if req_format is None:
              req_format_list = []
              if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and
-                    info_dict['extractor'] in ['youtube', 'ted'] and
                      not info_dict.get('is_live')):
                  merger = FFmpegMergerPP(self)
                  if merger.available and merger.can_merge():
@@ -1616,12 +1633,14 @@ class YoutubeDL(object):
                  self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
                  return
  
-            if success:
+            if success and filename != '-':
                  # Fixup content
                  fixup_policy = self.params.get('fixup')
                  if fixup_policy is None:
                      fixup_policy = 'detect_or_warn'
  
+                INSTALL_FFMPEG_MESSAGE = 'Install ffmpeg or avconv to fix this automatically.'
+
                  stretched_ratio = info_dict.get('stretched_ratio')
                  if stretched_ratio is not None and stretched_ratio != 1:
                      if fixup_policy == 'warn':
@@ -1634,15 +1653,18 @@ class YoutubeDL(object):
                              info_dict['__postprocessors'].append(stretched_pp)
                          else:
                              self.report_warning(
-                                '%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % (
-                                    info_dict['id'], stretched_ratio))
+                                '%s: Non-uniform pixel ratio (%s). %s'
+                                % (info_dict['id'], stretched_ratio, INSTALL_FFMPEG_MESSAGE))
                      else:
                          assert fixup_policy in ('ignore', 'never')
  
-                if info_dict.get('requested_formats') is None and info_dict.get('container') == 'm4a_dash':
+                if (info_dict.get('requested_formats') is None and
+                        info_dict.get('container') == 'm4a_dash'):
                      if fixup_policy == 'warn':
-                        self.report_warning('%s: writing DASH m4a. Only some players support this container.' % (
-                            info_dict['id']))
+                        self.report_warning(
+                            '%s: writing DASH m4a. '
+                            'Only some players support this container.'
+                            % info_dict['id'])
                      elif fixup_policy == 'detect_or_warn':
                          fixup_pp = FFmpegFixupM4aPP(self)
                          if fixup_pp.available:
@@ -1650,8 +1672,27 @@ class YoutubeDL(object):
                              info_dict['__postprocessors'].append(fixup_pp)
                          else:
                              self.report_warning(
-                                '%s: writing DASH m4a. Only some players support this container. Install ffmpeg or avconv to fix this automatically.' % (
-                                    info_dict['id']))
+                                '%s: writing DASH m4a. '
+                                'Only some players support this container. %s'
+                                % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
+                    else:
+                        assert fixup_policy in ('ignore', 'never')
+
+                if (info_dict.get('protocol') == 'm3u8_native' or
+                        info_dict.get('protocol') == 'm3u8' and
+                        self.params.get('hls_prefer_native')):
+                    if fixup_policy == 'warn':
+                        self.report_warning('%s: malformated aac bitstream.' % (
+                            info_dict['id']))
+                    elif fixup_policy == 'detect_or_warn':
+                        fixup_pp = FFmpegFixupM3u8PP(self)
+                        if fixup_pp.available:
+                            info_dict.setdefault('__postprocessors', [])
+                            info_dict['__postprocessors'].append(fixup_pp)
+                        else:
+                            self.report_warning(
+                                '%s: malformated aac bitstream. %s'
+                                % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
                      else:
                          assert fixup_policy in ('ignore', 'never')
  
@@ -1782,7 +1823,7 @@ class YoutubeDL(object):
              else:
                  res = '%sp' % format['height']
          elif format.get('width') is not None:
-            res = '?x%d' % format['width']
+            res = '%dx?' % format['width']
          else:
              res = default
          return res
@@ -1791,6 +1832,10 @@ class YoutubeDL(object):
          res = ''
          if fdict.get('ext') in ['f4f', 'f4m']:
              res += '(unsupported) '
+        if fdict.get('language'):
+            if res:
+                res += ' '
+            res += '[%s]' % fdict['language']
          if fdict.get('format_note') is not None:
              res += fdict['format_note'] + ' '
          if fdict.get('tbr') is not None:
@@ -1811,7 +1856,9 @@ class YoutubeDL(object):
          if fdict.get('vbr') is not None:
              res += '%4dk' % fdict['vbr']
          if fdict.get('fps') is not None:
-            res += ', %sfps' % fdict['fps']
+            if res:
+                res += ', '
+            res += '%sfps' % fdict['fps']
          if fdict.get('acodec') is not None:
              if res:
                  res += ', '
@@ -1982,8 +2029,19 @@ class YoutubeDL(object):
          https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
          ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
          data_handler = compat_urllib_request_DataHandler()
+
+        # When passing our own FileHandler instance, build_opener won't add the
+        # default FileHandler and allows us to disable the file protocol, which
+        # can be used for malicious purposes (see
+        # https://github.com/rg3/youtube-dl/issues/8227)
+        file_handler = compat_urllib_request.FileHandler()
+
+        def file_open(*args, **kwargs):
+            raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in youtube-dl for security reasons')
+        file_handler.file_open = file_open
+
          opener = compat_urllib_request.build_opener(
-            proxy_handler, https_handler, cookie_processor, ydlh, data_handler)
+            proxy_handler, https_handler, cookie_processor, ydlh, data_handler, file_handler)
  
          # Delete the default user-agent header, which would otherwise apply in
          # cases where our custom HTTP handler doesn't come into play