[YoutubeDL] Do not override ie_key in url_transparent

[youtube-dl] / youtube_dl / YoutubeDL.py
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py

index 6afc1b73070cd410992733a59c0cd8bc9b9bff0e..50228bb321d3b4076bdff438624b5136da2f6e87 100755 (executable)
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -46,6 +46,7 @@ from .utils import (
      DateRange,
      DEFAULT_OUTTMPL,
      determine_ext,
+    determine_protocol,
      DownloadError,
      encode_compat_str,
      encodeFilename,
@@ -241,7 +242,7 @@ class YoutubeDL(object):
                         - "detect_or_warn": check whether we can do anything
                                             about it, warn otherwise (default)
      source_address:    (Experimental) Client-side IP address to bind to.
-    call_home:         Boolean, true if we are allowed to contact the
+    call_home:         Boolean, true iff we are allowed to contact the
                         youtube-dl servers for debugging.
      sleep_interval:    Number of seconds to sleep before each download.
      listformats:       Print an overview of available video formats and exit.
@@ -262,7 +263,7 @@ class YoutubeDL(object):
      the downloader (see youtube_dl/downloader/common.py):
      nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
      noresizebuffer, retries, continuedl, noprogress, consoletitle,
-    xattr_set_filesize, external_downloader_args.
+    xattr_set_filesize, external_downloader_args, hls_use_mpegts.
  
      The following options are used by the post processors:
      prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
@@ -590,7 +591,7 @@ class YoutubeDL(object):
              return None
  
      def _match_entry(self, info_dict, incomplete):
-        """ Returns None if the file should be downloaded """
+        """ Returns None iff the file should be downloaded """
  
          video_title = info_dict.get('title', info_dict.get('id', 'video'))
          if 'title' in info_dict:
@@ -706,7 +707,6 @@ class YoutubeDL(object):
          It will also download the videos if 'download'.
          Returns the resolved ie_result.
          """
-
          result_type = ie_result.get('_type', 'video')
  
          if result_type in ('url', 'url_transparent'):
@@ -735,7 +735,7 @@ class YoutubeDL(object):
  
              force_properties = dict(
                  (k, v) for k, v in ie_result.items() if v is not None)
-            for f in ('_type', 'url'):
+            for f in ('_type', 'url', 'ie_key'):
                  if f in force_properties:
                      del force_properties[f]
              new_result = info.copy()
@@ -898,11 +898,14 @@ class YoutubeDL(object):
              STR_OPERATORS = {
                  '=': operator.eq,
                  '!=': operator.ne,
+                '^=': lambda attr, value: attr.startswith(value),
+                '$=': lambda attr, value: attr.endswith(value),
+                '*=': lambda attr, value: value in attr,
              }
              str_operator_rex = re.compile(r'''(?x)
                  \s*(?P<key>ext|acodec|vcodec|container|protocol)
                  \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
-                \s*(?P<value>[a-zA-Z0-9_-]+)
+                \s*(?P<value>[a-zA-Z0-9._-]+)
                  \s*$
                  ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
              m = str_operator_rex.search(filter_spec)
@@ -1244,6 +1247,12 @@ class YoutubeDL(object):
              except (ValueError, OverflowError, OSError):
                  pass
  
+        # Auto generate title fields corresponding to the *_number fields when missing
+        # in order to always have clean titles. This is very common for TV series.
+        for field in ('chapter', 'season', 'episode'):
+            if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
+                info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
+
          subtitles = info_dict.get('subtitles')
          if subtitles:
              for _, subtitle in subtitles.items():
@@ -1300,6 +1309,10 @@ class YoutubeDL(object):
              # Automatically determine file extension if missing
              if 'ext' not in format:
                  format['ext'] = determine_ext(format['url']).lower()
+            # Automatically determine protocol if missing (useful for format
+            # selection purposes)
+            if 'protocol' not in format:
+                format['protocol'] = determine_protocol(format)
              # Add HTTP headers, so that external programs can use them from the
              # json output
              full_format_info = info_dict.copy()
@@ -1986,8 +1999,19 @@ class YoutubeDL(object):
          https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
          ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
          data_handler = compat_urllib_request_DataHandler()
+
+        # When passing our own FileHandler instance, build_opener won't add the
+        # default FileHandler and allows us to disable the file protocol, which
+        # can be used for malicious purposes (see
+        # https://github.com/rg3/youtube-dl/issues/8227)
+        file_handler = compat_urllib_request.FileHandler()
+
+        def file_open(*args, **kwargs):
+            raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in youtube-dl for security reasons')
+        file_handler.file_open = file_open
+
          opener = compat_urllib_request.build_opener(
-            proxy_handler, https_handler, cookie_processor, ydlh, data_handler)
+            proxy_handler, https_handler, cookie_processor, ydlh, data_handler, file_handler)
  
          # Delete the default user-agent header, which would otherwise apply in
          # cases where our custom HTTP handler doesn't come into play