Merge pull request #7320 from remitamine/adobetv

[youtube-dl] / youtube_dl / utils.py
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py

index b7013a6aaef400633d45ac33c4a7fcb949802be7..d0606b4bcd3d4706912f753441608dff721d7699 100644 (file)
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -373,6 +373,13 @@ def sanitize_path(s):
      return os.path.join(*sanitized_path)
  
  
      return os.path.join(*sanitized_path)
  
  
+# Prepend protocol-less URLs with `http:` scheme in order to mitigate the number of
+# unwanted failures due to missing protocol
+def sanitized_Request(url, *args, **kwargs):
+    return compat_urllib_request.Request(
+        'http:%s' % url if url.startswith('//') else url, *args, **kwargs)
+
+
  def orderedSet(iterable):
      """ Remove all duplicates from the input iterable """
      res = []
  def orderedSet(iterable):
      """ Remove all duplicates from the input iterable """
      res = []
@@ -403,7 +410,7 @@ def _htmlentity_transform(entity):
              pass
  
      # Unknown entity in name, return its literal representation
              pass
  
      # Unknown entity in name, return its literal representation
-    return ('&%s;' % entity)
+    return '&%s;' % entity
  
  
  def unescapeHTML(s):
  
  
  def unescapeHTML(s):
@@ -656,6 +663,16 @@ def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
      return hc
  
  
      return hc
  
  
+def handle_youtubedl_headers(headers):
+    filtered_headers = headers
+
+    if 'Youtubedl-no-compression' in filtered_headers:
+        filtered_headers = dict((k, v) for k, v in filtered_headers.items() if k.lower() != 'accept-encoding')
+        del filtered_headers['Youtubedl-no-compression']
+
+    return filtered_headers
+
+
  class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
      """Handler for HTTP requests and responses.
  
  class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
      """Handler for HTTP requests and responses.
  
@@ -663,7 +680,7 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
      the standard headers to every HTTP request and handles gzipped and
      deflated responses from web servers. If compression is to be avoided in
      a particular request, the original request in the program code only has
      the standard headers to every HTTP request and handles gzipped and
      deflated responses from web servers. If compression is to be avoided in
      a particular request, the original request in the program code only has
-    to include the HTTP header "Youtubedl-No-Compression", which will be
+    to include the HTTP header "Youtubedl-no-compression", which will be
      removed before making the real request.
  
      Part of this code was copied from:
      removed before making the real request.
  
      Part of this code was copied from:
@@ -724,10 +741,8 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
              # The dict keys are capitalized because of this bug by urllib
              if h.capitalize() not in req.headers:
                  req.add_header(h, v)
              # The dict keys are capitalized because of this bug by urllib
              if h.capitalize() not in req.headers:
                  req.add_header(h, v)
-        if 'Youtubedl-no-compression' in req.headers:
-            if 'Accept-encoding' in req.headers:
-                del req.headers['Accept-encoding']
-            del req.headers['Youtubedl-no-compression']
+
+        req.headers = handle_youtubedl_headers(req.headers)
  
          if sys.version_info < (2, 7) and '#' in req.get_full_url():
              # Python 2.6 is brain-dead when it comes to fragments
  
          if sys.version_info < (2, 7) and '#' in req.get_full_url():
              # Python 2.6 is brain-dead when it comes to fragments
@@ -925,6 +940,21 @@ def determine_ext(url, default_ext='unknown_video'):
      guess = url.partition('?')[0].rpartition('.')[2]
      if re.match(r'^[A-Za-z0-9]+$', guess):
          return guess
      guess = url.partition('?')[0].rpartition('.')[2]
      if re.match(r'^[A-Za-z0-9]+$', guess):
          return guess
+    elif guess.rstrip('/') in (
+            'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
+            'flv', 'f4v', 'f4a', 'f4b',
+            'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
+            'mkv', 'mka', 'mk3d',
+            'avi', 'divx',
+            'mov',
+            'asf', 'wmv', 'wma',
+            '3gp', '3g2',
+            'mp3',
+            'flac',
+            'ape',
+            'wav',
+            'f4f', 'f4m', 'm3u8', 'smil'):
+        return guess.rstrip('/')
      else:
          return default_ext
  
      else:
          return default_ext
  
@@ -1668,7 +1698,9 @@ def urlencode_postdata(*args, **kargs):
  
  
  def encode_dict(d, encoding='utf-8'):
  
  
  def encode_dict(d, encoding='utf-8'):
-    return dict((k.encode(encoding), v.encode(encoding)) for k, v in d.items())
+    def encode(v):
+        return v.encode(encoding) if isinstance(v, compat_basestring) else v
+    return dict((encode(k), encode(v)) for k, v in d.items())
  
  
  US_RATINGS = {
  
  
  US_RATINGS = {