[core] Decode environment variables with filesystem encoding (Fixes #3854, Fixes...

[youtube-dl] / youtube_dl / YoutubeDL.py
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py

index e0cb1ef75fd5b4fb86638f3f6a872d1e1625b1f0..34a1e3b5c3fcc334b634c276a0e181f5d2cfb7ff 100755 (executable)
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -24,10 +24,12 @@ if os.name == 'nt':
  
  from .utils import (
      compat_cookiejar,
+    compat_expanduser,
      compat_http_client,
      compat_str,
      compat_urllib_error,
      compat_urllib_request,
+    escape_url,
      ContentTooShortError,
      date_from_str,
      DateRange,
@@ -57,6 +59,7 @@ from .utils import (
      YoutubeDLHandler,
      prepend_extension,
  )
+from .cache import Cache
  from .extractor import get_info_extractor, gen_extractors
  from .downloader import get_suitable_downloader
  from .postprocessor import FFmpegMergerPP
@@ -133,7 +136,7 @@ class YoutubeDL(object):
      daterange:         A DateRange object, download only if the upload_date is in the range.
      skip_download:     Skip the actual download of the video file
      cachedir:          Location of the cache files in the filesystem.
-                       None to disable filesystem cache.
+                       False to disable filesystem cache.
      noplaylist:        Download single video instead of a playlist if in doubt.
      age_limit:         An integer representing the user's age in years.
                         Unsuitable videos for the given age are skipped.
@@ -195,6 +198,7 @@ class YoutubeDL(object):
          self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
          self._err_file = sys.stderr
          self.params = params
+        self.cache = Cache(self)
  
          if params.get('bidi_workaround', False):
              try:
@@ -425,7 +429,7 @@ class YoutubeDL(object):
              autonumber_templ = '%0' + str(autonumber_size) + 'd'
              template_dict['autonumber'] = autonumber_templ % self._num_downloads
              if template_dict.get('playlist_index') is not None:
-                template_dict['playlist_index'] = '%05d' % template_dict['playlist_index']
+                template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
              if template_dict.get('resolution') is None:
                  if template_dict.get('width') and template_dict.get('height'):
                      template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
@@ -444,7 +448,7 @@ class YoutubeDL(object):
              template_dict = collections.defaultdict(lambda: 'NA', template_dict)
  
              outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
-            tmpl = os.path.expanduser(outtmpl)
+            tmpl = compat_expanduser(outtmpl)
              filename = tmpl % template_dict
              return filename
          except ValueError as err:
@@ -637,6 +641,7 @@ class YoutubeDL(object):
              for i, entry in enumerate(entries, 1):
                  self.to_screen('[download] Downloading video #%s of %s' % (i, n_entries))
                  extra = {
+                    'n_entries': n_entries,
                      'playlist': playlist,
                      'playlist_index': i + playliststart,
                      'extractor': ie_result['extractor'],
@@ -704,7 +709,7 @@ class YoutubeDL(object):
              if video_formats:
                  return video_formats[0]
          else:
-            extensions = ['mp4', 'flv', 'webm', '3gp']
+            extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a']
              if format_spec in extensions:
                  filter_f = lambda f: f['ext'] == format_spec
              else:
@@ -805,28 +810,29 @@ class YoutubeDL(object):
          if req_format in ('-1', 'all'):
              formats_to_download = formats
          else:
-            # We can accept formats requested in the format: 34/5/best, we pick
-            # the first that is available, starting from left
-            req_formats = req_format.split('/')
-            for rf in req_formats:
-                if re.match(r'.+?\+.+?', rf) is not None:
-                    # Two formats have been requested like '137+139'
-                    format_1, format_2 = rf.split('+')
-                    formats_info = (self.select_format(format_1, formats),
-                        self.select_format(format_2, formats))
-                    if all(formats_info):
-                        selected_format = {
-                            'requested_formats': formats_info,
-                            'format': rf,
-                            'ext': formats_info[0]['ext'],
-                        }
+            for rfstr in req_format.split(','):
+                # We can accept formats requested in the format: 34/5/best, we pick
+                # the first that is available, starting from left
+                req_formats = rfstr.split('/')
+                for rf in req_formats:
+                    if re.match(r'.+?\+.+?', rf) is not None:
+                        # Two formats have been requested like '137+139'
+                        format_1, format_2 = rf.split('+')
+                        formats_info = (self.select_format(format_1, formats),
+                            self.select_format(format_2, formats))
+                        if all(formats_info):
+                            selected_format = {
+                                'requested_formats': formats_info,
+                                'format': rf,
+                                'ext': formats_info[0]['ext'],
+                            }
+                        else:
+                            selected_format = None
                      else:
-                        selected_format = None
-                else:
-                    selected_format = self.select_format(rf, formats)
-                if selected_format is not None:
-                    formats_to_download = [selected_format]
-                    break
+                        selected_format = self.select_format(rf, formats)
+                    if selected_format is not None:
+                        formats_to_download.append(selected_format)
+                        break
          if not formats_to_download:
              raise ExtractorError('requested format not available',
                                   expected=True)
@@ -1238,6 +1244,26 @@ class YoutubeDL(object):
  
      def urlopen(self, req):
          """ Start an HTTP download """
+
+        # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
+        # always respected by websites, some tend to give out URLs with non percent-encoded
+        # non-ASCII characters (see telemb.py, ard.py [#3412])
+        # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
+        # To work around aforementioned issue we will replace request's original URL with
+        # percent-encoded one
+        req_is_string = isinstance(req, basestring if sys.version_info < (3, 0) else compat_str)
+        url = req if req_is_string else req.get_full_url()
+        url_escaped = escape_url(url)
+
+        # Substitute URL if any change after escaping
+        if url != url_escaped:
+            if req_is_string:
+                req = url_escaped
+            else:
+                req = compat_urllib_request.Request(
+                    url_escaped, data=req.data, headers=req.headers,
+                    origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
+
          return self._opener.open(req, timeout=self._socket_timeout)
  
      def print_debug_header(self):