Fix typos

[youtube-dl] / youtube_dl / YoutubeDL.py
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py

index c608ff91a91636bc40b4fa1c99c013aadcc80820..d50b7cfed3c537a02fd53a9dc46f4e0981b6608b 100755 (executable)
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -38,20 +38,21 @@ from .compat import (
      compat_tokenize_tokenize,
      compat_urllib_error,
      compat_urllib_request,
+    compat_urllib_request_DataHandler,
  )
  from .utils import (
-    escape_url,
      ContentTooShortError,
      date_from_str,
      DateRange,
      DEFAULT_OUTTMPL,
      determine_ext,
      DownloadError,
+    encode_compat_str,
      encodeFilename,
+    error_to_compat_str,
      ExtractorError,
      format_bytes,
      formatSeconds,
-    HEADRequest,
      locked_file,
      make_HTTPS_handler,
      MaxDownloadsReached,
@@ -65,6 +66,7 @@ from .utils import (
      SameFileError,
      sanitize_filename,
      sanitize_path,
+    sanitized_Request,
      std_headers,
      subtitles_filename,
      UnavailableVideoError,
@@ -72,6 +74,7 @@ from .utils import (
      version_tuple,
      write_json_file,
      write_string,
+    YoutubeDLCookieProcessor,
      YoutubeDLHandler,
      prepend_extension,
      replace_extension,
@@ -157,7 +160,7 @@ class YoutubeDL(object):
      writethumbnail:    Write the thumbnail image to a file
      write_all_thumbnails:  Write all thumbnail formats to files
      writesubtitles:    Write the video subtitles to a file
-    writeautomaticsub: Write the automatic subtitles to a file
+    writeautomaticsub: Write the automatically generated subtitles to a file
      allsubtitles:      Downloads all the subtitles of the video
                         (requires writesubtitles or writeautomaticsub)
      listsubtitles:     Lists all available subtitles for the video
@@ -287,7 +290,11 @@ class YoutubeDL(object):
          self._num_downloads = 0
          self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
          self._err_file = sys.stderr
-        self.params = params
+        self.params = {
+            # Default parameters
+            'nocheckcertificate': False,
+        }
+        self.params.update(params)
          self.cache = Cache(self)
  
          if params.get('bidi_workaround', False):
@@ -490,7 +497,7 @@ class YoutubeDL(object):
                      tb = ''
                      if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
                          tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
-                    tb += compat_str(traceback.format_exc())
+                    tb += encode_compat_str(traceback.format_exc())
                  else:
                      tb_data = traceback.format_list(traceback.extract_stack())
                      tb = ''.join(tb_data)
@@ -569,7 +576,7 @@ class YoutubeDL(object):
                                   if v is not None)
              template_dict = collections.defaultdict(lambda: 'NA', template_dict)
  
-            outtmpl = sanitize_path(self.params.get('outtmpl', DEFAULT_OUTTMPL))
+            outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
              tmpl = compat_expanduser(outtmpl)
              filename = tmpl % template_dict
              # Temporary fix for #4787
@@ -577,7 +584,7 @@ class YoutubeDL(object):
              # to workaround encoding issues with subprocess on python2 @ Windows
              if sys.version_info < (3, 0) and sys.platform == 'win32':
                  filename = encodeFilename(filename, True).decode(preferredencoding())
-            return filename
+            return sanitize_path(filename)
          except ValueError as err:
              self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
              return None
@@ -669,14 +676,14 @@ class YoutubeDL(object):
                      return self.process_ie_result(ie_result, download, extra_info)
                  else:
                      return ie_result
-            except ExtractorError as de:  # An error we somewhat expected
-                self.report_error(compat_str(de), de.format_traceback())
+            except ExtractorError as e:  # An error we somewhat expected
+                self.report_error(compat_str(e), e.format_traceback())
                  break
              except MaxDownloadsReached:
                  raise
              except Exception as e:
                  if self.params.get('ignoreerrors', False):
-                    self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
+                    self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
                      break
                  else:
                      raise
@@ -830,6 +837,7 @@ class YoutubeDL(object):
                                                        extra_info=extra)
                  playlist_results.append(entry_result)
              ie_result['entries'] = playlist_results
+            self.to_screen('[download] Finished downloading playlist: %s' % playlist)
              return ie_result
          elif result_type == 'compat_list':
              self.report_warning(
@@ -933,6 +941,37 @@ class YoutubeDL(object):
                  else:
                      filter_parts.append(string)
  
+        def _remove_unused_ops(tokens):
+            # Remove operators that we don't use and join them with the surrounding strings
+            # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
+            ALLOWED_OPS = ('/', '+', ',', '(', ')')
+            last_string, last_start, last_end, last_line = None, None, None, None
+            for type, string, start, end, line in tokens:
+                if type == tokenize.OP and string == '[':
+                    if last_string:
+                        yield tokenize.NAME, last_string, last_start, last_end, last_line
+                        last_string = None
+                    yield type, string, start, end, line
+                    # everything inside brackets will be handled by _parse_filter
+                    for type, string, start, end, line in tokens:
+                        yield type, string, start, end, line
+                        if type == tokenize.OP and string == ']':
+                            break
+                elif type == tokenize.OP and string in ALLOWED_OPS:
+                    if last_string:
+                        yield tokenize.NAME, last_string, last_start, last_end, last_line
+                        last_string = None
+                    yield type, string, start, end, line
+                elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
+                    if not last_string:
+                        last_string = string
+                        last_start = start
+                        last_end = end
+                    else:
+                        last_string += string
+            if last_string:
+                yield tokenize.NAME, last_string, last_start, last_end, last_line
+
          def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
              selectors = []
              current_selector = None
@@ -1073,6 +1112,12 @@ class YoutubeDL(object):
                                            'contain the video, try using '
                                            '"-f %s+%s"' % (format_2, format_1))
                          return
+                    # Formats must be opposite (video+audio)
+                    if formats_info[0].get('acodec') == 'none' and formats_info[1].get('acodec') == 'none':
+                        self.report_error(
+                            'Both formats %s and %s are video-only, you must specify "-f video+audio"'
+                            % (format_1, format_2))
+                        return
                      output_ext = (
                          formats_info[0]['ext']
                          if self.params.get('merge_output_format') is None
@@ -1111,7 +1156,7 @@ class YoutubeDL(object):
  
          stream = io.BytesIO(format_spec.encode('utf-8'))
          try:
-            tokens = list(compat_tokenize_tokenize(stream.readline))
+            tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
          except tokenize.TokenError:
              raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
  
@@ -1152,7 +1197,7 @@ class YoutubeDL(object):
          return res
  
      def _calc_cookies(self, info_dict):
-        pr = compat_urllib_request.Request(info_dict['url'])
+        pr = sanitized_Request(info_dict['url'])
          self.cookiejar.add_cookie_header(pr)
          return pr.get_header('Cookie')
  
@@ -1199,13 +1244,20 @@ class YoutubeDL(object):
              except (ValueError, OverflowError, OSError):
                  pass
  
+        subtitles = info_dict.get('subtitles')
+        if subtitles:
+            for _, subtitle in subtitles.items():
+                for subtitle_format in subtitle:
+                    if 'ext' not in subtitle_format:
+                        subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
+
          if self.params.get('listsubtitles', False):
              if 'automatic_captions' in info_dict:
                  self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions')
-            self.list_subtitles(info_dict['id'], info_dict.get('subtitles'), 'subtitles')
+            self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
              return
          info_dict['requested_subtitles'] = self.process_subtitles(
-            info_dict['id'], info_dict.get('subtitles'),
+            info_dict['id'], subtitles,
              info_dict.get('automatic_captions'))
  
          # We now pick which formats have to be downloaded
@@ -1260,7 +1312,7 @@ class YoutubeDL(object):
              # only set the 'formats' fields if the original info_dict list them
              # otherwise we end up with a circular reference, the first (and unique)
              # element in the 'formats' field in info_dict is info_dict itself,
-            # wich can't be exported to json
+            # which can't be exported to json
              info_dict['formats'] = formats
          if self.params.get('listformats'):
              self.list_formats(info_dict)
@@ -1409,7 +1461,7 @@ class YoutubeDL(object):
              if dn and not os.path.exists(dn):
                  os.makedirs(dn)
          except (OSError, IOError) as err:
-            self.report_error('unable to create directory ' + compat_str(err))
+            self.report_error('unable to create directory ' + error_to_compat_str(err))
              return
  
          if self.params.get('writedescription', False):
@@ -1460,7 +1512,7 @@ class YoutubeDL(object):
                              sub_info['url'], info_dict['id'], note=False)
                      except ExtractorError as err:
                          self.report_warning('Unable to download subtitle for "%s": %s' %
-                                            (sub_lang, compat_str(err.cause)))
+                                            (sub_lang, error_to_compat_str(err.cause)))
                          continue
                  try:
                      sub_filename = subtitles_filename(filename, sub_lang, sub_format)
@@ -1739,6 +1791,10 @@ class YoutubeDL(object):
          res = ''
          if fdict.get('ext') in ['f4f', 'f4m']:
              res += '(unsupported) '
+        if fdict.get('language'):
+            if res:
+                res += ' '
+            res += '[%s]' % fdict['language']
          if fdict.get('format_note') is not None:
              res += fdict['format_note'] + ' '
          if fdict.get('tbr') is not None:
@@ -1829,27 +1885,8 @@ class YoutubeDL(object):
  
      def urlopen(self, req):
          """ Start an HTTP download """
-
-        # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
-        # always respected by websites, some tend to give out URLs with non percent-encoded
-        # non-ASCII characters (see telemb.py, ard.py [#3412])
-        # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
-        # To work around aforementioned issue we will replace request's original URL with
-        # percent-encoded one
-        req_is_string = isinstance(req, compat_basestring)
-        url = req if req_is_string else req.get_full_url()
-        url_escaped = escape_url(url)
-
-        # Substitute URL if any change after escaping
-        if url != url_escaped:
-            if req_is_string:
-                req = url_escaped
-            else:
-                req_type = HEADRequest if req.get_method() == 'HEAD' else compat_urllib_request.Request
-                req = req_type(
-                    url_escaped, data=req.data, headers=req.headers,
-                    origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
-
+        if isinstance(req, compat_basestring):
+            req = sanitized_Request(req)
          return self._opener.open(req, timeout=self._socket_timeout)
  
      def print_debug_header(self):
@@ -1932,8 +1969,7 @@ class YoutubeDL(object):
              if os.access(opts_cookiefile, os.R_OK):
                  self.cookiejar.load()
  
-        cookie_processor = compat_urllib_request.HTTPCookieProcessor(
-            self.cookiejar)
+        cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
          if opts_proxy is not None:
              if opts_proxy == '':
                  proxies = {}
@@ -1949,8 +1985,9 @@ class YoutubeDL(object):
          debuglevel = 1 if self.params.get('debug_printtraffic') else 0
          https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
          ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
+        data_handler = compat_urllib_request_DataHandler()
          opener = compat_urllib_request.build_opener(
-            proxy_handler, https_handler, cookie_processor, ydlh)
+            proxy_handler, https_handler, cookie_processor, ydlh, data_handler)
  
          # Delete the default user-agent header, which would otherwise apply in
          # cases where our custom HTTP handler doesn't come into play
@@ -2002,10 +2039,10 @@ class YoutubeDL(object):
                                 (info_dict['extractor'], info_dict['id'], thumb_display_id))
                  try:
                      uf = self.urlopen(t['url'])
-                    with open(thumb_filename, 'wb') as thumbf:
+                    with open(encodeFilename(thumb_filename), 'wb') as thumbf:
                          shutil.copyfileobj(uf, thumbf)
                      self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
                                     (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
                  except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
                      self.report_warning('Unable to download thumbnail "%s": %s' %
-                                        (t['url'], compat_str(err)))
+                                        (t['url'], error_to_compat_str(err)))