lazy extractors: specify the encoding

[youtube-dl] / youtube_dl / YoutubeDL.py
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py

index 8c651cd52375e1dcf986307b57f447fce4025543..f18a8e840553528703e79cf3dd2415173a568d0e 100755 (executable)
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -39,6 +39,8 @@ from .compat import (
      compat_urllib_request_DataHandler,
  )
  from .utils import (
      compat_urllib_request_DataHandler,
  )
  from .utils import (
+    age_restricted,
+    args_to_str,
      ContentTooShortError,
      date_from_str,
      DateRange,
      ContentTooShortError,
      date_from_str,
      DateRange,
@@ -58,13 +60,16 @@ from .utils import (
      PagedList,
      parse_filesize,
      PerRequestProxyHandler,
      PagedList,
      parse_filesize,
      PerRequestProxyHandler,
-    PostProcessingError,
      platform_name,
      platform_name,
+    PostProcessingError,
      preferredencoding,
      preferredencoding,
+    prepend_extension,
      render_table,
      render_table,
+    replace_extension,
      SameFileError,
      sanitize_filename,
      sanitize_path,
      SameFileError,
      sanitize_filename,
      sanitize_path,
+    sanitize_url,
      sanitized_Request,
      std_headers,
      subtitles_filename,
      sanitized_Request,
      std_headers,
      subtitles_filename,
@@ -75,13 +80,9 @@ from .utils import (
      write_string,
      YoutubeDLCookieProcessor,
      YoutubeDLHandler,
      write_string,
      YoutubeDLCookieProcessor,
      YoutubeDLHandler,
-    prepend_extension,
-    replace_extension,
-    args_to_str,
-    age_restricted,
  )
  from .cache import Cache
  )
  from .cache import Cache
-from .extractor import get_info_extractor, gen_extractors
+from .extractor import get_info_extractor, gen_extractor_classes
  from .downloader import get_suitable_downloader
  from .downloader.rtmp import rtmpdump_version
  from .postprocessor import (
  from .downloader import get_suitable_downloader
  from .downloader.rtmp import rtmpdump_version
  from .postprocessor import (
@@ -377,8 +378,9 @@ class YoutubeDL(object):
      def add_info_extractor(self, ie):
          """Add an InfoExtractor object to the end of the list."""
          self._ies.append(ie)
      def add_info_extractor(self, ie):
          """Add an InfoExtractor object to the end of the list."""
          self._ies.append(ie)
-        self._ies_instances[ie.ie_key()] = ie
-        ie.set_downloader(self)
+        if not isinstance(ie, type):
+            self._ies_instances[ie.ie_key()] = ie
+            ie.set_downloader(self)
  
      def get_info_extractor(self, ie_key):
          """
  
      def get_info_extractor(self, ie_key):
          """
@@ -396,7 +398,7 @@ class YoutubeDL(object):
          """
          Add the InfoExtractors returned by gen_extractors to the end of the list
          """
          """
          Add the InfoExtractors returned by gen_extractors to the end of the list
          """
-        for ie in gen_extractors():
+        for ie in gen_extractor_classes():
              self.add_info_extractor(ie)
  
      def add_post_processor(self, pp):
              self.add_info_extractor(ie)
  
      def add_post_processor(self, pp):
@@ -660,6 +662,7 @@ class YoutubeDL(object):
              if not ie.suitable(url):
                  continue
  
              if not ie.suitable(url):
                  continue
  
+            ie = self.get_info_extractor(ie.ie_key())
              if not ie.working():
                  self.report_warning('The program functionality for this site has been marked as broken, '
                                      'and will probably not work.')
              if not ie.working():
                  self.report_warning('The program functionality for this site has been marked as broken, '
                                      'and will probably not work.')
@@ -905,7 +908,7 @@ class YoutubeDL(object):
                  '*=': lambda attr, value: value in attr,
              }
              str_operator_rex = re.compile(r'''(?x)
                  '*=': lambda attr, value: value in attr,
              }
              str_operator_rex = re.compile(r'''(?x)
-                \s*(?P<key>ext|acodec|vcodec|container|protocol)
+                \s*(?P<key>ext|acodec|vcodec|container|protocol|format_id)
                  \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
                  \s*(?P<value>[a-zA-Z0-9._-]+)
                  \s*$
                  \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
                  \s*(?P<value>[a-zA-Z0-9._-]+)
                  \s*$
@@ -1229,6 +1232,7 @@ class YoutubeDL(object):
                  t.get('preference'), t.get('width'), t.get('height'),
                  t.get('id'), t.get('url')))
              for i, t in enumerate(thumbnails):
                  t.get('preference'), t.get('width'), t.get('height'),
                  t.get('id'), t.get('url')))
              for i, t in enumerate(thumbnails):
+                t['url'] = sanitize_url(t['url'])
                  if t.get('width') and t.get('height'):
                      t['resolution'] = '%dx%d' % (t['width'], t['height'])
                  if t.get('id') is None:
                  if t.get('width') and t.get('height'):
                      t['resolution'] = '%dx%d' % (t['width'], t['height'])
                  if t.get('id') is None:
@@ -1238,7 +1242,10 @@ class YoutubeDL(object):
              self.list_thumbnails(info_dict)
              return
  
              self.list_thumbnails(info_dict)
              return
  
-        if thumbnails and 'thumbnail' not in info_dict:
+        thumbnail = info_dict.get('thumbnail')
+        if thumbnail:
+            info_dict['thumbnail'] = sanitize_url(thumbnail)
+        elif thumbnails:
              info_dict['thumbnail'] = thumbnails[-1]['url']
  
          if 'display_id' not in info_dict and 'id' in info_dict:
              info_dict['thumbnail'] = thumbnails[-1]['url']
  
          if 'display_id' not in info_dict and 'id' in info_dict:
@@ -1263,6 +1270,8 @@ class YoutubeDL(object):
          if subtitles:
              for _, subtitle in subtitles.items():
                  for subtitle_format in subtitle:
          if subtitles:
              for _, subtitle in subtitles.items():
                  for subtitle_format in subtitle:
+                    if subtitle_format.get('url'):
+                        subtitle_format['url'] = sanitize_url(subtitle_format['url'])
                      if 'ext' not in subtitle_format:
                          subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
  
                      if 'ext' not in subtitle_format:
                          subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
  
@@ -1292,6 +1301,8 @@ class YoutubeDL(object):
              if 'url' not in format:
                  raise ExtractorError('Missing "url" key in result (index %d)' % i)
  
              if 'url' not in format:
                  raise ExtractorError('Missing "url" key in result (index %d)' % i)
  
+            format['url'] = sanitize_url(format['url'])
+
              if format.get('format_id') is None:
                  format['format_id'] = compat_str(i)
              else:
              if format.get('format_id') is None:
                  format['format_id'] = compat_str(i)
              else:
@@ -1836,7 +1847,7 @@ class YoutubeDL(object):
          if fdict.get('language'):
              if res:
                  res += ' '
          if fdict.get('language'):
              if res:
                  res += ' '
-            res += '[%s]' % fdict['language']
+            res += '[%s] ' % fdict['language']
          if fdict.get('format_note') is not None:
              res += fdict['format_note'] + ' '
          if fdict.get('tbr') is not None:
          if fdict.get('format_note') is not None:
              res += fdict['format_note'] + ' '
          if fdict.get('tbr') is not None: