Add temporary _sort_formats helper function

[youtube-dl] / youtube_dl / extractor / common.py
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py

index 1fc0624a35a70c830f144d6749bfcc19e5bf47fb..280693d1d83312a00da78fa0852fd1a6cfa6be3a 100644 (file)
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -18,6 +18,7 @@ from ..utils import (
      sanitize_filename,
      unescapeHTML,
  )
+_NO_DEFAULT = object()
  
  
  class InfoExtractor(object):
@@ -36,10 +37,12 @@ class InfoExtractor(object):
      id:             Video identifier.
      title:          Video title, unescaped.
  
-    Additionally, it must contain either a formats entry or url and ext:
+    Additionally, it must contain either a formats entry or a url one:
  
-    formats:        A list of dictionaries for each format available, it must
-                    be ordered from worst to best quality. Potential fields:
+    formats:        A list of dictionaries for each format available, ordered
+                    from worst to best quality.
+
+                    Potential fields:
                      * url        Mandatory. The URL of the video file
                      * ext        Will be calculated from url if missing
                      * format     A human-readable description of the format
@@ -52,18 +55,21 @@ class InfoExtractor(object):
                                   ("3D" or "DASH video")
                      * width      Width of the video, if known
                      * height     Height of the video, if known
+                    * resolution Textual description of width and height
                      * abr        Average audio bitrate in KBit/s
                      * acodec     Name of the audio codec in use
                      * vbr        Average video bitrate in KBit/s
                      * vcodec     Name of the video codec in use
                      * filesize   The number of bytes, if known in advance
                      * player_url SWF Player URL (used for rtmpdump).
+                    * preference Order number of this format. If this field is
+                                 present, the formats get sorted by this field.
+                                 -1 for default (order by other properties),
+                                 -2 or smaller for less than default.
      url:            Final video URL.
      ext:            Video filename extension.
      format:         The video format, defaults to ext (used for --get-format)
      player_url:     SWF Player URL (used for rtmpdump).
-    urlhandle:      [internal] The urlHandle to be used to download the file,
-                    like returned by urllib.request.urlopen
  
      The following fields are optional:
  
@@ -169,6 +175,8 @@ class InfoExtractor(object):
          try:
              return self._downloader.urlopen(url_or_request)
          except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
+            if errnote is False:
+                return False
              if errnote is None:
                  errnote = u'Unable to download webpage'
              errmsg = u'%s: %s' % (errnote, compat_str(err))
@@ -241,6 +249,11 @@ class InfoExtractor(object):
              xml_string = transform_source(xml_string)
          return xml.etree.ElementTree.fromstring(xml_string.encode('utf-8'))
  
+    def report_warning(self, msg, video_id=None):
+        idstr = u'' if video_id is None else u'%s: ' % video_id
+        self._downloader.report_warning(
+            u'[%s] %s%s' % (self.IE_NAME, idstr, msg))
+
      def to_screen(self, msg):
          """Print msg to screen, prefixing it with '[ie_name]'"""
          self._downloader.to_screen(u'[%s] %s' % (self.IE_NAME, msg))
@@ -262,7 +275,8 @@ class InfoExtractor(object):
          self.to_screen(u'Logging in')
  
      #Methods for following #608
-    def url_result(self, url, ie=None, video_id=None):
+    @staticmethod
+    def url_result(url, ie=None, video_id=None):
          """Returns a url that points to a page that should be processed"""
          #TODO: ie should be the class used for getting the info
          video_info = {'_type': 'url',
@@ -271,7 +285,8 @@ class InfoExtractor(object):
          if video_id is not None:
              video_info['id'] = video_id
          return video_info
-    def playlist_result(self, entries, playlist_id=None, playlist_title=None):
+    @staticmethod
+    def playlist_result(entries, playlist_id=None, playlist_title=None):
          """Returns a playlist"""
          video_info = {'_type': 'playlist',
                        'entries': entries}
@@ -281,7 +296,7 @@ class InfoExtractor(object):
              video_info['title'] = playlist_title
          return video_info
  
-    def _search_regex(self, pattern, string, name, default=None, fatal=True, flags=0):
+    def _search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0):
          """
          Perform a regex search on the given string, using a single or a list of
          patterns returning the first matching group.
@@ -303,7 +318,7 @@ class InfoExtractor(object):
          if mobj:
              # return the first matching group
              return next(g for g in mobj.groups() if g is not None)
-        elif default is not None:
+        elif default is not _NO_DEFAULT:
              return default
          elif fatal:
              raise RegexNotFoundError(u'Unable to extract %s' % _name)
@@ -312,7 +327,7 @@ class InfoExtractor(object):
                  u'please report this issue on http://yt-dl.org/bug' % _name)
              return None
  
-    def _html_search_regex(self, pattern, string, name, default=None, fatal=True, flags=0):
+    def _html_search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0):
          """
          Like _search_regex, but strips HTML tags and unescapes entities.
          """
@@ -421,6 +436,47 @@ class InfoExtractor(object):
          }
          return RATING_TABLE.get(rating.lower(), None)
  
+    def _sort_formats(self, formats):
+        def _formats_key(f):
+            preference = f.get('preference')
+            if preference is None:
+                preference = 0 if f.get('url', '').startswith('http') else -0.1
+                if f.get('ext') in ['f4f', 'f4m']:  # Not yet supported
+                    preference -= 0.5
+
+            if f.get('vcodec') == 'none':  # audio only
+                if self._downloader.params.get('prefer_free_formats'):
+                    ORDER = [u'aac', u'mp3', u'm4a', u'webm', u'ogg', u'opus']
+                else:
+                    ORDER = [u'webm', u'opus', u'ogg', u'mp3', u'aac', u'm4a']
+                ext_preference = 0
+                try:
+                    audio_ext_preference = ORDER.index(f['ext'])
+                except ValueError:
+                    audio_ext_preference = -1
+            else:
+                if self._downloader.params.get('prefer_free_formats'):
+                    ORDER = [u'flv', u'mp4', u'webm']
+                else:
+                    ORDER = [u'webm', u'flv', u'mp4']
+                try:
+                    ext_preference = ORDER.index(f['ext'])
+                except ValueError:
+                    ext_preference = -1
+                audio_ext_preference = 0
+
+            return (
+                preference,
+                f.get('height') if f.get('height') is not None else -1,
+                f.get('width') if f.get('width') is not None else -1,
+                ext_preference,
+                f.get('vbr') if f.get('vbr') is not None else -1,
+                f.get('abr') if f.get('abr') is not None else -1,
+                audio_ext_preference,
+                f.get('filesize') if f.get('filesize') is not None else -1,
+                f.get('format_id'),
+            )
+        formats.sort(key=_formats_key)
  
  
  class SearchInfoExtractor(InfoExtractor):