[util] Move compatibility functions out of util

[youtube-dl] / youtube_dl / extractor / common.py
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py

index 60cab6f4e731df18a7e828d64cf5f471ccda9e25..9e1d62c2b908f0c8ac826a5d16fba10c4645b8b2 100644 (file)
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -1,6 +1,7 @@
  from __future__ import unicode_literals
  
  import base64
+import datetime
  import hashlib
  import json
  import netrc
@@ -11,16 +12,18 @@ import sys
  import time
  import xml.etree.ElementTree
  
-from ..utils import (
+from ..compat import (
      compat_http_client,
      compat_urllib_error,
      compat_urllib_parse_urlparse,
      compat_urlparse,
      compat_str,
-
+)
+from ..utils import (
      clean_html,
      compiled_regex_type,
      ExtractorError,
+    float_or_none,
      int_or_none,
      RegexNotFoundError,
      sanitize_filename,
@@ -70,6 +73,7 @@ class InfoExtractor(object):
                      * acodec     Name of the audio codec in use
                      * asr        Audio sampling rate in Hertz
                      * vbr        Average video bitrate in KBit/s
+                    * fps        Frame rate
                      * vcodec     Name of the video codec in use
                      * container  Name of the container format
                      * filesize   The number of bytes, if known in advance
@@ -87,6 +91,10 @@ class InfoExtractor(object):
                                   format, irrespective of the file format.
                                   -1 for default (order by other properties),
                                   -2 or smaller for less than default.
+                    * source_preference  Order number for this video source
+                                  (quality takes higher priority)
+                                 -1 for default (order by other properties),
+                                 -2 or smaller for less than default.
                      * http_referer  HTTP Referer header value to set.
                      * http_method  HTTP method to use for the download.
                      * http_headers  A dictionary of additional HTTP headers
@@ -136,6 +144,8 @@ class InfoExtractor(object):
  
      Unless mentioned otherwise, the fields should be Unicode strings.
  
+    Unless mentioned otherwise, None is equivalent to absence of information.
+
      Subclasses of this one should re-define the _real_initialize() and
      _real_extract() methods and define a _VALID_URL regexp.
      Probably, they should also be added to the list of extractors.
@@ -164,6 +174,14 @@ class InfoExtractor(object):
              cls._VALID_URL_RE = re.compile(cls._VALID_URL)
          return cls._VALID_URL_RE.match(url) is not None
  
+    @classmethod
+    def _match_id(cls, url):
+        if '_VALID_URL_RE' not in cls.__dict__:
+            cls._VALID_URL_RE = re.compile(cls._VALID_URL)
+        m = cls._VALID_URL_RE.match(url)
+        assert m
+        return m.group('id')
+
      @classmethod
      def working(cls):
          """Getter method for _WORKING."""
@@ -226,7 +244,6 @@ class InfoExtractor(object):
  
      def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True):
          """ Returns a tuple (page content as string, URL handle) """
-
          # Strip hashes from the URL (#1038)
          if isinstance(url_or_request, (compat_str, str)):
              url_or_request = url_or_request.partition('#')[0]
@@ -235,6 +252,10 @@ class InfoExtractor(object):
          if urlh is False:
              assert not fatal
              return False
+        content = self._webpage_read_content(urlh, url_or_request, video_id, note, errnote, fatal)
+        return (content, urlh)
+
+    def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True):
          content_type = urlh.headers.get('Content-Type', '')
          webpage_bytes = urlh.read()
          m = re.match(r'[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+\s*;\s*charset=(.+)', content_type)
@@ -269,6 +290,12 @@ class InfoExtractor(object):
              raw_filename = basen + '.dump'
              filename = sanitize_filename(raw_filename, restricted=True)
              self.to_screen('Saving request to ' + filename)
+            # Working around MAX_PATH limitation on Windows (see
+            # http://msdn.microsoft.com/en-us/library/windows/desktop/aa365247(v=vs.85).aspx)
+            if os.name == 'nt':
+                absfilepath = os.path.abspath(filename)
+                if len(absfilepath) > 259:
+                    filename = '\\\\?\\' + absfilepath
              with open(filename, 'wb') as outf:
                  outf.write(webpage_bytes)
  
@@ -287,7 +314,7 @@ class InfoExtractor(object):
                  msg += ' Visit %s for more details' % blocked_iframe
              raise ExtractorError(msg, expected=True)
  
-        return (content, urlh)
+        return content
  
      def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True):
          """ Returns the data of the page as a string """
@@ -324,7 +351,11 @@ class InfoExtractor(object):
          try:
              return json.loads(json_string)
          except ValueError as ve:
-            raise ExtractorError('Failed to download JSON', cause=ve)
+            errmsg = '%s: Failed to parse JSON ' % video_id
+            if fatal:
+                raise ExtractorError(errmsg, cause=ve)
+            else:
+                self.report_warning(errmsg + str(ve))
  
      def report_warning(self, msg, video_id=None):
          idstr = '' if video_id is None else '%s: ' % video_id
@@ -589,14 +620,16 @@ class InfoExtractor(object):
                  f.get('vbr') if f.get('vbr') is not None else -1,
                  f.get('abr') if f.get('abr') is not None else -1,
                  audio_ext_preference,
+                f.get('fps') if f.get('fps') is not None else -1,
                  f.get('filesize') if f.get('filesize') is not None else -1,
                  f.get('filesize_approx') if f.get('filesize_approx') is not None else -1,
+                f.get('source_preference') if f.get('source_preference') is not None else -1,
                  f.get('format_id'),
              )
          formats.sort(key=_formats_key)
  
      def http_scheme(self):
-        """ Either "https:" or "https:", depending on the user's preferences """
+        """ Either "http:" or "https:", depending on the user's preferences """
          return (
              'http:'
              if self._downloader.params.get('prefer_insecure', False)
@@ -659,7 +692,10 @@ class InfoExtractor(object):
              if re.match(r'^https?://', u)
              else compat_urlparse.urljoin(m3u8_url, u))
  
-        m3u8_doc = self._download_webpage(m3u8_url, video_id)
+        m3u8_doc = self._download_webpage(
+            m3u8_url, video_id,
+            note='Downloading m3u8 information',
+            errnote='Failed to download m3u8 information')
          last_info = None
          kv_rex = re.compile(
              r'(?P<key>[a-zA-Z_-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)')
@@ -705,6 +741,34 @@ class InfoExtractor(object):
          self._sort_formats(formats)
          return formats
  
+    def _live_title(self, name):
+        """ Generate the title for a live video """
+        now = datetime.datetime.now()
+        now_str = now.strftime("%Y-%m-%d %H:%M")
+        return name + ' ' + now_str
+
+    def _int(self, v, name, fatal=False, **kwargs):
+        res = int_or_none(v, **kwargs)
+        if 'get_attr' in kwargs:
+            print(getattr(v, kwargs['get_attr']))
+        if res is None:
+            msg = 'Failed to extract %s: Could not parse value %r' % (name, v)
+            if fatal:
+                raise ExtractorError(msg)
+            else:
+                self._downloader.report_warning(msg)
+        return res
+
+    def _float(self, v, name, fatal=False, **kwargs):
+        res = float_or_none(v, **kwargs)
+        if res is None:
+            msg = 'Failed to extract %s: Could not parse value %r' % (name, v)
+            if fatal:
+                raise ExtractorError(msg)
+            else:
+                self._downloader.report_warning(msg)
+        return res
+
  
  class SearchInfoExtractor(InfoExtractor):
      """