X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fcommon.py;h=9e1d62c2b908f0c8ac826a5d16fba10c4645b8b2;hb=8c25f81beea169c9d6540eea1a6f71dc045da6ed;hp=f43a0a569a3d90d555a27cece8ac3e68951c5106;hpb=0217aee154e9f0f5f0c82c3253046c925d4073eb;p=youtube-dl

diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index f43a0a569..9e1d62c2b 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -12,13 +12,14 @@ import sys
 import time
 import xml.etree.ElementTree
 
-from ..utils import (
+from ..compat import (
     compat_http_client,
     compat_urllib_error,
     compat_urllib_parse_urlparse,
     compat_urlparse,
     compat_str,
-
+)
+from ..utils import (
     clean_html,
     compiled_regex_type,
     ExtractorError,
@@ -72,6 +73,7 @@ class InfoExtractor(object):
                     * acodec     Name of the audio codec in use
                     * asr        Audio sampling rate in Hertz
                     * vbr        Average video bitrate in KBit/s
+                    * fps        Frame rate
                     * vcodec     Name of the video codec in use
                     * container  Name of the container format
                     * filesize   The number of bytes, if known in advance
@@ -89,6 +91,10 @@ class InfoExtractor(object):
                                  format, irrespective of the file format.
                                  -1 for default (order by other properties),
                                  -2 or smaller for less than default.
+                    * source_preference  Order number for this video source
+                                  (quality takes higher priority)
+                                 -1 for default (order by other properties),
+                                 -2 or smaller for less than default.
                     * http_referer  HTTP Referer header value to set.
                     * http_method  HTTP method to use for the download.
                     * http_headers  A dictionary of additional HTTP headers
@@ -138,6 +144,8 @@ class InfoExtractor(object):
 
     Unless mentioned otherwise, the fields should be Unicode strings.
 
+    Unless mentioned otherwise, None is equivalent to absence of information.
+
     Subclasses of this one should re-define the _real_initialize() and
     _real_extract() methods and define a _VALID_URL regexp.
     Probably, they should also be added to the list of extractors.
@@ -236,7 +244,6 @@ class InfoExtractor(object):
 
     def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True):
         """ Returns a tuple (page content as string, URL handle) """
-
         # Strip hashes from the URL (#1038)
         if isinstance(url_or_request, (compat_str, str)):
             url_or_request = url_or_request.partition('#')[0]
@@ -245,6 +252,10 @@ class InfoExtractor(object):
         if urlh is False:
             assert not fatal
             return False
+        content = self._webpage_read_content(urlh, url_or_request, video_id, note, errnote, fatal)
+        return (content, urlh)
+
+    def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True):
         content_type = urlh.headers.get('Content-Type', '')
         webpage_bytes = urlh.read()
         m = re.match(r'[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+\s*;\s*charset=(.+)', content_type)
@@ -279,6 +290,12 @@ class InfoExtractor(object):
             raw_filename = basen + '.dump'
             filename = sanitize_filename(raw_filename, restricted=True)
             self.to_screen('Saving request to ' + filename)
+            # Working around MAX_PATH limitation on Windows (see
+            # http://msdn.microsoft.com/en-us/library/windows/desktop/aa365247(v=vs.85).aspx)
+            if os.name == 'nt':
+                absfilepath = os.path.abspath(filename)
+                if len(absfilepath) > 259:
+                    filename = '\\\\?\\' + absfilepath
             with open(filename, 'wb') as outf:
                 outf.write(webpage_bytes)
 
@@ -297,7 +314,7 @@ class InfoExtractor(object):
                 msg += ' Visit %s for more details' % blocked_iframe
             raise ExtractorError(msg, expected=True)
 
-        return (content, urlh)
+        return content
 
     def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True):
         """ Returns the data of the page as a string """
@@ -334,7 +351,11 @@ class InfoExtractor(object):
         try:
             return json.loads(json_string)
         except ValueError as ve:
-            raise ExtractorError('Failed to download JSON', cause=ve)
+            errmsg = '%s: Failed to parse JSON ' % video_id
+            if fatal:
+                raise ExtractorError(errmsg, cause=ve)
+            else:
+                self.report_warning(errmsg + str(ve))
 
     def report_warning(self, msg, video_id=None):
         idstr = '' if video_id is None else '%s: ' % video_id
@@ -599,14 +620,16 @@ class InfoExtractor(object):
                 f.get('vbr') if f.get('vbr') is not None else -1,
                 f.get('abr') if f.get('abr') is not None else -1,
                 audio_ext_preference,
+                f.get('fps') if f.get('fps') is not None else -1,
                 f.get('filesize') if f.get('filesize') is not None else -1,
                 f.get('filesize_approx') if f.get('filesize_approx') is not None else -1,
+                f.get('source_preference') if f.get('source_preference') is not None else -1,
                 f.get('format_id'),
             )
         formats.sort(key=_formats_key)
 
     def http_scheme(self):
-        """ Either "https:" or "https:", depending on the user's preferences """
+        """ Either "http:" or "https:", depending on the user's preferences """
         return (
             'http:'
             if self._downloader.params.get('prefer_insecure', False)
@@ -669,7 +692,10 @@ class InfoExtractor(object):
             if re.match(r'^https?://', u)
             else compat_urlparse.urljoin(m3u8_url, u))
 
-        m3u8_doc = self._download_webpage(m3u8_url, video_id)
+        m3u8_doc = self._download_webpage(
+            m3u8_url, video_id,
+            note='Downloading m3u8 information',
+            errnote='Failed to download m3u8 information')
         last_info = None
         kv_rex = re.compile(
             r'(?P<key>[a-zA-Z_-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)')