X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fcommon.py;h=f7478d4598e8f15ea802ec9451aff53ce452fe7a;hb=a3978a615950af6e990313820f93baddce067ee4;hp=0a90382de116bdffdb88edfc0ebe1a38fb758dd6;hpb=e6812ac99dfc9117f3dd78425bf4bbe01601ecfd;p=youtube-dl

diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 0a90382de..f7478d459 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -1,4 +1,6 @@
 import base64
+import hashlib
+import json
 import os
 import re
 import socket
@@ -9,6 +11,7 @@ import xml.etree.ElementTree
 from ..utils import (
     compat_http_client,
     compat_urllib_error,
+    compat_urllib_parse_urlparse,
     compat_str,
 
     clean_html,
@@ -50,20 +53,32 @@ class InfoExtractor(object):
                                  Calculated from the format_id, width, height.
                                  and format_note fields if missing.
                     * format_id  A short description of the format
-                                 ("mp4_h264_opus" or "19")
+                                 ("mp4_h264_opus" or "19").
+                                Technically optional, but strongly recommended.
                     * format_note Additional info about the format
                                  ("3D" or "DASH video")
                     * width      Width of the video, if known
                     * height     Height of the video, if known
                     * resolution Textual description of width and height
+                    * tbr        Average bitrate of audio and video in KBit/s
                     * abr        Average audio bitrate in KBit/s
                     * acodec     Name of the audio codec in use
+                    * asr        Audio sampling rate in Hertz
                     * vbr        Average video bitrate in KBit/s
                     * vcodec     Name of the video codec in use
+                    * container  Name of the container format
                     * filesize   The number of bytes, if known in advance
                     * player_url SWF Player URL (used for rtmpdump).
+                    * protocol   The protocol that will be used for the actual
+                                 download, lower-case.
+                                 "http", "https", "rtsp", "rtmp", "m3u8" or so.
                     * preference Order number of this format. If this field is
-                                 present, the formats get sorted by this field.
+                                 present and not None, the formats get sorted
+                                 by this field.
+                                 -1 for default (order by other properties),
+                                 -2 or smaller for less than default.
+                    * quality    Order number of the video quality of this
+                                 format, irrespective of the file format.
                                  -1 for default (order by other properties),
                                  -2 or smaller for less than default.
     url:            Final video URL.
@@ -207,6 +222,8 @@ class InfoExtractor(object):
                           webpage_bytes[:1024])
             if m:
                 encoding = m.group(1).decode('ascii')
+            elif webpage_bytes.startswith(b'\xff\xfe'):
+                encoding = 'utf-16'
             else:
                 encoding = 'utf-8'
         if self._downloader.params.get('dump_intermediate_pages', False):
@@ -222,6 +239,9 @@ class InfoExtractor(object):
                 url = url_or_request.get_full_url()
             except AttributeError:
                 url = url_or_request
+            if len(url) > 200:
+                h = u'___' + hashlib.md5(url.encode('utf-8')).hexdigest()
+                url = url[:200 - len(h)] + h
             raw_filename = ('%s_%s.dump' % (video_id, url))
             filename = sanitize_filename(raw_filename, restricted=True)
             self.to_screen(u'Saving request to ' + filename)
@@ -249,6 +269,15 @@ class InfoExtractor(object):
             xml_string = transform_source(xml_string)
         return xml.etree.ElementTree.fromstring(xml_string.encode('utf-8'))
 
+    def _download_json(self, url_or_request, video_id,
+                       note=u'Downloading JSON metadata',
+                       errnote=u'Unable to download JSON metadata'):
+        json_string = self._download_webpage(url_or_request, video_id, note, errnote)
+        try:
+            return json.loads(json_string)
+        except ValueError as ve:
+            raise ExtractorError('Failed to download JSON', cause=ve)
+
     def report_warning(self, msg, video_id=None):
         idstr = u'' if video_id is None else u'%s: ' % video_id
         self._downloader.report_warning(
@@ -371,7 +400,7 @@ class InfoExtractor(object):
     @staticmethod
     def _og_regexes(prop):
         content_re = r'content=(?:"([^>]+?)"|\'(.+?)\')'
-        property_re = r'property=[\'"]og:%s[\'"]' % re.escape(prop)
+        property_re = r'(?:name|property)=[\'"]og:%s[\'"]' % re.escape(prop)
         template = r'<meta[^>]+?%s[^>]+?%s'
         return [
             template % (property_re, content_re),
@@ -437,6 +466,9 @@ class InfoExtractor(object):
         return RATING_TABLE.get(rating.lower(), None)
 
     def _sort_formats(self, formats):
+        if not formats:
+            raise ExtractorError(u'No video formats found')
+
         def _formats_key(f):
             # TODO remove the following workaround
             from ..utils import determine_ext
@@ -445,7 +477,11 @@ class InfoExtractor(object):
 
             preference = f.get('preference')
             if preference is None:
-                preference = 0 if f.get('url', '').startswith('http') else -0.1
+                proto = f.get('protocol')
+                if proto is None:
+                    proto = compat_urllib_parse_urlparse(f.get('url', '')).scheme
+
+                preference = 0 if proto in ['http', 'https'] else -0.1
                 if f.get('ext') in ['f4f', 'f4m']:  # Not yet supported
                     preference -= 0.5
 
@@ -472,9 +508,11 @@ class InfoExtractor(object):
 
             return (
                 preference,
+                f.get('quality') if f.get('quality') is not None else -1,
                 f.get('height') if f.get('height') is not None else -1,
                 f.get('width') if f.get('width') is not None else -1,
                 ext_preference,
+                f.get('tbr') if f.get('tbr') is not None else -1,
                 f.get('vbr') if f.get('vbr') is not None else -1,
                 f.get('abr') if f.get('abr') is not None else -1,
                 audio_ext_preference,