X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fcommon.py;h=84fca8ba0b2577696877c117a13fcc0a5ce40735;hb=280bc5dad651728e493b3b25a672a9aaef590683;hp=f498bcf6f5af1feb05eb8df0184f77e6336ac878;hpb=cc14dfb8ecb73be9905f5adab2d7f1f92d435e2f;p=youtube-dl diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index f498bcf6f..84fca8ba0 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1,4 +1,6 @@ import base64 +import hashlib +import json import os import re import socket @@ -61,18 +63,24 @@ class InfoExtractor(object): * tbr Average bitrate of audio and video in KBit/s * abr Average audio bitrate in KBit/s * acodec Name of the audio codec in use + * asr Audio sampling rate in Hertz * vbr Average video bitrate in KBit/s * vcodec Name of the video codec in use + * container Name of the container format * filesize The number of bytes, if known in advance * player_url SWF Player URL (used for rtmpdump). * protocol The protocol that will be used for the actual download, lower-case. - "http", "https", "rtsp", "rtmp" or so. + "http", "https", "rtsp", "rtmp", "m3u8" or so. * preference Order number of this format. If this field is present and not None, the formats get sorted by this field. -1 for default (order by other properties), -2 or smaller for less than default. + * quality Order number of the video quality of this + format, irrespective of the file format. + -1 for default (order by other properties), + -2 or smaller for less than default. url: Final video URL. ext: Video filename extension. format: The video format, defaults to ext (used for --get-format) @@ -214,6 +222,8 @@ class InfoExtractor(object): webpage_bytes[:1024]) if m: encoding = m.group(1).decode('ascii') + elif webpage_bytes.startswith(b'\xff\xfe'): + encoding = 'utf-16' else: encoding = 'utf-8' if self._downloader.params.get('dump_intermediate_pages', False): @@ -229,6 +239,9 @@ class InfoExtractor(object): url = url_or_request.get_full_url() except AttributeError: url = url_or_request + if len(url) > 200: + h = u'___' + hashlib.md5(url.encode('utf-8')).hexdigest() + url = url[:200 - len(h)] + h raw_filename = ('%s_%s.dump' % (video_id, url)) filename = sanitize_filename(raw_filename, restricted=True) self.to_screen(u'Saving request to ' + filename) @@ -256,6 +269,18 @@ class InfoExtractor(object): xml_string = transform_source(xml_string) return xml.etree.ElementTree.fromstring(xml_string.encode('utf-8')) + def _download_json(self, url_or_request, video_id, + note=u'Downloading JSON metadata', + errnote=u'Unable to download JSON metadata', + transform_source=None): + json_string = self._download_webpage(url_or_request, video_id, note, errnote) + if transform_source: + json_string = transform_source(json_string) + try: + return json.loads(json_string) + except ValueError as ve: + raise ExtractorError('Failed to download JSON', cause=ve) + def report_warning(self, msg, video_id=None): idstr = u'' if video_id is None else u'%s: ' % video_id self._downloader.report_warning( @@ -377,7 +402,7 @@ class InfoExtractor(object): # Helper functions for extracting OpenGraph info @staticmethod def _og_regexes(prop): - content_re = r'content=(?:"([^>]+?)"|\'(.+?)\')' + content_re = r'content=(?:"([^>]+?)"|\'([^>]+?)\')' property_re = r'(?:name|property)=[\'"]og:%s[\'"]' % re.escape(prop) template = r']+?%s[^>]+?%s' return [ @@ -443,7 +468,14 @@ class InfoExtractor(object): } return RATING_TABLE.get(rating.lower(), None) + def _twitter_search_player(self, html): + return self._html_search_meta('twitter:player', html, + 'twitter card player') + def _sort_formats(self, formats): + if not formats: + raise ExtractorError(u'No video formats found') + def _formats_key(f): # TODO remove the following workaround from ..utils import determine_ext @@ -483,9 +515,11 @@ class InfoExtractor(object): return ( preference, + f.get('quality') if f.get('quality') is not None else -1, f.get('height') if f.get('height') is not None else -1, f.get('width') if f.get('width') is not None else -1, ext_preference, + f.get('tbr') if f.get('tbr') is not None else -1, f.get('vbr') if f.get('vbr') is not None else -1, f.get('abr') if f.get('abr') is not None else -1, audio_ext_preference,