X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fcommon.py;h=f498bcf6f5af1feb05eb8df0184f77e6336ac878;hb=a7c26e7338ceed06b579775f315b078644a7482b;hp=ba46a7bc77d17ed4bcf4dcf7764b1d39f4799958;hpb=aa94a6d3159af8333b56d16f3ed0bc3a164a882a;p=youtube-dl diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index ba46a7bc7..f498bcf6f 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -9,6 +9,7 @@ import xml.etree.ElementTree from ..utils import ( compat_http_client, compat_urllib_error, + compat_urllib_parse_urlparse, compat_str, clean_html, @@ -37,10 +38,12 @@ class InfoExtractor(object): id: Video identifier. title: Video title, unescaped. - Additionally, it must contain either a formats entry or url and ext: + Additionally, it must contain either a formats entry or a url one: - formats: A list of dictionaries for each format available, it must - be ordered from worst to best quality. Potential fields: + formats: A list of dictionaries for each format available, ordered + from worst to best quality. + + Potential fields: * url Mandatory. The URL of the video file * ext Will be calculated from url if missing * format A human-readable description of the format @@ -48,23 +51,32 @@ class InfoExtractor(object): Calculated from the format_id, width, height. and format_note fields if missing. * format_id A short description of the format - ("mp4_h264_opus" or "19") + ("mp4_h264_opus" or "19"). + Technically optional, but strongly recommended. * format_note Additional info about the format ("3D" or "DASH video") * width Width of the video, if known * height Height of the video, if known + * resolution Textual description of width and height + * tbr Average bitrate of audio and video in KBit/s * abr Average audio bitrate in KBit/s * acodec Name of the audio codec in use * vbr Average video bitrate in KBit/s * vcodec Name of the video codec in use * filesize The number of bytes, if known in advance * player_url SWF Player URL (used for rtmpdump). + * protocol The protocol that will be used for the actual + download, lower-case. + "http", "https", "rtsp", "rtmp" or so. + * preference Order number of this format. If this field is + present and not None, the formats get sorted + by this field. + -1 for default (order by other properties), + -2 or smaller for less than default. url: Final video URL. ext: Video filename extension. format: The video format, defaults to ext (used for --get-format) player_url: SWF Player URL (used for rtmpdump). - urlhandle: [internal] The urlHandle to be used to download the file, - like returned by urllib.request.urlopen The following fields are optional: @@ -244,6 +256,11 @@ class InfoExtractor(object): xml_string = transform_source(xml_string) return xml.etree.ElementTree.fromstring(xml_string.encode('utf-8')) + def report_warning(self, msg, video_id=None): + idstr = u'' if video_id is None else u'%s: ' % video_id + self._downloader.report_warning( + u'[%s] %s%s' % (self.IE_NAME, idstr, msg)) + def to_screen(self, msg): """Print msg to screen, prefixing it with '[ie_name]'""" self._downloader.to_screen(u'[%s] %s' % (self.IE_NAME, msg)) @@ -361,7 +378,7 @@ class InfoExtractor(object): @staticmethod def _og_regexes(prop): content_re = r'content=(?:"([^>]+?)"|\'(.+?)\')' - property_re = r'property=[\'"]og:%s[\'"]' % re.escape(prop) + property_re = r'(?:name|property)=[\'"]og:%s[\'"]' % re.escape(prop) template = r']+?%s[^>]+?%s' return [ template % (property_re, content_re), @@ -426,6 +443,56 @@ class InfoExtractor(object): } return RATING_TABLE.get(rating.lower(), None) + def _sort_formats(self, formats): + def _formats_key(f): + # TODO remove the following workaround + from ..utils import determine_ext + if not f.get('ext') and 'url' in f: + f['ext'] = determine_ext(f['url']) + + preference = f.get('preference') + if preference is None: + proto = f.get('protocol') + if proto is None: + proto = compat_urllib_parse_urlparse(f.get('url', '')).scheme + + preference = 0 if proto in ['http', 'https'] else -0.1 + if f.get('ext') in ['f4f', 'f4m']: # Not yet supported + preference -= 0.5 + + if f.get('vcodec') == 'none': # audio only + if self._downloader.params.get('prefer_free_formats'): + ORDER = [u'aac', u'mp3', u'm4a', u'webm', u'ogg', u'opus'] + else: + ORDER = [u'webm', u'opus', u'ogg', u'mp3', u'aac', u'm4a'] + ext_preference = 0 + try: + audio_ext_preference = ORDER.index(f['ext']) + except ValueError: + audio_ext_preference = -1 + else: + if self._downloader.params.get('prefer_free_formats'): + ORDER = [u'flv', u'mp4', u'webm'] + else: + ORDER = [u'webm', u'flv', u'mp4'] + try: + ext_preference = ORDER.index(f['ext']) + except ValueError: + ext_preference = -1 + audio_ext_preference = 0 + + return ( + preference, + f.get('height') if f.get('height') is not None else -1, + f.get('width') if f.get('width') is not None else -1, + ext_preference, + f.get('vbr') if f.get('vbr') is not None else -1, + f.get('abr') if f.get('abr') is not None else -1, + audio_ext_preference, + f.get('filesize') if f.get('filesize') is not None else -1, + f.get('format_id'), + ) + formats.sort(key=_formats_key) class SearchInfoExtractor(InfoExtractor):