X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;ds=inline;f=youtube_dl%2Fextractor%2Fcommon.py;h=280693d1d83312a00da78fa0852fd1a6cfa6be3a;hb=4bcc7bd1f21c479a3ef613e62868a9fe6d9c370f;hp=1fc0624a35a70c830f144d6749bfcc19e5bf47fb;hpb=88bb52ee18275adf46f764e8fbc7e31b271cf254;p=youtube-dl diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 1fc0624a3..280693d1d 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -18,6 +18,7 @@ from ..utils import ( sanitize_filename, unescapeHTML, ) +_NO_DEFAULT = object() class InfoExtractor(object): @@ -36,10 +37,12 @@ class InfoExtractor(object): id: Video identifier. title: Video title, unescaped. - Additionally, it must contain either a formats entry or url and ext: + Additionally, it must contain either a formats entry or a url one: - formats: A list of dictionaries for each format available, it must - be ordered from worst to best quality. Potential fields: + formats: A list of dictionaries for each format available, ordered + from worst to best quality. + + Potential fields: * url Mandatory. The URL of the video file * ext Will be calculated from url if missing * format A human-readable description of the format @@ -52,18 +55,21 @@ class InfoExtractor(object): ("3D" or "DASH video") * width Width of the video, if known * height Height of the video, if known + * resolution Textual description of width and height * abr Average audio bitrate in KBit/s * acodec Name of the audio codec in use * vbr Average video bitrate in KBit/s * vcodec Name of the video codec in use * filesize The number of bytes, if known in advance * player_url SWF Player URL (used for rtmpdump). + * preference Order number of this format. If this field is + present, the formats get sorted by this field. + -1 for default (order by other properties), + -2 or smaller for less than default. url: Final video URL. ext: Video filename extension. format: The video format, defaults to ext (used for --get-format) player_url: SWF Player URL (used for rtmpdump). - urlhandle: [internal] The urlHandle to be used to download the file, - like returned by urllib.request.urlopen The following fields are optional: @@ -169,6 +175,8 @@ class InfoExtractor(object): try: return self._downloader.urlopen(url_or_request) except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: + if errnote is False: + return False if errnote is None: errnote = u'Unable to download webpage' errmsg = u'%s: %s' % (errnote, compat_str(err)) @@ -241,6 +249,11 @@ class InfoExtractor(object): xml_string = transform_source(xml_string) return xml.etree.ElementTree.fromstring(xml_string.encode('utf-8')) + def report_warning(self, msg, video_id=None): + idstr = u'' if video_id is None else u'%s: ' % video_id + self._downloader.report_warning( + u'[%s] %s%s' % (self.IE_NAME, idstr, msg)) + def to_screen(self, msg): """Print msg to screen, prefixing it with '[ie_name]'""" self._downloader.to_screen(u'[%s] %s' % (self.IE_NAME, msg)) @@ -262,7 +275,8 @@ class InfoExtractor(object): self.to_screen(u'Logging in') #Methods for following #608 - def url_result(self, url, ie=None, video_id=None): + @staticmethod + def url_result(url, ie=None, video_id=None): """Returns a url that points to a page that should be processed""" #TODO: ie should be the class used for getting the info video_info = {'_type': 'url', @@ -271,7 +285,8 @@ class InfoExtractor(object): if video_id is not None: video_info['id'] = video_id return video_info - def playlist_result(self, entries, playlist_id=None, playlist_title=None): + @staticmethod + def playlist_result(entries, playlist_id=None, playlist_title=None): """Returns a playlist""" video_info = {'_type': 'playlist', 'entries': entries} @@ -281,7 +296,7 @@ class InfoExtractor(object): video_info['title'] = playlist_title return video_info - def _search_regex(self, pattern, string, name, default=None, fatal=True, flags=0): + def _search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0): """ Perform a regex search on the given string, using a single or a list of patterns returning the first matching group. @@ -303,7 +318,7 @@ class InfoExtractor(object): if mobj: # return the first matching group return next(g for g in mobj.groups() if g is not None) - elif default is not None: + elif default is not _NO_DEFAULT: return default elif fatal: raise RegexNotFoundError(u'Unable to extract %s' % _name) @@ -312,7 +327,7 @@ class InfoExtractor(object): u'please report this issue on http://yt-dl.org/bug' % _name) return None - def _html_search_regex(self, pattern, string, name, default=None, fatal=True, flags=0): + def _html_search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0): """ Like _search_regex, but strips HTML tags and unescapes entities. """ @@ -421,6 +436,47 @@ class InfoExtractor(object): } return RATING_TABLE.get(rating.lower(), None) + def _sort_formats(self, formats): + def _formats_key(f): + preference = f.get('preference') + if preference is None: + preference = 0 if f.get('url', '').startswith('http') else -0.1 + if f.get('ext') in ['f4f', 'f4m']: # Not yet supported + preference -= 0.5 + + if f.get('vcodec') == 'none': # audio only + if self._downloader.params.get('prefer_free_formats'): + ORDER = [u'aac', u'mp3', u'm4a', u'webm', u'ogg', u'opus'] + else: + ORDER = [u'webm', u'opus', u'ogg', u'mp3', u'aac', u'm4a'] + ext_preference = 0 + try: + audio_ext_preference = ORDER.index(f['ext']) + except ValueError: + audio_ext_preference = -1 + else: + if self._downloader.params.get('prefer_free_formats'): + ORDER = [u'flv', u'mp4', u'webm'] + else: + ORDER = [u'webm', u'flv', u'mp4'] + try: + ext_preference = ORDER.index(f['ext']) + except ValueError: + ext_preference = -1 + audio_ext_preference = 0 + + return ( + preference, + f.get('height') if f.get('height') is not None else -1, + f.get('width') if f.get('width') is not None else -1, + ext_preference, + f.get('vbr') if f.get('vbr') is not None else -1, + f.get('abr') if f.get('abr') is not None else -1, + audio_ext_preference, + f.get('filesize') if f.get('filesize') is not None else -1, + f.get('format_id'), + ) + formats.sort(key=_formats_key) class SearchInfoExtractor(InfoExtractor):