X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;ds=sidebyside;f=youtube_dl%2Fextractor%2Fcommon.py;h=602601b24360766c4d37e33300ee01df0eaf122a;hb=ba322d82090bd1126774e772b699283121ffa4b8;hp=7b7a832dc34a37b70d2c2165dfe1c28185237e89;hpb=8940b8608e567dba09b3ea146b89b297190ec6d6;p=youtube-dl diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 7b7a832dc..602601b24 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -14,6 +14,7 @@ import xml.etree.ElementTree from ..compat import ( compat_cookiejar, + compat_HTTPError, compat_http_client, compat_urllib_error, compat_urllib_parse_urlparse, @@ -26,6 +27,7 @@ from ..utils import ( compiled_regex_type, ExtractorError, float_or_none, + HEADRequest, int_or_none, RegexNotFoundError, sanitize_filename, @@ -87,7 +89,8 @@ class InfoExtractor(object): * player_url SWF Player URL (used for rtmpdump). * protocol The protocol that will be used for the actual download, lower-case. - "http", "https", "rtsp", "rtmp", "m3u8" or so. + "http", "https", "rtsp", "rtmp", "rtmpe", + "m3u8", or "m3u8_native". * preference Order number of this format. If this field is present and not None, the formats get sorted by this field, regardless of all other values. @@ -114,8 +117,11 @@ class InfoExtractor(object): * http_post_data Additional data to send with a POST request. * stretched_ratio If given and not 1, indicates that the - video's pixels are not square. - width : height ratio as float. + video's pixels are not square. + width : height ratio as float. + * no_resume The server does not support resuming the + (HTTP or RTMP) download. Boolean. + url: Final video URL. ext: Video filename extension. format: The video format, defaults to ext (used for --get-format) @@ -139,6 +145,7 @@ class InfoExtractor(object): thumbnail: Full URL to a video thumbnail image. description: Full video description. uploader: Full name of the video uploader. + creator: The main artist who created the video. timestamp: UNIX timestamp of the moment the video became available. upload_date: Video upload date (YYYYMMDD). If not explicitly set, calculated from timestamp. @@ -698,11 +705,11 @@ class InfoExtractor(object): preference, f.get('language_preference') if f.get('language_preference') is not None else -1, f.get('quality') if f.get('quality') is not None else -1, - f.get('height') if f.get('height') is not None else -1, - f.get('width') if f.get('width') is not None else -1, - ext_preference, f.get('tbr') if f.get('tbr') is not None else -1, f.get('vbr') if f.get('vbr') is not None else -1, + ext_preference, + f.get('height') if f.get('height') is not None else -1, + f.get('width') if f.get('width') is not None else -1, f.get('abr') if f.get('abr') is not None else -1, audio_ext_preference, f.get('fps') if f.get('fps') is not None else -1, @@ -713,6 +720,27 @@ class InfoExtractor(object): ) formats.sort(key=_formats_key) + def _check_formats(self, formats, video_id): + if formats: + formats[:] = filter( + lambda f: self._is_valid_url( + f['url'], video_id, + item='%s video format' % f.get('format_id') if f.get('format_id') else 'video'), + formats) + + def _is_valid_url(self, url, video_id, item='video'): + try: + self._request_webpage( + HEADRequest(url), video_id, + 'Checking %s URL' % item) + return True + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError): + self.report_warning( + '%s URL is invalid, skipping' % item, video_id) + return False + raise + def http_scheme(self): """ Either "http:" or "https:", depending on the user's preferences """ return ( @@ -833,10 +861,13 @@ class InfoExtractor(object): return formats # TODO: improve extraction - def _extract_smil_formats(self, smil_url, video_id): + def _extract_smil_formats(self, smil_url, video_id, fatal=True): smil = self._download_xml( smil_url, video_id, 'Downloading SMIL file', - 'Unable to download SMIL file') + 'Unable to download SMIL file', fatal=fatal) + if smil is False: + assert not fatal + return [] base = smil.find('./head/meta').get('base')