X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fspiegel.py;h=f345883c767438a91412e0619a993a70e3a21a92;hb=c8dfe360eb642b2957116814bfdef67686ab000d;hp=9586a7da2226ff822992cb21bc1698952c14b29e;hpb=e4bdb37ec6c463df236cf7178046b8653c70b78e;p=youtube-dl diff --git a/youtube_dl/extractor/spiegel.py b/youtube_dl/extractor/spiegel.py index 9586a7da2..f345883c7 100644 --- a/youtube_dl/extractor/spiegel.py +++ b/youtube_dl/extractor/spiegel.py @@ -4,7 +4,15 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import compat_urlparse +from ..compat import ( + compat_urlparse, + compat_HTTPError, +) +from ..utils import ( + HEADRequest, + ExtractorError, +) +from .spiegeltv import SpiegeltvIE class SpiegelIE(InfoExtractor): @@ -42,7 +50,11 @@ class SpiegelIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) + webpage, handle = self._download_webpage_handle(url, video_id) + + # 302 to spiegel.tv, like http://www.spiegel.de/video/der-film-zum-wochenende-die-wahrheit-ueber-maenner-video-99003272.html + if SpiegeltvIE.suitable(handle.geturl()): + return self.url_result(handle.geturl(), 'Spiegeltv') title = re.sub(r'\s+', ' ', self._html_search_regex( r'(?s)<(?:h1|div) class="module-title"[^>]*>(.*?)', @@ -55,21 +67,31 @@ class SpiegelIE(InfoExtractor): xml_url = base_url + video_id + '.xml' idoc = self._download_xml(xml_url, video_id) - formats = [ - { - 'format_id': n.tag.rpartition('type')[2], - 'url': base_url + n.find('./filename').text, - 'width': int(n.find('./width').text), - 'height': int(n.find('./height').text), - 'abr': int(n.find('./audiobitrate').text), - 'vbr': int(n.find('./videobitrate').text), - 'vcodec': n.find('./codec').text, - 'acodec': 'MP4A', - } - for n in list(idoc) - # Blacklist type 6, it's extremely LQ and not available on the same server - if n.tag.startswith('type') and n.tag != 'type6' - ] + formats = [] + for n in list(idoc): + if n.tag.startswith('type') and n.tag != 'type6': + format_id = n.tag.rpartition('type')[2] + video_url = base_url + n.find('./filename').text + # Test video URLs beforehand as some of them are invalid + try: + self._request_webpage( + HEADRequest(video_url), video_id, + 'Checking %s video URL' % format_id) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404: + self.report_warning( + '%s video URL is invalid, skipping' % format_id, video_id) + continue + formats.append({ + 'format_id': format_id, + 'url': video_url, + 'width': int(n.find('./width').text), + 'height': int(n.find('./height').text), + 'abr': int(n.find('./audiobitrate').text), + 'vbr': int(n.find('./videobitrate').text), + 'vcodec': n.find('./codec').text, + 'acodec': 'MP4A', + }) duration = float(idoc[0].findall('./duration')[0].text) self._sort_formats(formats)