X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fspiegel.py;h=39a7aaf9d630203dc1796b3b5621aad3c433f575;hb=3047121c639428235191ff5f7afbda7ecda38779;hp=f345883c767438a91412e0619a993a70e3a21a92;hpb=8940b8608e567dba09b3ea146b89b297190ec6d6;p=youtube-dl diff --git a/youtube_dl/extractor/spiegel.py b/youtube_dl/extractor/spiegel.py index f345883c7..39a7aaf9d 100644 --- a/youtube_dl/extractor/spiegel.py +++ b/youtube_dl/extractor/spiegel.py @@ -4,19 +4,12 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import ( - compat_urlparse, - compat_HTTPError, -) -from ..utils import ( - HEADRequest, - ExtractorError, -) +from ..compat import compat_urlparse from .spiegeltv import SpiegeltvIE class SpiegelIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?spiegel\.de/video/[^/]*-(?P[0-9]+)(?:-embed)?(?:\.html)?(?:#.*)?$' + _VALID_URL = r'https?://(?:www\.)?spiegel\.de/video/[^/]*-(?P[0-9]+)(?:-embed|-iframe)?(?:\.html)?(?:#.*)?$' _TESTS = [{ 'url': 'http://www.spiegel.de/video/vulkan-tungurahua-in-ecuador-ist-wieder-aktiv-video-1259285.html', 'md5': '2c2754212136f35fb4b19767d242f66e', @@ -46,6 +39,9 @@ class SpiegelIE(InfoExtractor): 'description': 'SPIEGEL ONLINE-Nutzer durften den deutschen Astronauten Alexander Gerst über sein Leben auf der ISS-Station befragen. Hier kommen seine Antworten auf die besten sechs Fragen.', 'title': 'Fragen an Astronaut Alexander Gerst: "Bekommen Sie die Tageszeiten mit?"', } + }, { + 'url': 'http://www.spiegel.de/video/astronaut-alexander-gerst-von-der-iss-station-beantwortet-fragen-video-1519126-iframe.html', + 'only_matching': True, }] def _real_extract(self, url): @@ -62,7 +58,8 @@ class SpiegelIE(InfoExtractor): description = self._html_search_meta('description', webpage, 'description') base_url = self._search_regex( - r'var\s+server\s*=\s*"([^"]+)\"', webpage, 'server URL') + [r'server\s*:\s*(["\'])(?P.+?)\1', r'var\s+server\s*=\s*"(?P[^"]+)\"'], + webpage, 'server URL', group='url') xml_url = base_url + video_id + '.xml' idoc = self._download_xml(xml_url, video_id) @@ -72,16 +69,6 @@ class SpiegelIE(InfoExtractor): if n.tag.startswith('type') and n.tag != 'type6': format_id = n.tag.rpartition('type')[2] video_url = base_url + n.find('./filename').text - # Test video URLs beforehand as some of them are invalid - try: - self._request_webpage( - HEADRequest(video_url), video_id, - 'Checking %s video URL' % format_id) - except ExtractorError as e: - if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404: - self.report_warning( - '%s video URL is invalid, skipping' % format_id, video_id) - continue formats.append({ 'format_id': format_id, 'url': video_url, @@ -94,6 +81,7 @@ class SpiegelIE(InfoExtractor): }) duration = float(idoc[0].findall('./duration')[0].text) + self._check_formats(formats, video_id) self._sort_formats(formats) return {