X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fard.py;h=ef73d5a933f2ae08bfff028f0bd58afc0433ce54;hb=f4cc03d60b5dd713fb8964cd9ecf8ca2b1a8a556;hp=c15cf1575fa63eb903e6f79146fecb1843b02454;hpb=a66a73ee905cb1ef9cc63c86d68f5b87cbfe2582;p=youtube-dl diff --git a/youtube_dl/extractor/ard.py b/youtube_dl/extractor/ard.py index c15cf1575..ef73d5a93 100644 --- a/youtube_dl/extractor/ard.py +++ b/youtube_dl/extractor/ard.py @@ -5,6 +5,7 @@ import re from .common import InfoExtractor from .generic import GenericIE +from ..compat import compat_str from ..utils import ( determine_ext, ExtractorError, @@ -73,6 +74,7 @@ class ARDMediathekIE(InfoExtractor): 'description': 'md5:c0c1c8048514deaed2a73b3a60eecacb', 'duration': 3287, }, + 'skip': 'Video is no longer available', }] def _extract_media_info(self, media_info_url, webpage, video_id): @@ -92,6 +94,7 @@ class ARDMediathekIE(InfoExtractor): duration = int_or_none(media_info.get('_duration')) thumbnail = media_info.get('_previewImage') + is_live = media_info.get('_isLive') is True subtitles = {} subtitle_url = media_info.get('_subtitleUrl') @@ -105,6 +108,7 @@ class ARDMediathekIE(InfoExtractor): 'id': video_id, 'duration': duration, 'thumbnail': thumbnail, + 'is_live': is_live, 'formats': formats, 'subtitles': subtitles, } @@ -123,6 +127,8 @@ class ARDMediathekIE(InfoExtractor): quality = stream.get('_quality') server = stream.get('_server') for stream_url in stream_urls: + if not isinstance(stream_url, compat_str) or '//' not in stream_url: + continue ext = determine_ext(stream_url) if quality != 'auto' and ext in ('f4m', 'm3u8'): continue @@ -143,13 +149,11 @@ class ARDMediathekIE(InfoExtractor): 'play_path': stream_url, 'format_id': 'a%s-rtmp-%s' % (num, quality), } - elif stream_url.startswith('http'): + else: f = { 'url': stream_url, 'format_id': 'a%s-%s-%s' % (num, ext, quality) } - else: - continue m = re.search(r'_(?P\d+)x(?P\d+)\.mp4$', stream_url) if m: f.update({ @@ -165,19 +169,25 @@ class ARDMediathekIE(InfoExtractor): # determine video id from url m = re.match(self._VALID_URL, url) + document_id = None + numid = re.search(r'documentId=([0-9]+)', url) if numid: - video_id = numid.group(1) + document_id = video_id = numid.group(1) else: video_id = m.group('video_id') webpage = self._download_webpage(url, video_id) - if '>Der gewünschte Beitrag ist nicht mehr verfügbar.<' in webpage: - raise ExtractorError('Video %s is no longer available' % video_id, expected=True) + ERRORS = ( + ('>Leider liegt eine Störung vor.', 'Video %s is unavailable'), + ('>Der gewünschte Beitrag ist nicht mehr verfügbar.<', + 'Video %s is no longer available'), + ) - if 'Diese Sendung ist für Jugendliche unter 12 Jahren nicht geeignet. Der Clip ist deshalb nur von 20 bis 6 Uhr verfügbar.' in webpage: - raise ExtractorError('This program is only suitable for those aged 12 and older. Video %s is therefore only available between 20 pm and 6 am.' % video_id, expected=True) + for pattern, message in ERRORS: + if pattern in webpage: + raise ExtractorError(message % video_id, expected=True) if re.search(r'[\?&]rss($|[=&])', url): doc = compat_etree_fromstring(webpage.encode('utf-8')) @@ -186,7 +196,7 @@ class ARDMediathekIE(InfoExtractor): title = self._html_search_regex( [r'(.*?)', - r'', + r'', r'

(.*?)

'], webpage, 'title') description = self._html_search_meta( @@ -223,12 +233,16 @@ class ARDMediathekIE(InfoExtractor): 'formats': formats, } else: # request JSON file + if not document_id: + video_id = self._search_regex( + r'/play/(?:config|media)/(\d+)', webpage, 'media id') info = self._extract_media_info( - 'http://www.ardmediathek.de/play/media/%s' % video_id, webpage, video_id) + 'http://www.ardmediathek.de/play/media/%s' % video_id, + webpage, video_id) info.update({ 'id': video_id, - 'title': title, + 'title': self._live_title(title) if info.get('is_live') else title, 'description': description, 'thumbnail': thumbnail, }) @@ -237,7 +251,7 @@ class ARDMediathekIE(InfoExtractor): class ARDIE(InfoExtractor): - _VALID_URL = '(?Phttps?://(www\.)?daserste\.de/[^?#]+/videos/(?P[^/?#]+)-(?P[0-9]+))\.html' + _VALID_URL = r'(?Phttps?://(www\.)?daserste\.de/[^?#]+/videos/(?P[^/?#]+)-(?P[0-9]+))\.html' _TEST = { 'url': 'http://www.daserste.de/information/reportage-dokumentation/dokus/videos/die-story-im-ersten-mission-unter-falscher-flagge-100.html', 'md5': 'd216c3a86493f9322545e045ddc3eb35', @@ -248,7 +262,7 @@ class ARDIE(InfoExtractor): 'duration': 2600, 'title': 'Die Story im Ersten: Mission unter falscher Flagge', 'upload_date': '20140804', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', }, 'skip': 'HTTP Error 404: Not Found', }