X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fwdr.py;h=ef096cbd248def4c67a1046c326ce7d1fd6a8796;hb=9f0ee2a3883ec6f6fdccba90085cb925aaa2f617;hp=8e25ecf280769166a49d18cfd2508bd6d90caa74;hpb=2f15832f569834a37ac3ee6140a3b997407c04cd;p=youtube-dl diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py index 8e25ecf28..ef096cbd2 100644 --- a/youtube_dl/extractor/wdr.py +++ b/youtube_dl/extractor/wdr.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- from __future__ import unicode_literals +import itertools import re from .common import InfoExtractor @@ -9,8 +10,8 @@ from ..compat import ( compat_urlparse, ) from ..utils import ( - determine_ext, unified_strdate, + qualities, ) @@ -27,10 +28,12 @@ class WDRIE(InfoExtractor): 'title': 'Servicezeit', 'description': 'md5:c8f43e5e815eeb54d0b96df2fba906cb', 'upload_date': '20140310', + 'is_live': False }, 'params': { 'skip_download': True, }, + 'skip': 'Page Not Found', }, { 'url': 'http://www1.wdr.de/themen/av/videomargaspiegelisttot101-videoplayer.html', @@ -40,10 +43,12 @@ class WDRIE(InfoExtractor): 'title': 'Marga Spiegel ist tot', 'description': 'md5:2309992a6716c347891c045be50992e4', 'upload_date': '20140311', + 'is_live': False }, 'params': { 'skip_download': True, }, + 'skip': 'Page Not Found', }, { 'url': 'http://www1.wdr.de/themen/kultur/audioerlebtegeschichtenmargaspiegel100-audioplayer.html', @@ -54,6 +59,7 @@ class WDRIE(InfoExtractor): 'title': 'Erlebte Geschichten: Marga Spiegel (29.11.2009)', 'description': 'md5:2309992a6716c347891c045be50992e4', 'upload_date': '20091129', + 'is_live': False }, }, { @@ -65,8 +71,31 @@ class WDRIE(InfoExtractor): 'title': 'Flavia Coelho: Amar é Amar', 'description': 'md5:7b29e97e10dfb6e265238b32fa35b23a', 'upload_date': '20140717', + 'is_live': False }, + 'skip': 'Page Not Found', }, + { + 'url': 'http://www1.wdr.de/mediathek/video/sendungen/quarks_und_co/filterseite-quarks-und-co100.html', + 'playlist_mincount': 146, + 'info_dict': { + 'id': 'mediathek/video/sendungen/quarks_und_co/filterseite-quarks-und-co100', + } + }, + { + 'url': 'http://www1.wdr.de/mediathek/video/livestream/index.html', + 'info_dict': { + 'id': 'mdb-103364', + 'title': 're:^WDR Fernsehen Live [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', + 'description': 'md5:ae2ff888510623bf8d4b115f95a9b7c9', + 'ext': 'flv', + 'upload_date': '20150101', + 'is_live': True + }, + 'params': { + 'skip_download': True, + }, + } ] def _real_extract(self, url): @@ -81,6 +110,27 @@ class WDRIE(InfoExtractor): self.url_result(page_url + href, 'WDR') for href in re.findall(r'\s*]*>\s*\s*]+href="([^"]+)"', webpage, 'm3u8 url', default=None) + if m3u8_url: + m3u8_formats = self._extract_m3u8_formats(m3u8_url, page_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False) + if m3u8_formats: + formats.extend(m3u8_formats) + + direct_urls = re.findall(r'rel="web(S|M|L|XL)"[^>]+href="([^"]+)"', webpage) + if direct_urls: + for quality, video_url in direct_urls: + formats.append({ + 'url': video_url, + 'preference': preference(quality), + 'http_headers': { + 'User-Agent': 'mobile', + }, + }) + + self._sort_formats(formats) description = self._html_search_meta('Description', webpage, 'description') return { 'id': page_id, - 'url': video_url, - 'ext': ext, + 'formats': formats, 'title': title, 'description': description, 'thumbnail': thumbnail, 'upload_date': upload_date, + 'is_live': is_live } @@ -143,7 +232,9 @@ class WDRMobileIE(InfoExtractor): 'title': mobj.group('title'), 'age_limit': int(mobj.group('age_limit')), 'url': url, - 'user_agent': 'mobile', + 'http_headers': { + 'User-Agent': 'mobile', + }, } @@ -172,8 +263,7 @@ class WDRMausIE(InfoExtractor): }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) param_code = self._html_search_regex( @@ -224,5 +314,3 @@ class WDRMausIE(InfoExtractor): 'thumbnail': thumbnail, 'upload_date': upload_date, } - -# TODO test _1