X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fwdr.py;h=ef096cbd248def4c67a1046c326ce7d1fd6a8796;hb=9f0ee2a3883ec6f6fdccba90085cb925aaa2f617;hp=45466e31b7445f8dd8da742308dcc69f2ff1152f;hpb=5f0d813d9395848e92a1c6d83335360652d654c1;p=youtube-dl diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py index 45466e31b..ef096cbd2 100644 --- a/youtube_dl/extractor/wdr.py +++ b/youtube_dl/extractor/wdr.py @@ -10,8 +10,8 @@ from ..compat import ( compat_urlparse, ) from ..utils import ( - determine_ext, unified_strdate, + qualities, ) @@ -28,10 +28,12 @@ class WDRIE(InfoExtractor): 'title': 'Servicezeit', 'description': 'md5:c8f43e5e815eeb54d0b96df2fba906cb', 'upload_date': '20140310', + 'is_live': False }, 'params': { 'skip_download': True, }, + 'skip': 'Page Not Found', }, { 'url': 'http://www1.wdr.de/themen/av/videomargaspiegelisttot101-videoplayer.html', @@ -41,10 +43,12 @@ class WDRIE(InfoExtractor): 'title': 'Marga Spiegel ist tot', 'description': 'md5:2309992a6716c347891c045be50992e4', 'upload_date': '20140311', + 'is_live': False }, 'params': { 'skip_download': True, }, + 'skip': 'Page Not Found', }, { 'url': 'http://www1.wdr.de/themen/kultur/audioerlebtegeschichtenmargaspiegel100-audioplayer.html', @@ -55,6 +59,7 @@ class WDRIE(InfoExtractor): 'title': 'Erlebte Geschichten: Marga Spiegel (29.11.2009)', 'description': 'md5:2309992a6716c347891c045be50992e4', 'upload_date': '20091129', + 'is_live': False }, }, { @@ -66,11 +71,30 @@ class WDRIE(InfoExtractor): 'title': 'Flavia Coelho: Amar é Amar', 'description': 'md5:7b29e97e10dfb6e265238b32fa35b23a', 'upload_date': '20140717', + 'is_live': False }, + 'skip': 'Page Not Found', }, { 'url': 'http://www1.wdr.de/mediathek/video/sendungen/quarks_und_co/filterseite-quarks-und-co100.html', 'playlist_mincount': 146, + 'info_dict': { + 'id': 'mediathek/video/sendungen/quarks_und_co/filterseite-quarks-und-co100', + } + }, + { + 'url': 'http://www1.wdr.de/mediathek/video/livestream/index.html', + 'info_dict': { + 'id': 'mdb-103364', + 'title': 're:^WDR Fernsehen Live [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', + 'description': 'md5:ae2ff888510623bf8d4b115f95a9b7c9', + 'ext': 'flv', + 'upload_date': '20150101', + 'is_live': True + }, + 'params': { + 'skip_download': True, + }, } ] @@ -116,6 +140,10 @@ class WDRIE(InfoExtractor): video_url = flashvars['dslSrc'][0] title = flashvars['trackerClipTitle'][0] thumbnail = flashvars['startPicture'][0] if 'startPicture' in flashvars else None + is_live = flashvars.get('isLive', ['0'])[0] == '1' + + if is_live: + title = self._live_title(title) if 'trackerClipAirTime' in flashvars: upload_date = flashvars['trackerClipAirTime'][0] @@ -125,22 +153,57 @@ class WDRIE(InfoExtractor): if upload_date: upload_date = unified_strdate(upload_date) + formats = [] + preference = qualities(['S', 'M', 'L', 'XL']) + if video_url.endswith('.f4m'): - video_url += '?hdcore=3.2.0&plugin=aasp-3.2.0.77.18' - ext = 'flv' + f4m_formats = self._extract_f4m_formats(video_url + '?hdcore=3.2.0&plugin=aasp-3.2.0.77.18', page_id, f4m_id='hds', fatal=False) + if f4m_formats: + formats.extend(f4m_formats) + elif video_url.endswith('.smil'): + smil_formats = self._extract_smil_formats(video_url, page_id, False, { + 'hdcore': '3.3.0', + 'plugin': 'aasp-3.3.0.99.43', + }) + if smil_formats: + formats.extend(smil_formats) else: - ext = determine_ext(video_url) + formats.append({ + 'url': video_url, + 'http_headers': { + 'User-Agent': 'mobile', + }, + }) + + m3u8_url = self._search_regex(r'rel="adaptiv"[^>]+href="([^"]+)"', webpage, 'm3u8 url', default=None) + if m3u8_url: + m3u8_formats = self._extract_m3u8_formats(m3u8_url, page_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False) + if m3u8_formats: + formats.extend(m3u8_formats) + + direct_urls = re.findall(r'rel="web(S|M|L|XL)"[^>]+href="([^"]+)"', webpage) + if direct_urls: + for quality, video_url in direct_urls: + formats.append({ + 'url': video_url, + 'preference': preference(quality), + 'http_headers': { + 'User-Agent': 'mobile', + }, + }) + + self._sort_formats(formats) description = self._html_search_meta('Description', webpage, 'description') return { 'id': page_id, - 'url': video_url, - 'ext': ext, + 'formats': formats, 'title': title, 'description': description, 'thumbnail': thumbnail, 'upload_date': upload_date, + 'is_live': is_live } @@ -169,7 +232,9 @@ class WDRMobileIE(InfoExtractor): 'title': mobj.group('title'), 'age_limit': int(mobj.group('age_limit')), 'url': url, - 'user_agent': 'mobile', + 'http_headers': { + 'User-Agent': 'mobile', + }, }