X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fwdr.py;h=ef096cbd248def4c67a1046c326ce7d1fd6a8796;hb=9f0ee2a3883ec6f6fdccba90085cb925aaa2f617;hp=feeb44b45ff32b4738957e44a6603cde93c8b9a4;hpb=7b93c2c204d7bad9285b5c9db12ff2acb01338e6;p=youtube-dl diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py index feeb44b45..ef096cbd2 100644 --- a/youtube_dl/extractor/wdr.py +++ b/youtube_dl/extractor/wdr.py @@ -1,13 +1,17 @@ +# -*- coding: utf-8 -*- from __future__ import unicode_literals +import itertools import re from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_parse_qs, compat_urlparse, - determine_ext, +) +from ..utils import ( unified_strdate, + qualities, ) @@ -24,10 +28,12 @@ class WDRIE(InfoExtractor): 'title': 'Servicezeit', 'description': 'md5:c8f43e5e815eeb54d0b96df2fba906cb', 'upload_date': '20140310', + 'is_live': False }, 'params': { 'skip_download': True, }, + 'skip': 'Page Not Found', }, { 'url': 'http://www1.wdr.de/themen/av/videomargaspiegelisttot101-videoplayer.html', @@ -37,10 +43,12 @@ class WDRIE(InfoExtractor): 'title': 'Marga Spiegel ist tot', 'description': 'md5:2309992a6716c347891c045be50992e4', 'upload_date': '20140311', + 'is_live': False }, 'params': { 'skip_download': True, }, + 'skip': 'Page Not Found', }, { 'url': 'http://www1.wdr.de/themen/kultur/audioerlebtegeschichtenmargaspiegel100-audioplayer.html', @@ -51,19 +59,43 @@ class WDRIE(InfoExtractor): 'title': 'Erlebte Geschichten: Marga Spiegel (29.11.2009)', 'description': 'md5:2309992a6716c347891c045be50992e4', 'upload_date': '20091129', + 'is_live': False }, }, { - 'url': 'http://www.funkhauseuropa.de/av/audiogrenzenlosleckerbaklava101-audioplayer.html', - 'md5': 'cfff440d4ee64114083ac44676df5d15', + 'url': 'http://www.funkhauseuropa.de/av/audioflaviacoelhoamaramar100-audioplayer.html', + 'md5': '99a1443ff29af19f6c52cf6f4dc1f4aa', 'info_dict': { - 'id': 'mdb-363068', + 'id': 'mdb-478135', 'ext': 'mp3', - 'title': 'Grenzenlos lecker - Baklava', + 'title': 'Flavia Coelho: Amar é Amar', 'description': 'md5:7b29e97e10dfb6e265238b32fa35b23a', - 'upload_date': '20140311', + 'upload_date': '20140717', + 'is_live': False }, + 'skip': 'Page Not Found', }, + { + 'url': 'http://www1.wdr.de/mediathek/video/sendungen/quarks_und_co/filterseite-quarks-und-co100.html', + 'playlist_mincount': 146, + 'info_dict': { + 'id': 'mediathek/video/sendungen/quarks_und_co/filterseite-quarks-und-co100', + } + }, + { + 'url': 'http://www1.wdr.de/mediathek/video/livestream/index.html', + 'info_dict': { + 'id': 'mdb-103364', + 'title': 're:^WDR Fernsehen Live [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', + 'description': 'md5:ae2ff888510623bf8d4b115f95a9b7c9', + 'ext': 'flv', + 'upload_date': '20150101', + 'is_live': True + }, + 'params': { + 'skip_download': True, + }, + } ] def _real_extract(self, url): @@ -78,15 +110,40 @@ class WDRIE(InfoExtractor): self.url_result(page_url + href, 'WDR') for href in re.findall(r'\s*]*>\s*\s*]+href="([^"]+)"', webpage, 'm3u8 url', default=None) + if m3u8_url: + m3u8_formats = self._extract_m3u8_formats(m3u8_url, page_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False) + if m3u8_formats: + formats.extend(m3u8_formats) + + direct_urls = re.findall(r'rel="web(S|M|L|XL)"[^>]+href="([^"]+)"', webpage) + if direct_urls: + for quality, video_url in direct_urls: + formats.append({ + 'url': video_url, + 'preference': preference(quality), + 'http_headers': { + 'User-Agent': 'mobile', + }, + }) + + self._sort_formats(formats) description = self._html_search_meta('Description', webpage, 'description') return { 'id': page_id, - 'url': video_url, - 'ext': ext, + 'formats': formats, 'title': title, 'description': description, 'thumbnail': thumbnail, 'upload_date': upload_date, + 'is_live': is_live } @@ -127,9 +219,10 @@ class WDRMobileIE(InfoExtractor): 'info_dict': { 'title': '4283021', 'id': '421735', + 'ext': 'mp4', 'age_limit': 0, }, - '_skip': 'Will be depublicized shortly' + 'skip': 'Problems with loading data.' } def _real_extract(self, url): @@ -139,7 +232,9 @@ class WDRMobileIE(InfoExtractor): 'title': mobj.group('title'), 'age_limit': int(mobj.group('age_limit')), 'url': url, - 'user_agent': 'mobile', + 'http_headers': { + 'User-Agent': 'mobile', + }, } @@ -168,8 +263,7 @@ class WDRMausIE(InfoExtractor): }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) param_code = self._html_search_regex( @@ -220,5 +314,3 @@ class WDRMausIE(InfoExtractor): 'thumbnail': thumbnail, 'upload_date': upload_date, } - -# TODO test _1 \ No newline at end of file