X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fnpo.py;h=eb12fb8102deaf438621d303589488b0e831d774;hb=11bed5827dace09b5483b159476ce9f8c29d6078;hp=1c823ec7f4a5e5d2a88825a9545008f26a87019b;hpb=e118031ef827e851e537daa5b439cf5c249ca88d;p=youtube-dl diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py index 1c823ec7f..eb12fb810 100644 --- a/youtube_dl/extractor/npo.py +++ b/youtube_dl/extractor/npo.py @@ -1,5 +1,7 @@ from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..utils import ( fix_xml_ampersands, @@ -7,7 +9,6 @@ from ..utils import ( qualities, strip_jsonp, unified_strdate, - url_basename, ) @@ -39,7 +40,19 @@ class NPOBaseIE(InfoExtractor): class NPOIE(NPOBaseIE): IE_NAME = 'npo' IE_DESC = 'npo.nl and ntr.nl' - _VALID_URL = r'https?://(?:www\.)?(?:npo|ntr)\.nl/(?!live|radio)(?:[^/]+/){2,}(?P[^/?#]+)' + _VALID_URL = r'''(?x) + (?: + npo:| + https?:// + (?:www\.)? + (?: + npo\.nl/(?!live|radio)(?:[^/]+/){2}| + ntr\.nl/(?:[^/]+/){2,}| + omroepwnl\.nl/video/fragment/[^/]+__ + ) + ) + (?P[^/?#]+) + ''' _TESTS = [ { @@ -59,7 +72,7 @@ class NPOIE(NPOBaseIE): 'info_dict': { 'id': 'VARA_101191800', 'ext': 'm4v', - 'title': 'De Mega Mike & Mega Thomas show', + 'title': 'De Mega Mike & Mega Thomas show: The best of.', 'description': 'md5:3b74c97fc9d6901d5a665aac0e5400f4', 'upload_date': '20090227', 'duration': 2400, @@ -71,8 +84,8 @@ class NPOIE(NPOBaseIE): 'info_dict': { 'id': 'VPWON_1169289', 'ext': 'm4v', - 'title': 'Tegenlicht', - 'description': 'md5:d6476bceb17a8c103c76c3b708f05dd1', + 'title': 'Tegenlicht: De toekomst komt uit Afrika', + 'description': 'md5:52cf4eefbc96fffcbdc06d024147abea', 'upload_date': '20130225', 'duration': 3000, }, @@ -112,6 +125,18 @@ class NPOIE(NPOBaseIE): 'upload_date': '20150508', 'duration': 599, }, + }, + { + 'url': 'http://www.omroepwnl.nl/video/fragment/vandaag-de-dag-verkiezingen__POMS_WNL_853698', + 'md5': 'd30cd8417b8b9bca1fdff27428860d08', + 'info_dict': { + 'id': 'POW_00996502', + 'ext': 'm4v', + 'title': '''"Dit is wel een 'landslide'..."''', + 'description': 'md5:f8d66d537dfb641380226e31ca57b8e8', + 'upload_date': '20150508', + 'duration': 462, + }, } ] @@ -127,6 +152,18 @@ class NPOIE(NPOBaseIE): transform_source=strip_jsonp, ) + # For some videos actual video id (prid) is different (e.g. for + # http://www.omroepwnl.nl/video/fragment/vandaag-de-dag-verkiezingen__POMS_WNL_853698 + # video id is POMS_WNL_853698 but prid is POW_00996502) + video_id = metadata.get('prid') or video_id + + # titel is too generic in some cases so utilize aflevering_titel as well + # when available (e.g. http://tegenlicht.vpro.nl/afleveringen/2014-2015/access-to-africa.html) + title = metadata['titel'] + sub_title = metadata.get('aflevering_titel') + if sub_title and sub_title != title: + title += ': %s' % sub_title + token = self._get_token(video_id) formats = [] @@ -199,8 +236,8 @@ class NPOIE(NPOBaseIE): return { 'id': video_id, - 'title': metadata['titel'], - 'description': metadata['info'], + 'title': title, + 'description': metadata.get('info'), 'thumbnail': metadata.get('images', [{'url': None}])[-1]['url'], 'upload_date': unified_strdate(metadata.get('gidsdatum')), 'duration': parse_duration(metadata.get('tijdsduur')), @@ -369,9 +406,9 @@ class NPORadioFragmentIE(InfoExtractor): } -class TegenlichtVproIE(NPOIE): - IE_NAME = 'tegenlicht.vpro.nl' - _VALID_URL = r'https?://tegenlicht\.vpro\.nl/afleveringen/.*?' +class VPROIE(NPOIE): + IE_NAME = 'vpro' + _VALID_URL = r'https?://(?:www\.)?(?:tegenlicht\.)?vpro\.nl/(?:[^/]+/){2,}(?P[^/]+)\.html' _TESTS = [ { @@ -380,17 +417,72 @@ class TegenlichtVproIE(NPOIE): 'info_dict': { 'id': 'VPWON_1169289', 'ext': 'm4v', - 'title': 'Tegenlicht', - 'description': 'md5:d6476bceb17a8c103c76c3b708f05dd1', + 'title': 'De toekomst komt uit Afrika', + 'description': 'md5:52cf4eefbc96fffcbdc06d024147abea', 'upload_date': '20130225', }, }, + { + 'url': 'http://www.vpro.nl/programmas/2doc/2015/sergio-herman.html', + 'info_dict': { + 'id': 'sergio-herman', + 'title': 'Sergio Herman: Fucking perfect', + }, + 'playlist_count': 2, + }, + { + # playlist with youtube embed + 'url': 'http://www.vpro.nl/programmas/2doc/2015/education-education.html', + 'info_dict': { + 'id': 'education-education', + 'title': '2Doc', + }, + 'playlist_count': 2, + } ] def _real_extract(self, url): - name = url_basename(url) - webpage = self._download_webpage(url, name) - urn = self._html_search_meta('mediaurn', webpage) - info_page = self._download_json( - 'http://rs.vpro.nl/v2/api/media/%s.json' % urn, name) - return self._get_info(info_page['mid']) + playlist_id = self._match_id(url) + + webpage = self._download_webpage(url, playlist_id) + + entries = [ + self.url_result('npo:%s' % video_id if not video_id.startswith('http') else video_id) + for video_id in re.findall(r'data-media-id="([^"]+)"', webpage) + ] + + playlist_title = self._search_regex( + r'\s*([^>]+?)\s*-\s*Teledoc\s*-\s*VPRO\s*', + webpage, 'playlist title', default=None) or self._og_search_title(webpage) + + return self.playlist_result(entries, playlist_id, playlist_title) + + +class WNLIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?omroepwnl\.nl/video/detail/(?P[^/]+)__\d+' + + _TEST = { + 'url': 'http://www.omroepwnl.nl/video/detail/vandaag-de-dag-6-mei__060515', + 'info_dict': { + 'id': 'vandaag-de-dag-6-mei', + 'title': 'Vandaag de Dag 6 mei', + }, + 'playlist_count': 4, + } + + def _real_extract(self, url): + playlist_id = self._match_id(url) + + webpage = self._download_webpage(url, playlist_id) + + entries = [ + self.url_result('npo:%s' % video_id, 'NPO') + for video_id, part in re.findall( + r']+href="([^"]+)"[^>]+class="js-mid"[^>]*>(Deel \d+)', webpage) + ] + + playlist_title = self._html_search_regex( + r'(?s)]+class="subject"[^>]*>(.+?)', + webpage, 'playlist title') + + return self.playlist_result(entries, playlist_id, playlist_title)