X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fonet.py;h=58da1bc27b120e6636e61237dc1b0b61972bf96e;hb=a61ce71468cb222338ccd8039dc631f3619dc585;hp=801aadbffd3fe36a0923c318aa237830766f1189;hpb=43a3d9edfcdad8eb33758c4a7f4f912322001b8a;p=youtube-dl diff --git a/youtube_dl/extractor/onet.py b/youtube_dl/extractor/onet.py index 801aadbff..58da1bc27 100644 --- a/youtube_dl/extractor/onet.py +++ b/youtube_dl/extractor/onet.py @@ -11,6 +11,7 @@ from ..utils import ( get_element_by_class, int_or_none, js_to_json, + NO_DEFAULT, parse_iso8601, remove_start, strip_or_none, @@ -184,7 +185,7 @@ class OnetChannelIE(OnetBaseIE): class OnetPlIE(InfoExtractor): - _VALID_URL = r'https?://(?:[^/]+\.)?onet\.pl/(?:[^/]+/)+(?P[0-9a-z]+)' + _VALID_URL = r'https?://(?:[^/]+\.)?(?:onet|businessinsider\.com|plejada)\.pl/(?:[^/]+/)+(?P[0-9a-z]+)' IE_NAME = 'onet.pl' _TESTS = [{ @@ -198,18 +199,52 @@ class OnetPlIE(InfoExtractor): 'upload_date': '20170214', 'timestamp': 1487078046, }, + }, { + # embedded via pulsembed + 'url': 'http://film.onet.pl/pensjonat-nad-rozlewiskiem-relacja-z-planu-serialu/y428n0', + 'info_dict': { + 'id': '501235.965429946', + 'ext': 'mp4', + 'title': '"Pensjonat nad rozlewiskiem": relacja z planu serialu', + 'upload_date': '20170622', + 'timestamp': 1498159955, + }, + 'params': { + 'skip_download': True, + }, }, { 'url': 'http://film.onet.pl/zwiastuny/ghost-in-the-shell-drugi-zwiastun-pl/5q6yl3', 'only_matching': True, + }, { + 'url': 'http://moto.onet.pl/jak-wybierane-sa-miejsca-na-fotoradary/6rs04e', + 'only_matching': True, + }, { + 'url': 'http://businessinsider.com.pl/wideo/scenariusz-na-koniec-swiata-wedlug-nasa/dwnqptk', + 'only_matching': True, + }, { + 'url': 'http://plejada.pl/weronika-rosati-o-swoim-domniemanym-slubie/n2bq89', + 'only_matching': True, }] + def _search_mvp_id(self, webpage, default=NO_DEFAULT): + return self._search_regex( + r'data-(?:params-)?mvp=["\'](\d+\.\d+)', webpage, 'mvp id', + default=default) + def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - mvp_id = self._search_regex( - r'data-params-mvp=["\'](\d+\.\d+)', webpage, 'mvp id') + mvp_id = self._search_mvp_id(webpage, default=None) + + if not mvp_id: + pulsembed_url = self._search_regex( + r'data-src=(["\'])(?P(?:https?:)?//pulsembed\.eu/.+?)\1', + webpage, 'pulsembed url', group='url') + webpage = self._download_webpage( + pulsembed_url, video_id, 'Downloading pulsembed webpage') + mvp_id = self._search_mvp_id(webpage) return self.url_result( 'onetmvp:%s' % mvp_id, OnetMVPIE.ie_key(), video_id=mvp_id)