X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;ds=inline;f=youtube_dl%2Fextractor%2Fnrk.py;h=ea7be005a9640b631c03381778792251fca574a5;hb=baa3e1845b26d9756642325bbb0d58e22025b2ec;hp=c89aac63ee90f133074d8ade8b7af23cf020f148;hpb=fb27d0ce5e91216296e3406d461fe5b7af78c477;p=youtube-dl diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py index c89aac63e..ea7be005a 100644 --- a/youtube_dl/extractor/nrk.py +++ b/youtube_dl/extractor/nrk.py @@ -48,6 +48,13 @@ class NRKBaseIE(InfoExtractor): entries = [] + conviva = data.get('convivaStatistics') or {} + live = (data.get('mediaElementType') == 'Live' or + data.get('isLive') is True or conviva.get('isLive')) + + def make_title(t): + return self._live_title(t) if live else t + media_assets = data.get('mediaAssets') if media_assets and isinstance(media_assets, list): def video_id_and_title(idx): @@ -61,6 +68,13 @@ class NRKBaseIE(InfoExtractor): if not formats: continue self._sort_formats(formats) + + # Some f4m streams may not work with hdcore in fragments' URLs + for f in formats: + extra_param = f.get('extra_param_to_segment_url') + if extra_param and 'hdcore' in extra_param: + del f['extra_param_to_segment_url'] + entry_id, entry_title = video_id_and_title(num) duration = parse_duration(asset.get('duration')) subtitles = {} @@ -72,7 +86,7 @@ class NRKBaseIE(InfoExtractor): }) entries.append({ 'id': asset.get('carrierId') or entry_id, - 'title': entry_title, + 'title': make_title(entry_title), 'duration': duration, 'subtitles': subtitles, 'formats': formats, @@ -87,7 +101,7 @@ class NRKBaseIE(InfoExtractor): duration = parse_duration(data.get('duration')) entries = [{ 'id': video_id, - 'title': title, + 'title': make_title(title), 'duration': duration, 'formats': formats, }] @@ -111,7 +125,6 @@ class NRKBaseIE(InfoExtractor): message_type, message_type)), expected=True) - conviva = data.get('convivaStatistics') or {} series = conviva.get('seriesName') or data.get('seriesTitle') episode = conviva.get('episodeName') or data.get('episodeNumberOrDate') @@ -194,7 +207,15 @@ class NRKIE(NRKBaseIE): class NRKTVIE(NRKBaseIE): IE_DESC = 'NRK TV and NRK Radio' - _VALID_URL = r'https?://(?:tv|radio)\.nrk(?:super)?\.no/(?:serie/[^/]+|program)/(?P[a-zA-Z]{4}\d{8})(?:/\d{2}-\d{2}-\d{4})?(?:#del=(?P\d+))?' + _EPISODE_RE = r'(?P[a-zA-Z]{4}\d{8})' + _VALID_URL = r'''(?x) + https?:// + (?:tv|radio)\.nrk(?:super)?\.no/ + (?:serie/[^/]+|program)/ + (?![Ee]pisodes)%s + (?:/\d{2}-\d{2}-\d{4})? + (?:\#del=(?P\d+))? + ''' % _EPISODE_RE _API_HOST = 'psapi-we.nrk.no' _TESTS = [{ @@ -260,9 +281,43 @@ class NRKTVIE(NRKBaseIE): }] -class NRKPlaylistIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?nrk\.no/(?!video|skole)(?:[^/]+/)+(?P[^/]+)' +class NRKTVDirekteIE(NRKTVIE): + IE_DESC = 'NRK TV Direkte and NRK Radio Direkte' + _VALID_URL = r'https?://(?:tv|radio)\.nrk\.no/direkte/(?P[^/?#&]+)' + _TESTS = [{ + 'url': 'https://tv.nrk.no/direkte/nrk1', + 'only_matching': True, + }, { + 'url': 'https://radio.nrk.no/direkte/p1_oslo_akershus', + 'only_matching': True, + }] + + +class NRKPlaylistBaseIE(InfoExtractor): + def _extract_description(self, webpage): + pass + + def _real_extract(self, url): + playlist_id = self._match_id(url) + + webpage = self._download_webpage(url, playlist_id) + + entries = [ + self.url_result('nrk:%s' % video_id, NRKIE.ie_key()) + for video_id in re.findall(self._ITEM_RE, webpage) + ] + + playlist_title = self. _extract_title(webpage) + playlist_description = self._extract_description(webpage) + + return self.playlist_result( + entries, playlist_id, playlist_title, playlist_description) + + +class NRKPlaylistIE(NRKPlaylistBaseIE): + _VALID_URL = r'https?://(?:www\.)?nrk\.no/(?!video|skole)(?:[^/]+/)+(?P[^/]+)' + _ITEM_RE = r'class="[^"]*\brich\b[^"]*"[^>]+data-video-id="([^"]+)"' _TESTS = [{ 'url': 'http://www.nrk.no/troms/gjenopplev-den-historiske-solformorkelsen-1.12270763', 'info_dict': { @@ -281,23 +336,28 @@ class NRKPlaylistIE(InfoExtractor): 'playlist_count': 5, }] - def _real_extract(self, url): - playlist_id = self._match_id(url) + def _extract_title(self, webpage): + return self._og_search_title(webpage, fatal=False) - webpage = self._download_webpage(url, playlist_id) + def _extract_description(self, webpage): + return self._og_search_description(webpage) - entries = [ - self.url_result('nrk:%s' % video_id, 'NRK') - for video_id in re.findall( - r'class="[^"]*\brich\b[^"]*"[^>]+data-video-id="([^"]+)"', - webpage) - ] - playlist_title = self._og_search_title(webpage) - playlist_description = self._og_search_description(webpage) +class NRKTVEpisodesIE(NRKPlaylistBaseIE): + _VALID_URL = r'https?://tv\.nrk\.no/program/[Ee]pisodes/[^/]+/(?P\d+)' + _ITEM_RE = r'data-episode=["\']%s' % NRKTVIE._EPISODE_RE + _TESTS = [{ + 'url': 'https://tv.nrk.no/program/episodes/nytt-paa-nytt/69031', + 'info_dict': { + 'id': '69031', + 'title': 'Nytt på nytt, sesong: 201210', + }, + 'playlist_count': 4, + }] - return self.playlist_result( - entries, playlist_id, playlist_title, playlist_description) + def _extract_title(self, webpage): + return self._html_search_regex( + r'

([^<]+)

', webpage, 'title', fatal=False) class NRKSkoleIE(InfoExtractor):