X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fnrk.py;h=bff36f9d3f24cad293144d8e216faf5eeefed92c;hb=674fb0fcc54c72448f80a0573f7fd116f220827e;hp=321ce5ce707c7006ad1a8f5979e01afab1bddd23;hpb=c24dfef63c55ef1a5424d11b485c3b76245448a4;p=youtube-dl diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py index 321ce5ce7..bff36f9d3 100644 --- a/youtube_dl/extractor/nrk.py +++ b/youtube_dl/extractor/nrk.py @@ -4,9 +4,11 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..compat import compat_str from ..utils import ( ExtractorError, float_or_none, + parse_duration, unified_strdate, ) @@ -72,7 +74,7 @@ class NRKIE(InfoExtractor): class NRKTVIE(InfoExtractor): - _VALID_URL = r'http://tv\.nrk(?:super)?\.no/(?:serie/[^/]+|program)/(?P[a-zA-Z]{4}\d{8})(?:/\d{2}-\d{2}-\d{4})?(?:#del=(?P\d+))?' + _VALID_URL = r'(?Phttp://tv\.nrk(?:super)?\.no/)(?:serie/[^/]+|program)/(?P[a-zA-Z]{4}\d{8})(?:/\d{2}-\d{2}-\d{4})?(?:#del=(?P\d+))?' _TESTS = [ { @@ -147,6 +149,30 @@ class NRKTVIE(InfoExtractor): } ] + def _debug_print(self, txt): + if self._downloader.params.get('verbose', False): + self.to_screen('[debug] %s' % txt) + + def _get_subtitles(self, subtitlesurl, video_id, baseurl): + url = "%s%s" % (baseurl, subtitlesurl) + self._debug_print('%s: Subtitle url: %s' % (video_id, url)) + captions = self._download_xml( + url, video_id, 'Downloading subtitles', + transform_source=lambda s: s.replace(r'
', '\r\n')) + lang = captions.get('lang', 'no') + ps = captions.findall('./{0}body/{0}div/{0}p'.format('{http://www.w3.org/ns/ttml}')) + srt = '' + for pos, p in enumerate(ps): + begin = parse_duration(p.get('begin')) + duration = parse_duration(p.get('dur')) + starttime = self._subtitles_timecode(begin) + endtime = self._subtitles_timecode(begin + duration) + srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (compat_str(pos), starttime, endtime, p.text) + return {lang: [ + {'ext': 'ttml', 'url': url}, + {'ext': 'srt', 'data': srt}, + ]} + def _extract_f4m(self, manifest_url, video_id): return self._extract_f4m_formats(manifest_url + '?hdcore=3.1.1&plugin=aasp-3.1.1.69.124', video_id) @@ -154,6 +180,7 @@ class NRKTVIE(InfoExtractor): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') part_id = mobj.group('part_id') + baseurl = mobj.group('baseurl') webpage = self._download_webpage(url, video_id) @@ -210,9 +237,15 @@ class NRKTVIE(InfoExtractor): m3u8_url = re.search(r'data-hls-media="([^"]+)"', webpage) if m3u8_url: formats.extend(self._extract_m3u8_formats(m3u8_url.group(1), video_id, 'mp4')) - self._sort_formats(formats) + subtitles_url = self._html_search_regex( + r'data-subtitlesurl[ ]*=[ ]*"([^"]+)"', + webpage, 'subtitle URL', default=None) + subtitles = None + if subtitles_url: + subtitles = self.extract_subtitles(subtitles_url, video_id, baseurl) + return { 'id': video_id, 'title': title, @@ -221,4 +254,5 @@ class NRKTVIE(InfoExtractor): 'upload_date': upload_date, 'duration': duration, 'formats': formats, + 'subtitles': subtitles, }